From 7ed796f12ccb0f5c41e725621f26afce2e7c5172 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20Gruszczy=C5=84ski?=
 <mateusz.gruszczynski@firma.interia.pl>
Date: Fri, 26 Jun 2026 11:01:55 +0200
Subject: [PATCH] first commit

---
 README.md           |   83 ++++
 config.example.json |   19 +
 varnish_exporter.py | 1092 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1194 insertions(+)
 create mode 100644 README.md
 create mode 100644 config.example.json
 create mode 100755 varnish_exporter.py

diff --git a/README.md b/README.md
new file mode 100644
index 0000000..e81427d
--- /dev/null
+++ b/README.md
@@ -0,0 +1,83 @@
+# Varnish Prometheus Business Exporter
+
+Lightweight Python exporter for Varnish. It exposes `/metrics` for Prometheus and focuses on per-domain business/operational statistics rather than only raw counters.
+
+## Requirements
+
+- Python 3
+- `varnishstat`
+- `varnishlog`
+- Access to Varnish shared memory/logs, usually by running as root or with proper permissions
+
+No external Python dependencies are required.
+
+## Quick start
+
+Only varnishstat:
+
+```bash
+python3 varnish_exporter.py --modules core,stat
+```
+
+Test VSL without sampling:
+
+```bash
+sudo python3 varnish_exporter.py --enable-vsl --vsl-sample 1 --profile full
+```
+
+Recommended production mode:
+
+```bash
+sudo python3 varnish_exporter.py \
+  --port 9131 \
+  --enable-vsl \
+  --profile standard \
+  --vsl-sample 0.001
+```
+
+Then check:
+
+```bash
+curl -s http://127.0.0.1:9131/metrics | grep varnish_domain
+```
+
+## Profiles
+
+- `minimal`: rps, hit ratio, backend ratio, 5xx ratio, p95 latency
+- `standard`: recommended business-oriented domain statistics
+- `full`: more detailed derived statistics
+- `raw`: full plus raw HTTP counters/histograms
+
+## Modules
+
+- `core`: exporter self metrics
+- `stat`: `varnishstat -1 -j`
+- `vsl`: `varnishlog -g request`
+- `domain`: derived per-domain statistics
+- `raw`: raw request counters/histograms
+
+Example:
+
+```bash
+sudo python3 varnish_exporter.py --modules core,stat,vsl,domain --profile standard
+```
+
+## Domain grouping
+
+Use `config.example.json` as a template:
+
+```bash
+sudo python3 varnish_exporter.py \
+  --config ./config.example.json \
+  --enable-vsl \
+  --profile standard
+```
+
+If no config is provided, the exporter uses the real Host header as the `site` label.
+
+## Important notes
+
+- Varnish cannot measure real browser render time such as LCP/FCP/DOMContentLoaded.
+- Latency here means server-side response time as observed by Varnish.
+- For high traffic, avoid `--vsl-sample 1` in production.
+- Start production with `--vsl-sample 0.001` or `0.0001`.
diff --git a/config.example.json b/config.example.json
new file mode 100644
index 0000000..2b71541
--- /dev/null
+++ b/config.example.json
@@ -0,0 +1,19 @@
+{
+  "site_rules": [
+    {
+      "match": "(^|\\.)example\\.com$",
+      "site": "example_com"
+    },
+    {
+      "match": "(^|\\.)static\\.example\\.com$",
+      "site": "static_example_com"
+    },
+    {
+      "match": "(^|\\.)api\\.example\\.com$",
+      "site": "api_example_com"
+    }
+  ],
+  "default_site": "other",
+  "allowed_methods": ["GET", "HEAD", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
+  "histogram_buckets": [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
+}
diff --git a/varnish_exporter.py b/varnish_exporter.py
new file mode 100755
index 0000000..fb0260a
--- /dev/null
+++ b/varnish_exporter.py
@@ -0,0 +1,1092 @@
+#!/usr/bin/env python3
+"""
+Varnish Prometheus Business Exporter
+
+Purpose:
+  A lightweight Prometheus exporter for Varnish focused not only on raw counters,
+  but also on ready-to-use business and operational statistics per domain/site.
+
+Data sources:
+  1. varnishstat -1 -j
+     - global Varnish metrics
+     - storage, backends, cache hits/misses, workers, locks, etc.
+
+  2. varnishlog -g request
+     - sampled request stream
+     - per-domain aggregation
+     - hit/miss/pass/backend ratios
+     - latency p50/p90/p95/p99
+     - error ratios
+     - RPS
+     - saved backend RPS
+
+Main domain metrics:
+  varnish_domain_rps
+  varnish_domain_hit_ratio
+  varnish_domain_miss_ratio
+  varnish_domain_pass_ratio
+  varnish_domain_backend_ratio
+  varnish_domain_cache_efficiency_ratio
+  varnish_domain_saved_backend_rps
+  varnish_domain_error_ratio
+  varnish_domain_4xx_ratio
+  varnish_domain_5xx_ratio
+  varnish_domain_avg_latency_seconds
+  varnish_domain_p50_latency_seconds
+  varnish_domain_p90_latency_seconds
+  varnish_domain_p95_latency_seconds
+  varnish_domain_p99_latency_seconds
+  varnish_domain_slow_100ms_ratio
+  varnish_domain_slow_250ms_ratio
+  varnish_domain_slow_500ms_ratio
+  varnish_domain_slow_1s_ratio
+
+Profiles:
+  minimal:
+    - exporter health
+    - varnishstat
+    - per-domain: rps, hit_ratio, backend_ratio, 5xx_ratio, p95
+
+  standard:
+    - minimal +
+    - miss/pass ratio
+    - cache efficiency
+    - backend rps
+    - saved backend rps
+    - error ratio
+    - avg/p50/p90/p99 latency
+    - slow ratios
+
+  full:
+    - standard +
+    - per-cache average latency
+    - 2xx/3xx/4xx/5xx rps/ratio
+    - pipe/synth/unknown ratios
+
+  raw:
+    - full +
+    - raw varnish_http_requests_total
+    - raw varnish_http_request_duration_seconds
+    - varnish_domain_response_time_seconds histogram
+
+Modules:
+  core:
+    - exporter self metrics
+
+  stat:
+    - varnishstat metrics
+
+  vsl:
+    - varnishlog collector
+
+  domain:
+    - derived per-domain metrics
+
+  raw:
+    - raw HTTP request counters/histograms
+
+Defaults:
+  --modules core,stat,vsl,domain
+  --profile standard
+  --vsl-sample 0.001
+
+Examples:
+
+  Local test without sampling:
+    sudo python3 varnish_exporter.py --enable-vsl --vsl-sample 1 --profile full
+
+  Production:
+    sudo python3 varnish_exporter.py --enable-vsl --vsl-sample 0.001 --profile standard
+
+  Only varnishstat:
+    python3 varnish_exporter.py --modules core,stat
+
+  Only domain aggregates, no raw HTTP metrics:
+    sudo python3 varnish_exporter.py --modules core,stat,vsl,domain --profile standard
+
+  Debug with raw metrics:
+    sudo python3 varnish_exporter.py --modules core,stat,vsl,domain,raw --profile raw --vsl-sample 1
+
+JSON config example:
+  {
+    "site_rules": [
+      {
+        "match": "(^|\\\\.)example\\\\.com$",
+        "site": "example_com"
+      },
+      {
+        "match": "(^|\\\\.)static\\\\.example\\\\.com$",
+        "site": "static_example_com"
+      }
+    ],
+    "default_site": "other",
+    "allowed_methods": ["GET", "HEAD", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
+    "histogram_buckets": [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10]
+  }
+
+Notes:
+  - Varnish cannot see real browser render time.
+  - Latency here means server-side response time as seen by Varnish.
+  - For very high traffic, do not use --vsl-sample 1 in production.
+  - Sensible production values: 0.001 or 0.0001.
+"""
+
+import argparse
+import json
+import random
+import re
+import subprocess
+import threading
+import time
+from collections import defaultdict
+from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer
+
+
+DEFAULT_CONFIG = {
+    "site_rules": [],
+    "default_site": "other",
+    "allowed_methods": ["GET", "HEAD", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"],
+    "histogram_buckets": [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10],
+}
+
+
+VALID_MODULES = {"core", "stat", "vsl", "domain", "raw"}
+
+
+def load_config(path):
+    if not path:
+        cfg = dict(DEFAULT_CONFIG)
+    else:
+        with open(path, "r", encoding="utf-8") as f:
+            cfg = json.load(f)
+
+    cfg.setdefault("site_rules", [])
+    cfg.setdefault("default_site", "other")
+    cfg.setdefault("allowed_methods", DEFAULT_CONFIG["allowed_methods"])
+    cfg.setdefault("histogram_buckets", DEFAULT_CONFIG["histogram_buckets"])
+
+    cfg["site_rules_compiled"] = [
+        (re.compile(x["match"], re.I), x["site"])
+        for x in cfg.get("site_rules", [])
+    ]
+    cfg["allowed_methods_set"] = set(cfg["allowed_methods"])
+    return cfg
+
+
+def parse_modules(value):
+    modules = set()
+
+    for item in value.split(","):
+        item = item.strip().lower()
+
+        if not item:
+            continue
+
+        if item == "all":
+            return set(VALID_MODULES)
+
+        if item not in VALID_MODULES:
+            raise argparse.ArgumentTypeError(
+                f"unknown module: {item}; available modules: {','.join(sorted(VALID_MODULES))}"
+            )
+
+        modules.add(item)
+
+    if not modules:
+        raise argparse.ArgumentTypeError("module list cannot be empty")
+
+    return modules
+
+
+def prom_escape(value):
+    return str(value).replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n")
+
+
+def prom_name(name):
+    name = str(name).lower()
+    name = re.sub(r"[^a-z0-9_]", "_", name)
+    name = re.sub(r"_+", "_", name).strip("_")
+    if name and name[0].isdigit():
+        name = "_" + name
+    return name or "unknown"
+
+
+def status_class(status):
+    try:
+        code = int(status)
+    except Exception:
+        return "unknown"
+
+    if 100 <= code <= 599:
+        return f"{code // 100}xx"
+
+    return "unknown"
+
+
+def normalize_method(method, cfg):
+    method = str(method or "UNKNOWN").upper()
+    return method if method in cfg["allowed_methods_set"] else "OTHER"
+
+
+def normalize_host(host):
+    host = str(host or "").lower().strip().rstrip(".")
+
+    if not host:
+        return ""
+
+    if host.startswith("[") and "]" in host:
+        return host[1:host.index("]")]
+
+    if ":" in host:
+        host = host.split(":", 1)[0]
+
+    return host
+
+
+def site_group(host, cfg):
+    host = normalize_host(host)
+
+    if not host:
+        return cfg.get("default_site", "other")
+
+    for regex, site in cfg["site_rules_compiled"]:
+        if regex.search(host):
+            return site
+
+    return host
+
+
+def format_metric(name, labels, value):
+    if labels:
+        label_str = ",".join(
+            f'{k}="{prom_escape(v)}"'
+            for k, v in sorted(labels)
+        )
+        return f"{name}{{{label_str}}} {value}"
+
+    return f"{name} {value}"
+
+
+def estimate_quantile_from_buckets(buckets, count, q):
+    if count <= 0:
+        return 0.0
+
+    finite = sorted(b for b in buckets.keys() if b != float("inf"))
+
+    if not finite:
+        return 0.0
+
+    target = count * q
+    prev_le = 0.0
+    prev_count = 0.0
+
+    for le in finite:
+        current_count = buckets.get(le, 0.0)
+
+        if current_count >= target:
+            if current_count <= prev_count:
+                return le
+
+            fraction = (target - prev_count) / (current_count - prev_count)
+            return prev_le + ((le - prev_le) * fraction)
+
+        prev_le = le
+        prev_count = current_count
+
+    return finite[-1]
+
+
+class Metrics:
+    def __init__(self, buckets, max_series, window_seconds, bucket_seconds, profile, modules):
+        self.lock = threading.RLock()
+        self.buckets = buckets
+        self.max_series = max_series
+        self.window_seconds = window_seconds
+        self.bucket_seconds = bucket_seconds
+        self.profile = profile
+        self.modules = modules
+
+        self.gauges = {}
+        self.counters = {}
+        self.histograms = defaultdict(lambda: {
+            "buckets": defaultdict(float),
+            "sum": 0.0,
+            "count": 0.0,
+        })
+
+        self.series_seen = set()
+        self.dropped_series = 0
+
+        self.domain_window = defaultdict(lambda: defaultdict(float))
+
+    def module_enabled(self, name):
+        return name in self.modules
+
+    def emit_raw(self):
+        return self.profile == "raw" or self.module_enabled("raw")
+
+    def emit_minimal(self):
+        return self.profile in ("minimal", "standard", "full", "raw")
+
+    def emit_standard(self):
+        return self.profile in ("standard", "full", "raw")
+
+    def emit_full(self):
+        return self.profile in ("full", "raw")
+
+    def _label_key(self, labels):
+        return tuple(sorted((str(k), str(v)) for k, v in labels.items()))
+
+    def _allow_series(self, metric, labels):
+        key = (metric, labels)
+
+        if key in self.series_seen:
+            return True
+
+        if len(self.series_seen) >= self.max_series:
+            self.dropped_series += 1
+            return False
+
+        self.series_seen.add(key)
+        return True
+
+    def set_gauge(self, name, value, **labels):
+        labels_key = self._label_key(labels)
+
+        with self.lock:
+            if not self._allow_series(name, labels_key):
+                return
+            self.gauges[(name, labels_key)] = float(value)
+
+    def set_counter(self, name, value, **labels):
+        if not name.endswith("_total"):
+            name += "_total"
+
+        labels_key = self._label_key(labels)
+
+        with self.lock:
+            if not self._allow_series(name, labels_key):
+                return
+            self.counters[(name, labels_key)] = float(value)
+
+    def inc_counter(self, name, amount=1.0, **labels):
+        if not name.endswith("_total"):
+            name += "_total"
+
+        labels_key = self._label_key(labels)
+
+        with self.lock:
+            if not self._allow_series(name, labels_key):
+                return
+            self.counters[(name, labels_key)] = self.counters.get((name, labels_key), 0.0) + float(amount)
+
+    def observe(self, name, value, weight=1.0, **labels):
+        labels_key = self._label_key(labels)
+
+        with self.lock:
+            if not self._allow_series(name, labels_key):
+                return
+
+            h = self.histograms[(name, labels_key)]
+            h["sum"] += float(value) * float(weight)
+            h["count"] += float(weight)
+
+            for b in self.buckets:
+                if value <= b:
+                    h["buckets"][b] += float(weight)
+
+            h["buckets"][float("inf")] += float(weight)
+
+    def record_http(self, site, method, status_class_value, cache, latency, weight):
+        raw_labels = {
+            "site": site,
+            "method": method,
+            "status_class": status_class_value,
+            "cache": cache,
+        }
+
+        if self.emit_raw():
+            self.inc_counter("varnish_http_requests_total", weight, **raw_labels)
+
+            if latency is not None:
+                self.observe(
+                    "varnish_http_request_duration_seconds",
+                    latency,
+                    weight=weight,
+                    **raw_labels,
+                )
+
+                self.observe(
+                    "varnish_domain_response_time_seconds",
+                    latency,
+                    weight=weight,
+                    site=site,
+                )
+
+        if not self.module_enabled("domain"):
+            return
+
+        now = int(time.time())
+        bucket_ts = now - (now % self.bucket_seconds)
+        key = (bucket_ts, site)
+
+        with self.lock:
+            b = self.domain_window[key]
+
+            b["total"] += weight
+            b[f"cache_{cache}"] += weight
+
+            if cache in ("hit", "miss"):
+                b["cacheable"] += weight
+
+            if cache in ("miss", "pass"):
+                b["backend"] += weight
+
+            if status_class_value == "2xx":
+                b["2xx"] += weight
+            elif status_class_value == "3xx":
+                b["3xx"] += weight
+            elif status_class_value == "4xx":
+                b["4xx"] += weight
+                b["errors"] += weight
+            elif status_class_value == "5xx":
+                b["5xx"] += weight
+                b["errors"] += weight
+
+            if latency is not None:
+                b["latency_sum"] += latency * weight
+                b["latency_count"] += weight
+                b[f"cache_{cache}_latency_sum"] += latency * weight
+                b[f"cache_{cache}_latency_count"] += weight
+
+                if latency > 0.05:
+                    b["slow_50ms"] += weight
+                if latency > 0.1:
+                    b["slow_100ms"] += weight
+                if latency > 0.25:
+                    b["slow_250ms"] += weight
+                if latency > 0.5:
+                    b["slow_500ms"] += weight
+                if latency > 1.0:
+                    b["slow_1s"] += weight
+                if latency > 2.5:
+                    b["slow_2500ms"] += weight
+                if latency > 5.0:
+                    b["slow_5s"] += weight
+
+                for le in self.buckets:
+                    if latency <= le:
+                        b[f"latency_le_{le}"] += weight
+
+                b["latency_le_inf"] += weight
+
+            self.cleanup_window_locked(now)
+
+    def cleanup_window_locked(self, now):
+        min_ts = now - self.window_seconds - self.bucket_seconds
+
+        for key in list(self.domain_window.keys()):
+            bucket_ts, _site = key
+
+            if bucket_ts < min_ts:
+                del self.domain_window[key]
+
+    def calculate_domain_window_stats(self):
+        if not self.module_enabled("domain"):
+            return {}
+
+        now = int(time.time())
+        min_ts = now - self.window_seconds
+
+        stats = defaultdict(lambda: defaultdict(float))
+
+        with self.lock:
+            self.cleanup_window_locked(now)
+            items = list(self.domain_window.items())
+
+        for (bucket_ts, site), values in items:
+            if bucket_ts < min_ts:
+                continue
+
+            s = stats[site]
+
+            for k, v in values.items():
+                s[k] += float(v)
+
+        out = {}
+
+        for site, s in stats.items():
+            total = s.get("total", 0.0)
+
+            if total <= 0:
+                continue
+
+            labels = (("site", site),)
+            window = float(self.window_seconds)
+
+            hit = s.get("cache_hit", 0.0)
+            miss = s.get("cache_miss", 0.0)
+            passed = s.get("cache_pass", 0.0)
+            pipe = s.get("cache_pipe", 0.0)
+            synth = s.get("cache_synth", 0.0)
+            unknown_cache = s.get("cache_unknown", 0.0)
+
+            backend = s.get("backend", 0.0)
+            cacheable = s.get("cacheable", 0.0)
+
+            e2xx = s.get("2xx", 0.0)
+            e3xx = s.get("3xx", 0.0)
+            e4xx = s.get("4xx", 0.0)
+            e5xx = s.get("5xx", 0.0)
+            errors = s.get("errors", 0.0)
+
+            out[("varnish_domain_rps", labels)] = total / window
+            out[("varnish_domain_hit_ratio", labels)] = hit / total
+            out[("varnish_domain_backend_ratio", labels)] = backend / total
+            out[("varnish_domain_5xx_ratio", labels)] = e5xx / total
+
+            latency_count = s.get("latency_count", 0.0)
+
+            latency_buckets = None
+
+            if latency_count > 0:
+                latency_buckets = {}
+
+                for le in self.buckets:
+                    latency_buckets[le] = s.get(f"latency_le_{le}", 0.0)
+
+                latency_buckets[float("inf")] = s.get("latency_le_inf", 0.0)
+
+                out[("varnish_domain_p95_latency_seconds", labels)] = estimate_quantile_from_buckets(
+                    latency_buckets,
+                    latency_count,
+                    0.95,
+                )
+
+            if self.emit_standard():
+                out[("varnish_domain_requests_per_second", labels)] = total / window
+                out[("varnish_domain_hit_rps", labels)] = hit / window
+                out[("varnish_domain_miss_rps", labels)] = miss / window
+                out[("varnish_domain_pass_rps", labels)] = passed / window
+                out[("varnish_domain_backend_rps", labels)] = backend / window
+                out[("varnish_domain_saved_backend_rps", labels)] = hit / window
+
+                out[("varnish_domain_miss_ratio", labels)] = miss / total
+                out[("varnish_domain_pass_ratio", labels)] = passed / total
+                out[("varnish_domain_backend_ratio", labels)] = backend / total
+                out[("varnish_domain_cacheable_ratio", labels)] = cacheable / total
+                out[("varnish_domain_4xx_ratio", labels)] = e4xx / total
+                out[("varnish_domain_error_ratio", labels)] = errors / total
+
+                if cacheable > 0:
+                    out[("varnish_domain_cache_efficiency_ratio", labels)] = hit / cacheable
+                else:
+                    out[("varnish_domain_cache_efficiency_ratio", labels)] = 0.0
+
+                if latency_count > 0 and latency_buckets is not None:
+                    out[("varnish_domain_avg_latency_seconds", labels)] = s.get("latency_sum", 0.0) / latency_count
+                    out[("varnish_domain_latency_observed_ratio", labels)] = latency_count / total
+
+                    out[("varnish_domain_p50_latency_seconds", labels)] = estimate_quantile_from_buckets(
+                        latency_buckets,
+                        latency_count,
+                        0.50,
+                    )
+                    out[("varnish_domain_p90_latency_seconds", labels)] = estimate_quantile_from_buckets(
+                        latency_buckets,
+                        latency_count,
+                        0.90,
+                    )
+                    out[("varnish_domain_p99_latency_seconds", labels)] = estimate_quantile_from_buckets(
+                        latency_buckets,
+                        latency_count,
+                        0.99,
+                    )
+
+                    out[("varnish_domain_slow_100ms_ratio", labels)] = s.get("slow_100ms", 0.0) / latency_count
+                    out[("varnish_domain_slow_250ms_ratio", labels)] = s.get("slow_250ms", 0.0) / latency_count
+                    out[("varnish_domain_slow_500ms_ratio", labels)] = s.get("slow_500ms", 0.0) / latency_count
+                    out[("varnish_domain_slow_1s_ratio", labels)] = s.get("slow_1s", 0.0) / latency_count
+
+            if self.emit_full():
+                out[("varnish_domain_pipe_rps", labels)] = pipe / window
+                out[("varnish_domain_synth_rps", labels)] = synth / window
+                out[("varnish_domain_unknown_cache_rps", labels)] = unknown_cache / window
+
+                out[("varnish_domain_pipe_ratio", labels)] = pipe / total
+                out[("varnish_domain_synth_ratio", labels)] = synth / total
+                out[("varnish_domain_unknown_cache_ratio", labels)] = unknown_cache / total
+
+                out[("varnish_domain_2xx_ratio", labels)] = e2xx / total
+                out[("varnish_domain_3xx_ratio", labels)] = e3xx / total
+                out[("varnish_domain_2xx_rps", labels)] = e2xx / window
+                out[("varnish_domain_3xx_rps", labels)] = e3xx / window
+                out[("varnish_domain_4xx_rps", labels)] = e4xx / window
+                out[("varnish_domain_5xx_rps", labels)] = e5xx / window
+                out[("varnish_domain_error_rps", labels)] = errors / window
+
+                if latency_count > 0:
+                    out[("varnish_domain_slow_50ms_ratio", labels)] = s.get("slow_50ms", 0.0) / latency_count
+                    out[("varnish_domain_slow_2500ms_ratio", labels)] = s.get("slow_2500ms", 0.0) / latency_count
+                    out[("varnish_domain_slow_5s_ratio", labels)] = s.get("slow_5s", 0.0) / latency_count
+
+                for cache_name in ("hit", "miss", "pass", "pipe", "synth", "unknown"):
+                    cache_latency_count = s.get(f"cache_{cache_name}_latency_count", 0.0)
+                    cache_latency_sum = s.get(f"cache_{cache_name}_latency_sum", 0.0)
+
+                    if cache_latency_count > 0:
+                        metric = f"varnish_domain_{cache_name}_avg_latency_seconds"
+                        out[(metric, labels)] = cache_latency_sum / cache_latency_count
+
+        return out
+
+    def render(self):
+        with self.lock:
+            gauges = dict(self.gauges)
+            counters = dict(self.counters)
+            histograms = {
+                k: {
+                    "buckets": dict(v["buckets"]),
+                    "sum": v["sum"],
+                    "count": v["count"],
+                }
+                for k, v in self.histograms.items()
+            }
+            series_count = len(self.series_seen)
+            dropped_series = self.dropped_series
+
+        derived = self.calculate_domain_window_stats()
+
+        for key, value in derived.items():
+            gauges[key] = value
+
+        out = []
+
+        if self.module_enabled("core"):
+            out.append("# TYPE varnish_exporter_series gauge")
+            out.append(f"varnish_exporter_series {series_count}")
+
+            out.append("# TYPE varnish_exporter_dropped_series_total counter")
+            out.append(f"varnish_exporter_dropped_series_total {dropped_series}")
+
+            out.append("# TYPE varnish_exporter_window_seconds gauge")
+            out.append(f"varnish_exporter_window_seconds {self.window_seconds}")
+
+            out.append("# TYPE varnish_exporter_profile gauge")
+            out.append(f'varnish_exporter_profile{{profile="{self.profile}"}} 1')
+
+            for module in sorted(self.modules):
+                out.append(f'varnish_exporter_module_enabled{{module="{module}"}} 1')
+
+        typed = set()
+
+        for (name, labels), value in sorted(gauges.items()):
+            if name not in typed:
+                out.append(f"# TYPE {name} gauge")
+                typed.add(name)
+            out.append(format_metric(name, labels, value))
+
+        for (name, labels), value in sorted(counters.items()):
+            if name not in typed:
+                out.append(f"# TYPE {name} counter")
+                typed.add(name)
+            out.append(format_metric(name, labels, value))
+
+        for (name, labels), h in sorted(histograms.items()):
+            if name not in typed:
+                out.append(f"# TYPE {name} histogram")
+                typed.add(name)
+
+            base_labels = dict(labels)
+
+            for b in self.buckets:
+                lb = dict(base_labels)
+                lb["le"] = str(b)
+                out.append(format_metric(name + "_bucket", tuple(sorted(lb.items())), h["buckets"].get(b, 0.0)))
+
+            lb = dict(base_labels)
+            lb["le"] = "+Inf"
+            out.append(format_metric(name + "_bucket", tuple(sorted(lb.items())), h["buckets"].get(float("inf"), 0.0)))
+            out.append(format_metric(name + "_sum", labels, h["sum"]))
+            out.append(format_metric(name + "_count", labels, h["count"]))
+
+        return "\n".join(out) + "\n"
+
+
+class VarnishStatCollector(threading.Thread):
+    def __init__(self, metrics, interval, instance):
+        super().__init__(daemon=True)
+        self.metrics = metrics
+        self.interval = interval
+        self.instance = instance
+
+    def run(self):
+        while True:
+            started = time.time()
+
+            try:
+                self.collect()
+                self.metrics.set_gauge("varnish_exporter_collector_up", 1, collector="varnishstat")
+            except Exception:
+                self.metrics.set_gauge("varnish_exporter_collector_up", 0, collector="varnishstat")
+
+            self.metrics.set_gauge(
+                "varnish_exporter_collector_duration_seconds",
+                time.time() - started,
+                collector="varnishstat",
+            )
+
+            time.sleep(self.interval)
+
+    def collect(self):
+        cmd = ["varnishstat", "-1", "-j"]
+
+        if self.instance:
+            cmd.extend(["-n", self.instance])
+
+        raw = subprocess.check_output(cmd, text=True, timeout=10)
+        data = json.loads(raw)
+
+        for key, item in data.items():
+            if not isinstance(item, dict) or "value" not in item:
+                continue
+
+            value = item.get("value", 0)
+            flag = item.get("flag", "g")
+            metric, labels = self.metric_from_key(key)
+
+            if flag == "c":
+                self.metrics.set_counter(metric, value, **labels)
+            else:
+                self.metrics.set_gauge(metric, value, **labels)
+
+    def metric_from_key(self, key):
+        parts = key.split(".")
+        section = prom_name(parts[0])
+
+        if len(parts) == 2:
+            return f"varnish_{section}_{prom_name(parts[1])}", {}
+
+        labeled_sections = {
+            "VBE": "backend",
+            "SMA": "storage",
+            "SMF": "storage",
+            "MSE": "storage",
+            "LCK": "lock",
+        }
+
+        if parts[0] in labeled_sections and len(parts) >= 3:
+            label_name = labeled_sections[parts[0]]
+            object_name = ".".join(parts[1:-1])
+            field = parts[-1]
+            return f"varnish_{section}_{prom_name(field)}", {label_name: object_name}
+
+        return f"varnish_{section}_{prom_name('_'.join(parts[1:]))}", {}
+
+
+class VarnishLogCollector(threading.Thread):
+    def __init__(self, metrics, cfg, instance, sample_rate):
+        super().__init__(daemon=True)
+        self.metrics = metrics
+        self.cfg = cfg
+        self.instance = instance
+        self.sample_rate = sample_rate
+        self.sample_weight = 1.0 / sample_rate if sample_rate > 0 else 0.0
+
+    def run(self):
+        while True:
+            try:
+                self.stream()
+            except Exception:
+                self.metrics.set_gauge("varnish_exporter_collector_up", 0, collector="varnishlog")
+                time.sleep(2)
+
+    def stream(self):
+        cmd = [
+            "varnishlog",
+            "-g", "request",
+            "-i", "ReqMethod,ReqHeader,RespStatus,VCL_call,Timestamp,End",
+        ]
+
+        if self.instance:
+            cmd.extend(["-n", self.instance])
+
+        proc = subprocess.Popen(
+            cmd,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.DEVNULL,
+            text=True,
+            bufsize=1,
+        )
+
+        self.metrics.set_gauge("varnish_exporter_collector_up", 1, collector="varnishlog")
+
+        tx = None
+        sampled = False
+
+        for line in proc.stdout:
+            line = line.rstrip("\n")
+
+            if "<< Request" in line:
+                if tx:
+                    self.finish_tx(tx)
+
+                sampled = random.random() < self.sample_rate
+                tx = {} if sampled else None
+                continue
+
+            if not sampled or tx is None:
+                continue
+
+            parsed = self.parse_line(line)
+
+            if not parsed:
+                continue
+
+            tag, value = parsed
+
+            if tag == "End":
+                self.finish_tx(tx)
+                tx = None
+                sampled = False
+                continue
+
+            if tag == "ReqMethod":
+                tx["method"] = value.split()[0] if value else "UNKNOWN"
+
+            elif tag == "RespStatus":
+                tx["status"] = value.split()[0] if value else "0"
+
+            elif tag == "ReqHeader":
+                low = value.lower()
+
+                if low.startswith("host:"):
+                    tx["host"] = value.split(":", 1)[1].strip()
+
+            elif tag == "VCL_call":
+                cache = self.cache_state(value)
+
+                if cache:
+                    tx["cache"] = cache
+
+            elif tag == "Timestamp":
+                name, latency = self.parse_timestamp(value)
+
+                if latency is not None:
+                    if name == "Resp":
+                        tx["response_time"] = latency
+                    elif name == "Fetch":
+                        tx["backend_time"] = latency
+                    else:
+                        tx["fallback_time"] = max(tx.get("fallback_time", 0.0), latency)
+
+        if tx:
+            self.finish_tx(tx)
+
+    def parse_line(self, line):
+        m = re.match(r"^\s*-\s+([A-Za-z0-9_]+)(?:\s+(.*))?$", line)
+
+        if not m:
+            return None
+
+        tag = m.group(1)
+        value = (m.group(2) or "").strip()
+        return tag, value
+
+    def cache_state(self, value):
+        value = value.upper().strip()
+
+        if value in {"HIT", "MISS", "PASS", "PIPE", "SYNTH"}:
+            return value.lower()
+
+        return None
+
+    def parse_timestamp(self, value):
+        m = re.match(r"^([A-Za-z_]+):\s+\d+\.\d+\s+([0-9.]+)", value)
+
+        if not m:
+            return None, None
+
+        try:
+            return m.group(1), float(m.group(2))
+        except ValueError:
+            return None, None
+
+    def finish_tx(self, tx):
+        if not tx:
+            return
+
+        method = normalize_method(tx.get("method", "UNKNOWN"), self.cfg)
+        status = status_class(tx.get("status", "0"))
+        cache = tx.get("cache", "unknown")
+        site = site_group(tx.get("host", ""), self.cfg)
+        latency = tx.get("response_time", tx.get("fallback_time"))
+
+        self.metrics.record_http(
+            site=site,
+            method=method,
+            status_class_value=status,
+            cache=cache,
+            latency=latency,
+            weight=self.sample_weight,
+        )
+
+
+class Handler(BaseHTTPRequestHandler):
+    metrics = None
+
+    def do_GET(self):
+        if self.path != "/metrics":
+            self.send_response(404)
+            self.end_headers()
+            return
+
+        started = time.time()
+        body_text = self.metrics.render()
+        duration = time.time() - started
+
+        self.metrics.set_gauge("varnish_exporter_render_duration_seconds", duration)
+
+        body = body_text.encode("utf-8")
+
+        self.send_response(200)
+        self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8")
+        self.send_header("Content-Length", str(len(body)))
+        self.end_headers()
+        self.wfile.write(body)
+
+    def log_message(self, fmt, *args):
+        return
+
+
+def build_parser():
+    epilog = """
+Examples:
+
+  Only varnishstat:
+    python3 varnish_exporter.py --modules core,stat
+
+  Test VSL without sampling:
+    sudo python3 varnish_exporter.py --enable-vsl --vsl-sample 1 --profile full
+
+  Recommended production mode:
+    sudo python3 varnish_exporter.py --enable-vsl --vsl-sample 0.001 --profile standard
+
+  Debug with raw metrics:
+    sudo python3 varnish_exporter.py --modules core,stat,vsl,domain,raw --profile raw --vsl-sample 1
+
+  With domain config:
+    sudo python3 varnish_exporter.py --config /etc/varnish-exporter/config.json --enable-vsl
+
+Modules:
+  core    exporter self metrics
+  stat    varnishstat -1 -j
+  vsl     varnishlog -g request
+  domain  derived per-domain statistics
+  raw     raw request counters/histograms
+
+Profiles:
+  minimal   small set: rps, hit ratio, backend ratio, 5xx, p95
+  standard  recommended: business-oriented domain statistics
+  full      more detailed statistics
+  raw       full + raw HTTP metrics
+"""
+
+    parser = argparse.ArgumentParser(
+        description="Varnish Prometheus Business Exporter - per-domain statistics from varnishstat and varnishlog.",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+        epilog=epilog,
+    )
+
+    parser.add_argument("--listen", default="0.0.0.0", help="HTTP listen address. Default: 0.0.0.0")
+    parser.add_argument("--port", type=int, default=9131, help="HTTP /metrics port. Default: 9131")
+    parser.add_argument("--instance", default="", help="Varnish instance for -n. Usually empty.")
+    parser.add_argument("--config", default="", help="Path to config.json with domain/site rules.")
+    parser.add_argument("--stat-interval", type=int, default=5, help="varnishstat interval in seconds. Default: 5")
+
+    parser.add_argument(
+        "--modules",
+        type=parse_modules,
+        default=parse_modules("core,stat,vsl,domain"),
+        help="Modules to enable: core,stat,vsl,domain,raw or all. Default: core,stat,vsl,domain",
+    )
+
+    parser.add_argument(
+        "--profile",
+        choices=["minimal", "standard", "full", "raw"],
+        default="standard",
+        help="Domain metric detail level. Default: standard",
+    )
+
+    parser.add_argument("--enable-vsl", action="store_true", help="Enable varnishlog/VSL collector.")
+    parser.add_argument("--enable-varnishlog", action="store_true", help="Alias for --enable-vsl.")
+    parser.add_argument("--vsl-sample", type=float, default=0.001, help="VSL sampling: 1=100%%, 0.001=0.1%%. Default: 0.001")
+    parser.add_argument("--max-series", type=int, default=10000, help="Maximum number of series in the exporter. Default: 10000")
+    parser.add_argument("--window-seconds", type=int, default=60, help="Window for domain statistics. Default: 60")
+    parser.add_argument("--bucket-seconds", type=int, default=5, help="Internal bucket size for the domain window. Default: 5")
+
+    return parser
+
+
+def main():
+    parser = build_parser()
+    args = parser.parse_args()
+
+    modules = set(args.modules)
+
+    if args.enable_vsl or args.enable_varnishlog:
+        modules.add("vsl")
+
+    if "raw" in modules and args.profile != "raw":
+        args.profile = "raw"
+
+    if args.vsl_sample <= 0 or args.vsl_sample > 1:
+        raise SystemExit("--vsl-sample must be in range 0 < x <= 1")
+
+    if args.window_seconds < 10:
+        raise SystemExit("--window-seconds must be >= 10")
+
+    if args.bucket_seconds < 1:
+        raise SystemExit("--bucket-seconds must be >= 1")
+
+    cfg = load_config(args.config)
+
+    metrics = Metrics(
+        buckets=cfg["histogram_buckets"],
+        max_series=args.max_series,
+        window_seconds=args.window_seconds,
+        bucket_seconds=args.bucket_seconds,
+        profile=args.profile,
+        modules=modules,
+    )
+
+    if "stat" in modules:
+        VarnishStatCollector(
+            metrics=metrics,
+            interval=args.stat_interval,
+            instance=args.instance,
+        ).start()
+    else:
+        metrics.set_gauge("varnish_exporter_collector_up", 0, collector="varnishstat")
+
+    if "vsl" in modules:
+        VarnishLogCollector(
+            metrics=metrics,
+            cfg=cfg,
+            instance=args.instance,
+            sample_rate=args.vsl_sample,
+        ).start()
+    else:
+        metrics.set_gauge("varnish_exporter_collector_up", 0, collector="varnishlog")
+
+    Handler.metrics = metrics
+
+    server = ThreadingHTTPServer((args.listen, args.port), Handler)
+    print(f"listening on http://{args.listen}:{args.port}/metrics")
+    print(f"profile={args.profile}")
+    print(f"modules={','.join(sorted(modules))}")
+    server.serve_forever()
+
+
+if __name__ == "__main__":
+    main()