#!/usr/bin/env python3 """ Varnish Prometheus Business Exporter Purpose: A lightweight Prometheus exporter for Varnish focused not only on raw counters, but also on ready-to-use business and operational statistics per domain/site. Data sources: 1. varnishstat -1 -j - global Varnish metrics - storage, backends, cache hits/misses, workers, locks, etc. 2. varnishlog -g request - sampled request stream - per-domain aggregation - hit/miss/pass/backend ratios - latency p50/p90/p95/p99 - error ratios - RPS - saved backend RPS Main domain metrics: varnish_domain_rps varnish_domain_hit_ratio varnish_domain_miss_ratio varnish_domain_pass_ratio varnish_domain_backend_ratio varnish_domain_cache_efficiency_ratio varnish_domain_saved_backend_rps varnish_domain_error_ratio varnish_domain_4xx_ratio varnish_domain_5xx_ratio varnish_domain_avg_latency_seconds varnish_domain_p50_latency_seconds varnish_domain_p90_latency_seconds varnish_domain_p95_latency_seconds varnish_domain_p99_latency_seconds varnish_domain_slow_100ms_ratio varnish_domain_slow_250ms_ratio varnish_domain_slow_500ms_ratio varnish_domain_slow_1s_ratio Profiles: minimal: - exporter health - varnishstat - per-domain: rps, hit_ratio, backend_ratio, 5xx_ratio, p95 standard: - minimal + - miss/pass ratio - cache efficiency - backend rps - saved backend rps - error ratio - avg/p50/p90/p99 latency - slow ratios full: - standard + - per-cache average latency - 2xx/3xx/4xx/5xx rps/ratio - pipe/synth/unknown ratios raw: - full + - raw varnish_http_requests_total - raw varnish_http_request_duration_seconds - varnish_domain_response_time_seconds histogram Modules: core: - exporter self metrics stat: - varnishstat metrics vsl: - varnishlog collector domain: - derived per-domain metrics raw: - raw HTTP request counters/histograms Defaults: --modules core,stat,vsl,domain --profile standard --vsl-sample 0.001 Examples: Local test without sampling: sudo python3 varnish_exporter.py --enable-vsl --vsl-sample 1 --profile full Production: sudo python3 varnish_exporter.py --enable-vsl --vsl-sample 0.001 --profile standard Only varnishstat: python3 varnish_exporter.py --modules core,stat Only domain aggregates, no raw HTTP metrics: sudo python3 varnish_exporter.py --modules core,stat,vsl,domain --profile standard Debug with raw metrics: sudo python3 varnish_exporter.py --modules core,stat,vsl,domain,raw --profile raw --vsl-sample 1 JSON config example: { "site_rules": [ { "match": "(^|\\\\.)example\\\\.com$", "site": "example_com" }, { "match": "(^|\\\\.)static\\\\.example\\\\.com$", "site": "static_example_com" } ], "default_site": "other", "allowed_methods": ["GET", "HEAD", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"], "histogram_buckets": [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10] } Notes: - Varnish cannot see real browser render time. - Latency here means server-side response time as seen by Varnish. - For very high traffic, do not use --vsl-sample 1 in production. - Sensible production values: 0.001 or 0.0001. """ import argparse import json import random import re import subprocess import threading import time from collections import defaultdict from http.server import BaseHTTPRequestHandler, ThreadingHTTPServer DEFAULT_CONFIG = { "site_rules": [], "default_site": "other", "allowed_methods": ["GET", "HEAD", "POST", "PUT", "PATCH", "DELETE", "OPTIONS"], "histogram_buckets": [0.005, 0.01, 0.025, 0.05, 0.1, 0.25, 0.5, 1, 2.5, 5, 10], } VALID_MODULES = {"core", "stat", "vsl", "domain", "raw"} def load_config(path): if not path: cfg = dict(DEFAULT_CONFIG) else: with open(path, "r", encoding="utf-8") as f: cfg = json.load(f) cfg.setdefault("site_rules", []) cfg.setdefault("default_site", "other") cfg.setdefault("allowed_methods", DEFAULT_CONFIG["allowed_methods"]) cfg.setdefault("histogram_buckets", DEFAULT_CONFIG["histogram_buckets"]) cfg["site_rules_compiled"] = [ (re.compile(x["match"], re.I), x["site"]) for x in cfg.get("site_rules", []) ] cfg["allowed_methods_set"] = set(cfg["allowed_methods"]) return cfg def parse_modules(value): modules = set() for item in value.split(","): item = item.strip().lower() if not item: continue if item == "all": return set(VALID_MODULES) if item not in VALID_MODULES: raise argparse.ArgumentTypeError( f"unknown module: {item}; available modules: {','.join(sorted(VALID_MODULES))}" ) modules.add(item) if not modules: raise argparse.ArgumentTypeError("module list cannot be empty") return modules def prom_escape(value): return str(value).replace("\\", "\\\\").replace('"', '\\"').replace("\n", "\\n") def prom_name(name): name = str(name).lower() name = re.sub(r"[^a-z0-9_]", "_", name) name = re.sub(r"_+", "_", name).strip("_") if name and name[0].isdigit(): name = "_" + name return name or "unknown" def status_class(status): try: code = int(status) except Exception: return "unknown" if 100 <= code <= 599: return f"{code // 100}xx" return "unknown" def normalize_method(method, cfg): method = str(method or "UNKNOWN").upper() return method if method in cfg["allowed_methods_set"] else "OTHER" def normalize_host(host): host = str(host or "").lower().strip().rstrip(".") if not host: return "" if host.startswith("[") and "]" in host: return host[1:host.index("]")] if ":" in host: host = host.split(":", 1)[0] return host def site_group(host, cfg): host = normalize_host(host) if not host: return cfg.get("default_site", "other") for regex, site in cfg["site_rules_compiled"]: if regex.search(host): return site return host def format_metric(name, labels, value): if labels: label_str = ",".join( f'{k}="{prom_escape(v)}"' for k, v in sorted(labels) ) return f"{name}{{{label_str}}} {value}" return f"{name} {value}" def estimate_quantile_from_buckets(buckets, count, q): if count <= 0: return 0.0 finite = sorted(b for b in buckets.keys() if b != float("inf")) if not finite: return 0.0 target = count * q prev_le = 0.0 prev_count = 0.0 for le in finite: current_count = buckets.get(le, 0.0) if current_count >= target: if current_count <= prev_count: return le fraction = (target - prev_count) / (current_count - prev_count) return prev_le + ((le - prev_le) * fraction) prev_le = le prev_count = current_count return finite[-1] class Metrics: def __init__(self, buckets, max_series, window_seconds, bucket_seconds, profile, modules): self.lock = threading.RLock() self.buckets = buckets self.max_series = max_series self.window_seconds = window_seconds self.bucket_seconds = bucket_seconds self.profile = profile self.modules = modules self.gauges = {} self.counters = {} self.histograms = defaultdict(lambda: { "buckets": defaultdict(float), "sum": 0.0, "count": 0.0, }) self.series_seen = set() self.dropped_series = 0 self.domain_window = defaultdict(lambda: defaultdict(float)) def module_enabled(self, name): return name in self.modules def emit_raw(self): return self.profile == "raw" or self.module_enabled("raw") def emit_minimal(self): return self.profile in ("minimal", "standard", "full", "raw") def emit_standard(self): return self.profile in ("standard", "full", "raw") def emit_full(self): return self.profile in ("full", "raw") def _label_key(self, labels): return tuple(sorted((str(k), str(v)) for k, v in labels.items())) def _allow_series(self, metric, labels): key = (metric, labels) if key in self.series_seen: return True if len(self.series_seen) >= self.max_series: self.dropped_series += 1 return False self.series_seen.add(key) return True def set_gauge(self, name, value, **labels): labels_key = self._label_key(labels) with self.lock: if not self._allow_series(name, labels_key): return self.gauges[(name, labels_key)] = float(value) def set_counter(self, name, value, **labels): if not name.endswith("_total"): name += "_total" labels_key = self._label_key(labels) with self.lock: if not self._allow_series(name, labels_key): return self.counters[(name, labels_key)] = float(value) def inc_counter(self, name, amount=1.0, **labels): if not name.endswith("_total"): name += "_total" labels_key = self._label_key(labels) with self.lock: if not self._allow_series(name, labels_key): return self.counters[(name, labels_key)] = self.counters.get((name, labels_key), 0.0) + float(amount) def observe(self, name, value, weight=1.0, **labels): labels_key = self._label_key(labels) with self.lock: if not self._allow_series(name, labels_key): return h = self.histograms[(name, labels_key)] h["sum"] += float(value) * float(weight) h["count"] += float(weight) for b in self.buckets: if value <= b: h["buckets"][b] += float(weight) h["buckets"][float("inf")] += float(weight) def record_http(self, site, method, status_class_value, cache, latency, weight): raw_labels = { "site": site, "method": method, "status_class": status_class_value, "cache": cache, } if self.emit_raw(): self.inc_counter("varnish_http_requests_total", weight, **raw_labels) if latency is not None: self.observe( "varnish_http_request_duration_seconds", latency, weight=weight, **raw_labels, ) self.observe( "varnish_domain_response_time_seconds", latency, weight=weight, site=site, ) if not self.module_enabled("domain"): return now = int(time.time()) bucket_ts = now - (now % self.bucket_seconds) key = (bucket_ts, site) with self.lock: b = self.domain_window[key] b["total"] += weight b[f"cache_{cache}"] += weight if cache in ("hit", "miss"): b["cacheable"] += weight if cache in ("miss", "pass"): b["backend"] += weight if status_class_value == "2xx": b["2xx"] += weight elif status_class_value == "3xx": b["3xx"] += weight elif status_class_value == "4xx": b["4xx"] += weight b["errors"] += weight elif status_class_value == "5xx": b["5xx"] += weight b["errors"] += weight if latency is not None: b["latency_sum"] += latency * weight b["latency_count"] += weight b[f"cache_{cache}_latency_sum"] += latency * weight b[f"cache_{cache}_latency_count"] += weight if latency > 0.05: b["slow_50ms"] += weight if latency > 0.1: b["slow_100ms"] += weight if latency > 0.25: b["slow_250ms"] += weight if latency > 0.5: b["slow_500ms"] += weight if latency > 1.0: b["slow_1s"] += weight if latency > 2.5: b["slow_2500ms"] += weight if latency > 5.0: b["slow_5s"] += weight for le in self.buckets: if latency <= le: b[f"latency_le_{le}"] += weight b["latency_le_inf"] += weight self.cleanup_window_locked(now) def cleanup_window_locked(self, now): min_ts = now - self.window_seconds - self.bucket_seconds for key in list(self.domain_window.keys()): bucket_ts, _site = key if bucket_ts < min_ts: del self.domain_window[key] def calculate_domain_window_stats(self): if not self.module_enabled("domain"): return {} now = int(time.time()) min_ts = now - self.window_seconds stats = defaultdict(lambda: defaultdict(float)) with self.lock: self.cleanup_window_locked(now) items = list(self.domain_window.items()) for (bucket_ts, site), values in items: if bucket_ts < min_ts: continue s = stats[site] for k, v in values.items(): s[k] += float(v) out = {} for site, s in stats.items(): total = s.get("total", 0.0) if total <= 0: continue labels = (("site", site),) window = float(self.window_seconds) hit = s.get("cache_hit", 0.0) miss = s.get("cache_miss", 0.0) passed = s.get("cache_pass", 0.0) pipe = s.get("cache_pipe", 0.0) synth = s.get("cache_synth", 0.0) unknown_cache = s.get("cache_unknown", 0.0) backend = s.get("backend", 0.0) cacheable = s.get("cacheable", 0.0) e2xx = s.get("2xx", 0.0) e3xx = s.get("3xx", 0.0) e4xx = s.get("4xx", 0.0) e5xx = s.get("5xx", 0.0) errors = s.get("errors", 0.0) out[("varnish_domain_rps", labels)] = total / window out[("varnish_domain_hit_ratio", labels)] = hit / total out[("varnish_domain_backend_ratio", labels)] = backend / total out[("varnish_domain_5xx_ratio", labels)] = e5xx / total latency_count = s.get("latency_count", 0.0) latency_buckets = None if latency_count > 0: latency_buckets = {} for le in self.buckets: latency_buckets[le] = s.get(f"latency_le_{le}", 0.0) latency_buckets[float("inf")] = s.get("latency_le_inf", 0.0) out[("varnish_domain_p95_latency_seconds", labels)] = estimate_quantile_from_buckets( latency_buckets, latency_count, 0.95, ) if self.emit_standard(): out[("varnish_domain_requests_per_second", labels)] = total / window out[("varnish_domain_hit_rps", labels)] = hit / window out[("varnish_domain_miss_rps", labels)] = miss / window out[("varnish_domain_pass_rps", labels)] = passed / window out[("varnish_domain_backend_rps", labels)] = backend / window out[("varnish_domain_saved_backend_rps", labels)] = hit / window out[("varnish_domain_miss_ratio", labels)] = miss / total out[("varnish_domain_pass_ratio", labels)] = passed / total out[("varnish_domain_backend_ratio", labels)] = backend / total out[("varnish_domain_cacheable_ratio", labels)] = cacheable / total out[("varnish_domain_4xx_ratio", labels)] = e4xx / total out[("varnish_domain_error_ratio", labels)] = errors / total if cacheable > 0: out[("varnish_domain_cache_efficiency_ratio", labels)] = hit / cacheable else: out[("varnish_domain_cache_efficiency_ratio", labels)] = 0.0 if latency_count > 0 and latency_buckets is not None: out[("varnish_domain_avg_latency_seconds", labels)] = s.get("latency_sum", 0.0) / latency_count out[("varnish_domain_latency_observed_ratio", labels)] = latency_count / total out[("varnish_domain_p50_latency_seconds", labels)] = estimate_quantile_from_buckets( latency_buckets, latency_count, 0.50, ) out[("varnish_domain_p90_latency_seconds", labels)] = estimate_quantile_from_buckets( latency_buckets, latency_count, 0.90, ) out[("varnish_domain_p99_latency_seconds", labels)] = estimate_quantile_from_buckets( latency_buckets, latency_count, 0.99, ) out[("varnish_domain_slow_100ms_ratio", labels)] = s.get("slow_100ms", 0.0) / latency_count out[("varnish_domain_slow_250ms_ratio", labels)] = s.get("slow_250ms", 0.0) / latency_count out[("varnish_domain_slow_500ms_ratio", labels)] = s.get("slow_500ms", 0.0) / latency_count out[("varnish_domain_slow_1s_ratio", labels)] = s.get("slow_1s", 0.0) / latency_count if self.emit_full(): out[("varnish_domain_pipe_rps", labels)] = pipe / window out[("varnish_domain_synth_rps", labels)] = synth / window out[("varnish_domain_unknown_cache_rps", labels)] = unknown_cache / window out[("varnish_domain_pipe_ratio", labels)] = pipe / total out[("varnish_domain_synth_ratio", labels)] = synth / total out[("varnish_domain_unknown_cache_ratio", labels)] = unknown_cache / total out[("varnish_domain_2xx_ratio", labels)] = e2xx / total out[("varnish_domain_3xx_ratio", labels)] = e3xx / total out[("varnish_domain_2xx_rps", labels)] = e2xx / window out[("varnish_domain_3xx_rps", labels)] = e3xx / window out[("varnish_domain_4xx_rps", labels)] = e4xx / window out[("varnish_domain_5xx_rps", labels)] = e5xx / window out[("varnish_domain_error_rps", labels)] = errors / window if latency_count > 0: out[("varnish_domain_slow_50ms_ratio", labels)] = s.get("slow_50ms", 0.0) / latency_count out[("varnish_domain_slow_2500ms_ratio", labels)] = s.get("slow_2500ms", 0.0) / latency_count out[("varnish_domain_slow_5s_ratio", labels)] = s.get("slow_5s", 0.0) / latency_count for cache_name in ("hit", "miss", "pass", "pipe", "synth", "unknown"): cache_latency_count = s.get(f"cache_{cache_name}_latency_count", 0.0) cache_latency_sum = s.get(f"cache_{cache_name}_latency_sum", 0.0) if cache_latency_count > 0: metric = f"varnish_domain_{cache_name}_avg_latency_seconds" out[(metric, labels)] = cache_latency_sum / cache_latency_count return out def render(self): with self.lock: gauges = dict(self.gauges) counters = dict(self.counters) histograms = { k: { "buckets": dict(v["buckets"]), "sum": v["sum"], "count": v["count"], } for k, v in self.histograms.items() } series_count = len(self.series_seen) dropped_series = self.dropped_series derived = self.calculate_domain_window_stats() for key, value in derived.items(): gauges[key] = value out = [] if self.module_enabled("core"): out.append("# TYPE varnish_exporter_series gauge") out.append(f"varnish_exporter_series {series_count}") out.append("# TYPE varnish_exporter_dropped_series_total counter") out.append(f"varnish_exporter_dropped_series_total {dropped_series}") out.append("# TYPE varnish_exporter_window_seconds gauge") out.append(f"varnish_exporter_window_seconds {self.window_seconds}") out.append("# TYPE varnish_exporter_profile gauge") out.append(f'varnish_exporter_profile{{profile="{self.profile}"}} 1') for module in sorted(self.modules): out.append(f'varnish_exporter_module_enabled{{module="{module}"}} 1') typed = set() for (name, labels), value in sorted(gauges.items()): if name not in typed: out.append(f"# TYPE {name} gauge") typed.add(name) out.append(format_metric(name, labels, value)) for (name, labels), value in sorted(counters.items()): if name not in typed: out.append(f"# TYPE {name} counter") typed.add(name) out.append(format_metric(name, labels, value)) for (name, labels), h in sorted(histograms.items()): if name not in typed: out.append(f"# TYPE {name} histogram") typed.add(name) base_labels = dict(labels) for b in self.buckets: lb = dict(base_labels) lb["le"] = str(b) out.append(format_metric(name + "_bucket", tuple(sorted(lb.items())), h["buckets"].get(b, 0.0))) lb = dict(base_labels) lb["le"] = "+Inf" out.append(format_metric(name + "_bucket", tuple(sorted(lb.items())), h["buckets"].get(float("inf"), 0.0))) out.append(format_metric(name + "_sum", labels, h["sum"])) out.append(format_metric(name + "_count", labels, h["count"])) return "\n".join(out) + "\n" class VarnishStatCollector(threading.Thread): def __init__(self, metrics, interval, instance): super().__init__(daemon=True) self.metrics = metrics self.interval = interval self.instance = instance def run(self): while True: started = time.time() try: self.collect() self.metrics.set_gauge("varnish_exporter_collector_up", 1, collector="varnishstat") except Exception: self.metrics.set_gauge("varnish_exporter_collector_up", 0, collector="varnishstat") self.metrics.set_gauge( "varnish_exporter_collector_duration_seconds", time.time() - started, collector="varnishstat", ) time.sleep(self.interval) def collect(self): cmd = ["varnishstat", "-1", "-j"] if self.instance: cmd.extend(["-n", self.instance]) raw = subprocess.check_output(cmd, text=True, timeout=10) data = json.loads(raw) for key, item in data.items(): if not isinstance(item, dict) or "value" not in item: continue value = item.get("value", 0) flag = item.get("flag", "g") metric, labels = self.metric_from_key(key) if flag == "c": self.metrics.set_counter(metric, value, **labels) else: self.metrics.set_gauge(metric, value, **labels) def metric_from_key(self, key): parts = key.split(".") section = prom_name(parts[0]) if len(parts) == 2: return f"varnish_{section}_{prom_name(parts[1])}", {} labeled_sections = { "VBE": "backend", "SMA": "storage", "SMF": "storage", "MSE": "storage", "LCK": "lock", } if parts[0] in labeled_sections and len(parts) >= 3: label_name = labeled_sections[parts[0]] object_name = ".".join(parts[1:-1]) field = parts[-1] return f"varnish_{section}_{prom_name(field)}", {label_name: object_name} return f"varnish_{section}_{prom_name('_'.join(parts[1:]))}", {} class VarnishLogCollector(threading.Thread): def __init__(self, metrics, cfg, instance, sample_rate): super().__init__(daemon=True) self.metrics = metrics self.cfg = cfg self.instance = instance self.sample_rate = sample_rate self.sample_weight = 1.0 / sample_rate if sample_rate > 0 else 0.0 def run(self): while True: try: self.stream() except Exception: self.metrics.set_gauge("varnish_exporter_collector_up", 0, collector="varnishlog") time.sleep(2) def stream(self): cmd = [ "varnishlog", "-g", "request", "-i", "ReqMethod,ReqHeader,RespStatus,VCL_call,Timestamp,End", ] if self.instance: cmd.extend(["-n", self.instance]) proc = subprocess.Popen( cmd, stdout=subprocess.PIPE, stderr=subprocess.DEVNULL, text=True, bufsize=1, ) self.metrics.set_gauge("varnish_exporter_collector_up", 1, collector="varnishlog") tx = None sampled = False for line in proc.stdout: line = line.rstrip("\n") if "<< Request" in line: if tx: self.finish_tx(tx) sampled = random.random() < self.sample_rate tx = {} if sampled else None continue if not sampled or tx is None: continue parsed = self.parse_line(line) if not parsed: continue tag, value = parsed if tag == "End": self.finish_tx(tx) tx = None sampled = False continue if tag == "ReqMethod": tx["method"] = value.split()[0] if value else "UNKNOWN" elif tag == "RespStatus": tx["status"] = value.split()[0] if value else "0" elif tag == "ReqHeader": low = value.lower() if low.startswith("host:"): tx["host"] = value.split(":", 1)[1].strip() elif tag == "VCL_call": cache = self.cache_state(value) if cache: tx["cache"] = cache elif tag == "Timestamp": name, latency = self.parse_timestamp(value) if latency is not None: if name == "Resp": tx["response_time"] = latency elif name == "Fetch": tx["backend_time"] = latency else: tx["fallback_time"] = max(tx.get("fallback_time", 0.0), latency) if tx: self.finish_tx(tx) def parse_line(self, line): m = re.match(r"^\s*-\s+([A-Za-z0-9_]+)(?:\s+(.*))?$", line) if not m: return None tag = m.group(1) value = (m.group(2) or "").strip() return tag, value def cache_state(self, value): value = value.upper().strip() if value in {"HIT", "MISS", "PASS", "PIPE", "SYNTH"}: return value.lower() return None def parse_timestamp(self, value): m = re.match(r"^([A-Za-z_]+):\s+\d+\.\d+\s+([0-9.]+)", value) if not m: return None, None try: return m.group(1), float(m.group(2)) except ValueError: return None, None def finish_tx(self, tx): if not tx: return method = normalize_method(tx.get("method", "UNKNOWN"), self.cfg) status = status_class(tx.get("status", "0")) cache = tx.get("cache", "unknown") site = site_group(tx.get("host", ""), self.cfg) latency = tx.get("response_time", tx.get("fallback_time")) self.metrics.record_http( site=site, method=method, status_class_value=status, cache=cache, latency=latency, weight=self.sample_weight, ) class Handler(BaseHTTPRequestHandler): metrics = None def do_GET(self): if self.path != "/metrics": self.send_response(404) self.end_headers() return started = time.time() body_text = self.metrics.render() duration = time.time() - started self.metrics.set_gauge("varnish_exporter_render_duration_seconds", duration) body = body_text.encode("utf-8") self.send_response(200) self.send_header("Content-Type", "text/plain; version=0.0.4; charset=utf-8") self.send_header("Content-Length", str(len(body))) self.end_headers() self.wfile.write(body) def log_message(self, fmt, *args): return def build_parser(): epilog = """ Examples: Only varnishstat: python3 varnish_exporter.py --modules core,stat Test VSL without sampling: sudo python3 varnish_exporter.py --enable-vsl --vsl-sample 1 --profile full Recommended production mode: sudo python3 varnish_exporter.py --enable-vsl --vsl-sample 0.001 --profile standard Debug with raw metrics: sudo python3 varnish_exporter.py --modules core,stat,vsl,domain,raw --profile raw --vsl-sample 1 With domain config: sudo python3 varnish_exporter.py --config /etc/varnish-exporter/config.json --enable-vsl Modules: core exporter self metrics stat varnishstat -1 -j vsl varnishlog -g request domain derived per-domain statistics raw raw request counters/histograms Profiles: minimal small set: rps, hit ratio, backend ratio, 5xx, p95 standard recommended: business-oriented domain statistics full more detailed statistics raw full + raw HTTP metrics """ parser = argparse.ArgumentParser( description="Varnish Prometheus Business Exporter - per-domain statistics from varnishstat and varnishlog.", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=epilog, ) parser.add_argument("--listen", default="0.0.0.0", help="HTTP listen address. Default: 0.0.0.0") parser.add_argument("--port", type=int, default=9131, help="HTTP /metrics port. Default: 9131") parser.add_argument("--instance", default="", help="Varnish instance for -n. Usually empty.") parser.add_argument("--config", default="", help="Path to config.json with domain/site rules.") parser.add_argument("--stat-interval", type=int, default=5, help="varnishstat interval in seconds. Default: 5") parser.add_argument( "--modules", type=parse_modules, default=parse_modules("core,stat,vsl,domain"), help="Modules to enable: core,stat,vsl,domain,raw or all. Default: core,stat,vsl,domain", ) parser.add_argument( "--profile", choices=["minimal", "standard", "full", "raw"], default="standard", help="Domain metric detail level. Default: standard", ) parser.add_argument("--enable-vsl", action="store_true", help="Enable varnishlog/VSL collector.") parser.add_argument("--enable-varnishlog", action="store_true", help="Alias for --enable-vsl.") parser.add_argument("--vsl-sample", type=float, default=0.001, help="VSL sampling: 1=100%%, 0.001=0.1%%. Default: 0.001") parser.add_argument("--max-series", type=int, default=10000, help="Maximum number of series in the exporter. Default: 10000") parser.add_argument("--window-seconds", type=int, default=60, help="Window for domain statistics. Default: 60") parser.add_argument("--bucket-seconds", type=int, default=5, help="Internal bucket size for the domain window. Default: 5") return parser def main(): parser = build_parser() args = parser.parse_args() modules = set(args.modules) if args.enable_vsl or args.enable_varnishlog: modules.add("vsl") if "raw" in modules and args.profile != "raw": args.profile = "raw" if args.vsl_sample <= 0 or args.vsl_sample > 1: raise SystemExit("--vsl-sample must be in range 0 < x <= 1") if args.window_seconds < 10: raise SystemExit("--window-seconds must be >= 10") if args.bucket_seconds < 1: raise SystemExit("--bucket-seconds must be >= 1") cfg = load_config(args.config) metrics = Metrics( buckets=cfg["histogram_buckets"], max_series=args.max_series, window_seconds=args.window_seconds, bucket_seconds=args.bucket_seconds, profile=args.profile, modules=modules, ) if "stat" in modules: VarnishStatCollector( metrics=metrics, interval=args.stat_interval, instance=args.instance, ).start() else: metrics.set_gauge("varnish_exporter_collector_up", 0, collector="varnishstat") if "vsl" in modules: VarnishLogCollector( metrics=metrics, cfg=cfg, instance=args.instance, sample_rate=args.vsl_sample, ).start() else: metrics.set_gauge("varnish_exporter_collector_up", 0, collector="varnishlog") Handler.metrics = metrics server = ThreadingHTTPServer((args.listen, args.port), Handler) print(f"listening on http://{args.listen}:{args.port}/metrics") print(f"profile={args.profile}") print(f"modules={','.join(sorted(modules))}") server.serve_forever() if __name__ == "__main__": main()