tools_scripts/zfs_probe.py

#!/usr/bin/env python3
# -*- coding: utf-8 -*-

# example: python3 zfs_probe.py --duration 120 --interval 5 --track-files --path-prefix /home

import argparse
import ast
import json
import math
import os
import re
import shutil
import signal
import statistics
import subprocess
import sys
import time
from collections import defaultdict
from datetime import datetime


def eprint(msg):
    sys.stderr.write(str(msg) + "\n")
    sys.stderr.flush()


def oprint(msg=""):
    sys.stdout.write(str(msg) + "\n")
    sys.stdout.flush()


def human_bytes(n):
    if n is None:
        return "-"
    neg = n < 0
    n = abs(float(n))
    units = ["B", "KiB", "MiB", "GiB", "TiB", "PiB"]
    for unit in units:
        if n < 1024.0 or unit == units[-1]:
            s = "{0:.1f} {1}".format(n, unit) if unit != "B" else "{0} B".format(int(n))
            return "-" + s if neg else s
        n /= 1024.0


def human_ns_to_ms(n):
    if n is None:
        return "-"
    try:
        return "{0:.2f} ms".format(float(n) / 1000000.0)
    except Exception:
        return "-"


def mean_or_zero(values):
    return statistics.mean(values) if values else 0.0


def percentile(values, q):
    if not values:
        return 0.0
    if len(values) == 1:
        return float(values[0])
    values = sorted(values)
    pos = (len(values) - 1) * q
    lo = int(math.floor(pos))
    hi = int(math.ceil(pos))
    if lo == hi:
        return float(values[lo])
    frac = pos - lo
    return values[lo] * (1.0 - frac) + values[hi] * frac


def run_cmd(cmd, timeout=None):
    proc = subprocess.run(
        cmd,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        universal_newlines=True,
        timeout=timeout,
        check=False,
    )
    return proc.returncode, proc.stdout, proc.stderr


def command_exists(name):
    return shutil.which(name) is not None


def read_file(path, binary=False):
    mode = "rb" if binary else "r"
    with open(path, mode) as f:
        return f.read()


def safe_read_text(path):
    try:
        return read_file(path, binary=False)
    except Exception:
        return ""


def ensure_dir(path):
    if not os.path.isdir(path):
        os.makedirs(path)


def parse_proc_io_text(text):
    out = {}
    for line in text.splitlines():
        if ":" not in line:
            continue
        k, v = line.split(":", 1)
        try:
            out[k.strip()] = int(v.strip())
        except ValueError:
            continue
    return out


def read_proc_snapshot():
    snap = {}
    for pid in os.listdir("/proc"):
        if not pid.isdigit():
            continue
        base = os.path.join("/proc", pid)
        try:
            io_txt = read_file(os.path.join(base, "io"))
            stat_txt = read_file(os.path.join(base, "stat"))
            comm = safe_read_text(os.path.join(base, "comm")).strip() or "?"
            cmdline_raw = read_file(os.path.join(base, "cmdline"), binary=True)
            cmdline = cmdline_raw.replace(b"\x00", b" ").decode("utf-8", "replace").strip()
            stat_parts = stat_txt.split()
            starttime = int(stat_parts[21])
            io = parse_proc_io_text(io_txt)
            key = "{0}:{1}".format(pid, starttime)
            snap[key] = {
                "pid": int(pid),
                "starttime": starttime,
                "comm": comm,
                "cmdline": cmdline,
                "io": io,
            }
        except (FileNotFoundError, ProcessLookupError, PermissionError, IndexError, ValueError):
            continue
        except Exception:
            continue
    return snap


def diff_proc_snapshots(prev, curr, accum):
    tracked = ["rchar", "wchar", "syscr", "syscw", "read_bytes", "write_bytes", "cancelled_write_bytes"]
    for key, cur in curr.items():
        if key not in prev:
            continue
        old = prev[key]
        entry = accum.setdefault(key, {
            "pid": cur["pid"],
            "comm": cur["comm"],
            "cmdline": cur["cmdline"],
            "samples": 0,
            "rchar": 0,
            "wchar": 0,
            "syscr": 0,
            "syscw": 0,
            "read_bytes": 0,
            "write_bytes": 0,
            "cancelled_write_bytes": 0,
            "max_interval_read_bytes": 0,
            "max_interval_write_bytes": 0,
        })
        entry["samples"] += 1
        if cur.get("cmdline"):
            entry["cmdline"] = cur["cmdline"]
        for k in tracked:
            delta = cur["io"].get(k, 0) - old["io"].get(k, 0)
            if delta < 0:
                delta = 0
            entry[k] += delta
            if k == "read_bytes" and delta > entry["max_interval_read_bytes"]:
                entry["max_interval_read_bytes"] = delta
            if k == "write_bytes" and delta > entry["max_interval_write_bytes"]:
                entry["max_interval_write_bytes"] = delta


def get_pools(pool_arg=None):
    cmd = ["zpool", "list", "-H", "-o", "name"]
    rc, out, err = run_cmd(cmd, timeout=20)
    if rc != 0:
        raise RuntimeError("Nie mogę pobrać listy puli: {0}".format(err.strip()))
    pools = [x.strip() for x in out.splitlines() if x.strip()]
    if pool_arg:
        wanted = [x.strip() for x in pool_arg.split(",") if x.strip()]
        pools = [p for p in pools if p in wanted]
    return pools


def zpool_status_text(pools):
    cmd = ["zpool", "status"] + pools
    return run_cmd(cmd, timeout=30)[1]


def zpool_history_text(pools):
    if not pools:
        return ""
    cmd = ["zpool", "history", "-il"] + pools
    rc, out, err = run_cmd(cmd, timeout=60)
    if rc != 0:
        return "zpool history niedostępne: {0}\n".format(err.strip())
    return out


def zfs_get_properties(pools):
    props = "atime,relatime,primarycache,secondarycache,prefetch,recordsize,mountpoint"
    cmd = ["zfs", "get", "-H", "-r", "-o", "name,property,value", props] + pools
    rc, out, err = run_cmd(cmd, timeout=60)
    if rc != 0:
        return [{"error": err.strip()}]
    rows = []
    for line in out.splitlines():
        parts = line.split("\t")
        if len(parts) >= 3:
            rows.append({"name": parts[0], "property": parts[1], "value": parts[2]})
    return rows


def get_zfs_mountpoints():
    """
    Zwraca mountpointy filesystemów typu zfs widoczne w namespace procesu.
    /proc/self/mountinfo opisuje mounty w bieżącym mount namespace.
    """
    mountinfo = "/proc/self/mountinfo"
    mounts = []
    text = safe_read_text(mountinfo)
    for line in text.splitlines():
        line = line.strip()
        if not line or " - " not in line:
            continue
        left, right = line.split(" - ", 1)
        lparts = left.split()
        rparts = right.split()
        if len(lparts) < 5 or len(rparts) < 1:
            continue
        fstype = rparts[0]
        mountpoint = lparts[4]
        if fstype == "zfs":
            mounts.append(mountpoint)

    # najdłuższe prefiksy najpierw, bez duplikatów
    uniq = sorted(set(mounts), key=lambda x: (-len(x), x))
    return uniq


def path_is_under_mountpoints(path, mountpoints):
    if not path or not path.startswith("/"):
        return False
    norm = os.path.normpath(path)
    for mp in mountpoints:
        if mp == "/":
            return True
        if norm == mp or norm.startswith(mp.rstrip("/") + "/"):
            return True
    return False


def filter_bpf_opens_to_zfs(opens_map, mountpoints):
    kept = {}
    dropped_relative = 0
    dropped_non_zfs = 0

    for path, value in opens_map.items():
        if not isinstance(path, str):
            continue
        if not path.startswith("/"):
            dropped_relative += value
            continue
        if path_is_under_mountpoints(path, mountpoints):
            kept[path] = kept.get(path, 0) + value
        else:
            dropped_non_zfs += value

    return kept, dropped_relative, dropped_non_zfs


def normalize_prefixes(prefix_arg):
    prefixes = []
    for item in (prefix_arg or "").split(","):
        item = item.strip()
        if not item:
            continue
        norm = os.path.normpath(item)
        if not norm.startswith("/"):
            continue
        prefixes.append(norm)
    return sorted(set(prefixes), key=lambda x: (-len(x), x))


def path_matches_prefixes(path, prefixes):
    if not path or not path.startswith("/"):
        return False
    norm = os.path.normpath(path)
    for prefix in prefixes:
        if prefix == "/":
            return True
        if norm == prefix or norm.startswith(prefix.rstrip("/") + "/"):
            return True
    return False


def filter_bpf_opens_to_prefixes(opens_map, prefixes):
    kept = {}
    dropped_relative = 0
    dropped_outside = 0

    for path, value in opens_map.items():
        if not isinstance(path, str):
            continue
        if not path.startswith("/"):
            dropped_relative += value
            continue
        if path_matches_prefixes(path, prefixes):
            kept[path] = kept.get(path, 0) + value
        else:
            dropped_outside += value

    return kept, dropped_relative, dropped_outside


def parse_arcstats():
    path = "/proc/spl/kstat/zfs/arcstats"
    if not os.path.exists(path):
        return None
    raw = safe_read_text(path)
    data = {}
    for line in raw.splitlines():
        parts = line.split()
        if len(parts) >= 3 and parts[0] not in ("name", "class"):
            try:
                data[parts[0]] = int(parts[2])
            except Exception:
                continue
    return data or None


def arc_delta(prev, curr):
    if not prev or not curr:
        return None
    keys = [
        "hits", "misses",
        "demand_data_hits", "demand_data_misses",
        "demand_metadata_hits", "demand_metadata_misses",
        "prefetch_data_hits", "prefetch_data_misses",
        "prefetch_metadata_hits", "prefetch_metadata_misses",
        "l2_hits", "l2_misses",
    ]
    out = {}
    for k in keys:
        out[k] = max(0, curr.get(k, 0) - prev.get(k, 0))
    return out


def parse_zpool_iostat_once(pool_list, interval):
    candidate_cmds = [
        ["zpool", "iostat", "-H", "-p", "-y", "-l"] + pool_list + [str(interval), "1"],
        ["zpool", "iostat", "-H", "-p", "-y"] + pool_list + [str(interval), "1"],
    ]

    last_err = ""
    out = ""
    for cmd in candidate_cmds:
        rc, out, err = run_cmd(cmd, timeout=interval + 20)
        if rc == 0 and out.strip():
            break
        last_err = (err or out or "").strip()
        out = ""
    if not out.strip():
        raise RuntimeError("zpool iostat failed: {0}".format(last_err))

    now_ts = int(time.time())
    rows = []

    for line in out.splitlines():
        line = line.strip()
        if not line:
            continue
        parts = line.split("\t") if "\t" in line else line.split()
        if len(parts) < 7:
            continue

        if parts[0].isdigit():
            ts = int(parts[0])
            data = parts[1:]
        else:
            ts = now_ts
            data = parts

        if len(data) < 7:
            continue

        try:
            row = {
                "ts": ts,
                "name": data[0],
                "alloc": int(data[1]),
                "free": int(data[2]),
                "rops": int(data[3]),
                "wops": int(data[4]),
                "rbytes": int(data[5]),
                "wbytes": int(data[6]),
            }
        except ValueError:
            continue

        latency_names = [
            "total_wait_ns", "disk_wait_ns", "syncq_wait_ns",
            "asyncq_wait_ns", "scrub_wait_ns", "trim_wait_ns", "rebuild_wait_ns",
        ]
        for i, key in enumerate(latency_names, start=7):
            if i < len(data):
                try:
                    row[key] = int(data[i])
                except ValueError:
                    row[key] = 0

        rows.append(row)

    return rows


def summarize_samples(samples_by_pool):
    summary = {}
    for pool, samples in samples_by_pool.items():
        rb = [s["rbytes"] for s in samples]
        wb = [s["wbytes"] for s in samples]
        ro = [s["rops"] for s in samples]
        wo = [s["wops"] for s in samples]
        tw = [s.get("total_wait_ns", 0) for s in samples]
        dw = [s.get("disk_wait_ns", 0) for s in samples]
        summary[pool] = {
            "samples": len(samples),
            "read_avg": mean_or_zero(rb),
            "write_avg": mean_or_zero(wb),
            "read_p95": percentile(rb, 0.95),
            "write_p95": percentile(wb, 0.95),
            "read_max": max(rb) if rb else 0,
            "write_max": max(wb) if wb else 0,
            "rops_avg": mean_or_zero(ro),
            "wops_avg": mean_or_zero(wo),
            "total_wait_avg_ns": mean_or_zero(tw),
            "disk_wait_avg_ns": mean_or_zero(dw),
            "total_wait_p95_ns": percentile(tw, 0.95),
            "disk_wait_p95_ns": percentile(dw, 0.95),
        }
    return summary


def top_entries(entries, key, topn):
    vals = [v for v in entries if v.get(key, 0) > 0]
    vals.sort(key=lambda x: x.get(key, 0), reverse=True)
    return vals[:topn]


def parse_bpftrace_version(text):
    m = re.search(r'v?(\d+)\.(\d+)(?:\.(\d+))?', text)
    if not m:
        return None
    major = int(m.group(1))
    minor = int(m.group(2))
    patch = int(m.group(3) or 0)
    return (major, minor, patch)


def choose_bpftrace_field_style():
    """
    bpftrace 0.19 zmienił składnię z args->field na args.field.
    Dla 0.16 używamy starej składni, dla nowszych nowych wersji - nowej.
    """
    if not command_exists("bpftrace"):
        return None, None, "bpftrace pominięty: brak polecenia bpftrace."

    rc, out, err = run_cmd(["bpftrace", "--version"], timeout=10)
    text = (out or err or "").strip()
    version = parse_bpftrace_version(text)
    if version is None:
        return None, text, "bpftrace wykryty, ale nie udało się odczytać wersji z: {0}".format(text)

    if version < (0, 19, 0):
        return "arrow", text, None
    return "dot", text, None


def build_bpftrace_program(track_files=False, field_style="dot"):
    if field_style == "arrow":
        ret_expr = "args->ret"
        bytes_expr = "args->bytes"
        filename_expr = "str(args->filename)"
    else:
        ret_expr = "args.ret"
        bytes_expr = "args.bytes"
        filename_expr = "str(args.filename)"

    lines = []
    lines.append("tracepoint:syscalls:sys_exit_read /{0} > 0/ {{ @read_bytes_by_comm[comm] = sum({0}); @read_calls_by_comm[comm] = count(); }}".format(ret_expr))
    lines.append("tracepoint:block:block_rq_issue {{ @block_bytes_by_comm[comm] = sum({0}); @block_ios_by_comm[comm] = count(); }}".format(bytes_expr))
    if track_files:
        lines.append("tracepoint:syscalls:sys_enter_openat {{ @opens[{0}] = count(); @opens_by_comm[comm, {0}] = count(); }}".format(filename_expr))
    lines.append("END {")
    lines.append('  printf("===READ_BYTES_BY_COMM===\\n"); print(@read_bytes_by_comm);')
    lines.append('  printf("===READ_CALLS_BY_COMM===\\n"); print(@read_calls_by_comm);')
    lines.append('  printf("===BLOCK_BYTES_BY_COMM===\\n"); print(@block_bytes_by_comm);')
    lines.append('  printf("===BLOCK_IOS_BY_COMM===\\n"); print(@block_ios_by_comm);')
    if track_files:
        lines.append('  printf("===OPENS===\\n"); print(@opens);')
        lines.append('  printf("===OPENS_BY_COMM===\\n"); print(@opens_by_comm);')
    lines.append("}")
    return "\n".join(lines) + "\n"


def start_bpftrace(outdir, track_files=False):
    if os.geteuid() != 0:
        return None, "bpftrace pominięty: uruchom jako root."

    field_style, version_text, version_note = choose_bpftrace_field_style()
    if version_note:
        return None, version_note

    program = build_bpftrace_program(track_files=track_files, field_style=field_style)

    bt_path = os.path.join(outdir, "trace.bt")
    with open(bt_path, "w") as f:
        f.write(program)

    out_path = os.path.join(outdir, "bpftrace.txt")
    out_f = open(out_path, "w")
    proc = subprocess.Popen(
        ["bpftrace", bt_path],
        stdout=out_f,
        stderr=subprocess.STDOUT,
        universal_newlines=True,
    )

    note = "bpftrace: {0}, skladnia: {1}".format(
        version_text if version_text else "unknown",
        "args->field" if field_style == "arrow" else "args.field",
    )
    return {
        "proc": proc,
        "out_f": out_f,
        "out_path": out_path,
        "bt_path": bt_path,
        "version_text": version_text,
        "field_style": field_style,
    }, note


SECTION_RE = re.compile(r"^===([A-Z0-9_]+)===$")
MAP_LINE_RE = re.compile(r'^@[^[]+\[(.*)\]:\s+(-?\d+)$')


def parse_bpftrace_output(path):
    result = defaultdict(dict)
    if not path or not os.path.exists(path):
        return result
    section = None
    for raw in safe_read_text(path).splitlines():
        line = raw.strip()
        m = SECTION_RE.match(line)
        if m:
            section = m.group(1)
            continue
        m = MAP_LINE_RE.match(line)
        if not m or not section:
            continue
        key_raw = m.group(1).strip()
        value = int(m.group(2))
        try:
            parsed_key = ast.literal_eval(key_raw)
        except Exception:
            try:
                parsed_key = ast.literal_eval("({0},)".format(key_raw))
            except Exception:
                parsed_key = key_raw.strip('"')
        result[section][parsed_key] = value
    return result


def format_proc_entry(e):
    cmd = e.get("cmdline") or e.get("comm") or "?"
    if len(cmd) > 120:
        cmd = cmd[:117] + "..."
    return '{0} [pid {1}] {2}'.format(e.get("comm", "?"), e.get("pid", "?"), cmd)


def print_table(title, rows, cols):
    oprint(title)
    if not rows:
        oprint("  brak danych")
        oprint()
        return
    widths = []
    for col_name, key in cols:
        width = len(col_name)
        for row in rows:
            width = max(width, len(str(row.get(key, ""))))
        widths.append(width)
    header = "  " + "  ".join(col_name.ljust(widths[i]) for i, (col_name, _) in enumerate(cols))
    oprint(header)
    oprint("  " + "  ".join("-" * w for w in widths))
    for row in rows:
        oprint("  " + "  ".join(str(row.get(key, "")).ljust(widths[i]) for i, (_, key) in enumerate(cols)))
    oprint()


def stop_bpftrace(handle):
    if not handle:
        return
    proc = handle["proc"]
    if proc.poll() is not None:
        try:
            handle["out_f"].close()
        except Exception:
            pass
        return

    try:
        proc.send_signal(signal.SIGINT)
        proc.wait(timeout=3)
    except Exception:
        try:
            proc.terminate()
            proc.wait(timeout=2)
        except Exception:
            try:
                proc.kill()
                proc.wait(timeout=2)
            except Exception:
                pass
    try:
        handle["out_f"].close()
    except Exception:
        pass


def main():
    ap = argparse.ArgumentParser(description="Zbiera próbki ZFS/ARC/procesów przez zadany czas i pokazuje top statystyki.")
    ap.add_argument("--duration", type=int, default=3600, help="Czas zbierania w sekundach, np. 7200")
    ap.add_argument("--interval", type=int, default=5, help="Interwał próbek w sekundach")
    ap.add_argument("--pool", default="", help="Opcjonalnie: jedna lub kilka puli, rozdzielone przecinkiem")
    ap.add_argument("--top", type=int, default=15, help="Ile pozycji pokazać w topkach")
    ap.add_argument("--outdir", default="", help="Katalog na logi i JSON")
    ap.add_argument("--track-files", action="store_true", help="Śledź top otwierane pliki przez bpftrace")
    ap.add_argument("--zfs-files-only", action="store_true", help="W topce plików pokazuj tylko absolutne ścieżki leżące na mountpointach ZFS")
    ap.add_argument("--path-prefix", default="", help="Pokaż tylko pliki spod tego prefiksu lub kilku prefiksów rozdzielonych przecinkami, np. /home/tank,/mnt/data")
    ap.add_argument("--no-bpf", action="store_true", help="Nie uruchamiaj bpftrace nawet gdy jest dostępny")
    args = ap.parse_args()

    if args.interval <= 0 or args.duration <= 0:
        eprint("duration i interval muszą być > 0")
        sys.exit(2)

    if os.geteuid() != 0:
        eprint("Uwaga: bez roota część /proc i bpftrace może być niepełna.")

    ts = datetime.now().strftime("%Y%m%d_%H%M%S")
    outdir = args.outdir or os.path.abspath("./zfs_probe_{0}".format(ts))
    ensure_dir(outdir)

    oprint("Start. Zbieram dane do: {0}".format(outdir))
    pools = get_pools(args.pool)
    if not pools:
        eprint("Nie znaleziono żadnych puli ZFS.")
        sys.exit(1)

    oprint("Pule: {0}".format(", ".join(pools)))
    zfs_mountpoints = get_zfs_mountpoints()
    if zfs_mountpoints:
        oprint("Mountpointy ZFS: {0}".format(", ".join(zfs_mountpoints)))
    path_prefixes = normalize_prefixes(args.path_prefix)
    if path_prefixes:
        oprint("Filtr ścieżek: {0}".format(", ".join(path_prefixes)))
    meta = {
        "started_at": datetime.now().isoformat(),
        "duration": args.duration,
        "interval": args.interval,
        "pools": pools,
        "hostname": os.uname().nodename,
        "uid": os.geteuid(),
        "zfs_mountpoints": zfs_mountpoints,
        "path_prefixes": path_prefixes,
    }
    with open(os.path.join(outdir, "meta.json"), "w") as f:
        json.dump(meta, f, indent=2)

    with open(os.path.join(outdir, "zpool_status_start.txt"), "w") as f:
        f.write(zpool_status_text(pools))
    with open(os.path.join(outdir, "zpool_history.txt"), "w") as f:
        f.write(zpool_history_text(pools))
    with open(os.path.join(outdir, "zfs_properties.json"), "w") as f:
        json.dump(zfs_get_properties(pools), f, indent=2)
    with open(os.path.join(outdir, "zfs_mountpoints.json"), "w") as f:
        json.dump(zfs_mountpoints, f, indent=2)

    bpf_handle = None
    bpf_note = None
    if not args.no_bpf:
        bpf_handle, bpf_note = start_bpftrace(outdir, track_files=args.track_files)
    else:
        bpf_note = "bpftrace wyłączony przez --no-bpf"

    if bpf_note:
        oprint(bpf_note)

    proc_prev = read_proc_snapshot()
    arc_prev = parse_arcstats()

    samples_by_pool = defaultdict(list)
    arc_samples = []
    deadline = time.time() + args.duration
    rounds = max(1, int(math.ceil(float(args.duration) / float(args.interval))))
    proc_accum = {}

    for n in range(rounds):
        remaining = deadline - time.time()
        if remaining <= 0:
            break
        interval = min(args.interval, max(1, int(round(remaining))))
        oprint("[{0}/{1}] próbka, interwał {2}s...".format(n + 1, rounds, interval))
        try:
            rows = parse_zpool_iostat_once(pools, interval)
        except Exception as ex:
            eprint("Błąd zpool iostat: {0}".format(ex))
            break

        for row in rows:
            samples_by_pool[row["name"]].append(row)

        proc_cur = read_proc_snapshot()
        diff_proc_snapshots(proc_prev, proc_cur, proc_accum)
        proc_prev = proc_cur

        arc_cur = parse_arcstats()
        delta = arc_delta(arc_prev, arc_cur)
        if delta is not None:
            arc_samples.append(delta)
        arc_prev = arc_cur

        oprint("[{0}/{1}] gotowe".format(n + 1, rounds))

    if bpf_handle:
        oprint("Kończę bpftrace...")
        stop_bpftrace(bpf_handle)

    with open(os.path.join(outdir, "zpool_status_end.txt"), "w") as f:
        f.write(zpool_status_text(pools))

    with open(os.path.join(outdir, "samples_zpool.json"), "w") as f:
        json.dump(samples_by_pool, f, indent=2)
    with open(os.path.join(outdir, "samples_arc.json"), "w") as f:
        json.dump(arc_samples, f, indent=2)
    with open(os.path.join(outdir, "proc_totals.json"), "w") as f:
        json.dump(proc_accum, f, indent=2)

    sample_summary = summarize_samples(samples_by_pool)
    bpf = parse_bpftrace_output(bpf_handle["out_path"] if bpf_handle else "")

    proc_rows = list(proc_accum.values())

    top_proc_read = []
    for e in top_entries(proc_rows, "read_bytes", args.top):
        top_proc_read.append({
            "proc": format_proc_entry(e),
            "read": human_bytes(e["read_bytes"]),
            "write": human_bytes(e["write_bytes"]),
            "max_read_interval": human_bytes(e["max_interval_read_bytes"]),
            "syscr": e["syscr"],
        })

    top_proc_write = []
    for e in top_entries(proc_rows, "write_bytes", args.top):
        top_proc_write.append({
            "proc": format_proc_entry(e),
            "write": human_bytes(e["write_bytes"]),
            "read": human_bytes(e["read_bytes"]),
            "max_write_interval": human_bytes(e["max_interval_write_bytes"]),
            "syscw": e["syscw"],
        })

    oprint()
    oprint("Wyniki zapisane w: {0}".format(outdir))
    oprint("Pule: {0}".format(", ".join(pools)))
    zfs_mountpoints = get_zfs_mountpoints()
    if zfs_mountpoints:
        oprint("Mountpointy ZFS: {0}".format(", ".join(zfs_mountpoints)))
    oprint("Czas zbierania: {0}s, interwał: {1}s, próbek: {2}".format(
        args.duration, args.interval, sum(len(v) for v in samples_by_pool.values())
    ))
    oprint()

    pool_rows = []
    for pool in pools:
        s = sample_summary.get(pool, {})
        pool_rows.append({
            "pool": pool,
            "avg_read/s": human_bytes(s.get("read_avg", 0)),
            "avg_write/s": human_bytes(s.get("write_avg", 0)),
            "p95_read/s": human_bytes(s.get("read_p95", 0)),
            "p95_write/s": human_bytes(s.get("write_p95", 0)),
            "max_read/s": human_bytes(s.get("read_max", 0)),
            "max_write/s": human_bytes(s.get("write_max", 0)),
            "avg_total_wait": human_ns_to_ms(s.get("total_wait_avg_ns", 0)),
            "avg_disk_wait": human_ns_to_ms(s.get("disk_wait_avg_ns", 0)),
        })

    print_table("Podsumowanie puli", pool_rows, [
        ("pool", "pool"),
        ("avg_read/s", "avg_read/s"),
        ("avg_write/s", "avg_write/s"),
        ("p95_read/s", "p95_read/s"),
        ("p95_write/s", "p95_write/s"),
        ("avg_total_wait", "avg_total_wait"),
        ("avg_disk_wait", "avg_disk_wait"),
    ])

    print_table("Top procesy wg read_bytes z /proc/<pid>/io", top_proc_read, [
        ("proc", "proc"),
        ("read", "read"),
        ("write", "write"),
        ("max_read_interval", "max_read_interval"),
        ("syscr", "syscr"),
    ])

    print_table("Top procesy wg write_bytes z /proc/<pid>/io", top_proc_write, [
        ("proc", "proc"),
        ("write", "write"),
        ("read", "read"),
        ("max_write_interval", "max_write_interval"),
        ("syscw", "syscw"),
    ])

    if arc_samples:
        hits = sum(x.get("hits", 0) for x in arc_samples)
        misses = sum(x.get("misses", 0) for x in arc_samples)
        total = hits + misses
        hitp = (100.0 * hits / total) if total else 0.0
        l2_hits = sum(x.get("l2_hits", 0) for x in arc_samples)
        l2_misses = sum(x.get("l2_misses", 0) for x in arc_samples)
        l2_total = l2_hits + l2_misses
        l2p = (100.0 * l2_hits / l2_total) if l2_total else 0.0
        oprint("ARC/L2ARC")
        oprint("  ARC hit rate: {0:.2f}%  (hits={1}, misses={2})".format(hitp, hits, misses))
        oprint("  L2ARC hit rate: {0:.2f}% (hits={1}, misses={2})".format(l2p, l2_hits, l2_misses))
        oprint()

    if bpf:
        rb = bpf.get("READ_BYTES_BY_COMM", {})
        if rb:
            rows = []
            for comm, value in sorted(rb.items(), key=lambda kv: kv[1], reverse=True)[:args.top]:
                rows.append({"comm": comm, "read_bytes": human_bytes(value)})
            print_table("Top comm wg read() bajtów z bpftrace", rows, [
                ("comm", "comm"),
                ("read_bytes", "read_bytes"),
            ])

        bb = bpf.get("BLOCK_BYTES_BY_COMM", {})
        if bb:
            rows = []
            for comm, value in sorted(bb.items(), key=lambda kv: kv[1], reverse=True)[:args.top]:
                rows.append({"comm": comm, "block_bytes": human_bytes(value)})
            print_table("Top comm wg block_rq_issue bajtów z bpftrace", rows, [
                ("comm", "comm"),
                ("block_bytes", "block_bytes"),
            ])

        opens = bpf.get("OPENS", {})
        if opens:
            report_opens = opens
            title = "Top otwierane pliki z bpftrace"

            if path_prefixes:
                report_opens, dropped_relative, dropped_outside = filter_bpf_opens_to_prefixes(opens, path_prefixes)
                title = "Top otwierane pliki z bpftrace dla wskazanych ścieżek"
                oprint("Filtrowanie plików po prefiksie:")
                oprint("  zostawione wpisy: {0}".format(sum(report_opens.values()) if report_opens else 0))
                oprint("  odrzucone ścieżki względne: {0}".format(dropped_relative))
                oprint("  odrzucone poza prefiksem: {0}".format(dropped_outside))
                oprint()
            elif args.zfs_files_only:
                report_opens, dropped_relative, dropped_non_zfs = filter_bpf_opens_to_zfs(opens, zfs_mountpoints)
                title = "Top otwierane pliki na ZFS z bpftrace"
                oprint("Filtrowanie plików:")
                oprint("  zostawione wpisy na ZFS: {0}".format(sum(report_opens.values()) if report_opens else 0))
                oprint("  odrzucone ścieżki względne: {0}".format(dropped_relative))
                oprint("  odrzucone ścieżki poza ZFS: {0}".format(dropped_non_zfs))
                oprint()

            rows = []
            for path, value in sorted(report_opens.items(), key=lambda kv: kv[1], reverse=True)[:args.top]:
                rows.append({"path": path, "opens": value})
            print_table(title, rows, [
                ("path", "path"),
                ("opens", "opens"),
            ])

    oprint("Najważniejsze pliki wynikowe:")
    for name in [
        "zpool_status_start.txt",
        "zpool_status_end.txt",
        "zpool_history.txt",
        "zfs_properties.json",
        "zfs_mountpoints.json",
        "samples_zpool.json",
        "samples_arc.json",
        "proc_totals.json",
        "bpftrace.txt",
        "trace.bt",
    ]:
        path = os.path.join(outdir, name)
        if os.path.exists(path):
            oprint("  {0}".format(path))


if __name__ == "__main__":
    main()