diff --git a/pytorrent/db.py b/pytorrent/db.py index a68444f..d673a5e 100644 --- a/pytorrent/db.py +++ b/pytorrent/db.py @@ -275,6 +275,26 @@ CREATE TABLE IF NOT EXISTS torrent_stats_cache ( updated_at TEXT NOT NULL, updated_epoch REAL DEFAULT 0 ); + +CREATE TABLE IF NOT EXISTS tracker_summary_cache ( + profile_id INTEGER NOT NULL, + torrent_hash TEXT NOT NULL, + trackers_json TEXT NOT NULL, + updated_at TEXT NOT NULL, + updated_epoch REAL DEFAULT 0, + PRIMARY KEY(profile_id, torrent_hash) +); +CREATE INDEX IF NOT EXISTS idx_tracker_summary_cache_profile ON tracker_summary_cache(profile_id, updated_epoch); + +CREATE TABLE IF NOT EXISTS tracker_favicon_cache ( + domain TEXT PRIMARY KEY, + source_url TEXT, + file_path TEXT, + mime_type TEXT, + updated_at TEXT NOT NULL, + updated_epoch REAL DEFAULT 0, + error TEXT +); """ MIGRATIONS = [ @@ -302,6 +322,9 @@ MIGRATIONS = [ "ALTER TABLE torrent_stats_cache ADD COLUMN updated_epoch REAL DEFAULT 0", "ALTER TABLE smart_queue_settings ADD COLUMN manage_stopped INTEGER DEFAULT 0", "ALTER TABLE smart_queue_settings ADD COLUMN min_peers INTEGER DEFAULT 0", + "CREATE TABLE IF NOT EXISTS tracker_summary_cache (profile_id INTEGER NOT NULL, torrent_hash TEXT NOT NULL, trackers_json TEXT NOT NULL, updated_at TEXT NOT NULL, updated_epoch REAL DEFAULT 0, PRIMARY KEY(profile_id, torrent_hash))", + "CREATE INDEX IF NOT EXISTS idx_tracker_summary_cache_profile ON tracker_summary_cache(profile_id, updated_epoch)", + "CREATE TABLE IF NOT EXISTS tracker_favicon_cache (domain TEXT PRIMARY KEY, source_url TEXT, file_path TEXT, mime_type TEXT, updated_at TEXT NOT NULL, updated_epoch REAL DEFAULT 0, error TEXT)", ] diff --git a/pytorrent/routes/api.py b/pytorrent/routes/api.py index 77cec66..26e5751 100644 --- a/pytorrent/routes/api.py +++ b/pytorrent/routes/api.py @@ -13,11 +13,11 @@ import socket import json import psutil import xml.etree.ElementTree as ET -from flask import Blueprint, jsonify, request, abort +from flask import Blueprint, jsonify, request, abort, send_file from ..config import DB_PATH, JOBS_RETENTION_DAYS, SMART_QUEUE_HISTORY_RETENTION_DAYS, WORKERS from ..db import connect, utcnow from ..services.auth import current_user_id as default_user_id, current_user, list_users, save_user, delete_user, login_user, logout_user, enabled as auth_enabled, require_profile_write -from ..services import preferences, rtorrent, torrent_stats, speed_peaks +from ..services import preferences, rtorrent, torrent_stats, speed_peaks, tracker_cache from ..services.torrent_cache import torrent_cache from ..services.torrent_summary import cached_summary from ..services.workers import enqueue, list_jobs, cancel_job, retry_job, clear_jobs, emergency_clear_jobs @@ -489,16 +489,25 @@ def torrents(): def trackers_summary(): profile = preferences.active_profile() if not profile: - return ok({"summary": {"hashes": {}, "trackers": [], "errors": [], "scanned": 0}, "error": "No profile"}) - limit = min(2000, max(1, int(request.args.get("limit") or 1000))) - hashes = request.args.getlist("hash") + return ok({"summary": {"hashes": {}, "trackers": [], "errors": [], "scanned": 0, "pending": 0}, "error": "No profile"}) try: - # Note: This endpoint powers only the sidebar tracker filter and never mutates torrents. - if not hashes: - hashes = [t.get("hash") for t in torrent_cache.snapshot(profile["id"]) if t.get("hash")] - return ok({"summary": rtorrent.tracker_summary(profile, hashes, limit=limit)}) + # Note: Tracker summary uses the local torrent snapshot and refreshes only a small cache batch per request. + scan_limit = min(250, max(0, int(request.args.get("scan_limit") or 80))) + hashes = [t.get("hash") for t in torrent_cache.snapshot(profile["id"]) if t.get("hash")] + summary = tracker_cache.summary(profile, hashes, lambda h: rtorrent.torrent_trackers(profile, h), scan_limit=scan_limit) + return ok({"summary": summary}) except Exception as exc: - return jsonify({"ok": False, "error": str(exc)}), 500 + return ok({"summary": {"hashes": {}, "trackers": [], "errors": [{"error": str(exc)}], "scanned": 0, "pending": 0}, "error": str(exc)}) + + +@bp.get("/trackers/favicon/") +def tracker_favicon(domain: str): + prefs = preferences.get_preferences() + enabled = bool(prefs and prefs.get("tracker_favicons_enabled")) + path, mime = tracker_cache.favicon_path(domain, enabled=enabled) + if not path: + abort(404) + return send_file(path, mimetype=mime or "image/x-icon", max_age=7 * 24 * 60 * 60) @bp.get("/torrent-stats") def torrent_stats_get(): diff --git a/pytorrent/services/tracker_cache.py b/pytorrent/services/tracker_cache.py new file mode 100644 index 0000000..75694f8 --- /dev/null +++ b/pytorrent/services/tracker_cache.py @@ -0,0 +1,283 @@ +from __future__ import annotations + +import json +import mimetypes +import re +import time +import urllib.error +import urllib.parse +import urllib.request +from html.parser import HTMLParser +from pathlib import Path + +from ..config import BASE_DIR +from ..db import connect, utcnow + +TRACKER_CACHE_TTL_SECONDS = 7 * 24 * 60 * 60 +FAVICON_CACHE_TTL_SECONDS = 7 * 24 * 60 * 60 +TRACKER_SCAN_LIMIT = 80 +FAVICON_DIR = BASE_DIR / "data" / "tracker_favicons" + + +class _IconParser(HTMLParser): + def __init__(self): + super().__init__() + self.icons: list[str] = [] + + def handle_starttag(self, tag: str, attrs): + if tag.lower() != "link": + return + data = {str(k).lower(): str(v or "") for k, v in attrs} + rel = data.get("rel", "").lower() + href = data.get("href", "").strip() + if href and any(part in rel.split() for part in ("icon", "shortcut", "apple-touch-icon")): + self.icons.append(href) + + +def _now_epoch() -> float: + return time.time() + + +def tracker_domain(url: str) -> str: + raw = str(url or "").strip() + if not raw: + return "" + parsed = urllib.parse.urlparse(raw if "://" in raw else f"http://{raw}") + host = (parsed.hostname or "").lower().strip(".") + if host.startswith("www."): + host = host[4:] + return host + + +def _root_domain(domain: str) -> str: + parts = [p for p in str(domain or "").lower().strip(".").split(".") if p] + if len(parts) <= 2: + return ".".join(parts) + if len(parts[-1]) == 2 and len(parts[-2]) <= 3 and len(parts) >= 3: + return ".".join(parts[-3:]) + return ".".join(parts[-2:]) + + +def _safe_filename(domain: str) -> str: + return re.sub(r"[^a-z0-9_.-]+", "_", domain.lower()).strip("._") or "tracker" + + +def _read_cached(profile_id: int, hashes: list[str], ttl: int) -> tuple[dict[str, list[dict]], set[str]]: + if not hashes: + return {}, set() + now = _now_epoch() + cached: dict[str, list[dict]] = {} + fresh: set[str] = set() + with connect() as conn: + for start in range(0, len(hashes), 900): + chunk = hashes[start:start + 900] + placeholders = ",".join("?" for _ in chunk) + rows = conn.execute( + f"SELECT torrent_hash, trackers_json, updated_epoch FROM tracker_summary_cache WHERE profile_id=? AND torrent_hash IN ({placeholders})", + (profile_id, *chunk), + ).fetchall() + for row in rows: + h = str(row.get("torrent_hash") or "") + try: + items = json.loads(row.get("trackers_json") or "[]") + except Exception: + items = [] + cached[h] = items if isinstance(items, list) else [] + if now - float(row.get("updated_epoch") or 0) < ttl: + fresh.add(h) + return cached, fresh + + +def _store(profile_id: int, torrent_hash: str, trackers: list[dict]) -> None: + now = utcnow() + epoch = _now_epoch() + compact = [] + seen = set() + for item in trackers: + domain = tracker_domain(str(item.get("url") or item.get("domain") or "")) or str(item.get("domain") or "") + if not domain or domain in seen: + continue + seen.add(domain) + compact.append({"domain": domain, "url": str(item.get("url") or "")}) + with connect() as conn: + conn.execute( + """ + INSERT INTO tracker_summary_cache(profile_id, torrent_hash, trackers_json, updated_at, updated_epoch) + VALUES(?, ?, ?, ?, ?) + ON CONFLICT(profile_id, torrent_hash) DO UPDATE SET + trackers_json=excluded.trackers_json, + updated_at=excluded.updated_at, + updated_epoch=excluded.updated_epoch + """, + (profile_id, torrent_hash, json.dumps(compact), now, epoch), + ) + + +def summary(profile: dict, hashes: list[str], loader, scan_limit: int = TRACKER_SCAN_LIMIT) -> dict: + """Build tracker sidebar data from disk cache and refresh a small batch per request.""" + # Note: Tracker data is cached per torrent hash, so huge rTorrent libraries are never scanned in one UI request. + profile_id = int(profile.get("id") or 0) + clean_hashes = [str(h or "").strip() for h in hashes if str(h or "").strip()] + cached, fresh = _read_cached(profile_id, clean_hashes, TRACKER_CACHE_TTL_SECONDS) + missing = [h for h in clean_hashes if h not in fresh] + errors: list[dict] = [] + scanned_now = 0 + for h in missing[:max(0, int(scan_limit or 0))]: + try: + trackers = loader(h) + _store(profile_id, h, trackers) + cached[h] = [{"domain": tracker_domain(t.get("url") or t.get("domain") or ""), "url": str(t.get("url") or "")} for t in trackers] + fresh.add(h) + scanned_now += 1 + except Exception as exc: + errors.append({"hash": h, "error": str(exc)}) + by_hash: dict[str, list[dict]] = {} + counts: dict[str, dict] = {} + for h in clean_hashes: + items = [] + seen = set() + for item in cached.get(h, []): + domain = tracker_domain(str(item.get("url") or item.get("domain") or "")) or str(item.get("domain") or "") + if not domain or domain in seen: + continue + seen.add(domain) + row = {"domain": domain, "url": str(item.get("url") or "")} + items.append(row) + bucket = counts.setdefault(domain, {"domain": domain, "url": row["url"], "count": 0}) + bucket["count"] += 1 + if not bucket.get("url") and row["url"]: + bucket["url"] = row["url"] + by_hash[h] = items + trackers = sorted(counts.values(), key=lambda x: (-int(x.get("count") or 0), str(x.get("domain") or ""))) + pending = max(0, len([h for h in clean_hashes if h not in fresh])) + return {"hashes": by_hash, "trackers": trackers, "errors": errors[:25], "scanned": len(clean_hashes), "scanned_now": scanned_now, "pending": pending, "cached": len(clean_hashes) - pending} + + +def _fetch(url: str, limit: int = 262144) -> tuple[bytes, str, str]: + req = urllib.request.Request(url, headers={"User-Agent": "pyTorrent/1.0 favicon-cache"}) + with urllib.request.urlopen(req, timeout=5) as resp: + data = resp.read(limit + 1) + if len(data) > limit: + data = data[:limit] + content_type = str(resp.headers.get("Content-Type") or "").split(";", 1)[0].strip().lower() + final_url = str(resp.geturl() or url) + return data, content_type, final_url + + +def _is_icon(data: bytes, content_type: str, url: str) -> bool: + if not data: + return False + ctype = content_type.lower() + if ctype.startswith("image/") or ctype in {"application/octet-stream", "binary/octet-stream"}: + return True + return urllib.parse.urlparse(url).path.lower().endswith((".ico", ".png", ".jpg", ".jpeg", ".svg", ".webp")) + + +def _favicon_candidates(domain: str) -> list[str]: + host = tracker_domain(domain) + root = _root_domain(host) + candidates = [] + for h in [host, root]: + if h: + candidates.extend([f"https://{h}/favicon.ico", f"http://{h}/favicon.ico"]) + return list(dict.fromkeys(candidates)) + + +def _html_icon_candidates(domain: str) -> list[str]: + host = tracker_domain(domain) + root = _root_domain(host) + urls = [] + for h in [host, root]: + if not h: + continue + for scheme in ("https", "http"): + base = f"{scheme}://{h}/" + try: + data, ctype, final_url = _fetch(base, limit=524288) + except Exception: + continue + if "html" not in ctype and b" tuple[Path | None, str | None]: + clean = tracker_domain(domain) + if not enabled or not clean: + return None, None + cached = _cached_favicon(clean) + now = _now_epoch() + if cached and now - float(cached.get("updated_epoch") or 0) < FAVICON_CACHE_TTL_SECONDS: + path = Path(str(cached.get("file_path") or "")) + if path.exists(): + return path, str(cached.get("mime_type") or mimetypes.guess_type(path.name)[0] or "image/x-icon") + if cached.get("error"): + return None, None + # Note: Favicon lookup tries tracker host, root domain, then HTML and stores the result for a week. + FAVICON_DIR.mkdir(parents=True, exist_ok=True) + errors = [] + candidates = _favicon_candidates(clean) + checked_html = False + idx = 0 + while idx < len(candidates): + url = candidates[idx] + idx += 1 + try: + data, ctype, final_url = _fetch(url, limit=524288) + if not _is_icon(data, ctype, final_url): + continue + ext = Path(urllib.parse.urlparse(final_url).path).suffix.lower() or mimetypes.guess_extension(ctype) or ".ico" + if ext not in {".ico", ".png", ".jpg", ".jpeg", ".svg", ".webp"}: + ext = ".ico" + path = FAVICON_DIR / f"{_safe_filename(clean)}{ext}" + path.write_bytes(data) + mime = ctype if ctype.startswith("image/") else (mimetypes.guess_type(path.name)[0] or "image/x-icon") + with connect() as conn: + conn.execute( + """ + INSERT INTO tracker_favicon_cache(domain, source_url, file_path, mime_type, updated_at, updated_epoch, error) + VALUES(?, ?, ?, ?, ?, ?, NULL) + ON CONFLICT(domain) DO UPDATE SET + source_url=excluded.source_url, + file_path=excluded.file_path, + mime_type=excluded.mime_type, + updated_at=excluded.updated_at, + updated_epoch=excluded.updated_epoch, + error=NULL + """, + (clean, final_url, str(path), mime, utcnow(), now), + ) + return path, mime + except Exception as exc: + errors.append(f"{url}: {exc}") + if idx >= len(candidates) and not checked_html: + checked_html = True + candidates.extend([u for u in _html_icon_candidates(clean) if u not in candidates]) + with connect() as conn: + conn.execute( + """ + INSERT INTO tracker_favicon_cache(domain, source_url, file_path, mime_type, updated_at, updated_epoch, error) + VALUES(?, '', '', '', ?, ?, ?) + ON CONFLICT(domain) DO UPDATE SET + updated_at=excluded.updated_at, + updated_epoch=excluded.updated_epoch, + error=excluded.error + """, + (clean, utcnow(), now, "; ".join(errors[-3:]) or "favicon not found"), + ) + return None, None diff --git a/pytorrent/static/app.js b/pytorrent/static/app.js index 4398030..a122ee0 100644 --- a/pytorrent/static/app.js +++ b/pytorrent/static/app.js @@ -238,12 +238,14 @@ } function trackerFavicon(domain){ if(!trackerFaviconsEnabled || !domain) return ''; - const src=`https://${encodeURIComponent(domain).replace(/%2E/g,'.')}/favicon.ico`; + // Note: Favicony trackerów idą przez lokalny cache backendu, więc przeglądarka nie odpytuje tysięcy domen bezpośrednio. + const src=`/api/trackers/favicon/${encodeURIComponent(domain)}`; return ``; } function trackerFilterPlaceholder(){ if(trackerSummaryStatus==='loading') return '
Loading trackers...
'; if(trackerSummaryStatus==='error') return '
Tracker list unavailable
'; + if(Number(trackerSummary.pending||0)) return `
Scanning cache: ${esc(trackerSummary.cached||0)}/${esc(trackerSummary.scanned||0)}
`; if(hasTorrentSnapshot && torrents.size) return '
No trackers found
'; return '
Waiting for torrents...
'; } @@ -261,22 +263,24 @@ } async function refreshTrackerSummary(force=false){ const hashes=[...torrents.keys()].sort(); - const sig=`${hashes.length}:${hashes.slice(0,2000).join(',')}:${trackerFaviconsEnabled?1:0}`; - if(!force && sig===trackerSummarySignature) return; + const sig=`${hashes.length}:${hashes[0]||''}:${hashes[hashes.length-1]||''}:${trackerFaviconsEnabled?1:0}`; + if(!force && sig===trackerSummarySignature && !Number(trackerSummary.pending||0)) return; trackerSummarySignature=sig; - if(!hashes.length){ trackerSummary={hashes:{},trackers:[],scanned:0,errors:[]}; trackerSummaryStatus='empty'; renderTrackerFilters(); return; } - trackerSummaryStatus='loading'; + if(!hashes.length){ trackerSummary={hashes:{},trackers:[],scanned:0,errors:[],pending:0,cached:0}; trackerSummaryStatus='empty'; renderTrackerFilters(); return; } + trackerSummaryStatus=(trackerSummary.trackers||[]).length?'ready':'loading'; renderTrackerFilters(); try{ - const qs=new URLSearchParams({limit:'2000'}); - // Note: Browser sends currently visible torrent hashes, avoiding an empty cache race on the backend. - hashes.slice(0,2000).forEach(h=>qs.append('hash',h)); - const j=await (await fetch(`/api/trackers/summary?${qs.toString()}`)).json(); - if(!j.ok) throw new Error(j.error||'Tracker summary failed'); - trackerSummary=j.summary||{hashes:{},trackers:[],scanned:0,errors:[]}; - trackerSummaryStatus=(trackerSummary.trackers||[]).length?'ready':'empty'; + // Note: Nie wysyłamy 13k hashy w URL; backend bierze lokalny snapshot i doczytuje cache małymi porcjami. + const j=await (await fetch('/api/trackers/summary?scan_limit=200')).json(); + if(!j.ok && !j.summary) throw new Error(j.error||'Tracker summary failed'); + trackerSummary=j.summary||{hashes:{},trackers:[],scanned:0,errors:[],pending:0,cached:0}; + trackerSummaryStatus=(trackerSummary.trackers||[]).length?'ready':Number(trackerSummary.pending||0)?'loading':'empty'; renderTrackerFilters(); scheduleRender(true); + if(Number(trackerSummary.pending||0)>0){ + clearTimeout(trackerSummaryTimer); + trackerSummaryTimer=setTimeout(()=>refreshTrackerSummary(true).catch(()=>{}), 3500); + } }catch(e){ trackerSummaryStatus='error'; renderTrackerFilters(); console.warn('Tracker summary failed', e); } } function scheduleTrackerSummary(force=false){ diff --git a/pytorrent/static/styles.css b/pytorrent/static/styles.css index 88c0a21..8a2fd0e 100644 --- a/pytorrent/static/styles.css +++ b/pytorrent/static/styles.css @@ -223,8 +223,9 @@ body { display: grid; grid-template-columns: var(--sidebar) 1fr; } +/* Note: Sidebar filters are denser so large tracker lists fit better on one screen. */ .sidebar { - padding: 0.65rem; + padding: 0.5rem; overflow: auto; background: rgba(var(--bs-secondary-bg-rgb), 0.9); } @@ -232,10 +233,10 @@ body { width: 100%; display: grid; grid-template-columns: minmax(0, 1fr) auto; - gap: 0.15rem 0.55rem; + gap: 0.1rem 0.45rem; align-items: center; - margin-bottom: 0.2rem; - padding: 0.45rem 0.6rem; + margin-bottom: 0.12rem; + padding: 0.34rem 0.5rem; border: 0; border-radius: 0.55rem; background: transparent; @@ -863,9 +864,9 @@ body.mobile-mode .main-grid { } .label-filters .label-filter, .tracker-filters .tracker-filter { - font-size: 0.82rem; - padding: 0.34rem 0.5rem; - margin-bottom: 0.15rem; + font-size: 0.78rem; + margin-bottom: 0.08rem; + padding: 0.26rem 0.44rem; } .label-filters .label-filter i, .tracker-filters .tracker-filter i { @@ -883,9 +884,9 @@ body.mobile-mode .main-grid { .tracker-favicon { border-radius: 0.2rem; flex: 0 0 auto; - height: 16px; + height: 14px; object-fit: contain; - width: 16px; + width: 14px; } .tracker-favicon:not(.d-none) + .tracker-fallback-icon { @@ -896,9 +897,9 @@ body.mobile-mode .main-grid { align-items: center; color: var(--bs-secondary-color); display: flex; - font-size: 0.78rem; - gap: 0.35rem; - padding: 0.25rem 0.5rem; + font-size: 0.76rem; + gap: 0.3rem; + padding: 0.2rem 0.44rem; } /* Note: Empty tracker state uses the same sidebar spacing as regular filter rows. */