This commit is contained in:
Mateusz Gruszczyński
2026-05-08 19:53:06 +02:00
parent 607d1c59c1
commit 3eee5be37a
3 changed files with 49 additions and 29 deletions

View File

@@ -506,7 +506,8 @@ def trackers_summary():
def tracker_favicon(domain: str): def tracker_favicon(domain: str):
prefs = preferences.get_preferences() prefs = preferences.get_preferences()
enabled = bool(prefs and prefs.get("tracker_favicons_enabled")) enabled = bool(prefs and prefs.get("tracker_favicons_enabled"))
static_url = tracker_cache.favicon_public_url(domain, enabled=enabled, create=True) force = str(request.args.get("refresh") or "").lower() in {"1", "true", "yes", "force"}
static_url = tracker_cache.favicon_public_url(domain, enabled=enabled, create=True, force=force)
if static_url: if static_url:
# Note: The API only discovers/cache-warms the icon; the browser receives the file from /static/tracker_favicons/. # Note: The API only discovers/cache-warms the icon; the browser receives the file from /static/tracker_favicons/.
return redirect(static_url, code=302) return redirect(static_url, code=302)

View File

@@ -159,27 +159,20 @@ def summary(profile: dict, hashes: list[str], loader, scan_limit: int = TRACKER_
def favicon_public_url(domain: str, enabled: bool = True, create: bool = False) -> str: def favicon_public_url(domain: str, enabled: bool = True, create: bool = False, force: bool = False) -> str:
"""Return the static URL for a cached tracker favicon, optionally creating it first.""" """Return the static URL for a cached tracker favicon, optionally creating or refreshing it first."""
# Note: Favicon files stay in data/tracker_favicons, but the browser loads them via the static/tracker_favicons symlink. # Note: Favicon files stay in data/tracker_favicons, but the browser loads them via the static/tracker_favicons symlink.
clean = tracker_domain(domain) clean = tracker_domain(domain)
if not enabled or not clean: if not enabled or not clean:
return "" return ""
if create: if create:
favicon_path(clean, enabled=True) favicon_path(clean, enabled=True, force=force)
cached = _cached_favicon(clean) cached = _cached_favicon(clean)
now = _now_epoch() now = _now_epoch()
path = None if not cached or now - float(cached.get("updated_epoch") or 0) >= FAVICON_CACHE_TTL_SECONDS:
if cached and now - float(cached.get("updated_epoch") or 0) < FAVICON_CACHE_TTL_SECONDS: return ""
cached_path = Path(str(cached.get("file_path") or "")) path = Path(str(cached.get("file_path") or ""))
if cached_path.exists() and cached_path.is_file(): if not path.exists() or not path.is_file():
path = cached_path
if path is None:
# Note: Existing symlinked .ico files are still linked directly even when the DB favicon row is missing or stale.
direct_path = FAVICON_DIR / f"{_safe_filename(clean)}.ico"
if direct_path.exists() and direct_path.is_file():
path = direct_path
if path is None:
return "" return ""
try: try:
rel = path.resolve().relative_to(FAVICON_DIR.resolve()) rel = path.resolve().relative_to(FAVICON_DIR.resolve())
@@ -199,12 +192,32 @@ def _fetch(url: str, limit: int = 262144) -> tuple[bytes, str, str]:
def _is_icon(data: bytes, content_type: str, url: str) -> bool: def _is_icon(data: bytes, content_type: str, url: str) -> bool:
if not data: """Validate that downloaded bytes are a browser-readable image, not only an image-like HTTP header."""
# Note: Some trackers serve a broken /favicon.ico with image/vnd.microsoft.icon; pyTorrent now validates bytes before caching it.
if not data or len(data) < 16:
return False return False
ctype = content_type.lower() head = data[:32]
if ctype.startswith("image/") or ctype in {"application/octet-stream", "binary/octet-stream"}: lower = data[:512].lstrip().lower()
if head.startswith(b"\x00\x00\x01\x00") or head.startswith(b"\x00\x00\x02\x00"):
try:
count = int.from_bytes(data[4:6], "little")
except Exception:
count = 0
return 0 < count <= 256 and len(data) >= 6 + (16 * count)
if head.startswith(b"\x89PNG\r\n\x1a\n"):
return True return True
return urllib.parse.urlparse(url).path.lower().endswith((".ico", ".png", ".jpg", ".jpeg", ".svg", ".webp")) if head.startswith(b"\xff\xd8\xff"):
return True
if head.startswith((b"GIF87a", b"GIF89a")):
return True
if head.startswith(b"RIFF") and data[8:12] == b"WEBP":
return True
if lower.startswith(b"<svg") or b"<svg" in lower[:256]:
return True
ctype = content_type.lower()
if ctype in {"image/svg+xml"}:
return b"<svg" in lower[:512]
return False
def _favicon_candidates(domain: str) -> list[str]: def _favicon_candidates(domain: str) -> list[str]:
@@ -250,22 +263,28 @@ def _cached_favicon(domain: str):
return conn.execute("SELECT * FROM tracker_favicon_cache WHERE domain=?", (clean,)).fetchone() return conn.execute("SELECT * FROM tracker_favicon_cache WHERE domain=?", (clean,)).fetchone()
def favicon_path(domain: str, enabled: bool = True) -> tuple[Path | None, str | None]: def favicon_path(domain: str, enabled: bool = True, force: bool = False) -> tuple[Path | None, str | None]:
clean = tracker_domain(domain) clean = tracker_domain(domain)
if not enabled or not clean: if not enabled or not clean:
return None, None return None, None
cached = _cached_favicon(clean) cached = _cached_favicon(clean)
now = _now_epoch() now = _now_epoch()
if cached and now - float(cached.get("updated_epoch") or 0) < FAVICON_CACHE_TTL_SECONDS: if cached and not force and now - float(cached.get("updated_epoch") or 0) < FAVICON_CACHE_TTL_SECONDS:
path = Path(str(cached.get("file_path") or "")) path = Path(str(cached.get("file_path") or ""))
if path.exists(): mime = str(cached.get("mime_type") or mimetypes.guess_type(path.name)[0] or "image/x-icon")
return path, str(cached.get("mime_type") or mimetypes.guess_type(path.name)[0] or "image/x-icon") if path.exists() and path.is_file():
try:
if _is_icon(path.read_bytes()[:524288], mime, str(cached.get("source_url") or path.name)):
return path, mime
except Exception:
pass
if cached.get("error"): if cached.get("error"):
return None, None return None, None
# Note: Favicon lookup tries tracker host, root domain, then HTML <link rel="icon"> and stores the result for a week. # Note: Favicon lookup prefers HTML <link rel="icon"> over generic /favicon.ico, because some trackers serve a broken default icon there.
FAVICON_DIR.mkdir(parents=True, exist_ok=True) FAVICON_DIR.mkdir(parents=True, exist_ok=True)
errors = [] errors = []
candidates = _favicon_candidates(clean) candidates = _html_icon_candidates(clean) + _favicon_candidates(clean)
candidates = list(dict.fromkeys(candidates))
checked_html = False checked_html = False
idx = 0 idx = 0
while idx < len(candidates): while idx < len(candidates):

View File

@@ -239,10 +239,10 @@
function trackerFavicon(tracker){ function trackerFavicon(tracker){
const domain=typeof tracker==='string'?tracker:(tracker?.domain||''); const domain=typeof tracker==='string'?tracker:(tracker?.domain||'');
if(!trackerFaviconsEnabled || !domain) return '<i class="fa-solid fa-bullseye"></i>'; if(!trackerFaviconsEnabled || !domain) return '<i class="fa-solid fa-bullseye"></i>';
const safeName=String(domain).toLowerCase().replace(/[^a-z0-9_.-]+/g,'_').replace(/^[._]+|[._]+$/g,'')||'tracker'; // Note: Cached favicons are served from the static/tracker_favicons symlink; the API path is only a one-time cache warmer fallback.
// Note: Tracker favicon links are direct static URLs matching the tracker_favicons symlink. const fallback=`/api/trackers/favicon/${encodeURIComponent(domain)}?refresh=1`;
const src=(typeof tracker==='object' && tracker?.favicon_url) ? tracker.favicon_url : `/static/tracker_favicons/${encodeURIComponent(safeName)}.ico`; const src=(typeof tracker==='object' && tracker?.favicon_url) ? tracker.favicon_url : fallback;
return `<img class="tracker-favicon" src="${esc(src)}" alt="" loading="lazy" onerror="this.classList.add('d-none')"><i class="fa-solid fa-bullseye tracker-fallback-icon"></i>`; return `<img class="tracker-favicon" src="${esc(src)}" alt="" loading="lazy" data-fallback-src="${esc(fallback)}" onerror="if(this.dataset.retry!=='1'){this.dataset.retry='1';this.src=this.dataset.fallbackSrc;}else{this.classList.add('d-none')}"><i class="fa-solid fa-bullseye tracker-fallback-icon"></i>`;
} }
function trackerFilterPlaceholder(){ function trackerFilterPlaceholder(){
if(trackerSummaryStatus==='loading') return '<div class="tracker-filter-empty"><span class="spinner-border spinner-border-xs"></span> Loading trackers...</div>'; if(trackerSummaryStatus==='loading') return '<div class="tracker-filter-empty"><span class="spinner-border spinner-border-xs"></span> Loading trackers...</div>';