favicons
This commit is contained in:
@@ -159,27 +159,20 @@ def summary(profile: dict, hashes: list[str], loader, scan_limit: int = TRACKER_
|
||||
|
||||
|
||||
|
||||
def favicon_public_url(domain: str, enabled: bool = True, create: bool = False) -> str:
|
||||
"""Return the static URL for a cached tracker favicon, optionally creating it first."""
|
||||
def favicon_public_url(domain: str, enabled: bool = True, create: bool = False, force: bool = False) -> str:
|
||||
"""Return the static URL for a cached tracker favicon, optionally creating or refreshing it first."""
|
||||
# Note: Favicon files stay in data/tracker_favicons, but the browser loads them via the static/tracker_favicons symlink.
|
||||
clean = tracker_domain(domain)
|
||||
if not enabled or not clean:
|
||||
return ""
|
||||
if create:
|
||||
favicon_path(clean, enabled=True)
|
||||
favicon_path(clean, enabled=True, force=force)
|
||||
cached = _cached_favicon(clean)
|
||||
now = _now_epoch()
|
||||
path = None
|
||||
if cached and now - float(cached.get("updated_epoch") or 0) < FAVICON_CACHE_TTL_SECONDS:
|
||||
cached_path = Path(str(cached.get("file_path") or ""))
|
||||
if cached_path.exists() and cached_path.is_file():
|
||||
path = cached_path
|
||||
if path is None:
|
||||
# Note: Existing symlinked .ico files are still linked directly even when the DB favicon row is missing or stale.
|
||||
direct_path = FAVICON_DIR / f"{_safe_filename(clean)}.ico"
|
||||
if direct_path.exists() and direct_path.is_file():
|
||||
path = direct_path
|
||||
if path is None:
|
||||
if not cached or now - float(cached.get("updated_epoch") or 0) >= FAVICON_CACHE_TTL_SECONDS:
|
||||
return ""
|
||||
path = Path(str(cached.get("file_path") or ""))
|
||||
if not path.exists() or not path.is_file():
|
||||
return ""
|
||||
try:
|
||||
rel = path.resolve().relative_to(FAVICON_DIR.resolve())
|
||||
@@ -199,12 +192,32 @@ def _fetch(url: str, limit: int = 262144) -> tuple[bytes, str, str]:
|
||||
|
||||
|
||||
def _is_icon(data: bytes, content_type: str, url: str) -> bool:
|
||||
if not data:
|
||||
"""Validate that downloaded bytes are a browser-readable image, not only an image-like HTTP header."""
|
||||
# Note: Some trackers serve a broken /favicon.ico with image/vnd.microsoft.icon; pyTorrent now validates bytes before caching it.
|
||||
if not data or len(data) < 16:
|
||||
return False
|
||||
ctype = content_type.lower()
|
||||
if ctype.startswith("image/") or ctype in {"application/octet-stream", "binary/octet-stream"}:
|
||||
head = data[:32]
|
||||
lower = data[:512].lstrip().lower()
|
||||
if head.startswith(b"\x00\x00\x01\x00") or head.startswith(b"\x00\x00\x02\x00"):
|
||||
try:
|
||||
count = int.from_bytes(data[4:6], "little")
|
||||
except Exception:
|
||||
count = 0
|
||||
return 0 < count <= 256 and len(data) >= 6 + (16 * count)
|
||||
if head.startswith(b"\x89PNG\r\n\x1a\n"):
|
||||
return True
|
||||
return urllib.parse.urlparse(url).path.lower().endswith((".ico", ".png", ".jpg", ".jpeg", ".svg", ".webp"))
|
||||
if head.startswith(b"\xff\xd8\xff"):
|
||||
return True
|
||||
if head.startswith((b"GIF87a", b"GIF89a")):
|
||||
return True
|
||||
if head.startswith(b"RIFF") and data[8:12] == b"WEBP":
|
||||
return True
|
||||
if lower.startswith(b"<svg") or b"<svg" in lower[:256]:
|
||||
return True
|
||||
ctype = content_type.lower()
|
||||
if ctype in {"image/svg+xml"}:
|
||||
return b"<svg" in lower[:512]
|
||||
return False
|
||||
|
||||
|
||||
def _favicon_candidates(domain: str) -> list[str]:
|
||||
@@ -250,22 +263,28 @@ def _cached_favicon(domain: str):
|
||||
return conn.execute("SELECT * FROM tracker_favicon_cache WHERE domain=?", (clean,)).fetchone()
|
||||
|
||||
|
||||
def favicon_path(domain: str, enabled: bool = True) -> tuple[Path | None, str | None]:
|
||||
def favicon_path(domain: str, enabled: bool = True, force: bool = False) -> tuple[Path | None, str | None]:
|
||||
clean = tracker_domain(domain)
|
||||
if not enabled or not clean:
|
||||
return None, None
|
||||
cached = _cached_favicon(clean)
|
||||
now = _now_epoch()
|
||||
if cached and now - float(cached.get("updated_epoch") or 0) < FAVICON_CACHE_TTL_SECONDS:
|
||||
if cached and not force and now - float(cached.get("updated_epoch") or 0) < FAVICON_CACHE_TTL_SECONDS:
|
||||
path = Path(str(cached.get("file_path") or ""))
|
||||
if path.exists():
|
||||
return path, str(cached.get("mime_type") or mimetypes.guess_type(path.name)[0] or "image/x-icon")
|
||||
mime = str(cached.get("mime_type") or mimetypes.guess_type(path.name)[0] or "image/x-icon")
|
||||
if path.exists() and path.is_file():
|
||||
try:
|
||||
if _is_icon(path.read_bytes()[:524288], mime, str(cached.get("source_url") or path.name)):
|
||||
return path, mime
|
||||
except Exception:
|
||||
pass
|
||||
if cached.get("error"):
|
||||
return None, None
|
||||
# Note: Favicon lookup tries tracker host, root domain, then HTML <link rel="icon"> and stores the result for a week.
|
||||
# Note: Favicon lookup prefers HTML <link rel="icon"> over generic /favicon.ico, because some trackers serve a broken default icon there.
|
||||
FAVICON_DIR.mkdir(parents=True, exist_ok=True)
|
||||
errors = []
|
||||
candidates = _favicon_candidates(clean)
|
||||
candidates = _html_icon_candidates(clean) + _favicon_candidates(clean)
|
||||
candidates = list(dict.fromkeys(candidates))
|
||||
checked_html = False
|
||||
idx = 0
|
||||
while idx < len(candidates):
|
||||
|
||||
Reference in New Issue
Block a user