diff --git a/pytorrent/services/tracker_cache.py b/pytorrent/services/tracker_cache.py index 2c61380..deaca74 100644 --- a/pytorrent/services/tracker_cache.py +++ b/pytorrent/services/tracker_cache.py @@ -58,7 +58,9 @@ def _root_domain(domain: str) -> str: parts = [p for p in str(domain or "").lower().strip(".").split(".") if p] if len(parts) <= 2: return ".".join(parts) - if len(parts[-1]) == 2 and len(parts[-2]) <= 3 and len(parts) >= 3: + # Note: Tracker favicon discovery needs the real main site first; for t.pte.nu that is pte.nu, not t.pte.nu. + known_second_level_suffixes = {"co", "com", "net", "org", "gov", "edu", "ac"} + if len(parts[-1]) == 2 and parts[-2] in known_second_level_suffixes and len(parts) >= 3: return ".".join(parts[-3:]) return ".".join(parts[-2:]) @@ -314,10 +316,17 @@ def _extract_icon_hrefs(html: str) -> list[str]: def _tracker_icon_hosts(domain: str) -> list[str]: host = tracker_domain(domain) root = _root_domain(host) - # Note: Only probe the exact tracker host and the registrable root domain; CDN/static hosts are used only when HTML explicitly points to them. + # Note: Direct favicon fallback checks the tracker host first, then the main domain. return [h for h in dict.fromkeys([host, root]) if h] +def _tracker_html_hosts(domain: str) -> list[str]: + host = tracker_domain(domain) + root = _root_domain(host) + # Note: HTML discovery checks the main site first, because tracker announce hosts often return text/plain. + return [h for h in dict.fromkeys([root, host]) if h] + + def _favicon_candidates(domain: str) -> list[str]: candidates = [] for h in _tracker_icon_hosts(domain): @@ -327,7 +336,7 @@ def _favicon_candidates(domain: str) -> list[str]: def _html_icon_candidates(domain: str, errors: list[str] | None = None) -> list[str]: urls = [] - for h in _tracker_icon_hosts(domain): + for h in _tracker_html_hosts(domain): for scheme in ("https", "http"): base = f"{scheme}://{h}/" try: @@ -377,7 +386,7 @@ def favicon_path(domain: str, enabled: bool = True, force: bool = False) -> tupl pass if cached.get("error"): return None, None - # Note: Favicon lookup prefers HTML over generic /favicon.ico, because some trackers serve a broken default icon there. + # Note: Favicon lookup checks the main-domain HTML first, then tracker HTML, then direct /favicon.ico fallbacks. FAVICON_DIR.mkdir(parents=True, exist_ok=True) errors = [] candidates = _html_icon_candidates(clean, errors) + _favicon_candidates(clean)