favicons
This commit is contained in:
@@ -58,7 +58,9 @@ def _root_domain(domain: str) -> str:
|
||||
parts = [p for p in str(domain or "").lower().strip(".").split(".") if p]
|
||||
if len(parts) <= 2:
|
||||
return ".".join(parts)
|
||||
if len(parts[-1]) == 2 and len(parts[-2]) <= 3 and len(parts) >= 3:
|
||||
# Note: Tracker favicon discovery needs the real main site first; for t.pte.nu that is pte.nu, not t.pte.nu.
|
||||
known_second_level_suffixes = {"co", "com", "net", "org", "gov", "edu", "ac"}
|
||||
if len(parts[-1]) == 2 and parts[-2] in known_second_level_suffixes and len(parts) >= 3:
|
||||
return ".".join(parts[-3:])
|
||||
return ".".join(parts[-2:])
|
||||
|
||||
@@ -314,10 +316,17 @@ def _extract_icon_hrefs(html: str) -> list[str]:
|
||||
def _tracker_icon_hosts(domain: str) -> list[str]:
|
||||
host = tracker_domain(domain)
|
||||
root = _root_domain(host)
|
||||
# Note: Only probe the exact tracker host and the registrable root domain; CDN/static hosts are used only when HTML explicitly points to them.
|
||||
# Note: Direct favicon fallback checks the tracker host first, then the main domain.
|
||||
return [h for h in dict.fromkeys([host, root]) if h]
|
||||
|
||||
|
||||
def _tracker_html_hosts(domain: str) -> list[str]:
|
||||
host = tracker_domain(domain)
|
||||
root = _root_domain(host)
|
||||
# Note: HTML discovery checks the main site first, because tracker announce hosts often return text/plain.
|
||||
return [h for h in dict.fromkeys([root, host]) if h]
|
||||
|
||||
|
||||
def _favicon_candidates(domain: str) -> list[str]:
|
||||
candidates = []
|
||||
for h in _tracker_icon_hosts(domain):
|
||||
@@ -327,7 +336,7 @@ def _favicon_candidates(domain: str) -> list[str]:
|
||||
|
||||
def _html_icon_candidates(domain: str, errors: list[str] | None = None) -> list[str]:
|
||||
urls = []
|
||||
for h in _tracker_icon_hosts(domain):
|
||||
for h in _tracker_html_hosts(domain):
|
||||
for scheme in ("https", "http"):
|
||||
base = f"{scheme}://{h}/"
|
||||
try:
|
||||
@@ -377,7 +386,7 @@ def favicon_path(domain: str, enabled: bool = True, force: bool = False) -> tupl
|
||||
pass
|
||||
if cached.get("error"):
|
||||
return None, None
|
||||
# Note: Favicon lookup prefers HTML <link rel="icon"> over generic /favicon.ico, because some trackers serve a broken default icon there.
|
||||
# Note: Favicon lookup checks the main-domain HTML first, then tracker HTML, then direct /favicon.ico fallbacks.
|
||||
FAVICON_DIR.mkdir(parents=True, exist_ok=True)
|
||||
errors = []
|
||||
candidates = _html_icon_candidates(clean, errors) + _favicon_candidates(clean)
|
||||
|
||||
Reference in New Issue
Block a user