queue_stopped #3
@@ -58,7 +58,9 @@ def _root_domain(domain: str) -> str:
|
|||||||
parts = [p for p in str(domain or "").lower().strip(".").split(".") if p]
|
parts = [p for p in str(domain or "").lower().strip(".").split(".") if p]
|
||||||
if len(parts) <= 2:
|
if len(parts) <= 2:
|
||||||
return ".".join(parts)
|
return ".".join(parts)
|
||||||
if len(parts[-1]) == 2 and len(parts[-2]) <= 3 and len(parts) >= 3:
|
# Note: Tracker favicon discovery needs the real main site first; for t.pte.nu that is pte.nu, not t.pte.nu.
|
||||||
|
known_second_level_suffixes = {"co", "com", "net", "org", "gov", "edu", "ac"}
|
||||||
|
if len(parts[-1]) == 2 and parts[-2] in known_second_level_suffixes and len(parts) >= 3:
|
||||||
return ".".join(parts[-3:])
|
return ".".join(parts[-3:])
|
||||||
return ".".join(parts[-2:])
|
return ".".join(parts[-2:])
|
||||||
|
|
||||||
@@ -314,10 +316,17 @@ def _extract_icon_hrefs(html: str) -> list[str]:
|
|||||||
def _tracker_icon_hosts(domain: str) -> list[str]:
|
def _tracker_icon_hosts(domain: str) -> list[str]:
|
||||||
host = tracker_domain(domain)
|
host = tracker_domain(domain)
|
||||||
root = _root_domain(host)
|
root = _root_domain(host)
|
||||||
# Note: Only probe the exact tracker host and the registrable root domain; CDN/static hosts are used only when HTML explicitly points to them.
|
# Note: Direct favicon fallback checks the tracker host first, then the main domain.
|
||||||
return [h for h in dict.fromkeys([host, root]) if h]
|
return [h for h in dict.fromkeys([host, root]) if h]
|
||||||
|
|
||||||
|
|
||||||
|
def _tracker_html_hosts(domain: str) -> list[str]:
|
||||||
|
host = tracker_domain(domain)
|
||||||
|
root = _root_domain(host)
|
||||||
|
# Note: HTML discovery checks the main site first, because tracker announce hosts often return text/plain.
|
||||||
|
return [h for h in dict.fromkeys([root, host]) if h]
|
||||||
|
|
||||||
|
|
||||||
def _favicon_candidates(domain: str) -> list[str]:
|
def _favicon_candidates(domain: str) -> list[str]:
|
||||||
candidates = []
|
candidates = []
|
||||||
for h in _tracker_icon_hosts(domain):
|
for h in _tracker_icon_hosts(domain):
|
||||||
@@ -327,7 +336,7 @@ def _favicon_candidates(domain: str) -> list[str]:
|
|||||||
|
|
||||||
def _html_icon_candidates(domain: str, errors: list[str] | None = None) -> list[str]:
|
def _html_icon_candidates(domain: str, errors: list[str] | None = None) -> list[str]:
|
||||||
urls = []
|
urls = []
|
||||||
for h in _tracker_icon_hosts(domain):
|
for h in _tracker_html_hosts(domain):
|
||||||
for scheme in ("https", "http"):
|
for scheme in ("https", "http"):
|
||||||
base = f"{scheme}://{h}/"
|
base = f"{scheme}://{h}/"
|
||||||
try:
|
try:
|
||||||
@@ -377,7 +386,7 @@ def favicon_path(domain: str, enabled: bool = True, force: bool = False) -> tupl
|
|||||||
pass
|
pass
|
||||||
if cached.get("error"):
|
if cached.get("error"):
|
||||||
return None, None
|
return None, None
|
||||||
# Note: Favicon lookup prefers HTML <link rel="icon"> over generic /favicon.ico, because some trackers serve a broken default icon there.
|
# Note: Favicon lookup checks the main-domain HTML first, then tracker HTML, then direct /favicon.ico fallbacks.
|
||||||
FAVICON_DIR.mkdir(parents=True, exist_ok=True)
|
FAVICON_DIR.mkdir(parents=True, exist_ok=True)
|
||||||
errors = []
|
errors = []
|
||||||
candidates = _html_icon_candidates(clean, errors) + _favicon_candidates(clean)
|
candidates = _html_icon_candidates(clean, errors) + _favicon_candidates(clean)
|
||||||
|
|||||||
Reference in New Issue
Block a user