scan performance

This commit is contained in:
Mateusz Gruszczyński
2026-02-24 10:06:02 +01:00
parent 98acbc0119
commit 9ccb1651b6
4 changed files with 151 additions and 66 deletions

View File

@@ -14,7 +14,7 @@ from pathlib import Path
from concurrent.futures import ThreadPoolExecutor, as_completed
from multiprocessing import cpu_count
import threading
import traceback
sys.path.insert(0, str(Path(__file__).parent))
@@ -29,6 +29,21 @@ write_lock = threading.Lock()
active_scans = {}
active_scans_lock = threading.Lock()
def heartbeat():
log_safe(f"[{datetime.now()}] HEARTBEAT running=True next_run={schedule.next_run()} jobs={len(schedule.jobs)}")
def compute_maxmind_workers():
with active_scans_lock:
active = max(1, len(active_scans))
cpu = cpu_count()
total_budget = max(32, cpu * 6) # 16*6 = 96
per_country = max(4, total_budget // active)
min_w = int(os.getenv('MAXMIND_WORKERS_MIN', '6'))
max_w = int(os.getenv('MAXMIND_WORKERS_MAX', '48'))
return max(min_w, min(max_w, per_country))
def signal_handler(signum, frame):
global running
@@ -96,7 +111,14 @@ def scan_single_country(country_code, is_update=False):
print(f"[{country_code}] Scanning MaxMind + GitHub...", flush=True)
maxmind_networks = handler._scan_maxmind_for_country(country_code, progress_callback=progress_cb)
maxmind_workers = compute_maxmind_workers()
print(f"[{country_code}] MaxMind workers: {maxmind_workers} (active scans: {len(active_scans)})", flush=True)
maxmind_networks = handler._scan_maxmind_for_country(
country_code,
progress_callback=progress_cb,
workers=maxmind_workers
)
if maxmind_networks:
print(f"[{country_code}] MaxMind: {len(maxmind_networks):,} networks, checking GitHub...", flush=True)
@@ -386,8 +408,16 @@ if __name__ == '__main__':
print("\nScheduler running. Press Ctrl+C to stop.\n", flush=True)
sys.stdout.flush()
# heartbeat
schedule.every(1).minutes.do(heartbeat)
while running:
schedule.run_pending()
try:
schedule.run_pending()
except Exception as e:
log_safe(f"[{datetime.now()}] ERROR in run_pending: {e}")
traceback.print_exc()
sys.stdout.flush()
time.sleep(60)
print("\n[SHUTDOWN] Stopped gracefully.", flush=True)