first commit
This commit is contained in:
395
scheduler.py
Normal file
395
scheduler.py
Normal file
@@ -0,0 +1,395 @@
|
||||
#!/usr/bin/env python3
|
||||
"""
|
||||
GeoIP Country Scanner Daemon - Incremental Update Mode
|
||||
"""
|
||||
import schedule
|
||||
import time
|
||||
import sys
|
||||
import signal
|
||||
import os
|
||||
import sqlite3
|
||||
import concurrent.futures
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
from multiprocessing import cpu_count
|
||||
import threading
|
||||
|
||||
|
||||
sys.path.insert(0, str(Path(__file__).parent))
|
||||
|
||||
|
||||
from geoip_handler import GeoIPHandler
|
||||
import config
|
||||
|
||||
|
||||
running = True
|
||||
log_lock = threading.Lock()
|
||||
write_lock = threading.Lock()
|
||||
active_scans = {}
|
||||
active_scans_lock = threading.Lock()
|
||||
|
||||
|
||||
def signal_handler(signum, frame):
|
||||
global running
|
||||
print(f"\n[{datetime.now()}] Received signal {signum}, shutting down...", flush=True)
|
||||
sys.stdout.flush()
|
||||
running = False
|
||||
|
||||
|
||||
def log_safe(message):
|
||||
with log_lock:
|
||||
print(message, flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def update_scan_progress(country_code, progress_msg):
|
||||
with active_scans_lock:
|
||||
if country_code in active_scans:
|
||||
active_scans[country_code]['progress'] = progress_msg
|
||||
active_scans[country_code]['last_update'] = datetime.now()
|
||||
|
||||
|
||||
def progress_callback_factory(country_code):
|
||||
def callback(msg):
|
||||
update_scan_progress(country_code, msg)
|
||||
return callback
|
||||
|
||||
|
||||
def print_active_scans():
|
||||
with active_scans_lock:
|
||||
if not active_scans:
|
||||
return
|
||||
|
||||
print("\n" + "=" * 70, flush=True)
|
||||
print("ACTIVE SCANS STATUS:", flush=True)
|
||||
print("=" * 70, flush=True)
|
||||
|
||||
for country, info in sorted(active_scans.items()):
|
||||
elapsed = (datetime.now() - info['start_time']).total_seconds()
|
||||
progress = info.get('progress', 'Unknown')
|
||||
is_update = info.get('is_update', False)
|
||||
mode = "UPDATE" if is_update else "SCAN"
|
||||
print(f" {country} [{mode}]: {progress} | {elapsed:.0f}s", flush=True)
|
||||
|
||||
print("=" * 70 + "\n", flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
|
||||
def scan_single_country(country_code, is_update=False):
|
||||
try:
|
||||
with active_scans_lock:
|
||||
active_scans[country_code] = {
|
||||
'start_time': datetime.now(),
|
||||
'progress': 'Starting...',
|
||||
'last_update': datetime.now(),
|
||||
'is_update': is_update
|
||||
}
|
||||
|
||||
start_time = time.time()
|
||||
mode = "INCREMENTAL UPDATE" if is_update else "FULL SCAN"
|
||||
print(f"[START] {country_code} - {mode}...", flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
progress_cb = progress_callback_factory(country_code)
|
||||
handler = GeoIPHandler()
|
||||
|
||||
print(f"[{country_code}] Scanning MaxMind + GitHub...", flush=True)
|
||||
|
||||
maxmind_networks = handler._scan_maxmind_for_country(country_code, progress_callback=progress_cb)
|
||||
|
||||
if maxmind_networks:
|
||||
print(f"[{country_code}] MaxMind: {len(maxmind_networks):,} networks, checking GitHub...", flush=True)
|
||||
|
||||
github_networks = handler._fetch_from_github(country_code)
|
||||
if github_networks:
|
||||
maxmind_set = set(maxmind_networks)
|
||||
github_set = set(github_networks)
|
||||
missing = github_set - maxmind_set
|
||||
|
||||
if missing:
|
||||
maxmind_networks.extend(missing)
|
||||
print(f"[{country_code}] GitHub added {len(missing):,} new networks", flush=True)
|
||||
else:
|
||||
print(f"[{country_code}] GitHub: {len(github_networks):,} networks (no new)", flush=True)
|
||||
|
||||
source = 'maxmind+github'
|
||||
else:
|
||||
print(f"[{country_code}] GitHub: no data", flush=True)
|
||||
source = 'maxmind'
|
||||
|
||||
networks = maxmind_networks
|
||||
else:
|
||||
print(f"[{country_code}] MaxMind found nothing, trying GitHub...", flush=True)
|
||||
networks = handler._fetch_from_github(country_code)
|
||||
source = 'github' if networks else None
|
||||
|
||||
if networks:
|
||||
with write_lock:
|
||||
print(f"[{country_code}] Acquired write lock, saving to database...", flush=True)
|
||||
|
||||
if is_update:
|
||||
saved = handler._update_cache_incremental(country_code, networks, source)
|
||||
else:
|
||||
saved = handler._save_to_cache(country_code, networks, source)
|
||||
|
||||
print(f"[{country_code}] Released write lock", flush=True)
|
||||
|
||||
elapsed = time.time() - start_time
|
||||
|
||||
with active_scans_lock:
|
||||
active_scans.pop(country_code, None)
|
||||
|
||||
if saved:
|
||||
print(f"[DONE] {country_code}: {len(networks)} networks in {elapsed:.1f}s ({mode})", flush=True)
|
||||
sys.stdout.flush()
|
||||
return {'country': country_code, 'success': True, 'networks': len(networks), 'error': None, 'mode': mode}
|
||||
else:
|
||||
print(f"[ERROR] {country_code}: Failed to save to cache", flush=True)
|
||||
sys.stdout.flush()
|
||||
return {'country': country_code, 'success': False, 'networks': 0, 'error': 'Failed to save', 'mode': mode}
|
||||
else:
|
||||
with active_scans_lock:
|
||||
active_scans.pop(country_code, None)
|
||||
|
||||
print(f"[ERROR] {country_code}: No data found", flush=True)
|
||||
sys.stdout.flush()
|
||||
return {'country': country_code, 'success': False, 'networks': 0, 'error': 'No data found', 'mode': mode}
|
||||
|
||||
except Exception as e:
|
||||
with active_scans_lock:
|
||||
active_scans.pop(country_code, None)
|
||||
|
||||
print(f"[ERROR] {country_code}: {e}", flush=True)
|
||||
sys.stdout.flush()
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
return {'country': country_code, 'success': False, 'networks': 0, 'error': str(e), 'mode': 'UNKNOWN'}
|
||||
|
||||
|
||||
def scan_all_countries_incremental(parallel_workers=None, max_age_hours=168):
|
||||
log_safe(f"[{datetime.now()}] Starting INCREMENTAL country scan...")
|
||||
|
||||
try:
|
||||
handler = GeoIPHandler()
|
||||
|
||||
if handler.needs_update():
|
||||
log_safe("Updating MaxMind database...")
|
||||
result = handler.download_database()
|
||||
if not result.get('success'):
|
||||
log_safe(f"Warning: Database update failed - {result.get('error')}")
|
||||
|
||||
log_safe("\nChecking cache status...")
|
||||
missing, stale = handler.get_countries_needing_scan(max_age_hours)
|
||||
|
||||
log_safe(f"Missing countries (never scanned): {len(missing)}")
|
||||
log_safe(f"Stale countries (needs update): {len(stale)}")
|
||||
|
||||
if missing:
|
||||
log_safe(f"Missing: {', '.join(sorted(missing))}")
|
||||
if stale:
|
||||
log_safe(f"Stale: {', '.join(sorted(stale))}")
|
||||
|
||||
total = len(missing) + len(stale)
|
||||
|
||||
if total == 0:
|
||||
log_safe("\n✓ All countries are up to date!")
|
||||
return True
|
||||
|
||||
if parallel_workers is None:
|
||||
parallel_workers = min(cpu_count(), 16)
|
||||
|
||||
log_safe(f"\nProcessing {total} countries using {parallel_workers} parallel workers...")
|
||||
log_safe(f" - {len(missing)} new countries (full scan)")
|
||||
log_safe(f" - {len(stale)} stale countries (incremental update)")
|
||||
log_safe(f"Note: Database writes are serialized with write lock")
|
||||
log_safe(f"Estimated time: {total / parallel_workers * 3:.1f} minutes\n")
|
||||
|
||||
start_time = datetime.now()
|
||||
completed = 0
|
||||
success_count = 0
|
||||
failed_countries = []
|
||||
results_list = []
|
||||
last_progress_time = time.time()
|
||||
last_status_print = time.time()
|
||||
|
||||
def print_progress(force=False):
|
||||
nonlocal last_progress_time
|
||||
current_time = time.time()
|
||||
|
||||
if not force and (current_time - last_progress_time) < 30:
|
||||
return
|
||||
|
||||
last_progress_time = current_time
|
||||
elapsed = (datetime.now() - start_time).total_seconds()
|
||||
avg_time = elapsed / completed if completed > 0 else 0
|
||||
remaining = (total - completed) * avg_time if completed > 0 else 0
|
||||
progress_bar = "█" * int(completed / total * 40)
|
||||
progress_bar += "░" * (40 - int(completed / total * 40))
|
||||
|
||||
msg = (f"[{progress_bar}] {completed}/{total} ({100*completed/total:.1f}%) | "
|
||||
f"Elapsed: {elapsed:.0f}s | ETA: {remaining:.0f}s")
|
||||
print(msg, flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
log_safe("Starting parallel execution...")
|
||||
sys.stdout.flush()
|
||||
|
||||
tasks = [(country, False) for country in missing] + [(country, True) for country in stale]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=parallel_workers) as executor:
|
||||
future_to_country = {
|
||||
executor.submit(scan_single_country, country, is_update): country
|
||||
for country, is_update in tasks
|
||||
}
|
||||
|
||||
log_safe(f"Submitted {len(future_to_country)} tasks\n")
|
||||
sys.stdout.flush()
|
||||
|
||||
pending = set(future_to_country.keys())
|
||||
|
||||
while pending:
|
||||
done, pending = concurrent.futures.wait(
|
||||
pending,
|
||||
timeout=10,
|
||||
return_when=concurrent.futures.FIRST_COMPLETED
|
||||
)
|
||||
|
||||
for future in done:
|
||||
result = future.result()
|
||||
results_list.append(result)
|
||||
completed += 1
|
||||
|
||||
if result['success']:
|
||||
success_count += 1
|
||||
else:
|
||||
failed_countries.append(result['country'])
|
||||
|
||||
print_progress(force=bool(done))
|
||||
|
||||
current_time = time.time()
|
||||
if current_time - last_status_print >= 30:
|
||||
print_active_scans()
|
||||
last_status_print = current_time
|
||||
|
||||
print("\n", flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
elapsed = (datetime.now() - start_time).total_seconds()
|
||||
|
||||
log_safe("=" * 70)
|
||||
log_safe("SCAN RESULTS (sorted by country):")
|
||||
log_safe("=" * 70)
|
||||
|
||||
for result in sorted(results_list, key=lambda x: x['country']):
|
||||
mode_str = f"[{result.get('mode', 'UNKNOWN')}]"
|
||||
if result['success']:
|
||||
log_safe(f" {result['country']}: ✓ {result['networks']:,} networks {mode_str}")
|
||||
else:
|
||||
log_safe(f" {result['country']}: ✗ {result['error']} {mode_str}")
|
||||
|
||||
log_safe("=" * 70)
|
||||
log_safe(f"\n[{datetime.now()}] Incremental scan complete!")
|
||||
log_safe(f"✓ Success: {success_count}/{total} countries")
|
||||
log_safe(f" - New countries: {len([r for r in results_list if r.get('mode') == 'FULL SCAN' and r['success']])}")
|
||||
log_safe(f" - Updated countries: {len([r for r in results_list if r.get('mode') == 'INCREMENTAL UPDATE' and r['success']])}")
|
||||
log_safe(f" Time: {elapsed:.1f}s ({elapsed/60:.1f} minutes)")
|
||||
log_safe(f" Average: {elapsed/total:.1f}s per country\n")
|
||||
|
||||
if failed_countries:
|
||||
log_safe(f"✗ Failed: {', '.join(failed_countries)}\n")
|
||||
|
||||
return True
|
||||
except Exception as e:
|
||||
log_safe(f"[{datetime.now()}] ERROR: {e}")
|
||||
import traceback
|
||||
traceback.print_exc()
|
||||
sys.stdout.flush()
|
||||
return False
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
print("=" * 70, flush=True)
|
||||
print("GeoIP Country Scanner Daemon", flush=True)
|
||||
print("=" * 70, flush=True)
|
||||
print(f"Started: {datetime.now()}", flush=True)
|
||||
print(f"Data dir: {config.GEOIP_DB_DIR}", flush=True)
|
||||
print(f"CPU cores: {cpu_count()}", flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
scheduler_enabled = os.getenv('SCHEDULER_ENABLED', 'true').lower() == 'true'
|
||||
|
||||
if not scheduler_enabled:
|
||||
print("\n[DISABLED] SCHEDULER_ENABLED=false - exiting", flush=True)
|
||||
print("=" * 70, flush=True)
|
||||
sys.stdout.flush()
|
||||
sys.exit(0)
|
||||
|
||||
print("=" * 70, flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
signal.signal(signal.SIGTERM, signal_handler)
|
||||
signal.signal(signal.SIGINT, signal_handler)
|
||||
|
||||
scan_time = os.getenv('SCAN_TIME', '02:00')
|
||||
scan_interval = os.getenv('SCAN_INTERVAL', '7d')
|
||||
scan_on_startup = os.getenv('SCAN_ON_STARTUP', 'true').lower() == 'true'
|
||||
cache_max_age_hours = int(os.getenv('CACHE_MAX_AGE_HOURS', '168'))
|
||||
parallel_workers = int(os.getenv('PARALLEL_WORKERS', '16'))
|
||||
|
||||
if parallel_workers == 0:
|
||||
parallel_workers = min(cpu_count(), 16)
|
||||
|
||||
print(f"\n[CONFIG] Scheduler: enabled", flush=True)
|
||||
print(f"[CONFIG] Parallel: {parallel_workers} workers", flush=True)
|
||||
print(f"[CONFIG] Interval: {scan_interval}", flush=True)
|
||||
print(f"[CONFIG] Time: {scan_time}", flush=True)
|
||||
print(f"[CONFIG] Startup scan: {scan_on_startup}", flush=True)
|
||||
print(f"[CONFIG] Cache max age: {cache_max_age_hours}h ({cache_max_age_hours/24:.1f} days)", flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
scan_function = lambda: scan_all_countries_incremental(parallel_workers, cache_max_age_hours)
|
||||
|
||||
if scan_on_startup:
|
||||
print("\n[STARTUP] Running incremental scan...\n", flush=True)
|
||||
sys.stdout.flush()
|
||||
scan_function()
|
||||
else:
|
||||
print("\n[STARTUP] Skipping (SCAN_ON_STARTUP=false)", flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
if scan_interval == 'daily':
|
||||
schedule.every().day.at(scan_time).do(scan_function)
|
||||
print(f"\n[SCHEDULER] Daily at {scan_time}", flush=True)
|
||||
elif scan_interval == 'weekly':
|
||||
schedule.every().monday.at(scan_time).do(scan_function)
|
||||
print(f"\n[SCHEDULER] Weekly (Monday at {scan_time})", flush=True)
|
||||
elif scan_interval == 'monthly':
|
||||
schedule.every(30).days.do(scan_function)
|
||||
print(f"\n[SCHEDULER] Monthly (every 30 days)", flush=True)
|
||||
elif scan_interval.endswith('h'):
|
||||
hours = int(scan_interval[:-1])
|
||||
schedule.every(hours).hours.do(scan_function)
|
||||
print(f"\n[SCHEDULER] Every {hours} hours", flush=True)
|
||||
elif scan_interval.endswith('d'):
|
||||
days = int(scan_interval[:-1])
|
||||
schedule.every(days).days.do(scan_function)
|
||||
print(f"\n[SCHEDULER] Every {days} days", flush=True)
|
||||
else:
|
||||
print(f"\n[ERROR] Invalid SCAN_INTERVAL: {scan_interval}", flush=True)
|
||||
sys.stdout.flush()
|
||||
sys.exit(1)
|
||||
|
||||
next_run = schedule.next_run()
|
||||
if next_run:
|
||||
print(f"[SCHEDULER] Next run: {next_run}", flush=True)
|
||||
print("\nScheduler running. Press Ctrl+C to stop.\n", flush=True)
|
||||
sys.stdout.flush()
|
||||
|
||||
while running:
|
||||
schedule.run_pending()
|
||||
time.sleep(60)
|
||||
|
||||
print("\n[SHUTDOWN] Stopped gracefully.", flush=True)
|
||||
sys.stdout.flush()
|
||||
sys.exit(0)
|
||||
Reference in New Issue
Block a user