Files
geoip_block_generator/precache_daemon.py
Mateusz Gruszczyński b3a16303d2 fix raw scan
2026-03-02 14:01:20 +01:00

346 lines
13 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Pre-cache individual countries in ALL config variants to Redis
Smart caching: only regenerates expired or missing entries
"""
import sys
import os
import sqlite3
import json
from datetime import datetime
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, SCRIPT_DIR)
os.chdir(SCRIPT_DIR)
print(f"[PRE-CACHE] Working from: {SCRIPT_DIR}", flush=True)
from redis_cache import RedisCache
from geoip_handler import ConfigGenerator
import config
DB_PATH = config.GEOIP_DB_DIR / 'networks_cache.db'
if not DB_PATH.exists():
print(f"[ERROR] SQLite database not found: {DB_PATH}", flush=True)
sys.exit(1)
redis_cache = RedisCache()
health = redis_cache.health_check()
print(f"[PRE-CACHE] Redis: {health['status']} ({health.get('memory_used_mb', 0):.1f} MB used)", flush=True)
APP_TYPES = [
'nginx_geo',
'nginx_map',
'nginx_deny',
'apache_24',
'apache_22',
'haproxy_acl',
'haproxy_lua',
'haproxy_map',
'raw-cidr_txt',
'raw-newline_txt',
'raw-json',
'raw-csv',
]
CACHE_TTL_SECONDS = getattr(config, 'PRECACHE_INTERVAL_HOURS', 168) * 3600
MIN_TTL_THRESHOLD = getattr(config, 'PRECACHE_MIN_TTL_HOURS', 7) * 3600
def get_available_countries():
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
cursor = conn.cursor()
cursor.execute('SELECT country_code, network_count FROM cache_metadata ORDER BY country_code')
countries_info = {}
for row in cursor.fetchall():
countries_info[row[0]] = row[1]
conn.close()
return countries_info
def fetch_country_networks(country_code):
conn = sqlite3.connect(str(DB_PATH), timeout=600.0)
cursor = conn.cursor()
cursor.execute('SELECT network_count FROM cache_metadata WHERE country_code = ?', (country_code.upper(),))
row = cursor.fetchone()
if not row:
conn.close()
return []
total_count = row[0]
chunk_size = 100000
all_networks = []
offset = 0
while offset < total_count:
cursor.execute('SELECT network FROM networks_cache WHERE country_code = ? LIMIT ? OFFSET ?',
(country_code.upper(), chunk_size, offset))
chunk = [row[0] for row in cursor.fetchall()]
if not chunk:
break
all_networks.extend(chunk)
offset += chunk_size
conn.close()
return all_networks
def check_cache_validity(country):
"""Check if country data and configs are valid in Redis"""
redis_key_data = f"geoban:country:{country}"
data_exists = redis_cache.redis_client.exists(redis_key_data)
if not data_exists:
return False, "Raw data missing"
data_ttl = redis_cache.redis_client.ttl(redis_key_data)
if data_ttl < MIN_TTL_THRESHOLD:
return False, f"Raw data expiring soon (TTL: {data_ttl}s)"
missing_configs = []
expiring_configs = []
for app_type in APP_TYPES:
for aggregate in [True, False]:
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
if not cached_config:
missing_configs.append(f"{app_type}:{aggregate}")
else:
cache_key = redis_cache._generate_key([country], app_type, aggregate)
config_ttl = redis_cache.redis_client.ttl(cache_key)
if config_ttl < MIN_TTL_THRESHOLD:
expiring_configs.append(f"{app_type}:{aggregate}")
if missing_configs:
return False, f"Missing {len(missing_configs)} configs"
if expiring_configs:
return False, f"Expiring soon: {len(expiring_configs)} configs"
return True, f"Valid (TTL: {data_ttl}s)"
def process_country(country, networks_count, force=False):
"""Process single country - fetch data and generate configs"""
redis_key_data = f"geoban:country:{country}"
if not force:
is_valid, reason = check_cache_validity(country)
if is_valid:
return {
'country': country,
'status': 'skipped',
'reason': reason,
'generated': 0,
'cached': len(APP_TYPES) * 2
}
data_exists = redis_cache.redis_client.exists(redis_key_data)
if data_exists:
try:
data = redis_cache.redis_client.get(redis_key_data)
if isinstance(data, bytes):
networks = json.loads(data.decode('utf-8'))
else:
networks = json.loads(data)
except Exception as e:
print(f" ✗ Error loading cached data: {e}", flush=True)
networks = fetch_country_networks(country)
if not networks:
return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0}
redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks))
else:
networks = fetch_country_networks(country)
if not networks:
return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0}
redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks))
country_networks = {country: networks}
configs_generated = 0
configs_cached = 0
errors = 0
for app_type in APP_TYPES:
for aggregate in [True, False]:
try:
if not force:
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
if cached_config:
cache_key = redis_cache._generate_key([country], app_type, aggregate)
config_ttl = redis_cache.redis_client.ttl(cache_key)
if config_ttl > MIN_TTL_THRESHOLD:
configs_cached += 1
continue
if app_type.startswith('raw-'):
format_type = app_type.split('-')[1]
if aggregate:
nets_out = ConfigGenerator._aggregate_networks(networks)
else:
nets_out = sorted(set(networks))
if format_type == 'cidr_txt':
config_text = '\n'.join(nets_out)
elif format_type == 'newline_txt':
config_text = '\n'.join(nets_out)
elif format_type == 'json':
config_text = json.dumps({
'country': country,
'networks': nets_out,
'count': len(nets_out)
}, indent=2)
elif format_type == 'csv':
config_text = 'network\n' + '\n'.join(nets_out)
else:
errors += 1
continue
else:
generators = {
'nginx_geo': ConfigGenerator.generate_nginx_geo,
'nginx_map': ConfigGenerator.generate_nginx_map,
'nginx_deny': ConfigGenerator.generate_nginx_deny,
'apache_22': ConfigGenerator.generate_apache_22,
'apache_24': ConfigGenerator.generate_apache_24,
'haproxy_acl': ConfigGenerator.generate_haproxy_acl,
'haproxy_lua': ConfigGenerator.generate_haproxy_lua,
'haproxy_map': ConfigGenerator.generate_haproxy_map,
}
generator = generators.get(app_type)
if not generator:
continue
config_text = generator(country_networks, aggregate=aggregate, redis_ips=None)
stats = {
'countries': 1,
'total_networks': len(networks),
'per_country': {country: len(networks)}
}
success = redis_cache.save_config([country], app_type, aggregate, config_text, stats)
if success:
configs_generated += 1
else:
errors += 1
except Exception as e:
errors += 1
return {
'country': country,
'status': 'processed',
'generated': configs_generated,
'cached': configs_cached,
'errors': errors
}
def main(force=False):
start_time = datetime.now()
print(f"\n{'='*70}", flush=True)
print(f"[STRATEGY] Smart per-country cache", flush=True)
print(f" Mode: {'FORCE (regenerate all)' if force else 'SMART (skip valid cache)'}", flush=True)
print(f" Cache TTL: {CACHE_TTL_SECONDS}s ({CACHE_TTL_SECONDS/3600:.1f}h)", flush=True)
print(f" Min TTL to skip: {MIN_TTL_THRESHOLD}s ({MIN_TTL_THRESHOLD/3600:.1f}h)", flush=True)
print(f" Config types: {len(APP_TYPES)} × 2 = {len(APP_TYPES)*2} per country", flush=True)
print(f"{'='*70}\n", flush=True)
available_countries = get_available_countries()
print(f"Found {len(available_countries)} countries\n", flush=True)
results = {
'skipped': 0,
'processed': 0,
'errors': 0,
'configs_generated': 0,
'configs_cached': 0
}
for idx, (country, count) in enumerate(available_countries.items(), 1):
print(f"[{idx}/{len(available_countries)}] {country}: {count:,} networks", flush=True)
result = process_country(country, count, force=force)
if result['status'] == 'skipped':
results['skipped'] += 1
print(f" ⊘ SKIPPED: {result['reason']}", flush=True)
elif result['status'] == 'processed':
results['processed'] += 1
results['configs_generated'] += result['generated']
results['configs_cached'] += result['cached']
results['errors'] += result.get('errors', 0)
if result['generated'] > 0:
print(f" ✓ Generated: {result['generated']}, Cached: {result['cached']}", flush=True)
else:
print(f" ✓ All valid: {result['cached']} configs", flush=True)
else:
results['errors'] += 1
print(f" ✗ ERROR: {result.get('reason', 'Unknown')}", flush=True)
progress_pct = (idx / len(available_countries)) * 100
print(f" → Progress: {progress_pct:.1f}%\n", flush=True)
duration = (datetime.now() - start_time).total_seconds()
print(f"{'='*70}", flush=True)
print(f"[SUMMARY] Complete in {duration/60:.1f} minutes", flush=True)
print(f"\n[Countries]", flush=True)
print(f" Skipped (valid cache): {results['skipped']}", flush=True)
print(f" Processed: {results['processed']}", flush=True)
print(f" Errors: {results['errors']}", flush=True)
print(f"\n[Configs]", flush=True)
print(f" Generated: {results['configs_generated']}", flush=True)
print(f" Already cached: {results['configs_cached']}", flush=True)
print(f" Total valid: {results['configs_generated'] + results['configs_cached']}", flush=True)
try:
total_keys = redis_cache.redis_client.dbsize()
cursor = 0
country_keys = 0
while True:
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoban:country:*", count=1000)
country_keys += len(keys)
if cursor == 0:
break
cursor = 0
config_keys = 0
while True:
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoip:config:*", count=1000)
config_keys += len(keys)
if cursor == 0:
break
health = redis_cache.health_check()
print(f"\n[REDIS]", flush=True)
print(f" Total keys: {total_keys}", flush=True)
print(f" Country keys: {country_keys}", flush=True)
print(f" Config keys: {config_keys}", flush=True)
print(f" Memory: {health.get('memory_used_mb', 0):.2f} MB", flush=True)
expected_configs = len(available_countries) * len(APP_TYPES) * 2
coverage = (config_keys / expected_configs * 100) if expected_configs > 0 else 0
print(f" Coverage: {config_keys}/{expected_configs} ({coverage:.1f}%)", flush=True)
except Exception as e:
print(f"\n[REDIS] Error: {e}", flush=True)
print(f"{'='*70}\n", flush=True)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Pre-cache GeoIP configs to Redis')
parser.add_argument('--force', action='store_true', help='Force regenerate all configs (ignore TTL)')
args = parser.parse_args()
main(force=args.force)