#!/usr/bin/env python3 """ Pre-cache individual countries in ALL config variants to Redis Smart caching: only regenerates expired or missing entries """ import sys import os import sqlite3 import json from datetime import datetime SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__)) sys.path.insert(0, SCRIPT_DIR) os.chdir(SCRIPT_DIR) print(f"[PRE-CACHE] Working from: {SCRIPT_DIR}", flush=True) from redis_cache import RedisCache from geoip_handler import ConfigGenerator import config DB_PATH = config.GEOIP_DB_DIR / 'networks_cache.db' if not DB_PATH.exists(): print(f"[ERROR] SQLite database not found: {DB_PATH}", flush=True) sys.exit(1) redis_cache = RedisCache() health = redis_cache.health_check() print(f"[PRE-CACHE] Redis: {health['status']} ({health.get('memory_used_mb', 0):.1f} MB used)", flush=True) APP_TYPES = [ 'nginx_geo', 'nginx_map', 'nginx_deny', 'apache_24', 'haproxy_acl', 'raw-cidr_txt', 'raw-newline_txt', 'raw-json', 'raw-csv', ] CACHE_TTL_SECONDS = getattr(config, 'PRECACHE_INTERVAL_HOURS', 168) * 3600 MIN_TTL_THRESHOLD = getattr(config, 'PRECACHE_MIN_TTL_HOURS', 7) * 3600 def get_available_countries(): conn = sqlite3.connect(str(DB_PATH), timeout=30.0) cursor = conn.cursor() cursor.execute('SELECT country_code, network_count FROM cache_metadata ORDER BY country_code') countries_info = {} for row in cursor.fetchall(): countries_info[row[0]] = row[1] conn.close() return countries_info def fetch_country_networks(country_code): conn = sqlite3.connect(str(DB_PATH), timeout=600.0) cursor = conn.cursor() cursor.execute('SELECT network_count FROM cache_metadata WHERE country_code = ?', (country_code.upper(),)) row = cursor.fetchone() if not row: conn.close() return [] total_count = row[0] chunk_size = 100000 all_networks = [] offset = 0 while offset < total_count: cursor.execute('SELECT network FROM networks_cache WHERE country_code = ? LIMIT ? OFFSET ?', (country_code.upper(), chunk_size, offset)) chunk = [row[0] for row in cursor.fetchall()] if not chunk: break all_networks.extend(chunk) offset += chunk_size conn.close() return all_networks def check_cache_validity(country): """Check if country data and configs are valid in Redis""" redis_key_data = f"geoban:country:{country}" data_exists = redis_cache.redis_client.exists(redis_key_data) if not data_exists: return False, "Raw data missing" data_ttl = redis_cache.redis_client.ttl(redis_key_data) if data_ttl < MIN_TTL_THRESHOLD: return False, f"Raw data expiring soon (TTL: {data_ttl}s)" missing_configs = [] expiring_configs = [] for app_type in APP_TYPES: for aggregate in [True, False]: cached_config = redis_cache.get_cached_config([country], app_type, aggregate) if not cached_config: missing_configs.append(f"{app_type}:{aggregate}") else: cache_key = redis_cache._generate_key([country], app_type, aggregate) config_ttl = redis_cache.redis_client.ttl(cache_key) if config_ttl < MIN_TTL_THRESHOLD: expiring_configs.append(f"{app_type}:{aggregate}") if missing_configs: return False, f"Missing {len(missing_configs)} configs" if expiring_configs: return False, f"Expiring soon: {len(expiring_configs)} configs" return True, f"Valid (TTL: {data_ttl}s)" def process_country(country, networks_count, force=False): """Process single country - fetch data and generate configs""" redis_key_data = f"geoban:country:{country}" if not force: is_valid, reason = check_cache_validity(country) if is_valid: return { 'country': country, 'status': 'skipped', 'reason': reason, 'generated': 0, 'cached': len(APP_TYPES) * 2 } data_exists = redis_cache.redis_client.exists(redis_key_data) if data_exists: try: data = redis_cache.redis_client.get(redis_key_data) if isinstance(data, bytes): networks = json.loads(data.decode('utf-8')) else: networks = json.loads(data) except Exception as e: print(f" ✗ Error loading cached data: {e}", flush=True) networks = fetch_country_networks(country) if not networks: return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0} redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks)) else: networks = fetch_country_networks(country) if not networks: return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0} redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks)) country_networks = {country: networks} configs_generated = 0 configs_cached = 0 errors = 0 for app_type in APP_TYPES: for aggregate in [True, False]: try: if not force: cached_config = redis_cache.get_cached_config([country], app_type, aggregate) if cached_config: cache_key = redis_cache._generate_key([country], app_type, aggregate) config_ttl = redis_cache.redis_client.ttl(cache_key) if config_ttl > MIN_TTL_THRESHOLD: configs_cached += 1 continue if app_type.startswith('raw-'): format_type = app_type.split('-')[1] if format_type == 'cidr_txt': config_text = '\n'.join(networks) elif format_type == 'newline_txt': config_text = '\n'.join(networks) elif format_type == 'json': config_text = json.dumps({ 'country': country, 'networks': networks, 'count': len(networks) }, indent=2) elif format_type == 'csv': config_text = 'network\n' + '\n'.join(networks) else: errors += 1 continue else: generators = { 'nginx_geo': ConfigGenerator.generate_nginx_geo, 'nginx_map': ConfigGenerator.generate_nginx_map, 'nginx_deny': ConfigGenerator.generate_nginx_deny, 'apache_22': ConfigGenerator.generate_apache_22, 'apache_24': ConfigGenerator.generate_apache_24, 'haproxy_acl': ConfigGenerator.generate_haproxy_acl, 'haproxy_lua': ConfigGenerator.generate_haproxy_lua, } generator = generators.get(app_type) if not generator: continue config_text = generator(country_networks, aggregate=aggregate, redis_ips=None) stats = { 'countries': 1, 'total_networks': len(networks), 'per_country': {country: len(networks)} } success = redis_cache.save_config([country], app_type, aggregate, config_text, stats) if success: configs_generated += 1 else: errors += 1 except Exception as e: errors += 1 return { 'country': country, 'status': 'processed', 'generated': configs_generated, 'cached': configs_cached, 'errors': errors } def main(force=False): start_time = datetime.now() print(f"\n{'='*70}", flush=True) print(f"[STRATEGY] Smart per-country cache", flush=True) print(f" Mode: {'FORCE (regenerate all)' if force else 'SMART (skip valid cache)'}", flush=True) print(f" Cache TTL: {CACHE_TTL_SECONDS}s ({CACHE_TTL_SECONDS/3600:.1f}h)", flush=True) print(f" Min TTL to skip: {MIN_TTL_THRESHOLD}s ({MIN_TTL_THRESHOLD/3600:.1f}h)", flush=True) print(f" Config types: {len(APP_TYPES)} × 2 = {len(APP_TYPES)*2} per country", flush=True) print(f"{'='*70}\n", flush=True) available_countries = get_available_countries() print(f"Found {len(available_countries)} countries\n", flush=True) results = { 'skipped': 0, 'processed': 0, 'errors': 0, 'configs_generated': 0, 'configs_cached': 0 } for idx, (country, count) in enumerate(available_countries.items(), 1): print(f"[{idx}/{len(available_countries)}] {country}: {count:,} networks", flush=True) result = process_country(country, count, force=force) if result['status'] == 'skipped': results['skipped'] += 1 print(f" ⊘ SKIPPED: {result['reason']}", flush=True) elif result['status'] == 'processed': results['processed'] += 1 results['configs_generated'] += result['generated'] results['configs_cached'] += result['cached'] results['errors'] += result.get('errors', 0) if result['generated'] > 0: print(f" ✓ Generated: {result['generated']}, Cached: {result['cached']}", flush=True) else: print(f" ✓ All valid: {result['cached']} configs", flush=True) else: results['errors'] += 1 print(f" ✗ ERROR: {result.get('reason', 'Unknown')}", flush=True) progress_pct = (idx / len(available_countries)) * 100 print(f" → Progress: {progress_pct:.1f}%\n", flush=True) duration = (datetime.now() - start_time).total_seconds() print(f"{'='*70}", flush=True) print(f"[SUMMARY] Complete in {duration/60:.1f} minutes", flush=True) print(f"\n[Countries]", flush=True) print(f" Skipped (valid cache): {results['skipped']}", flush=True) print(f" Processed: {results['processed']}", flush=True) print(f" Errors: {results['errors']}", flush=True) print(f"\n[Configs]", flush=True) print(f" Generated: {results['configs_generated']}", flush=True) print(f" Already cached: {results['configs_cached']}", flush=True) print(f" Total valid: {results['configs_generated'] + results['configs_cached']}", flush=True) try: total_keys = redis_cache.redis_client.dbsize() cursor = 0 country_keys = 0 while True: cursor, keys = redis_cache.redis_client.scan(cursor, match="geoban:country:*", count=1000) country_keys += len(keys) if cursor == 0: break cursor = 0 config_keys = 0 while True: cursor, keys = redis_cache.redis_client.scan(cursor, match="geoip:config:*", count=1000) config_keys += len(keys) if cursor == 0: break health = redis_cache.health_check() print(f"\n[REDIS]", flush=True) print(f" Total keys: {total_keys}", flush=True) print(f" Country keys: {country_keys}", flush=True) print(f" Config keys: {config_keys}", flush=True) print(f" Memory: {health.get('memory_used_mb', 0):.2f} MB", flush=True) expected_configs = len(available_countries) * len(APP_TYPES) * 2 coverage = (config_keys / expected_configs * 100) if expected_configs > 0 else 0 print(f" Coverage: {config_keys}/{expected_configs} ({coverage:.1f}%)", flush=True) except Exception as e: print(f"\n[REDIS] Error: {e}", flush=True) print(f"{'='*70}\n", flush=True) if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Pre-cache GeoIP configs to Redis') parser.add_argument('--force', action='store_true', help='Force regenerate all configs (ignore TTL)') args = parser.parse_args() main(force=args.force)