346 lines
13 KiB
Python
346 lines
13 KiB
Python
#!/usr/bin/env python3
|
||
"""
|
||
Pre-cache individual countries in ALL config variants to Redis
|
||
Smart caching: only regenerates expired or missing entries
|
||
"""
|
||
|
||
import sys
|
||
import os
|
||
import sqlite3
|
||
import json
|
||
from datetime import datetime
|
||
|
||
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
|
||
sys.path.insert(0, SCRIPT_DIR)
|
||
os.chdir(SCRIPT_DIR)
|
||
|
||
print(f"[PRE-CACHE] Working from: {SCRIPT_DIR}", flush=True)
|
||
|
||
from redis_cache import RedisCache
|
||
from geoip_handler import ConfigGenerator
|
||
import config
|
||
|
||
DB_PATH = config.GEOIP_DB_DIR / 'networks_cache.db'
|
||
|
||
if not DB_PATH.exists():
|
||
print(f"[ERROR] SQLite database not found: {DB_PATH}", flush=True)
|
||
sys.exit(1)
|
||
|
||
redis_cache = RedisCache()
|
||
health = redis_cache.health_check()
|
||
print(f"[PRE-CACHE] Redis: {health['status']} ({health.get('memory_used_mb', 0):.1f} MB used)", flush=True)
|
||
|
||
APP_TYPES = [
|
||
'nginx_geo',
|
||
'nginx_map',
|
||
'nginx_deny',
|
||
'apache_24',
|
||
'apache_22',
|
||
'haproxy_acl',
|
||
'haproxy_lua',
|
||
'haproxy_map',
|
||
'raw-cidr_txt',
|
||
'raw-newline_txt',
|
||
'raw-json',
|
||
'raw-csv',
|
||
]
|
||
|
||
CACHE_TTL_SECONDS = getattr(config, 'PRECACHE_INTERVAL_HOURS', 168) * 3600
|
||
MIN_TTL_THRESHOLD = getattr(config, 'PRECACHE_MIN_TTL_HOURS', 7) * 3600
|
||
|
||
def get_available_countries():
|
||
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
|
||
cursor = conn.cursor()
|
||
cursor.execute('SELECT country_code, network_count FROM cache_metadata ORDER BY country_code')
|
||
countries_info = {}
|
||
for row in cursor.fetchall():
|
||
countries_info[row[0]] = row[1]
|
||
conn.close()
|
||
return countries_info
|
||
|
||
def fetch_country_networks(country_code):
|
||
conn = sqlite3.connect(str(DB_PATH), timeout=600.0)
|
||
cursor = conn.cursor()
|
||
|
||
cursor.execute('SELECT network_count FROM cache_metadata WHERE country_code = ?', (country_code.upper(),))
|
||
row = cursor.fetchone()
|
||
if not row:
|
||
conn.close()
|
||
return []
|
||
|
||
total_count = row[0]
|
||
chunk_size = 100000
|
||
all_networks = []
|
||
offset = 0
|
||
|
||
while offset < total_count:
|
||
cursor.execute('SELECT network FROM networks_cache WHERE country_code = ? LIMIT ? OFFSET ?',
|
||
(country_code.upper(), chunk_size, offset))
|
||
chunk = [row[0] for row in cursor.fetchall()]
|
||
if not chunk:
|
||
break
|
||
all_networks.extend(chunk)
|
||
offset += chunk_size
|
||
|
||
conn.close()
|
||
return all_networks
|
||
|
||
def check_cache_validity(country):
|
||
"""Check if country data and configs are valid in Redis"""
|
||
redis_key_data = f"geoban:country:{country}"
|
||
|
||
data_exists = redis_cache.redis_client.exists(redis_key_data)
|
||
if not data_exists:
|
||
return False, "Raw data missing"
|
||
|
||
data_ttl = redis_cache.redis_client.ttl(redis_key_data)
|
||
if data_ttl < MIN_TTL_THRESHOLD:
|
||
return False, f"Raw data expiring soon (TTL: {data_ttl}s)"
|
||
|
||
missing_configs = []
|
||
expiring_configs = []
|
||
|
||
for app_type in APP_TYPES:
|
||
for aggregate in [True, False]:
|
||
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
|
||
if not cached_config:
|
||
missing_configs.append(f"{app_type}:{aggregate}")
|
||
else:
|
||
cache_key = redis_cache._generate_key([country], app_type, aggregate)
|
||
config_ttl = redis_cache.redis_client.ttl(cache_key)
|
||
if config_ttl < MIN_TTL_THRESHOLD:
|
||
expiring_configs.append(f"{app_type}:{aggregate}")
|
||
|
||
if missing_configs:
|
||
return False, f"Missing {len(missing_configs)} configs"
|
||
|
||
if expiring_configs:
|
||
return False, f"Expiring soon: {len(expiring_configs)} configs"
|
||
|
||
return True, f"Valid (TTL: {data_ttl}s)"
|
||
|
||
def process_country(country, networks_count, force=False):
|
||
"""Process single country - fetch data and generate configs"""
|
||
redis_key_data = f"geoban:country:{country}"
|
||
|
||
if not force:
|
||
is_valid, reason = check_cache_validity(country)
|
||
if is_valid:
|
||
return {
|
||
'country': country,
|
||
'status': 'skipped',
|
||
'reason': reason,
|
||
'generated': 0,
|
||
'cached': len(APP_TYPES) * 2
|
||
}
|
||
|
||
data_exists = redis_cache.redis_client.exists(redis_key_data)
|
||
|
||
if data_exists:
|
||
try:
|
||
data = redis_cache.redis_client.get(redis_key_data)
|
||
if isinstance(data, bytes):
|
||
networks = json.loads(data.decode('utf-8'))
|
||
else:
|
||
networks = json.loads(data)
|
||
except Exception as e:
|
||
print(f" ✗ Error loading cached data: {e}", flush=True)
|
||
networks = fetch_country_networks(country)
|
||
if not networks:
|
||
return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0}
|
||
redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks))
|
||
else:
|
||
networks = fetch_country_networks(country)
|
||
if not networks:
|
||
return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0}
|
||
redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks))
|
||
|
||
country_networks = {country: networks}
|
||
|
||
configs_generated = 0
|
||
configs_cached = 0
|
||
errors = 0
|
||
|
||
for app_type in APP_TYPES:
|
||
for aggregate in [True, False]:
|
||
try:
|
||
if not force:
|
||
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
|
||
if cached_config:
|
||
cache_key = redis_cache._generate_key([country], app_type, aggregate)
|
||
config_ttl = redis_cache.redis_client.ttl(cache_key)
|
||
if config_ttl > MIN_TTL_THRESHOLD:
|
||
configs_cached += 1
|
||
continue
|
||
|
||
if app_type.startswith('raw-'):
|
||
format_type = app_type.split('-')[1]
|
||
|
||
if aggregate:
|
||
nets_out = ConfigGenerator._aggregate_networks(networks)
|
||
else:
|
||
nets_out = sorted(set(networks))
|
||
|
||
if format_type == 'cidr_txt':
|
||
config_text = '\n'.join(nets_out)
|
||
elif format_type == 'newline_txt':
|
||
config_text = '\n'.join(nets_out)
|
||
elif format_type == 'json':
|
||
config_text = json.dumps({
|
||
'country': country,
|
||
'networks': nets_out,
|
||
'count': len(nets_out)
|
||
}, indent=2)
|
||
elif format_type == 'csv':
|
||
config_text = 'network\n' + '\n'.join(nets_out)
|
||
else:
|
||
errors += 1
|
||
continue
|
||
|
||
else:
|
||
generators = {
|
||
'nginx_geo': ConfigGenerator.generate_nginx_geo,
|
||
'nginx_map': ConfigGenerator.generate_nginx_map,
|
||
'nginx_deny': ConfigGenerator.generate_nginx_deny,
|
||
'apache_22': ConfigGenerator.generate_apache_22,
|
||
'apache_24': ConfigGenerator.generate_apache_24,
|
||
'haproxy_acl': ConfigGenerator.generate_haproxy_acl,
|
||
'haproxy_lua': ConfigGenerator.generate_haproxy_lua,
|
||
'haproxy_map': ConfigGenerator.generate_haproxy_map,
|
||
}
|
||
|
||
generator = generators.get(app_type)
|
||
if not generator:
|
||
continue
|
||
|
||
config_text = generator(country_networks, aggregate=aggregate, redis_ips=None)
|
||
|
||
stats = {
|
||
'countries': 1,
|
||
'total_networks': len(networks),
|
||
'per_country': {country: len(networks)}
|
||
}
|
||
|
||
success = redis_cache.save_config([country], app_type, aggregate, config_text, stats)
|
||
|
||
if success:
|
||
configs_generated += 1
|
||
else:
|
||
errors += 1
|
||
|
||
except Exception as e:
|
||
errors += 1
|
||
|
||
return {
|
||
'country': country,
|
||
'status': 'processed',
|
||
'generated': configs_generated,
|
||
'cached': configs_cached,
|
||
'errors': errors
|
||
}
|
||
|
||
def main(force=False):
|
||
start_time = datetime.now()
|
||
|
||
print(f"\n{'='*70}", flush=True)
|
||
print(f"[STRATEGY] Smart per-country cache", flush=True)
|
||
print(f" Mode: {'FORCE (regenerate all)' if force else 'SMART (skip valid cache)'}", flush=True)
|
||
print(f" Cache TTL: {CACHE_TTL_SECONDS}s ({CACHE_TTL_SECONDS/3600:.1f}h)", flush=True)
|
||
print(f" Min TTL to skip: {MIN_TTL_THRESHOLD}s ({MIN_TTL_THRESHOLD/3600:.1f}h)", flush=True)
|
||
print(f" Config types: {len(APP_TYPES)} × 2 = {len(APP_TYPES)*2} per country", flush=True)
|
||
print(f"{'='*70}\n", flush=True)
|
||
|
||
available_countries = get_available_countries()
|
||
print(f"Found {len(available_countries)} countries\n", flush=True)
|
||
|
||
results = {
|
||
'skipped': 0,
|
||
'processed': 0,
|
||
'errors': 0,
|
||
'configs_generated': 0,
|
||
'configs_cached': 0
|
||
}
|
||
|
||
for idx, (country, count) in enumerate(available_countries.items(), 1):
|
||
print(f"[{idx}/{len(available_countries)}] {country}: {count:,} networks", flush=True)
|
||
|
||
result = process_country(country, count, force=force)
|
||
|
||
if result['status'] == 'skipped':
|
||
results['skipped'] += 1
|
||
print(f" ⊘ SKIPPED: {result['reason']}", flush=True)
|
||
elif result['status'] == 'processed':
|
||
results['processed'] += 1
|
||
results['configs_generated'] += result['generated']
|
||
results['configs_cached'] += result['cached']
|
||
results['errors'] += result.get('errors', 0)
|
||
if result['generated'] > 0:
|
||
print(f" ✓ Generated: {result['generated']}, Cached: {result['cached']}", flush=True)
|
||
else:
|
||
print(f" ✓ All valid: {result['cached']} configs", flush=True)
|
||
else:
|
||
results['errors'] += 1
|
||
print(f" ✗ ERROR: {result.get('reason', 'Unknown')}", flush=True)
|
||
|
||
progress_pct = (idx / len(available_countries)) * 100
|
||
print(f" → Progress: {progress_pct:.1f}%\n", flush=True)
|
||
|
||
duration = (datetime.now() - start_time).total_seconds()
|
||
|
||
print(f"{'='*70}", flush=True)
|
||
print(f"[SUMMARY] Complete in {duration/60:.1f} minutes", flush=True)
|
||
print(f"\n[Countries]", flush=True)
|
||
print(f" Skipped (valid cache): {results['skipped']}", flush=True)
|
||
print(f" Processed: {results['processed']}", flush=True)
|
||
print(f" Errors: {results['errors']}", flush=True)
|
||
print(f"\n[Configs]", flush=True)
|
||
print(f" Generated: {results['configs_generated']}", flush=True)
|
||
print(f" Already cached: {results['configs_cached']}", flush=True)
|
||
print(f" Total valid: {results['configs_generated'] + results['configs_cached']}", flush=True)
|
||
|
||
try:
|
||
total_keys = redis_cache.redis_client.dbsize()
|
||
|
||
cursor = 0
|
||
country_keys = 0
|
||
while True:
|
||
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoban:country:*", count=1000)
|
||
country_keys += len(keys)
|
||
if cursor == 0:
|
||
break
|
||
|
||
cursor = 0
|
||
config_keys = 0
|
||
while True:
|
||
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoip:config:*", count=1000)
|
||
config_keys += len(keys)
|
||
if cursor == 0:
|
||
break
|
||
|
||
health = redis_cache.health_check()
|
||
|
||
print(f"\n[REDIS]", flush=True)
|
||
print(f" Total keys: {total_keys}", flush=True)
|
||
print(f" Country keys: {country_keys}", flush=True)
|
||
print(f" Config keys: {config_keys}", flush=True)
|
||
print(f" Memory: {health.get('memory_used_mb', 0):.2f} MB", flush=True)
|
||
|
||
expected_configs = len(available_countries) * len(APP_TYPES) * 2
|
||
coverage = (config_keys / expected_configs * 100) if expected_configs > 0 else 0
|
||
print(f" Coverage: {config_keys}/{expected_configs} ({coverage:.1f}%)", flush=True)
|
||
|
||
except Exception as e:
|
||
print(f"\n[REDIS] Error: {e}", flush=True)
|
||
|
||
print(f"{'='*70}\n", flush=True)
|
||
|
||
if __name__ == '__main__':
|
||
import argparse
|
||
|
||
parser = argparse.ArgumentParser(description='Pre-cache GeoIP configs to Redis')
|
||
parser.add_argument('--force', action='store_true', help='Force regenerate all configs (ignore TTL)')
|
||
|
||
args = parser.parse_args()
|
||
|
||
main(force=args.force)
|