precache config and performance test

This commit is contained in:
Mateusz Gruszczyński
2026-02-17 09:27:09 +01:00
parent 7b805cd177
commit 5d2c5aa451
4 changed files with 586 additions and 93 deletions

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Pre-cache individual countries in ALL config variants to Redis
Smart caching: only regenerates expired or missing entries
"""
import sys
@@ -41,6 +42,9 @@ APP_TYPES = [
'raw-csv',
]
CACHE_TTL_SECONDS = getattr(config, 'PRECACHE_INTERVAL_HOURS', 168) * 3600
MIN_TTL_THRESHOLD = getattr(config, 'PRECACHE_MIN_TTL_HOURS', 7) * 3600
def get_available_countries():
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
cursor = conn.cursor()
@@ -78,66 +82,93 @@ def fetch_country_networks(country_code):
conn.close()
return all_networks
start_time = datetime.now()
print(f"\n{'='*70}", flush=True)
print(f"[STRATEGY] Per-country cache (all config variants)", flush=True)
print(f" Each country: raw data + {len(APP_TYPES)} types × 2 aggregation = {len(APP_TYPES)*2} configs", flush=True)
print(f" Multi-country combos: generated on-demand", flush=True)
print(f"{'='*70}\n", flush=True)
available_countries = get_available_countries()
print(f"Found {len(available_countries)} countries\n", flush=True)
country_data_generated = 0
country_data_cached = 0
config_generated = 0
config_cached = 0
errors = 0
for idx, (country, count) in enumerate(available_countries.items(), 1):
print(f"[{idx}/{len(available_countries)}] {country}: {count:,} networks", flush=True)
def check_cache_validity(country):
"""Check if country data and configs are valid in Redis"""
redis_key_data = f"geoban:country:{country}"
data_exists = redis_cache.redis_client.exists(redis_key_data)
if not data_exists:
return False, "Raw data missing"
data_ttl = redis_cache.redis_client.ttl(redis_key_data)
if data_ttl < MIN_TTL_THRESHOLD:
return False, f"Raw data expiring soon (TTL: {data_ttl}s)"
missing_configs = []
expiring_configs = []
for app_type in APP_TYPES:
for aggregate in [True, False]:
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
if not cached_config:
missing_configs.append(f"{app_type}:{aggregate}")
else:
cache_key = redis_cache._generate_key([country], app_type, aggregate)
config_ttl = redis_cache.redis_client.ttl(cache_key)
if config_ttl < MIN_TTL_THRESHOLD:
expiring_configs.append(f"{app_type}:{aggregate}")
if missing_configs:
return False, f"Missing {len(missing_configs)} configs"
if expiring_configs:
return False, f"Expiring soon: {len(expiring_configs)} configs"
return True, f"Valid (TTL: {data_ttl}s)"
def process_country(country, networks_count, force=False):
"""Process single country - fetch data and generate configs"""
redis_key_data = f"geoban:country:{country}"
if not force:
is_valid, reason = check_cache_validity(country)
if is_valid:
return {
'country': country,
'status': 'skipped',
'reason': reason,
'generated': 0,
'cached': len(APP_TYPES) * 2
}
data_exists = redis_cache.redis_client.exists(redis_key_data)
if data_exists:
country_data_cached += 1
print(f" ✓ Raw data: cached", flush=True)
try:
data = redis_cache.redis_client.get(redis_key_data)
if isinstance(data, bytes):
networks = json.loads(data.decode('utf-8'))
else:
networks = json.loads(data)
country_networks = {country: networks}
except Exception as e:
print(f" ✗ Error loading: {e}", flush=True)
errors += 1
continue
print(f" ✗ Error loading cached data: {e}", flush=True)
networks = fetch_country_networks(country)
if not networks:
return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0}
redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks))
else:
networks = fetch_country_networks(country)
if not networks:
print(f" ✗ No data", flush=True)
errors += 1
continue
redis_cache.redis_client.setex(redis_key_data, 86400, json.dumps(networks))
country_data_generated += 1
print(f" ✓ Raw data: generated", flush=True)
country_networks = {country: networks}
return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0}
redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks))
configs_generated_this_country = 0
configs_cached_this_country = 0
country_networks = {country: networks}
configs_generated = 0
configs_cached = 0
errors = 0
for app_type in APP_TYPES:
for aggregate in [True, False]:
try:
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
if cached_config:
config_cached += 1
configs_cached_this_country += 1
continue
if not force:
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
if cached_config:
cache_key = redis_cache._generate_key([country], app_type, aggregate)
config_ttl = redis_cache.redis_client.ttl(cache_key)
if config_ttl > MIN_TTL_THRESHOLD:
configs_cached += 1
continue
if app_type.startswith('raw-'):
format_type = app_type.split('-')[1]
@@ -155,7 +186,7 @@ for idx, (country, count) in enumerate(available_countries.items(), 1):
elif format_type == 'csv':
config_text = 'network\n' + '\n'.join(networks)
else:
print(f" ✗ Unknown raw format: {format_type}", flush=True)
errors += 1
continue
else:
@@ -184,63 +215,122 @@ for idx, (country, count) in enumerate(available_countries.items(), 1):
success = redis_cache.save_config([country], app_type, aggregate, config_text, stats)
if success:
config_generated += 1
configs_generated_this_country += 1
configs_generated += 1
else:
errors += 1
except Exception as e:
print(f"{app_type} ({aggregate}): {e}", flush=True)
errors += 1
if configs_generated_this_country > 0:
print(f" → New configs: {configs_generated_this_country}", flush=True)
if configs_cached_this_country > 0:
print(f" → Cached configs: {configs_cached_this_country}", flush=True)
progress_pct = (idx / len(available_countries)) * 100
print(f" → Progress: {progress_pct:.1f}%\n", flush=True)
return {
'country': country,
'status': 'processed',
'generated': configs_generated,
'cached': configs_cached,
'errors': errors
}
duration = (datetime.now() - start_time).total_seconds()
def main(force=False):
start_time = datetime.now()
print(f"\n{'='*70}", flush=True)
print(f"[STRATEGY] Smart per-country cache", flush=True)
print(f" Mode: {'FORCE (regenerate all)' if force else 'SMART (skip valid cache)'}", flush=True)
print(f" Cache TTL: {CACHE_TTL_SECONDS}s ({CACHE_TTL_SECONDS/3600:.1f}h)", flush=True)
print(f" Min TTL to skip: {MIN_TTL_THRESHOLD}s ({MIN_TTL_THRESHOLD/3600:.1f}h)", flush=True)
print(f" Config types: {len(APP_TYPES)} × 2 = {len(APP_TYPES)*2} per country", flush=True)
print(f"{'='*70}\n", flush=True)
available_countries = get_available_countries()
print(f"Found {len(available_countries)} countries\n", flush=True)
results = {
'skipped': 0,
'processed': 0,
'errors': 0,
'configs_generated': 0,
'configs_cached': 0
}
for idx, (country, count) in enumerate(available_countries.items(), 1):
print(f"[{idx}/{len(available_countries)}] {country}: {count:,} networks", flush=True)
result = process_country(country, count, force=force)
if result['status'] == 'skipped':
results['skipped'] += 1
print(f" ⊘ SKIPPED: {result['reason']}", flush=True)
elif result['status'] == 'processed':
results['processed'] += 1
results['configs_generated'] += result['generated']
results['configs_cached'] += result['cached']
results['errors'] += result.get('errors', 0)
if result['generated'] > 0:
print(f" ✓ Generated: {result['generated']}, Cached: {result['cached']}", flush=True)
else:
print(f" ✓ All valid: {result['cached']} configs", flush=True)
else:
results['errors'] += 1
print(f" ✗ ERROR: {result.get('reason', 'Unknown')}", flush=True)
progress_pct = (idx / len(available_countries)) * 100
print(f" → Progress: {progress_pct:.1f}%\n", flush=True)
duration = (datetime.now() - start_time).total_seconds()
print(f"{'='*70}", flush=True)
print(f"[SUMMARY] Complete in {duration/60:.1f} minutes", flush=True)
print(f"\n[Countries]", flush=True)
print(f" Skipped (valid cache): {results['skipped']}", flush=True)
print(f" Processed: {results['processed']}", flush=True)
print(f" Errors: {results['errors']}", flush=True)
print(f"\n[Configs]", flush=True)
print(f" Generated: {results['configs_generated']}", flush=True)
print(f" Already cached: {results['configs_cached']}", flush=True)
print(f" Total valid: {results['configs_generated'] + results['configs_cached']}", flush=True)
try:
total_keys = redis_cache.redis_client.dbsize()
cursor = 0
country_keys = 0
while True:
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoban:country:*", count=1000)
country_keys += len(keys)
if cursor == 0:
break
cursor = 0
config_keys = 0
while True:
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoip:config:*", count=1000)
config_keys += len(keys)
if cursor == 0:
break
health = redis_cache.health_check()
print(f"\n[REDIS]", flush=True)
print(f" Total keys: {total_keys}", flush=True)
print(f" Country keys: {country_keys}", flush=True)
print(f" Config keys: {config_keys}", flush=True)
print(f" Memory: {health.get('memory_used_mb', 0):.2f} MB", flush=True)
expected_configs = len(available_countries) * len(APP_TYPES) * 2
coverage = (config_keys / expected_configs * 100) if expected_configs > 0 else 0
print(f" Coverage: {config_keys}/{expected_configs} ({coverage:.1f}%)", flush=True)
except Exception as e:
print(f"\n[REDIS] Error: {e}", flush=True)
print(f"{'='*70}\n", flush=True)
print(f"{'='*70}", flush=True)
print(f"[SUMMARY] Complete in {duration/60:.1f} minutes", flush=True)
print(f"\n[Raw Country Data]", flush=True)
print(f" Generated: {country_data_generated}", flush=True)
print(f" Cached: {country_data_cached}", flush=True)
print(f"\n[Config Files]", flush=True)
print(f" Generated: {config_generated}", flush=True)
print(f" Cached: {config_cached}", flush=True)
print(f" Errors: {errors}", flush=True)
try:
total_keys = redis_cache.redis_client.dbsize()
if __name__ == '__main__':
import argparse
cursor = 0
country_keys = 0
while True:
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoban:country:*", count=1000)
country_keys += len(keys)
if cursor == 0:
break
parser = argparse.ArgumentParser(description='Pre-cache GeoIP configs to Redis')
parser.add_argument('--force', action='store_true', help='Force regenerate all configs (ignore TTL)')
cursor = 0
config_keys = 0
while True:
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoip:config:*", count=1000)
config_keys += len(keys)
if cursor == 0:
break
args = parser.parse_args()
health = redis_cache.health_check()
print(f"\n[REDIS]", flush=True)
print(f" Total keys: {total_keys}", flush=True)
print(f" Country keys: {country_keys}", flush=True)
print(f" Config keys: {config_keys}", flush=True)
print(f" Memory: {health.get('memory_used_mb', 0):.2f} MB", flush=True)
except Exception as e:
print(f"\n[REDIS] Error: {e}", flush=True)
print(f"{'='*70}\n", flush=True)
main(force=args.force)