Files
geoip_block_generator/precache_daemon.py
Mateusz Gruszczyński c0afc1554d first commit
2026-02-17 09:04:09 +01:00

247 lines
8.5 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Pre-cache individual countries in ALL config variants to Redis
"""
import sys
import os
import sqlite3
import json
from datetime import datetime
SCRIPT_DIR = os.path.dirname(os.path.abspath(__file__))
sys.path.insert(0, SCRIPT_DIR)
os.chdir(SCRIPT_DIR)
print(f"[PRE-CACHE] Working from: {SCRIPT_DIR}", flush=True)
from redis_cache import RedisCache
from geoip_handler import ConfigGenerator
import config
DB_PATH = config.GEOIP_DB_DIR / 'networks_cache.db'
if not DB_PATH.exists():
print(f"[ERROR] SQLite database not found: {DB_PATH}", flush=True)
sys.exit(1)
redis_cache = RedisCache()
health = redis_cache.health_check()
print(f"[PRE-CACHE] Redis: {health['status']} ({health.get('memory_used_mb', 0):.1f} MB used)", flush=True)
APP_TYPES = [
'nginx_geo',
'nginx_map',
'nginx_deny',
'apache_24',
'haproxy_acl',
'raw-cidr_txt',
'raw-newline_txt',
'raw-json',
'raw-csv',
]
def get_available_countries():
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
cursor = conn.cursor()
cursor.execute('SELECT country_code, network_count FROM cache_metadata ORDER BY country_code')
countries_info = {}
for row in cursor.fetchall():
countries_info[row[0]] = row[1]
conn.close()
return countries_info
def fetch_country_networks(country_code):
conn = sqlite3.connect(str(DB_PATH), timeout=600.0)
cursor = conn.cursor()
cursor.execute('SELECT network_count FROM cache_metadata WHERE country_code = ?', (country_code.upper(),))
row = cursor.fetchone()
if not row:
conn.close()
return []
total_count = row[0]
chunk_size = 100000
all_networks = []
offset = 0
while offset < total_count:
cursor.execute('SELECT network FROM networks_cache WHERE country_code = ? LIMIT ? OFFSET ?',
(country_code.upper(), chunk_size, offset))
chunk = [row[0] for row in cursor.fetchall()]
if not chunk:
break
all_networks.extend(chunk)
offset += chunk_size
conn.close()
return all_networks
start_time = datetime.now()
print(f"\n{'='*70}", flush=True)
print(f"[STRATEGY] Per-country cache (all config variants)", flush=True)
print(f" Each country: raw data + {len(APP_TYPES)} types × 2 aggregation = {len(APP_TYPES)*2} configs", flush=True)
print(f" Multi-country combos: generated on-demand", flush=True)
print(f"{'='*70}\n", flush=True)
available_countries = get_available_countries()
print(f"Found {len(available_countries)} countries\n", flush=True)
country_data_generated = 0
country_data_cached = 0
config_generated = 0
config_cached = 0
errors = 0
for idx, (country, count) in enumerate(available_countries.items(), 1):
print(f"[{idx}/{len(available_countries)}] {country}: {count:,} networks", flush=True)
redis_key_data = f"geoban:country:{country}"
data_exists = redis_cache.redis_client.exists(redis_key_data)
if data_exists:
country_data_cached += 1
print(f" ✓ Raw data: cached", flush=True)
try:
data = redis_cache.redis_client.get(redis_key_data)
if isinstance(data, bytes):
networks = json.loads(data.decode('utf-8'))
else:
networks = json.loads(data)
country_networks = {country: networks}
except Exception as e:
print(f" ✗ Error loading: {e}", flush=True)
errors += 1
continue
else:
networks = fetch_country_networks(country)
if not networks:
print(f" ✗ No data", flush=True)
errors += 1
continue
redis_cache.redis_client.setex(redis_key_data, 86400, json.dumps(networks))
country_data_generated += 1
print(f" ✓ Raw data: generated", flush=True)
country_networks = {country: networks}
configs_generated_this_country = 0
configs_cached_this_country = 0
for app_type in APP_TYPES:
for aggregate in [True, False]:
try:
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
if cached_config:
config_cached += 1
configs_cached_this_country += 1
continue
if app_type.startswith('raw-'):
format_type = app_type.split('-')[1]
if format_type == 'cidr_txt':
config_text = '\n'.join(networks)
elif format_type == 'newline_txt':
config_text = '\n'.join(networks)
elif format_type == 'json':
config_text = json.dumps({
'country': country,
'networks': networks,
'count': len(networks)
}, indent=2)
elif format_type == 'csv':
config_text = 'network\n' + '\n'.join(networks)
else:
print(f" ✗ Unknown raw format: {format_type}", flush=True)
continue
else:
generators = {
'nginx_geo': ConfigGenerator.generate_nginx_geo,
'nginx_map': ConfigGenerator.generate_nginx_map,
'nginx_deny': ConfigGenerator.generate_nginx_deny,
'apache_22': ConfigGenerator.generate_apache_22,
'apache_24': ConfigGenerator.generate_apache_24,
'haproxy_acl': ConfigGenerator.generate_haproxy_acl,
'haproxy_lua': ConfigGenerator.generate_haproxy_lua,
}
generator = generators.get(app_type)
if not generator:
continue
config_text = generator(country_networks, aggregate=aggregate, redis_ips=None)
stats = {
'countries': 1,
'total_networks': len(networks),
'per_country': {country: len(networks)}
}
success = redis_cache.save_config([country], app_type, aggregate, config_text, stats)
if success:
config_generated += 1
configs_generated_this_country += 1
else:
errors += 1
except Exception as e:
print(f"{app_type} ({aggregate}): {e}", flush=True)
errors += 1
if configs_generated_this_country > 0:
print(f" → New configs: {configs_generated_this_country}", flush=True)
if configs_cached_this_country > 0:
print(f" → Cached configs: {configs_cached_this_country}", flush=True)
progress_pct = (idx / len(available_countries)) * 100
print(f" → Progress: {progress_pct:.1f}%\n", flush=True)
duration = (datetime.now() - start_time).total_seconds()
print(f"{'='*70}", flush=True)
print(f"[SUMMARY] Complete in {duration/60:.1f} minutes", flush=True)
print(f"\n[Raw Country Data]", flush=True)
print(f" Generated: {country_data_generated}", flush=True)
print(f" Cached: {country_data_cached}", flush=True)
print(f"\n[Config Files]", flush=True)
print(f" Generated: {config_generated}", flush=True)
print(f" Cached: {config_cached}", flush=True)
print(f" Errors: {errors}", flush=True)
try:
total_keys = redis_cache.redis_client.dbsize()
cursor = 0
country_keys = 0
while True:
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoban:country:*", count=1000)
country_keys += len(keys)
if cursor == 0:
break
cursor = 0
config_keys = 0
while True:
cursor, keys = redis_cache.redis_client.scan(cursor, match="geoip:config:*", count=1000)
config_keys += len(keys)
if cursor == 0:
break
health = redis_cache.health_check()
print(f"\n[REDIS]", flush=True)
print(f" Total keys: {total_keys}", flush=True)
print(f" Country keys: {country_keys}", flush=True)
print(f" Config keys: {config_keys}", flush=True)
print(f" Memory: {health.get('memory_used_mb', 0):.2f} MB", flush=True)
except Exception as e:
print(f"\n[REDIS] Error: {e}", flush=True)
print(f"{'='*70}\n", flush=True)