Files
geoip_block_generator/geoip_handler.py
Mateusz Gruszczyński 9ccb1651b6 scan performance
2026-02-24 10:06:02 +01:00

1482 lines
55 KiB
Python

"""
GeoIP Handler - Database management and IP network fetching
"""
import geoip2.database
import requests
import json
import ipaddress
import sqlite3
from pathlib import Path
from datetime import datetime, timedelta
from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import config
import ipaddress
import math
from multiprocessing import cpu_count
def generate_metadata(countries: list, country_data: dict, redis_stats: dict = None, handler: 'GeoIPHandler' = None) -> dict:
"""
Generate metadata about the configuration for headers
Args:
countries: List of country codes
country_data: Dict mapping country codes to their networks
redis_stats: Optional dict with Redis statistics {'total': int, 'unique': int, 'deduped': int}
handler: Optional GeoIPHandler instance (will create new if None)
Returns:
Dict with metadata fields
"""
if handler is None:
handler = GeoIPHandler()
now = datetime.now()
timestamp = now.strftime('%Y-%m-%d %H:%M:%S %Z')
total_networks = sum(len(networks) for networks in country_data.values())
# Build data sources info per country
sources_info = []
conn = sqlite3.connect(str(handler.cache_db))
cursor = conn.cursor()
for country in countries:
count = len(country_data.get(country, []))
# Get cache metadata
cursor.execute(
'SELECT last_scan, source FROM cache_metadata WHERE country_code = ?',
(country.upper(),)
)
row = cursor.fetchone()
if row:
last_scan_str, source = row
try:
last_scan = datetime.fromisoformat(last_scan_str)
age_hours = (now - last_scan).total_seconds() / 3600
age_days = age_hours / 24
sources_info.append({
'country': country,
'count': count,
'source_type': 'cache',
'source_detail': source,
'last_scan': last_scan_str[:19],
'age_hours': age_hours,
'age_days': age_days,
'formatted': f"# [{country}] {count:,} networks - SQLite cache (source: {source}, scanned: {last_scan_str[:19]}, age: {age_days:.1f} days)"
})
except Exception as e:
sources_info.append({
'country': country,
'count': count,
'source_type': 'cache',
'source_detail': source,
'last_scan': last_scan_str[:19] if last_scan_str else 'unknown',
'age_hours': None,
'age_days': None,
'formatted': f"# [{country}] {count:,} networks - SQLite cache (source: {source}, scanned: {last_scan_str[:19]})"
})
else:
sources_info.append({
'country': country,
'count': count,
'source_type': 'fresh',
'source_detail': 'live_scan',
'last_scan': None,
'age_hours': 0,
'age_days': 0,
'formatted': f"# [{country}] {count:,} networks - Fresh scan (no cache)"
})
conn.close()
# Redis statistics
redis_info = {}
if redis_stats:
redis_info = {
'total': redis_stats.get('total', 0),
'unique': redis_stats.get('unique', 0),
'deduped': redis_stats.get('deduped', 0),
'formatted': f"Redis bad IPs: {redis_stats.get('total', 0)} entries ({redis_stats.get('unique', 0)} unique after deduplication)"
}
return {
'timestamp': timestamp,
'timestamp_iso': now.isoformat(),
'countries': countries,
'countries_string': ', '.join(countries),
'country_count': len(countries),
'total_networks': total_networks,
'sources': sources_info,
'sources_formatted': '\n'.join([s['formatted'] for s in sources_info]),
'redis': redis_info,
'cache_max_age_hours': getattr(config, 'CACHE_MAX_AGE_HOURS', 168),
'cache_max_age_days': getattr(config, 'CACHE_MAX_AGE_HOURS', 168) / 24,
'cache_db_path': str(handler.cache_db)
}
def _generate_range_regex(start: int, end: int) -> str:
"""Generate optimal regex for numeric range 0-255"""
if start == 0 and end == 255:
return "(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])"
if end - start < 10:
return "(" + "|".join(str(i) for i in range(start, end + 1)) + ")"
parts = []
current = start
while current <= end:
first_digit = current // 10
last_digit = current % 10
max_in_decade = min(end, (first_digit + 1) * 10 - 1)
if last_digit == 0 and max_in_decade == (first_digit + 1) * 10 - 1:
if first_digit == 0:
parts.append("[0-9]")
else:
parts.append(f"{first_digit}[0-9]")
current = max_in_decade + 1
elif current == max_in_decade:
parts.append(str(current))
current += 1
else:
if first_digit == 0:
parts.append(f"[{last_digit}-{max_in_decade % 10}]")
else:
parts.append(f"{first_digit}[{last_digit}-{max_in_decade % 10}]")
current = max_in_decade + 1
return "(" + "|".join(parts) + ")"
def cidr_to_nginx_regex(cidr: str) -> str:
try:
network = ipaddress.IPv4Network(cidr, strict=False)
prefix = network.prefixlen
octets = str(network.network_address).split('.')
if prefix == 32:
return f"~^{octets[0]}\\.{octets[1]}\\.{octets[2]}\\.{octets[3]}$"
if prefix >= 24:
return f"~^{octets[0]}\\.{octets[1]}\\.{octets[2]}\\."
if prefix >= 16:
start_third = int(octets[2])
num_subnets = 2 ** (24 - prefix)
end_third = start_third + num_subnets - 1
if start_third == end_third:
return f"~^{octets[0]}\\.{octets[1]}\\.{start_third}\\."
elif end_third - start_third == 1:
return f"~^{octets[0]}\\.{octets[1]}\\.({start_third}|{end_third})\\."
else:
range_regex = _generate_range_regex(start_third, end_third)
return f"~^{octets[0]}\\.{octets[1]}\\.{range_regex}\\."
if prefix >= 8:
start_second = int(octets[1])
num_subnets = 2 ** (16 - prefix)
end_second = start_second + num_subnets - 1
if start_second == end_second:
return f"~^{octets[0]}\\.{start_second}\\."
else:
range_regex = _generate_range_regex(start_second, end_second)
return f"~^{octets[0]}\\.{range_regex}\\."
start_first = int(octets[0])
num_subnets = 2 ** (8 - prefix)
end_first = start_first + num_subnets - 1
range_regex = _generate_range_regex(start_first, end_first)
return f"~^{range_regex}\\."
except Exception as e:
print(f"[ERROR] CIDR conversion failed for {cidr}: {e}", flush=True)
return None
class GeoIPHandler:
def __init__(self):
self.mmdb_file = config.GEOIP_DB_DIR / 'GeoLite2-Country.mmdb'
self.config_file = config.GEOIP_DB_DIR / 'config.json'
self.cache_db = config.GEOIP_DB_DIR / 'networks_cache.db'
config.GEOIP_DB_DIR.mkdir(parents=True, exist_ok=True)
self._init_cache_db()
def _init_cache_db(self):
conn = sqlite3.connect(str(self.cache_db), timeout=30.0)
cursor = conn.cursor()
cursor.execute('PRAGMA journal_mode=WAL;')
cursor.execute('PRAGMA synchronous=NORMAL;')
cursor.execute('PRAGMA cache_size=10000;')
cursor.execute('PRAGMA temp_store=MEMORY;')
cursor.execute('''
CREATE TABLE IF NOT EXISTS networks_cache (
country_code TEXT NOT NULL,
network TEXT NOT NULL,
source TEXT NOT NULL,
created_at TEXT NOT NULL,
PRIMARY KEY (country_code, network)
)
''')
cursor.execute('''
CREATE TABLE IF NOT EXISTS cache_metadata (
country_code TEXT PRIMARY KEY,
last_scan TEXT NOT NULL,
network_count INTEGER NOT NULL,
source TEXT DEFAULT 'unknown'
)
''')
cursor.execute('''
CREATE INDEX IF NOT EXISTS idx_networks_country
ON networks_cache(country_code)
''')
conn.commit()
conn.close()
def _get_cached_networks(self, country_code: str) -> list:
"""Get networks from cache with chunked reading for large datasets"""
conn = sqlite3.connect(str(self.cache_db), timeout=600.0)
cursor = conn.cursor()
cursor.execute(
'SELECT last_scan, network_count FROM cache_metadata WHERE country_code = ?',
(country_code.upper(),)
)
row = cursor.fetchone()
if row:
last_scan_str, count = row
last_scan = datetime.fromisoformat(last_scan_str)
age_hours = (datetime.now() - last_scan).total_seconds() / 3600
if age_hours < config.CACHE_MAX_AGE_HOURS:
# Chunked reading for large datasets
chunk_size = 100000
all_networks = []
offset = 0
while offset < count:
cursor.execute(
'SELECT network FROM networks_cache WHERE country_code = ? LIMIT ? OFFSET ?',
(country_code.upper(), chunk_size, offset)
)
chunk = [row[0] for row in cursor.fetchall()]
if not chunk:
break
all_networks.extend(chunk)
offset += chunk_size
conn.close()
return all_networks
conn.close()
return None
def _save_to_cache(self, country_code: str, networks: list, source: str):
if not networks:
print(f"[CACHE] Skipping {country_code} - no networks to save", flush=True)
return False
max_retries = 3
country_code = country_code.upper()
chunk_size = 50000
for attempt in range(max_retries):
conn = None
try:
conn = sqlite3.connect(
str(self.cache_db),
timeout=300.0,
isolation_level='DEFERRED'
)
cursor = conn.cursor()
cursor.execute('DELETE FROM networks_cache WHERE country_code = ?', (country_code,))
cursor.execute('DELETE FROM cache_metadata WHERE country_code = ?', (country_code,))
timestamp = datetime.now().isoformat()
total_inserted = 0
for i in range(0, len(networks), chunk_size):
chunk = networks[i:i+chunk_size]
cursor.executemany(
'INSERT INTO networks_cache (country_code, network, source, created_at) VALUES (?, ?, ?, ?)',
[(country_code, network, source, timestamp) for network in chunk]
)
total_inserted += len(chunk)
if len(networks) > chunk_size:
print(f"[CACHE] {country_code}: Inserted {total_inserted}/{len(networks)} networks...", flush=True)
cursor.execute(
'INSERT INTO cache_metadata (country_code, last_scan, network_count, source) VALUES (?, ?, ?, ?)',
(country_code, timestamp, len(networks), source)
)
conn.commit()
print(f"[CACHE] ✓ Saved {country_code}: {len(networks)} networks from {source}", flush=True)
return True
except sqlite3.OperationalError as e:
if 'locked' in str(e).lower() or 'busy' in str(e).lower():
print(f"[CACHE] Database locked for {country_code}, attempt {attempt+1}/{max_retries}", flush=True)
if attempt < max_retries - 1:
import time
time.sleep(10 * (attempt + 1))
else:
print(f"[ERROR] Failed to save {country_code} after {max_retries} attempts", flush=True)
return False
else:
print(f"[ERROR] SQLite error for {country_code}: {e}", flush=True)
import traceback
traceback.print_exc()
return False
except Exception as e:
print(f"[ERROR] Failed to save cache for {country_code}: {e}", flush=True)
import traceback
traceback.print_exc()
return False
finally:
if conn:
try:
conn.close()
except:
pass
return False
def _update_cache_incremental(self, country_code: str, new_networks: list, source: str):
if not new_networks:
print(f"[CACHE] No networks to update for {country_code}", flush=True)
return False
max_retries = 3
country_code = country_code.upper()
chunk_size = 50000
for attempt in range(max_retries):
conn = None
try:
conn = sqlite3.connect(
str(self.cache_db),
timeout=300.0,
isolation_level='DEFERRED'
)
cursor = conn.cursor()
cursor.execute(
'SELECT network FROM networks_cache WHERE country_code = ?',
(country_code,)
)
old_networks = set(row[0] for row in cursor.fetchall())
new_networks_set = set(new_networks)
to_add = new_networks_set - old_networks
to_remove = old_networks - new_networks_set
timestamp = datetime.now().isoformat()
if to_remove:
to_remove_list = list(to_remove)
for i in range(0, len(to_remove_list), chunk_size):
chunk = to_remove_list[i:i+chunk_size]
cursor.executemany(
'DELETE FROM networks_cache WHERE country_code = ? AND network = ?',
[(country_code, net) for net in chunk]
)
print(f"[CACHE] Removed {len(to_remove)} old networks from {country_code}", flush=True)
if to_add:
to_add_list = list(to_add)
total_added = 0
for i in range(0, len(to_add_list), chunk_size):
chunk = to_add_list[i:i+chunk_size]
cursor.executemany(
'INSERT INTO networks_cache (country_code, network, source, created_at) VALUES (?, ?, ?, ?)',
[(country_code, network, source, timestamp) for network in chunk]
)
total_added += len(chunk)
if len(to_add_list) > chunk_size:
print(f"[CACHE] {country_code}: Added {total_added}/{len(to_add_list)} new networks...", flush=True)
print(f"[CACHE] Added {len(to_add)} new networks to {country_code}", flush=True)
cursor.execute('DELETE FROM cache_metadata WHERE country_code = ?', (country_code,))
cursor.execute(
'INSERT INTO cache_metadata (country_code, last_scan, network_count, source) VALUES (?, ?, ?, ?)',
(country_code, timestamp, len(new_networks), source)
)
conn.commit()
unchanged = len(old_networks & new_networks_set)
print(f"[CACHE] ✓ Updated {country_code}: +{len(to_add)} new, -{len(to_remove)} removed, ={unchanged} unchanged (total: {len(new_networks)})", flush=True)
return True
except sqlite3.OperationalError as e:
if 'locked' in str(e).lower() or 'busy' in str(e).lower():
print(f"[CACHE] Database locked for {country_code}, attempt {attempt+1}/{max_retries}", flush=True)
if attempt < max_retries - 1:
import time
time.sleep(10 * (attempt + 1))
else:
print(f"[ERROR] Failed to update {country_code} after {max_retries} attempts", flush=True)
return False
else:
print(f"[ERROR] SQLite error for {country_code}: {e}", flush=True)
return False
except Exception as e:
print(f"[ERROR] Failed to update cache for {country_code}: {e}", flush=True)
import traceback
traceback.print_exc()
return False
finally:
if conn:
try:
conn.close()
except:
pass
return False
def get_countries_needing_scan(self, max_age_hours: int = 168) -> tuple:
import sys
sys.path.insert(0, '/opt/geoip_block_generator')
all_countries = [c['code'] for c in config.COMMON_COUNTRIES]
try:
conn = sqlite3.connect(str(self.cache_db), timeout=30.0)
cursor = conn.cursor()
cursor.execute('SELECT country_code, last_scan FROM cache_metadata')
cached_data = {row[0]: row[1] for row in cursor.fetchall()}
conn.close()
missing = []
stale = []
cutoff_time = datetime.now() - timedelta(hours=max_age_hours)
for country in all_countries:
if country not in cached_data:
missing.append(country)
else:
try:
last_scan = datetime.fromisoformat(cached_data[country])
if last_scan < cutoff_time:
stale.append(country)
except:
stale.append(country)
return missing, stale
except Exception as e:
print(f"[ERROR] Failed to check cache status: {e}", flush=True)
return all_countries, []
def load_config(self) -> dict:
if self.config_file.exists():
try:
with open(self.config_file, 'r') as f:
return json.load(f)
except:
pass
return {}
def save_config(self, data: dict):
with open(self.config_file, 'w') as f:
json.dump(data, f, indent=2)
def needs_update(self) -> bool:
if not self.mmdb_file.exists():
return True
cfg = self.load_config()
last_update = cfg.get('last_update')
if not last_update:
return True
try:
last_update_date = datetime.fromisoformat(last_update)
days_old = (datetime.now() - last_update_date).days
return days_old >= config.MAXMIND_UPDATE_INTERVAL_DAYS
except:
return True
def download_database(self) -> dict:
urls = [config.MAXMIND_PRIMARY_URL, config.MAXMIND_FALLBACK_URL]
for url in urls:
try:
print(f"Downloading database from {url}")
response = requests.get(url, timeout=60, stream=True)
response.raise_for_status()
with open(self.mmdb_file, 'wb') as f:
for chunk in response.iter_content(chunk_size=8192):
f.write(chunk)
file_size = self.mmdb_file.stat().st_size
self.save_config({
'last_update': datetime.now().isoformat(),
'url': url,
'file_size': file_size
})
print(f"Database downloaded successfully ({file_size} bytes)")
return {'success': True, 'url': url, 'size': file_size}
except Exception as e:
print(f"Failed to download from {url}: {e}")
continue
return {'success': False, 'error': 'All download sources failed'}
def check_and_update(self):
if self.needs_update():
print("Database update needed, downloading...")
self.download_database()
def _get_scan_ranges(self) -> list:
scan_ranges = []
for first_octet in range(1, 224):
if first_octet in [10, 127, 169, 172, 192]:
continue
for second_octet in range(0, 256):
scan_ranges.append(f"{first_octet}.{second_octet}.0.0/16")
return scan_ranges
def _scan_maxmind_for_country(self, country_code: str, progress_callback=None, workers=None) -> list:
if not self.mmdb_file.exists():
return []
country_code = country_code.upper()
scan_ranges = self._get_scan_ranges()
total_ranges = len(scan_ranges)
# workers default
if workers is None or int(workers) <= 0:
workers = min(32, max(4, cpu_count() * 2))
else:
workers = int(workers)
tasks_per_worker = getattr(config, "MAXMIND_CHUNK_TASKS_PER_WORKER", 12)
chunk_min = getattr(config, "MAXMIND_CHUNK_MIN", 50)
chunk_max = getattr(config, "MAXMIND_CHUNK_MAX", 2000)
target_tasks = max(workers * int(tasks_per_worker), workers)
chunk = int(math.ceil(total_ranges / float(target_tasks)))
CHUNK = max(int(chunk_min), min(int(chunk_max), chunk))
if progress_callback:
progress_callback(f"Starting parallel MaxMind scan with {workers} workers...")
progress_callback(f"Scanning {total_ranges} IP ranges...")
progress_callback(f"Chunking: {CHUNK} ranges/task (~{int(math.ceil(total_ranges/float(CHUNK)))} tasks)")
found_networks = set()
found_networks_lock = threading.Lock()
completed = 0
completed_lock = threading.Lock()
tls = threading.local()
def get_reader():
r = getattr(tls, "reader", None)
if r is None:
tls.reader = geoip2.database.Reader(str(self.mmdb_file))
return tls.reader
def scan_one_range(reader, network_str: str):
local = set()
try:
network = ipaddress.IPv4Network(network_str, strict=False)
for subnet in network.subnets(new_prefix=24):
sample_ip = str(subnet.network_address + 1)
try:
resp = reader.country(sample_ip)
if resp.country.iso_code == country_code:
local.add(subnet) # mniej alokacji niż str() w pętli
except Exception:
pass
except Exception:
pass
return local
def scan_chunk(ranges):
nonlocal completed
reader = get_reader()
local_chunk = set()
for r in ranges:
local_chunk.update(scan_one_range(reader, r))
with completed_lock:
completed += 1
c = completed
# progres częściej (diagnostyka), nie wpływa na wynik
if progress_callback and (c % 500 == 0 or c == total_ranges):
with found_networks_lock:
found_cnt = len(found_networks)
pct = (c / float(total_ranges)) * 100.0
progress_callback(
f"Scanning: {c}/{total_ranges} ranges ({pct:.1f}%), found {found_cnt} networks"
)
return local_chunk
try:
chunks = [scan_ranges[i:i + CHUNK] for i in range(0, total_ranges, CHUNK)]
with ThreadPoolExecutor(max_workers=workers) as executor:
futures = [executor.submit(scan_chunk, ch) for ch in chunks]
for future in as_completed(futures):
local_nets = future.result()
if local_nets:
with found_networks_lock:
found_networks.update(local_nets)
# konwersja na string na końcu (wynik ten sam co wcześniej)
result = [str(n) for n in found_networks]
if progress_callback:
progress_callback(f"MaxMind scan complete: {len(result)} networks")
return result
except Exception as e:
print(f"[ERROR] MaxMind scan failed for {country_code}: {e}", flush=True)
import traceback
traceback.print_exc()
return []
def fetch_country_networks(self, country_code: str, progress_callback=None) -> list:
country_code = country_code.upper()
cached = self._get_cached_networks(country_code)
if cached is not None:
if progress_callback:
progress_callback(f"Using cached data")
return cached
if progress_callback:
progress_callback(f"No cache, starting parallel MaxMind scan")
maxmind_networks = self._scan_maxmind_for_country(country_code, progress_callback)
if maxmind_networks:
if progress_callback:
progress_callback(f"Checking GitHub for validation")
github_networks = self._fetch_from_github(country_code)
if github_networks:
maxmind_set = set(maxmind_networks)
github_set = set(github_networks)
missing = github_set - maxmind_set
if missing:
maxmind_networks.extend(missing)
self._save_to_cache(country_code, maxmind_networks, 'maxmind+github')
return maxmind_networks
github_networks = self._fetch_from_github(country_code)
if github_networks:
self._save_to_cache(country_code, github_networks, 'github')
return github_networks
ipdeny_networks = self._fetch_from_ipdeny(country_code)
if ipdeny_networks:
self._save_to_cache(country_code, ipdeny_networks, 'ipdeny')
return ipdeny_networks
return []
def _fetch_from_github(self, country_code: str) -> list:
url = config.IP_RANGE_SOURCES['github'].format(country_lower=country_code.lower())
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
networks = [line.strip() for line in response.text.split('\n') if line.strip() and not line.startswith('#')]
return networks
except Exception as e:
return []
def _fetch_from_ipdeny(self, country_code: str) -> list:
url = config.IP_RANGE_SOURCES['ipdeny'].format(country_lower=country_code.lower())
try:
response = requests.get(url, timeout=10)
response.raise_for_status()
networks = [line.strip() for line in response.text.split('\n') if line.strip() and not line.startswith('#')]
return networks
except Exception as e:
return []
class ConfigGenerator:
@staticmethod
def _aggregate_networks(networks: list) -> list:
try:
if not networks:
return []
unique_networks = list(set(networks))
ip_objects = []
for network in unique_networks:
try:
ip_objects.append(ipaddress.IPv4Network(network, strict=False))
except ValueError:
continue
if ip_objects:
collapsed = list(ipaddress.collapse_addresses(ip_objects))
return sorted([str(net) for net in collapsed])
return sorted(unique_networks)
except Exception as e:
print(f"[ERROR] Aggregation failed: {e}")
return sorted(list(set(networks)))
@staticmethod
def generate_nginx_geo(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""Generate Nginx Geo Module configuration with detailed metadata header"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Aggregate networks
all_networks = []
for networks in country_networks.values():
all_networks.extend(networks)
if redis_ips:
all_networks.extend(redis_ips)
if aggregate:
all_networks = ConfigGenerator._aggregate_networks(all_networks)
else:
all_networks = sorted(list(set(all_networks)))
# Generate header
config = "# " + "="*77 + "\n"
config += "# Nginx Geo Module Configuration\n"
config += f"# Generated: {metadata['timestamp']}\n"
config += "# " + "="*77 + "\n"
config += "# \n"
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"# Total networks: {len(all_networks):,}\n"
config += "# \n"
config += "# Data sources:\n"
config += metadata['sources_formatted'] + "\n"
config += "# \n"
if metadata['redis']:
config += f"# {metadata['redis']['formatted']}\n"
config += "# \n"
config += "# Cache settings:\n"
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"# Database: {metadata['cache_db_path']}\n"
config += "# \n"
config += "# " + "="*77 + "\n"
config += "\n"
# Generate geo block
config += "geo $blocked_country {\n"
config += " default 0;\n"
config += " \n"
for network in all_networks:
config += f" {network} 1;\n"
config += "}\n"
return config
@staticmethod
def generate_nginx_map(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""Generate Nginx Map Module configuration with detailed metadata header"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Process networks per country
all_networks = []
for networks in country_networks.values():
all_networks.extend(networks)
if redis_ips:
all_networks.extend(redis_ips)
if aggregate:
all_networks = ConfigGenerator._aggregate_networks(all_networks)
else:
all_networks = sorted(list(set(all_networks)))
# Generate header
config = "# " + "="*77 + "\n"
config += "# Nginx Map Module Configuration\n"
config += f"# Generated: {metadata['timestamp']}\n"
config += "# " + "="*77 + "\n"
config += "# \n"
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"# Total networks: {len(all_networks):,}\n"
config += "# Note: Using regex patterns for CIDR matching (map module doesn't support CIDR natively)\n"
config += "# \n"
config += "# Data sources:\n"
config += metadata['sources_formatted'] + "\n"
config += "# \n"
if metadata['redis']:
config += f"# {metadata['redis']['formatted']}\n"
config += "# \n"
config += "# Cache settings:\n"
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"# Database: {metadata['cache_db_path']}\n"
config += "# \n"
config += "# " + "="*77 + "\n"
config += "\n"
# Generate map block with regex conversion
config += "map $remote_addr $blocked_country {\n"
config += " default 0;\n"
config += " \n"
converted_count = 0
failed_count = 0
for network in all_networks:
regex = cidr_to_nginx_regex(network)
if regex:
config += f" {regex} 1;\n"
converted_count += 1
else:
# Fallback - zapisz z ostrzeżeniem
config += f" # ERROR: Failed to convert: {network}\n"
failed_count += 1
config += "}\n"
# Log conversion statistics
#print(f"[INFO] Generated nginx map: {converted_count} regex patterns", flush=True)
if failed_count > 0:
print(f"[WARNING] Failed to convert {failed_count} networks to regex - check config file", flush=True)
return config
@staticmethod
def generate_nginx_deny(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""Generate Nginx Deny Directives configuration with detailed metadata header"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Aggregate networks
all_networks = []
for networks in country_networks.values():
all_networks.extend(networks)
if redis_ips:
all_networks.extend(redis_ips)
if aggregate:
all_networks = ConfigGenerator._aggregate_networks(all_networks)
else:
all_networks = sorted(list(set(all_networks)))
# Generate header
config = "# " + "="*77 + "\n"
config += "# Nginx Deny Directives Configuration\n"
config += f"# Generated: {metadata['timestamp']}\n"
config += "# " + "="*77 + "\n"
config += "# \n"
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"# Total networks: {len(all_networks):,}\n"
config += "# \n"
config += "# Data sources:\n"
config += metadata['sources_formatted'] + "\n"
config += "# \n"
if metadata['redis']:
config += f"# {metadata['redis']['formatted']}\n"
config += "# \n"
config += "# Cache settings:\n"
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"# Database: {metadata['cache_db_path']}\n"
config += "# \n"
config += "# " + "="*77 + "\n"
config += "\n"
# Generate deny directives
for network in all_networks:
config += f"deny {network};\n"
config += "allow all;\n"
return config
@staticmethod
def generate_apache_24(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""Generate Apache 2.4 configuration with detailed metadata header"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Aggregate networks
all_networks = []
for networks in country_networks.values():
all_networks.extend(networks)
if redis_ips:
all_networks.extend(redis_ips)
if aggregate:
all_networks = ConfigGenerator._aggregate_networks(all_networks)
else:
all_networks = sorted(list(set(all_networks)))
# Generate header
config = "# " + "="*77 + "\n"
config += "# Apache 2.4 Configuration\n"
config += f"# Generated: {metadata['timestamp']}\n"
config += "# " + "="*77 + "\n"
config += "# \n"
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"# Total networks: {len(all_networks):,}\n"
config += "# \n"
config += "# Data sources:\n"
config += metadata['sources_formatted'] + "\n"
config += "# \n"
if metadata['redis']:
config += f"# {metadata['redis']['formatted']}\n"
config += "# \n"
config += "# Cache settings:\n"
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"# Database: {metadata['cache_db_path']}\n"
config += "# \n"
config += "# " + "="*77 + "\n"
config += "\n"
# Generate Apache 2.4 rules
config += "<RequireAll>\n"
config += " Require all granted\n"
for network in all_networks:
config += f" Require not ip {network}\n"
config += "</RequireAll>\n"
return config
@staticmethod
def generate_apache_22(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""Generate Apache 2.2 configuration with detailed metadata header"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Aggregate networks
all_networks = []
for networks in country_networks.values():
all_networks.extend(networks)
if redis_ips:
all_networks.extend(redis_ips)
if aggregate:
all_networks = ConfigGenerator._aggregate_networks(all_networks)
else:
all_networks = sorted(list(set(all_networks)))
# Generate header
config = "# " + "="*77 + "\n"
config += "# Apache 2.2 Configuration\n"
config += f"# Generated: {metadata['timestamp']}\n"
config += "# " + "="*77 + "\n"
config += "# \n"
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"# Total networks: {len(all_networks):,}\n"
config += "# \n"
config += "# Data sources:\n"
config += metadata['sources_formatted'] + "\n"
config += "# \n"
if metadata['redis']:
config += f"# {metadata['redis']['formatted']}\n"
config += "# \n"
config += "# Cache settings:\n"
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"# Database: {metadata['cache_db_path']}\n"
config += "# \n"
config += "# " + "="*77 + "\n"
config += "\n"
# Generate Apache 2.2 rules
config += "Order Allow,Deny\n"
config += "Allow from all\n"
for network in all_networks:
config += f"Deny from {network}\n"
return config
@staticmethod
def generate_haproxy_acl(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""Generate HAProxy ACL configuration with detailed metadata header"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Aggregate networks
all_networks = []
for networks in country_networks.values():
all_networks.extend(networks)
if redis_ips:
all_networks.extend(redis_ips)
if aggregate:
all_networks = ConfigGenerator._aggregate_networks(all_networks)
else:
all_networks = sorted(list(set(all_networks)))
# Generate header
config = "# " + "="*77 + "\n"
config += "# HAProxy ACL Configuration\n"
config += f"# Generated: {metadata['timestamp']}\n"
config += "# " + "="*77 + "\n"
config += "# \n"
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"# Total networks: {len(all_networks):,}\n"
config += "# \n"
config += "# Data sources:\n"
config += metadata['sources_formatted'] + "\n"
config += "# \n"
if metadata['redis']:
config += f"# {metadata['redis']['formatted']}\n"
config += "# \n"
config += "# Cache settings:\n"
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"# Database: {metadata['cache_db_path']}\n"
config += "# \n"
config += "# Usage in HAProxy:\n"
config += "# acl banned_ips src -f /path/to/this_file.acl\n"
config += "# http-request deny if banned_ips\n"
config += "# \n"
config += "# " + "="*77 + "\n"
config += "\n"
# Generate ACL rules
config += "frontend http-in\n"
config += " bind *:80\n"
config += " \n"
for network in all_networks:
config += f" acl blocked_ip src {network}\n"
config += """
http-request deny if blocked_ip
default_backend servers
"""
return config
@staticmethod
def generate_haproxy_map(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""
Generate HAProxy MAP file (CIDR COUNTRY format)
"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Aggregate networks (for header stats only, same style as ACL)
all_networks = []
for nets in country_networks.values():
all_networks.extend(nets)
if redis_ips:
all_networks.extend(redis_ips)
if aggregate:
all_networks = ConfigGenerator._aggregate_networks(all_networks)
else:
all_networks = sorted(list(set(all_networks)))
# Generate header (same style as ACL)
config = "# " + "="*77 + "\n"
config += "# HAProxy MAP Configuration\n"
config += f"# Generated: {metadata['timestamp']}\n"
config += "# " + "="*77 + "\n"
config += "# \n"
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"# Total networks: {len(all_networks):,}\n"
config += "# \n"
config += "# Data sources:\n"
config += metadata['sources_formatted'] + "\n"
config += "# \n"
if metadata['redis']:
config += f"# {metadata['redis']['formatted']}\n"
config += "# \n"
config += "# Cache settings:\n"
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"# Database: {metadata['cache_db_path']}\n"
config += "# \n"
config += "# Usage in HAProxy:\n"
config += "# map_beg(/path/to/geo.map) -m ip $src var(txn.country)\n"
config += "# \n"
config += "# " + "="*77 + "\n"
config += "\n"
# MAP BODY (per-country aggregation => poprawny country, brak XX, brak pustych wyników)
for country_code, nets in sorted(country_networks.items()):
if not nets:
continue
if aggregate:
nets = ConfigGenerator._aggregate_networks(nets)
else:
nets = sorted(list(set(nets)))
for network in nets:
config += f"{network} {country_code}\n"
# Redis IPs (opcjonalnie jako osobna etykieta)
if redis_ips:
redis_list = list(redis_ips)
if aggregate:
redis_list = ConfigGenerator._aggregate_networks(redis_list)
else:
redis_list = sorted(list(set(redis_list)))
for network in redis_list:
config += f"{network} REDIS\n"
return config
@staticmethod
def generate_haproxy_lua(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""Generate HAProxy Lua script with detailed metadata header"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Aggregate networks
all_networks = []
for networks in country_networks.values():
all_networks.extend(networks)
if redis_ips:
all_networks.extend(redis_ips)
if aggregate:
all_networks = ConfigGenerator._aggregate_networks(all_networks)
else:
all_networks = sorted(list(set(all_networks)))
# Generate header
config = "-- " + "="*76 + "\n"
config += "-- HAProxy Lua Script\n"
config += f"-- Generated: {metadata['timestamp']}\n"
config += "-- " + "="*76 + "\n"
config += "-- \n"
config += f"-- Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"-- Total networks: {len(all_networks):,}\n"
config += "-- \n"
config += "-- Data sources:\n"
for line in metadata['sources_formatted'].split('\n'):
config += f"-- # {line}\n"
config += "-- \n"
if metadata['redis']:
config += f"-- {metadata['redis']['formatted']}\n"
config += "-- \n"
config += "-- Cache settings:\n"
config += f"-- Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"-- Database: {metadata['cache_db_path']}\n"
config += "-- \n"
config += "-- " + "="*76 + "\n"
config += "\n"
# Generate Lua code
config += "local blocked_networks = {\n"
for network in all_networks:
config += f' "{network}",\n'
config += "}\n\n"
config += """
function check_blocked(txn)
local src_ip = txn.f:src()
for _, network in ipairs(blocked_networks) do
if string.match(src_ip, network) then
return true
end
end
return false
end
core.register_fetches("is_blocked", check_blocked)
"""
return config
@staticmethod
def generate_raw_cidr(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""Generate raw CIDR list with detailed metadata header"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Aggregate networks
all_networks = []
for networks in country_networks.values():
all_networks.extend(networks)
if redis_ips:
all_networks.extend(redis_ips)
if aggregate:
all_networks = ConfigGenerator._aggregate_networks(all_networks)
else:
all_networks = sorted(list(set(all_networks)))
# Generate header
config = "# " + "="*77 + "\n"
config += "# Raw CIDR List\n"
config += f"# Generated: {metadata['timestamp']}\n"
config += "# " + "="*77 + "\n"
config += "# \n"
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"# Total networks: {len(all_networks):,}\n"
config += f"# Aggregated: {aggregate}\n"
config += "# \n"
config += "# Data sources:\n"
config += metadata['sources_formatted'] + "\n"
config += "# \n"
if metadata['redis']:
config += f"# {metadata['redis']['formatted']}\n"
config += "# \n"
config += "# Cache settings:\n"
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"# Database: {metadata['cache_db_path']}\n"
config += "# \n"
config += "# " + "="*77 + "\n"
config += "\n"
# Generate CIDR list
for network in all_networks:
config += f"{network}\n"
return config
@staticmethod
def generate_csv(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
"""Generate CSV format with detailed metadata header"""
# Get metadata
countries = sorted(country_networks.keys())
redis_stats = None
if redis_ips:
redis_stats = {
'total': len(redis_ips),
'unique': len(redis_ips),
'deduped': 0
}
handler = GeoIPHandler()
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
# Calculate totals before aggregation
total_before = sum(len(nets) for nets in country_networks.values())
if redis_ips:
total_before += len(redis_ips)
# Generate header
config = "# " + "="*77 + "\n"
config += "# CSV Export\n"
config += f"# Generated: {metadata['timestamp']}\n"
config += "# " + "="*77 + "\n"
config += "# \n"
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
config += f"# Aggregated: {aggregate}\n"
config += f"# Networks before aggregation: {total_before:,}\n"
config += "# \n"
config += "# Data sources:\n"
config += metadata['sources_formatted'] + "\n"
config += "# \n"
if metadata['redis']:
config += f"# {metadata['redis']['formatted']}\n"
config += "# \n"
config += "# Cache settings:\n"
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
config += f"# Database: {metadata['cache_db_path']}\n"
config += "# \n"
config += "# " + "="*77 + "\n"
config += "\n"
config += "country,network,source\n"
# Generate CSV data
total_after = 0
for country_code, networks in sorted(country_networks.items()):
if aggregate:
networks = ConfigGenerator._aggregate_networks(networks)
else:
networks = sorted(list(set(networks)))
total_after += len(networks)
for network in networks:
config += f"{country_code},{network},cache\n"
# Add Redis IPs if present
if redis_ips:
redis_list = list(redis_ips)
if aggregate:
redis_list = ConfigGenerator._aggregate_networks(redis_list)
else:
redis_list = sorted(redis_list)
total_after += len(redis_list)
for network in redis_list:
config += f"REDIS,{network},redis\n"
# Update header with final count
config = config.replace(
f"# Networks before aggregation: {total_before:,}",
f"# Networks before aggregation: {total_before:,}\n# Networks after aggregation: {total_after:,}"
)
return config