1482 lines
55 KiB
Python
1482 lines
55 KiB
Python
"""
|
|
GeoIP Handler - Database management and IP network fetching
|
|
"""
|
|
|
|
import geoip2.database
|
|
import requests
|
|
import json
|
|
import ipaddress
|
|
import sqlite3
|
|
from pathlib import Path
|
|
from datetime import datetime, timedelta
|
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
import threading
|
|
import config
|
|
import ipaddress
|
|
import math
|
|
from multiprocessing import cpu_count
|
|
|
|
def generate_metadata(countries: list, country_data: dict, redis_stats: dict = None, handler: 'GeoIPHandler' = None) -> dict:
|
|
"""
|
|
Generate metadata about the configuration for headers
|
|
|
|
Args:
|
|
countries: List of country codes
|
|
country_data: Dict mapping country codes to their networks
|
|
redis_stats: Optional dict with Redis statistics {'total': int, 'unique': int, 'deduped': int}
|
|
handler: Optional GeoIPHandler instance (will create new if None)
|
|
|
|
Returns:
|
|
Dict with metadata fields
|
|
"""
|
|
if handler is None:
|
|
handler = GeoIPHandler()
|
|
|
|
now = datetime.now()
|
|
timestamp = now.strftime('%Y-%m-%d %H:%M:%S %Z')
|
|
|
|
total_networks = sum(len(networks) for networks in country_data.values())
|
|
|
|
# Build data sources info per country
|
|
sources_info = []
|
|
|
|
conn = sqlite3.connect(str(handler.cache_db))
|
|
cursor = conn.cursor()
|
|
|
|
for country in countries:
|
|
count = len(country_data.get(country, []))
|
|
|
|
# Get cache metadata
|
|
cursor.execute(
|
|
'SELECT last_scan, source FROM cache_metadata WHERE country_code = ?',
|
|
(country.upper(),)
|
|
)
|
|
row = cursor.fetchone()
|
|
|
|
if row:
|
|
last_scan_str, source = row
|
|
try:
|
|
last_scan = datetime.fromisoformat(last_scan_str)
|
|
age_hours = (now - last_scan).total_seconds() / 3600
|
|
age_days = age_hours / 24
|
|
|
|
sources_info.append({
|
|
'country': country,
|
|
'count': count,
|
|
'source_type': 'cache',
|
|
'source_detail': source,
|
|
'last_scan': last_scan_str[:19],
|
|
'age_hours': age_hours,
|
|
'age_days': age_days,
|
|
'formatted': f"# [{country}] {count:,} networks - SQLite cache (source: {source}, scanned: {last_scan_str[:19]}, age: {age_days:.1f} days)"
|
|
})
|
|
except Exception as e:
|
|
sources_info.append({
|
|
'country': country,
|
|
'count': count,
|
|
'source_type': 'cache',
|
|
'source_detail': source,
|
|
'last_scan': last_scan_str[:19] if last_scan_str else 'unknown',
|
|
'age_hours': None,
|
|
'age_days': None,
|
|
'formatted': f"# [{country}] {count:,} networks - SQLite cache (source: {source}, scanned: {last_scan_str[:19]})"
|
|
})
|
|
else:
|
|
sources_info.append({
|
|
'country': country,
|
|
'count': count,
|
|
'source_type': 'fresh',
|
|
'source_detail': 'live_scan',
|
|
'last_scan': None,
|
|
'age_hours': 0,
|
|
'age_days': 0,
|
|
'formatted': f"# [{country}] {count:,} networks - Fresh scan (no cache)"
|
|
})
|
|
|
|
conn.close()
|
|
|
|
# Redis statistics
|
|
redis_info = {}
|
|
if redis_stats:
|
|
redis_info = {
|
|
'total': redis_stats.get('total', 0),
|
|
'unique': redis_stats.get('unique', 0),
|
|
'deduped': redis_stats.get('deduped', 0),
|
|
'formatted': f"Redis bad IPs: {redis_stats.get('total', 0)} entries ({redis_stats.get('unique', 0)} unique after deduplication)"
|
|
}
|
|
|
|
return {
|
|
'timestamp': timestamp,
|
|
'timestamp_iso': now.isoformat(),
|
|
'countries': countries,
|
|
'countries_string': ', '.join(countries),
|
|
'country_count': len(countries),
|
|
'total_networks': total_networks,
|
|
'sources': sources_info,
|
|
'sources_formatted': '\n'.join([s['formatted'] for s in sources_info]),
|
|
'redis': redis_info,
|
|
'cache_max_age_hours': getattr(config, 'CACHE_MAX_AGE_HOURS', 168),
|
|
'cache_max_age_days': getattr(config, 'CACHE_MAX_AGE_HOURS', 168) / 24,
|
|
'cache_db_path': str(handler.cache_db)
|
|
}
|
|
|
|
def _generate_range_regex(start: int, end: int) -> str:
|
|
"""Generate optimal regex for numeric range 0-255"""
|
|
|
|
if start == 0 and end == 255:
|
|
return "(25[0-5]|2[0-4][0-9]|1[0-9][0-9]|[1-9][0-9]|[0-9])"
|
|
|
|
if end - start < 10:
|
|
return "(" + "|".join(str(i) for i in range(start, end + 1)) + ")"
|
|
|
|
parts = []
|
|
current = start
|
|
while current <= end:
|
|
first_digit = current // 10
|
|
last_digit = current % 10
|
|
max_in_decade = min(end, (first_digit + 1) * 10 - 1)
|
|
|
|
if last_digit == 0 and max_in_decade == (first_digit + 1) * 10 - 1:
|
|
if first_digit == 0:
|
|
parts.append("[0-9]")
|
|
else:
|
|
parts.append(f"{first_digit}[0-9]")
|
|
current = max_in_decade + 1
|
|
elif current == max_in_decade:
|
|
parts.append(str(current))
|
|
current += 1
|
|
else:
|
|
if first_digit == 0:
|
|
parts.append(f"[{last_digit}-{max_in_decade % 10}]")
|
|
else:
|
|
parts.append(f"{first_digit}[{last_digit}-{max_in_decade % 10}]")
|
|
current = max_in_decade + 1
|
|
|
|
return "(" + "|".join(parts) + ")"
|
|
|
|
|
|
def cidr_to_nginx_regex(cidr: str) -> str:
|
|
|
|
try:
|
|
network = ipaddress.IPv4Network(cidr, strict=False)
|
|
prefix = network.prefixlen
|
|
octets = str(network.network_address).split('.')
|
|
|
|
if prefix == 32:
|
|
return f"~^{octets[0]}\\.{octets[1]}\\.{octets[2]}\\.{octets[3]}$"
|
|
|
|
if prefix >= 24:
|
|
return f"~^{octets[0]}\\.{octets[1]}\\.{octets[2]}\\."
|
|
|
|
if prefix >= 16:
|
|
start_third = int(octets[2])
|
|
num_subnets = 2 ** (24 - prefix)
|
|
end_third = start_third + num_subnets - 1
|
|
|
|
if start_third == end_third:
|
|
return f"~^{octets[0]}\\.{octets[1]}\\.{start_third}\\."
|
|
elif end_third - start_third == 1:
|
|
return f"~^{octets[0]}\\.{octets[1]}\\.({start_third}|{end_third})\\."
|
|
else:
|
|
range_regex = _generate_range_regex(start_third, end_third)
|
|
return f"~^{octets[0]}\\.{octets[1]}\\.{range_regex}\\."
|
|
|
|
if prefix >= 8:
|
|
start_second = int(octets[1])
|
|
num_subnets = 2 ** (16 - prefix)
|
|
end_second = start_second + num_subnets - 1
|
|
|
|
if start_second == end_second:
|
|
return f"~^{octets[0]}\\.{start_second}\\."
|
|
else:
|
|
range_regex = _generate_range_regex(start_second, end_second)
|
|
return f"~^{octets[0]}\\.{range_regex}\\."
|
|
|
|
start_first = int(octets[0])
|
|
num_subnets = 2 ** (8 - prefix)
|
|
end_first = start_first + num_subnets - 1
|
|
range_regex = _generate_range_regex(start_first, end_first)
|
|
return f"~^{range_regex}\\."
|
|
|
|
except Exception as e:
|
|
print(f"[ERROR] CIDR conversion failed for {cidr}: {e}", flush=True)
|
|
return None
|
|
|
|
class GeoIPHandler:
|
|
def __init__(self):
|
|
self.mmdb_file = config.GEOIP_DB_DIR / 'GeoLite2-Country.mmdb'
|
|
self.config_file = config.GEOIP_DB_DIR / 'config.json'
|
|
self.cache_db = config.GEOIP_DB_DIR / 'networks_cache.db'
|
|
config.GEOIP_DB_DIR.mkdir(parents=True, exist_ok=True)
|
|
self._init_cache_db()
|
|
|
|
def _init_cache_db(self):
|
|
conn = sqlite3.connect(str(self.cache_db), timeout=30.0)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('PRAGMA journal_mode=WAL;')
|
|
cursor.execute('PRAGMA synchronous=NORMAL;')
|
|
cursor.execute('PRAGMA cache_size=10000;')
|
|
cursor.execute('PRAGMA temp_store=MEMORY;')
|
|
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS networks_cache (
|
|
country_code TEXT NOT NULL,
|
|
network TEXT NOT NULL,
|
|
source TEXT NOT NULL,
|
|
created_at TEXT NOT NULL,
|
|
PRIMARY KEY (country_code, network)
|
|
)
|
|
''')
|
|
cursor.execute('''
|
|
CREATE TABLE IF NOT EXISTS cache_metadata (
|
|
country_code TEXT PRIMARY KEY,
|
|
last_scan TEXT NOT NULL,
|
|
network_count INTEGER NOT NULL,
|
|
source TEXT DEFAULT 'unknown'
|
|
)
|
|
''')
|
|
cursor.execute('''
|
|
CREATE INDEX IF NOT EXISTS idx_networks_country
|
|
ON networks_cache(country_code)
|
|
''')
|
|
|
|
conn.commit()
|
|
conn.close()
|
|
|
|
def _get_cached_networks(self, country_code: str) -> list:
|
|
"""Get networks from cache with chunked reading for large datasets"""
|
|
conn = sqlite3.connect(str(self.cache_db), timeout=600.0)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute(
|
|
'SELECT last_scan, network_count FROM cache_metadata WHERE country_code = ?',
|
|
(country_code.upper(),)
|
|
)
|
|
row = cursor.fetchone()
|
|
|
|
if row:
|
|
last_scan_str, count = row
|
|
last_scan = datetime.fromisoformat(last_scan_str)
|
|
age_hours = (datetime.now() - last_scan).total_seconds() / 3600
|
|
|
|
if age_hours < config.CACHE_MAX_AGE_HOURS:
|
|
# Chunked reading for large datasets
|
|
chunk_size = 100000
|
|
all_networks = []
|
|
offset = 0
|
|
|
|
while offset < count:
|
|
cursor.execute(
|
|
'SELECT network FROM networks_cache WHERE country_code = ? LIMIT ? OFFSET ?',
|
|
(country_code.upper(), chunk_size, offset)
|
|
)
|
|
chunk = [row[0] for row in cursor.fetchall()]
|
|
|
|
if not chunk:
|
|
break
|
|
|
|
all_networks.extend(chunk)
|
|
offset += chunk_size
|
|
|
|
conn.close()
|
|
return all_networks
|
|
|
|
conn.close()
|
|
return None
|
|
|
|
def _save_to_cache(self, country_code: str, networks: list, source: str):
|
|
if not networks:
|
|
print(f"[CACHE] Skipping {country_code} - no networks to save", flush=True)
|
|
return False
|
|
|
|
max_retries = 3
|
|
country_code = country_code.upper()
|
|
chunk_size = 50000
|
|
|
|
for attempt in range(max_retries):
|
|
conn = None
|
|
try:
|
|
conn = sqlite3.connect(
|
|
str(self.cache_db),
|
|
timeout=300.0,
|
|
isolation_level='DEFERRED'
|
|
)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('DELETE FROM networks_cache WHERE country_code = ?', (country_code,))
|
|
cursor.execute('DELETE FROM cache_metadata WHERE country_code = ?', (country_code,))
|
|
|
|
timestamp = datetime.now().isoformat()
|
|
|
|
total_inserted = 0
|
|
for i in range(0, len(networks), chunk_size):
|
|
chunk = networks[i:i+chunk_size]
|
|
cursor.executemany(
|
|
'INSERT INTO networks_cache (country_code, network, source, created_at) VALUES (?, ?, ?, ?)',
|
|
[(country_code, network, source, timestamp) for network in chunk]
|
|
)
|
|
total_inserted += len(chunk)
|
|
|
|
if len(networks) > chunk_size:
|
|
print(f"[CACHE] {country_code}: Inserted {total_inserted}/{len(networks)} networks...", flush=True)
|
|
|
|
cursor.execute(
|
|
'INSERT INTO cache_metadata (country_code, last_scan, network_count, source) VALUES (?, ?, ?, ?)',
|
|
(country_code, timestamp, len(networks), source)
|
|
)
|
|
|
|
conn.commit()
|
|
print(f"[CACHE] ✓ Saved {country_code}: {len(networks)} networks from {source}", flush=True)
|
|
return True
|
|
|
|
except sqlite3.OperationalError as e:
|
|
if 'locked' in str(e).lower() or 'busy' in str(e).lower():
|
|
print(f"[CACHE] Database locked for {country_code}, attempt {attempt+1}/{max_retries}", flush=True)
|
|
if attempt < max_retries - 1:
|
|
import time
|
|
time.sleep(10 * (attempt + 1))
|
|
else:
|
|
print(f"[ERROR] Failed to save {country_code} after {max_retries} attempts", flush=True)
|
|
return False
|
|
else:
|
|
print(f"[ERROR] SQLite error for {country_code}: {e}", flush=True)
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"[ERROR] Failed to save cache for {country_code}: {e}", flush=True)
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
finally:
|
|
if conn:
|
|
try:
|
|
conn.close()
|
|
except:
|
|
pass
|
|
|
|
return False
|
|
|
|
def _update_cache_incremental(self, country_code: str, new_networks: list, source: str):
|
|
if not new_networks:
|
|
print(f"[CACHE] No networks to update for {country_code}", flush=True)
|
|
return False
|
|
|
|
max_retries = 3
|
|
country_code = country_code.upper()
|
|
chunk_size = 50000
|
|
|
|
for attempt in range(max_retries):
|
|
conn = None
|
|
try:
|
|
conn = sqlite3.connect(
|
|
str(self.cache_db),
|
|
timeout=300.0,
|
|
isolation_level='DEFERRED'
|
|
)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute(
|
|
'SELECT network FROM networks_cache WHERE country_code = ?',
|
|
(country_code,)
|
|
)
|
|
old_networks = set(row[0] for row in cursor.fetchall())
|
|
new_networks_set = set(new_networks)
|
|
|
|
to_add = new_networks_set - old_networks
|
|
to_remove = old_networks - new_networks_set
|
|
|
|
timestamp = datetime.now().isoformat()
|
|
|
|
if to_remove:
|
|
to_remove_list = list(to_remove)
|
|
for i in range(0, len(to_remove_list), chunk_size):
|
|
chunk = to_remove_list[i:i+chunk_size]
|
|
cursor.executemany(
|
|
'DELETE FROM networks_cache WHERE country_code = ? AND network = ?',
|
|
[(country_code, net) for net in chunk]
|
|
)
|
|
print(f"[CACHE] Removed {len(to_remove)} old networks from {country_code}", flush=True)
|
|
|
|
if to_add:
|
|
to_add_list = list(to_add)
|
|
total_added = 0
|
|
for i in range(0, len(to_add_list), chunk_size):
|
|
chunk = to_add_list[i:i+chunk_size]
|
|
cursor.executemany(
|
|
'INSERT INTO networks_cache (country_code, network, source, created_at) VALUES (?, ?, ?, ?)',
|
|
[(country_code, network, source, timestamp) for network in chunk]
|
|
)
|
|
total_added += len(chunk)
|
|
|
|
if len(to_add_list) > chunk_size:
|
|
print(f"[CACHE] {country_code}: Added {total_added}/{len(to_add_list)} new networks...", flush=True)
|
|
|
|
print(f"[CACHE] Added {len(to_add)} new networks to {country_code}", flush=True)
|
|
|
|
cursor.execute('DELETE FROM cache_metadata WHERE country_code = ?', (country_code,))
|
|
cursor.execute(
|
|
'INSERT INTO cache_metadata (country_code, last_scan, network_count, source) VALUES (?, ?, ?, ?)',
|
|
(country_code, timestamp, len(new_networks), source)
|
|
)
|
|
|
|
conn.commit()
|
|
|
|
unchanged = len(old_networks & new_networks_set)
|
|
print(f"[CACHE] ✓ Updated {country_code}: +{len(to_add)} new, -{len(to_remove)} removed, ={unchanged} unchanged (total: {len(new_networks)})", flush=True)
|
|
return True
|
|
|
|
except sqlite3.OperationalError as e:
|
|
if 'locked' in str(e).lower() or 'busy' in str(e).lower():
|
|
print(f"[CACHE] Database locked for {country_code}, attempt {attempt+1}/{max_retries}", flush=True)
|
|
if attempt < max_retries - 1:
|
|
import time
|
|
time.sleep(10 * (attempt + 1))
|
|
else:
|
|
print(f"[ERROR] Failed to update {country_code} after {max_retries} attempts", flush=True)
|
|
return False
|
|
else:
|
|
print(f"[ERROR] SQLite error for {country_code}: {e}", flush=True)
|
|
return False
|
|
|
|
except Exception as e:
|
|
print(f"[ERROR] Failed to update cache for {country_code}: {e}", flush=True)
|
|
import traceback
|
|
traceback.print_exc()
|
|
return False
|
|
|
|
finally:
|
|
if conn:
|
|
try:
|
|
conn.close()
|
|
except:
|
|
pass
|
|
|
|
return False
|
|
|
|
def get_countries_needing_scan(self, max_age_hours: int = 168) -> tuple:
|
|
import sys
|
|
sys.path.insert(0, '/opt/geoip_block_generator')
|
|
|
|
all_countries = [c['code'] for c in config.COMMON_COUNTRIES]
|
|
|
|
try:
|
|
conn = sqlite3.connect(str(self.cache_db), timeout=30.0)
|
|
cursor = conn.cursor()
|
|
|
|
cursor.execute('SELECT country_code, last_scan FROM cache_metadata')
|
|
cached_data = {row[0]: row[1] for row in cursor.fetchall()}
|
|
conn.close()
|
|
|
|
missing = []
|
|
stale = []
|
|
cutoff_time = datetime.now() - timedelta(hours=max_age_hours)
|
|
|
|
for country in all_countries:
|
|
if country not in cached_data:
|
|
missing.append(country)
|
|
else:
|
|
try:
|
|
last_scan = datetime.fromisoformat(cached_data[country])
|
|
if last_scan < cutoff_time:
|
|
stale.append(country)
|
|
except:
|
|
stale.append(country)
|
|
|
|
return missing, stale
|
|
|
|
except Exception as e:
|
|
print(f"[ERROR] Failed to check cache status: {e}", flush=True)
|
|
return all_countries, []
|
|
|
|
def load_config(self) -> dict:
|
|
if self.config_file.exists():
|
|
try:
|
|
with open(self.config_file, 'r') as f:
|
|
return json.load(f)
|
|
except:
|
|
pass
|
|
return {}
|
|
|
|
def save_config(self, data: dict):
|
|
with open(self.config_file, 'w') as f:
|
|
json.dump(data, f, indent=2)
|
|
|
|
def needs_update(self) -> bool:
|
|
if not self.mmdb_file.exists():
|
|
return True
|
|
|
|
cfg = self.load_config()
|
|
last_update = cfg.get('last_update')
|
|
|
|
if not last_update:
|
|
return True
|
|
|
|
try:
|
|
last_update_date = datetime.fromisoformat(last_update)
|
|
days_old = (datetime.now() - last_update_date).days
|
|
return days_old >= config.MAXMIND_UPDATE_INTERVAL_DAYS
|
|
except:
|
|
return True
|
|
|
|
def download_database(self) -> dict:
|
|
urls = [config.MAXMIND_PRIMARY_URL, config.MAXMIND_FALLBACK_URL]
|
|
|
|
for url in urls:
|
|
try:
|
|
print(f"Downloading database from {url}")
|
|
response = requests.get(url, timeout=60, stream=True)
|
|
response.raise_for_status()
|
|
|
|
with open(self.mmdb_file, 'wb') as f:
|
|
for chunk in response.iter_content(chunk_size=8192):
|
|
f.write(chunk)
|
|
|
|
file_size = self.mmdb_file.stat().st_size
|
|
|
|
self.save_config({
|
|
'last_update': datetime.now().isoformat(),
|
|
'url': url,
|
|
'file_size': file_size
|
|
})
|
|
|
|
print(f"Database downloaded successfully ({file_size} bytes)")
|
|
return {'success': True, 'url': url, 'size': file_size}
|
|
|
|
except Exception as e:
|
|
print(f"Failed to download from {url}: {e}")
|
|
continue
|
|
|
|
return {'success': False, 'error': 'All download sources failed'}
|
|
|
|
def check_and_update(self):
|
|
if self.needs_update():
|
|
print("Database update needed, downloading...")
|
|
self.download_database()
|
|
|
|
def _get_scan_ranges(self) -> list:
|
|
scan_ranges = []
|
|
|
|
for first_octet in range(1, 224):
|
|
if first_octet in [10, 127, 169, 172, 192]:
|
|
continue
|
|
|
|
for second_octet in range(0, 256):
|
|
scan_ranges.append(f"{first_octet}.{second_octet}.0.0/16")
|
|
|
|
return scan_ranges
|
|
|
|
def _scan_maxmind_for_country(self, country_code: str, progress_callback=None, workers=None) -> list:
|
|
if not self.mmdb_file.exists():
|
|
return []
|
|
|
|
country_code = country_code.upper()
|
|
|
|
scan_ranges = self._get_scan_ranges()
|
|
total_ranges = len(scan_ranges)
|
|
|
|
# workers default
|
|
if workers is None or int(workers) <= 0:
|
|
workers = min(32, max(4, cpu_count() * 2))
|
|
else:
|
|
workers = int(workers)
|
|
|
|
tasks_per_worker = getattr(config, "MAXMIND_CHUNK_TASKS_PER_WORKER", 12)
|
|
chunk_min = getattr(config, "MAXMIND_CHUNK_MIN", 50)
|
|
chunk_max = getattr(config, "MAXMIND_CHUNK_MAX", 2000)
|
|
|
|
target_tasks = max(workers * int(tasks_per_worker), workers)
|
|
chunk = int(math.ceil(total_ranges / float(target_tasks)))
|
|
CHUNK = max(int(chunk_min), min(int(chunk_max), chunk))
|
|
|
|
if progress_callback:
|
|
progress_callback(f"Starting parallel MaxMind scan with {workers} workers...")
|
|
progress_callback(f"Scanning {total_ranges} IP ranges...")
|
|
progress_callback(f"Chunking: {CHUNK} ranges/task (~{int(math.ceil(total_ranges/float(CHUNK)))} tasks)")
|
|
|
|
found_networks = set()
|
|
found_networks_lock = threading.Lock()
|
|
|
|
completed = 0
|
|
completed_lock = threading.Lock()
|
|
|
|
tls = threading.local()
|
|
|
|
def get_reader():
|
|
r = getattr(tls, "reader", None)
|
|
if r is None:
|
|
tls.reader = geoip2.database.Reader(str(self.mmdb_file))
|
|
return tls.reader
|
|
|
|
def scan_one_range(reader, network_str: str):
|
|
local = set()
|
|
try:
|
|
network = ipaddress.IPv4Network(network_str, strict=False)
|
|
for subnet in network.subnets(new_prefix=24):
|
|
sample_ip = str(subnet.network_address + 1)
|
|
try:
|
|
resp = reader.country(sample_ip)
|
|
if resp.country.iso_code == country_code:
|
|
local.add(subnet) # mniej alokacji niż str() w pętli
|
|
except Exception:
|
|
pass
|
|
except Exception:
|
|
pass
|
|
return local
|
|
|
|
def scan_chunk(ranges):
|
|
nonlocal completed
|
|
reader = get_reader()
|
|
local_chunk = set()
|
|
|
|
for r in ranges:
|
|
local_chunk.update(scan_one_range(reader, r))
|
|
|
|
with completed_lock:
|
|
completed += 1
|
|
c = completed
|
|
|
|
# progres częściej (diagnostyka), nie wpływa na wynik
|
|
if progress_callback and (c % 500 == 0 or c == total_ranges):
|
|
with found_networks_lock:
|
|
found_cnt = len(found_networks)
|
|
pct = (c / float(total_ranges)) * 100.0
|
|
progress_callback(
|
|
f"Scanning: {c}/{total_ranges} ranges ({pct:.1f}%), found {found_cnt} networks"
|
|
)
|
|
|
|
return local_chunk
|
|
|
|
try:
|
|
chunks = [scan_ranges[i:i + CHUNK] for i in range(0, total_ranges, CHUNK)]
|
|
|
|
with ThreadPoolExecutor(max_workers=workers) as executor:
|
|
futures = [executor.submit(scan_chunk, ch) for ch in chunks]
|
|
for future in as_completed(futures):
|
|
local_nets = future.result()
|
|
if local_nets:
|
|
with found_networks_lock:
|
|
found_networks.update(local_nets)
|
|
|
|
# konwersja na string na końcu (wynik ten sam co wcześniej)
|
|
result = [str(n) for n in found_networks]
|
|
|
|
if progress_callback:
|
|
progress_callback(f"MaxMind scan complete: {len(result)} networks")
|
|
|
|
return result
|
|
|
|
except Exception as e:
|
|
print(f"[ERROR] MaxMind scan failed for {country_code}: {e}", flush=True)
|
|
import traceback
|
|
traceback.print_exc()
|
|
return []
|
|
|
|
def fetch_country_networks(self, country_code: str, progress_callback=None) -> list:
|
|
country_code = country_code.upper()
|
|
|
|
cached = self._get_cached_networks(country_code)
|
|
if cached is not None:
|
|
if progress_callback:
|
|
progress_callback(f"Using cached data")
|
|
return cached
|
|
|
|
if progress_callback:
|
|
progress_callback(f"No cache, starting parallel MaxMind scan")
|
|
|
|
maxmind_networks = self._scan_maxmind_for_country(country_code, progress_callback)
|
|
|
|
if maxmind_networks:
|
|
if progress_callback:
|
|
progress_callback(f"Checking GitHub for validation")
|
|
|
|
github_networks = self._fetch_from_github(country_code)
|
|
if github_networks:
|
|
maxmind_set = set(maxmind_networks)
|
|
github_set = set(github_networks)
|
|
missing = github_set - maxmind_set
|
|
|
|
if missing:
|
|
maxmind_networks.extend(missing)
|
|
|
|
self._save_to_cache(country_code, maxmind_networks, 'maxmind+github')
|
|
return maxmind_networks
|
|
|
|
github_networks = self._fetch_from_github(country_code)
|
|
if github_networks:
|
|
self._save_to_cache(country_code, github_networks, 'github')
|
|
return github_networks
|
|
|
|
ipdeny_networks = self._fetch_from_ipdeny(country_code)
|
|
if ipdeny_networks:
|
|
self._save_to_cache(country_code, ipdeny_networks, 'ipdeny')
|
|
return ipdeny_networks
|
|
|
|
return []
|
|
|
|
def _fetch_from_github(self, country_code: str) -> list:
|
|
url = config.IP_RANGE_SOURCES['github'].format(country_lower=country_code.lower())
|
|
try:
|
|
response = requests.get(url, timeout=10)
|
|
response.raise_for_status()
|
|
networks = [line.strip() for line in response.text.split('\n') if line.strip() and not line.startswith('#')]
|
|
return networks
|
|
except Exception as e:
|
|
return []
|
|
|
|
def _fetch_from_ipdeny(self, country_code: str) -> list:
|
|
url = config.IP_RANGE_SOURCES['ipdeny'].format(country_lower=country_code.lower())
|
|
try:
|
|
response = requests.get(url, timeout=10)
|
|
response.raise_for_status()
|
|
networks = [line.strip() for line in response.text.split('\n') if line.strip() and not line.startswith('#')]
|
|
return networks
|
|
except Exception as e:
|
|
return []
|
|
|
|
|
|
class ConfigGenerator:
|
|
|
|
@staticmethod
|
|
def _aggregate_networks(networks: list) -> list:
|
|
try:
|
|
if not networks:
|
|
return []
|
|
|
|
unique_networks = list(set(networks))
|
|
|
|
ip_objects = []
|
|
for network in unique_networks:
|
|
try:
|
|
ip_objects.append(ipaddress.IPv4Network(network, strict=False))
|
|
except ValueError:
|
|
continue
|
|
|
|
if ip_objects:
|
|
collapsed = list(ipaddress.collapse_addresses(ip_objects))
|
|
return sorted([str(net) for net in collapsed])
|
|
|
|
return sorted(unique_networks)
|
|
except Exception as e:
|
|
print(f"[ERROR] Aggregation failed: {e}")
|
|
return sorted(list(set(networks)))
|
|
|
|
@staticmethod
|
|
def generate_nginx_geo(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""Generate Nginx Geo Module configuration with detailed metadata header"""
|
|
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Aggregate networks
|
|
all_networks = []
|
|
for networks in country_networks.values():
|
|
all_networks.extend(networks)
|
|
|
|
if redis_ips:
|
|
all_networks.extend(redis_ips)
|
|
|
|
if aggregate:
|
|
all_networks = ConfigGenerator._aggregate_networks(all_networks)
|
|
else:
|
|
all_networks = sorted(list(set(all_networks)))
|
|
|
|
# Generate header
|
|
config = "# " + "="*77 + "\n"
|
|
config += "# Nginx Geo Module Configuration\n"
|
|
config += f"# Generated: {metadata['timestamp']}\n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "# \n"
|
|
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"# Total networks: {len(all_networks):,}\n"
|
|
config += "# \n"
|
|
config += "# Data sources:\n"
|
|
config += metadata['sources_formatted'] + "\n"
|
|
config += "# \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"# {metadata['redis']['formatted']}\n"
|
|
config += "# \n"
|
|
|
|
config += "# Cache settings:\n"
|
|
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"# Database: {metadata['cache_db_path']}\n"
|
|
config += "# \n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "\n"
|
|
|
|
# Generate geo block
|
|
config += "geo $blocked_country {\n"
|
|
config += " default 0;\n"
|
|
config += " \n"
|
|
|
|
for network in all_networks:
|
|
config += f" {network} 1;\n"
|
|
|
|
config += "}\n"
|
|
return config
|
|
|
|
@staticmethod
|
|
def generate_nginx_map(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""Generate Nginx Map Module configuration with detailed metadata header"""
|
|
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Process networks per country
|
|
all_networks = []
|
|
for networks in country_networks.values():
|
|
all_networks.extend(networks)
|
|
|
|
if redis_ips:
|
|
all_networks.extend(redis_ips)
|
|
|
|
if aggregate:
|
|
all_networks = ConfigGenerator._aggregate_networks(all_networks)
|
|
else:
|
|
all_networks = sorted(list(set(all_networks)))
|
|
|
|
# Generate header
|
|
config = "# " + "="*77 + "\n"
|
|
config += "# Nginx Map Module Configuration\n"
|
|
config += f"# Generated: {metadata['timestamp']}\n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "# \n"
|
|
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"# Total networks: {len(all_networks):,}\n"
|
|
config += "# Note: Using regex patterns for CIDR matching (map module doesn't support CIDR natively)\n"
|
|
config += "# \n"
|
|
config += "# Data sources:\n"
|
|
config += metadata['sources_formatted'] + "\n"
|
|
config += "# \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"# {metadata['redis']['formatted']}\n"
|
|
config += "# \n"
|
|
|
|
config += "# Cache settings:\n"
|
|
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"# Database: {metadata['cache_db_path']}\n"
|
|
config += "# \n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "\n"
|
|
|
|
# Generate map block with regex conversion
|
|
config += "map $remote_addr $blocked_country {\n"
|
|
config += " default 0;\n"
|
|
config += " \n"
|
|
|
|
converted_count = 0
|
|
failed_count = 0
|
|
|
|
for network in all_networks:
|
|
regex = cidr_to_nginx_regex(network)
|
|
if regex:
|
|
config += f" {regex} 1;\n"
|
|
converted_count += 1
|
|
else:
|
|
# Fallback - zapisz z ostrzeżeniem
|
|
config += f" # ERROR: Failed to convert: {network}\n"
|
|
failed_count += 1
|
|
|
|
config += "}\n"
|
|
|
|
# Log conversion statistics
|
|
#print(f"[INFO] Generated nginx map: {converted_count} regex patterns", flush=True)
|
|
|
|
if failed_count > 0:
|
|
print(f"[WARNING] Failed to convert {failed_count} networks to regex - check config file", flush=True)
|
|
|
|
return config
|
|
|
|
|
|
@staticmethod
|
|
def generate_nginx_deny(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""Generate Nginx Deny Directives configuration with detailed metadata header"""
|
|
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Aggregate networks
|
|
all_networks = []
|
|
for networks in country_networks.values():
|
|
all_networks.extend(networks)
|
|
|
|
if redis_ips:
|
|
all_networks.extend(redis_ips)
|
|
|
|
if aggregate:
|
|
all_networks = ConfigGenerator._aggregate_networks(all_networks)
|
|
else:
|
|
all_networks = sorted(list(set(all_networks)))
|
|
|
|
# Generate header
|
|
config = "# " + "="*77 + "\n"
|
|
config += "# Nginx Deny Directives Configuration\n"
|
|
config += f"# Generated: {metadata['timestamp']}\n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "# \n"
|
|
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"# Total networks: {len(all_networks):,}\n"
|
|
config += "# \n"
|
|
config += "# Data sources:\n"
|
|
config += metadata['sources_formatted'] + "\n"
|
|
config += "# \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"# {metadata['redis']['formatted']}\n"
|
|
config += "# \n"
|
|
|
|
config += "# Cache settings:\n"
|
|
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"# Database: {metadata['cache_db_path']}\n"
|
|
config += "# \n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "\n"
|
|
|
|
# Generate deny directives
|
|
for network in all_networks:
|
|
config += f"deny {network};\n"
|
|
|
|
config += "allow all;\n"
|
|
return config
|
|
|
|
@staticmethod
|
|
def generate_apache_24(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""Generate Apache 2.4 configuration with detailed metadata header"""
|
|
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Aggregate networks
|
|
all_networks = []
|
|
for networks in country_networks.values():
|
|
all_networks.extend(networks)
|
|
|
|
if redis_ips:
|
|
all_networks.extend(redis_ips)
|
|
|
|
if aggregate:
|
|
all_networks = ConfigGenerator._aggregate_networks(all_networks)
|
|
else:
|
|
all_networks = sorted(list(set(all_networks)))
|
|
|
|
# Generate header
|
|
config = "# " + "="*77 + "\n"
|
|
config += "# Apache 2.4 Configuration\n"
|
|
config += f"# Generated: {metadata['timestamp']}\n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "# \n"
|
|
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"# Total networks: {len(all_networks):,}\n"
|
|
config += "# \n"
|
|
config += "# Data sources:\n"
|
|
config += metadata['sources_formatted'] + "\n"
|
|
config += "# \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"# {metadata['redis']['formatted']}\n"
|
|
config += "# \n"
|
|
|
|
config += "# Cache settings:\n"
|
|
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"# Database: {metadata['cache_db_path']}\n"
|
|
config += "# \n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "\n"
|
|
|
|
# Generate Apache 2.4 rules
|
|
config += "<RequireAll>\n"
|
|
config += " Require all granted\n"
|
|
|
|
for network in all_networks:
|
|
config += f" Require not ip {network}\n"
|
|
|
|
config += "</RequireAll>\n"
|
|
return config
|
|
|
|
|
|
@staticmethod
|
|
def generate_apache_22(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""Generate Apache 2.2 configuration with detailed metadata header"""
|
|
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Aggregate networks
|
|
all_networks = []
|
|
for networks in country_networks.values():
|
|
all_networks.extend(networks)
|
|
|
|
if redis_ips:
|
|
all_networks.extend(redis_ips)
|
|
|
|
if aggregate:
|
|
all_networks = ConfigGenerator._aggregate_networks(all_networks)
|
|
else:
|
|
all_networks = sorted(list(set(all_networks)))
|
|
|
|
# Generate header
|
|
config = "# " + "="*77 + "\n"
|
|
config += "# Apache 2.2 Configuration\n"
|
|
config += f"# Generated: {metadata['timestamp']}\n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "# \n"
|
|
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"# Total networks: {len(all_networks):,}\n"
|
|
config += "# \n"
|
|
config += "# Data sources:\n"
|
|
config += metadata['sources_formatted'] + "\n"
|
|
config += "# \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"# {metadata['redis']['formatted']}\n"
|
|
config += "# \n"
|
|
|
|
config += "# Cache settings:\n"
|
|
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"# Database: {metadata['cache_db_path']}\n"
|
|
config += "# \n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "\n"
|
|
|
|
# Generate Apache 2.2 rules
|
|
config += "Order Allow,Deny\n"
|
|
config += "Allow from all\n"
|
|
|
|
for network in all_networks:
|
|
config += f"Deny from {network}\n"
|
|
|
|
return config
|
|
|
|
|
|
@staticmethod
|
|
def generate_haproxy_acl(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""Generate HAProxy ACL configuration with detailed metadata header"""
|
|
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Aggregate networks
|
|
all_networks = []
|
|
for networks in country_networks.values():
|
|
all_networks.extend(networks)
|
|
|
|
if redis_ips:
|
|
all_networks.extend(redis_ips)
|
|
|
|
if aggregate:
|
|
all_networks = ConfigGenerator._aggregate_networks(all_networks)
|
|
else:
|
|
all_networks = sorted(list(set(all_networks)))
|
|
|
|
# Generate header
|
|
config = "# " + "="*77 + "\n"
|
|
config += "# HAProxy ACL Configuration\n"
|
|
config += f"# Generated: {metadata['timestamp']}\n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "# \n"
|
|
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"# Total networks: {len(all_networks):,}\n"
|
|
config += "# \n"
|
|
config += "# Data sources:\n"
|
|
config += metadata['sources_formatted'] + "\n"
|
|
config += "# \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"# {metadata['redis']['formatted']}\n"
|
|
config += "# \n"
|
|
|
|
config += "# Cache settings:\n"
|
|
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"# Database: {metadata['cache_db_path']}\n"
|
|
config += "# \n"
|
|
config += "# Usage in HAProxy:\n"
|
|
config += "# acl banned_ips src -f /path/to/this_file.acl\n"
|
|
config += "# http-request deny if banned_ips\n"
|
|
config += "# \n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "\n"
|
|
|
|
# Generate ACL rules
|
|
config += "frontend http-in\n"
|
|
config += " bind *:80\n"
|
|
config += " \n"
|
|
|
|
for network in all_networks:
|
|
config += f" acl blocked_ip src {network}\n"
|
|
|
|
config += """
|
|
http-request deny if blocked_ip
|
|
default_backend servers
|
|
"""
|
|
return config
|
|
|
|
@staticmethod
|
|
def generate_haproxy_map(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""
|
|
Generate HAProxy MAP file (CIDR COUNTRY format)
|
|
"""
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Aggregate networks (for header stats only, same style as ACL)
|
|
all_networks = []
|
|
for nets in country_networks.values():
|
|
all_networks.extend(nets)
|
|
|
|
if redis_ips:
|
|
all_networks.extend(redis_ips)
|
|
|
|
if aggregate:
|
|
all_networks = ConfigGenerator._aggregate_networks(all_networks)
|
|
else:
|
|
all_networks = sorted(list(set(all_networks)))
|
|
|
|
# Generate header (same style as ACL)
|
|
config = "# " + "="*77 + "\n"
|
|
config += "# HAProxy MAP Configuration\n"
|
|
config += f"# Generated: {metadata['timestamp']}\n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "# \n"
|
|
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"# Total networks: {len(all_networks):,}\n"
|
|
config += "# \n"
|
|
config += "# Data sources:\n"
|
|
config += metadata['sources_formatted'] + "\n"
|
|
config += "# \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"# {metadata['redis']['formatted']}\n"
|
|
config += "# \n"
|
|
|
|
config += "# Cache settings:\n"
|
|
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"# Database: {metadata['cache_db_path']}\n"
|
|
config += "# \n"
|
|
config += "# Usage in HAProxy:\n"
|
|
config += "# map_beg(/path/to/geo.map) -m ip $src var(txn.country)\n"
|
|
config += "# \n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "\n"
|
|
|
|
# MAP BODY (per-country aggregation => poprawny country, brak XX, brak pustych wyników)
|
|
for country_code, nets in sorted(country_networks.items()):
|
|
if not nets:
|
|
continue
|
|
|
|
if aggregate:
|
|
nets = ConfigGenerator._aggregate_networks(nets)
|
|
else:
|
|
nets = sorted(list(set(nets)))
|
|
|
|
for network in nets:
|
|
config += f"{network} {country_code}\n"
|
|
|
|
# Redis IPs (opcjonalnie jako osobna etykieta)
|
|
if redis_ips:
|
|
redis_list = list(redis_ips)
|
|
if aggregate:
|
|
redis_list = ConfigGenerator._aggregate_networks(redis_list)
|
|
else:
|
|
redis_list = sorted(list(set(redis_list)))
|
|
|
|
for network in redis_list:
|
|
config += f"{network} REDIS\n"
|
|
|
|
return config
|
|
|
|
@staticmethod
|
|
def generate_haproxy_lua(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""Generate HAProxy Lua script with detailed metadata header"""
|
|
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Aggregate networks
|
|
all_networks = []
|
|
for networks in country_networks.values():
|
|
all_networks.extend(networks)
|
|
|
|
if redis_ips:
|
|
all_networks.extend(redis_ips)
|
|
|
|
if aggregate:
|
|
all_networks = ConfigGenerator._aggregate_networks(all_networks)
|
|
else:
|
|
all_networks = sorted(list(set(all_networks)))
|
|
|
|
# Generate header
|
|
config = "-- " + "="*76 + "\n"
|
|
config += "-- HAProxy Lua Script\n"
|
|
config += f"-- Generated: {metadata['timestamp']}\n"
|
|
config += "-- " + "="*76 + "\n"
|
|
config += "-- \n"
|
|
config += f"-- Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"-- Total networks: {len(all_networks):,}\n"
|
|
config += "-- \n"
|
|
config += "-- Data sources:\n"
|
|
|
|
for line in metadata['sources_formatted'].split('\n'):
|
|
config += f"-- # {line}\n"
|
|
config += "-- \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"-- {metadata['redis']['formatted']}\n"
|
|
config += "-- \n"
|
|
|
|
config += "-- Cache settings:\n"
|
|
config += f"-- Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"-- Database: {metadata['cache_db_path']}\n"
|
|
config += "-- \n"
|
|
config += "-- " + "="*76 + "\n"
|
|
config += "\n"
|
|
|
|
# Generate Lua code
|
|
config += "local blocked_networks = {\n"
|
|
|
|
for network in all_networks:
|
|
config += f' "{network}",\n'
|
|
|
|
config += "}\n\n"
|
|
config += """
|
|
function check_blocked(txn)
|
|
local src_ip = txn.f:src()
|
|
for _, network in ipairs(blocked_networks) do
|
|
if string.match(src_ip, network) then
|
|
return true
|
|
end
|
|
end
|
|
return false
|
|
end
|
|
|
|
core.register_fetches("is_blocked", check_blocked)
|
|
"""
|
|
return config
|
|
|
|
@staticmethod
|
|
def generate_raw_cidr(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""Generate raw CIDR list with detailed metadata header"""
|
|
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Aggregate networks
|
|
all_networks = []
|
|
for networks in country_networks.values():
|
|
all_networks.extend(networks)
|
|
|
|
if redis_ips:
|
|
all_networks.extend(redis_ips)
|
|
|
|
if aggregate:
|
|
all_networks = ConfigGenerator._aggregate_networks(all_networks)
|
|
else:
|
|
all_networks = sorted(list(set(all_networks)))
|
|
|
|
# Generate header
|
|
config = "# " + "="*77 + "\n"
|
|
config += "# Raw CIDR List\n"
|
|
config += f"# Generated: {metadata['timestamp']}\n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "# \n"
|
|
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"# Total networks: {len(all_networks):,}\n"
|
|
config += f"# Aggregated: {aggregate}\n"
|
|
config += "# \n"
|
|
config += "# Data sources:\n"
|
|
config += metadata['sources_formatted'] + "\n"
|
|
config += "# \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"# {metadata['redis']['formatted']}\n"
|
|
config += "# \n"
|
|
|
|
config += "# Cache settings:\n"
|
|
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"# Database: {metadata['cache_db_path']}\n"
|
|
config += "# \n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "\n"
|
|
|
|
# Generate CIDR list
|
|
for network in all_networks:
|
|
config += f"{network}\n"
|
|
|
|
return config
|
|
|
|
@staticmethod
|
|
def generate_csv(country_networks: dict, aggregate: bool = True, redis_ips: set = None) -> str:
|
|
"""Generate CSV format with detailed metadata header"""
|
|
|
|
# Get metadata
|
|
countries = sorted(country_networks.keys())
|
|
redis_stats = None
|
|
if redis_ips:
|
|
redis_stats = {
|
|
'total': len(redis_ips),
|
|
'unique': len(redis_ips),
|
|
'deduped': 0
|
|
}
|
|
|
|
handler = GeoIPHandler()
|
|
metadata = generate_metadata(countries, country_networks, redis_stats, handler)
|
|
|
|
# Calculate totals before aggregation
|
|
total_before = sum(len(nets) for nets in country_networks.values())
|
|
if redis_ips:
|
|
total_before += len(redis_ips)
|
|
|
|
# Generate header
|
|
config = "# " + "="*77 + "\n"
|
|
config += "# CSV Export\n"
|
|
config += f"# Generated: {metadata['timestamp']}\n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "# \n"
|
|
config += f"# Countries: {metadata['countries_string']} ({metadata['country_count']} countries)\n"
|
|
config += f"# Aggregated: {aggregate}\n"
|
|
config += f"# Networks before aggregation: {total_before:,}\n"
|
|
config += "# \n"
|
|
config += "# Data sources:\n"
|
|
config += metadata['sources_formatted'] + "\n"
|
|
config += "# \n"
|
|
|
|
if metadata['redis']:
|
|
config += f"# {metadata['redis']['formatted']}\n"
|
|
config += "# \n"
|
|
|
|
config += "# Cache settings:\n"
|
|
config += f"# Max age: {metadata['cache_max_age_hours']} hours ({metadata['cache_max_age_days']:.1f} days)\n"
|
|
config += f"# Database: {metadata['cache_db_path']}\n"
|
|
config += "# \n"
|
|
config += "# " + "="*77 + "\n"
|
|
config += "\n"
|
|
config += "country,network,source\n"
|
|
|
|
# Generate CSV data
|
|
total_after = 0
|
|
for country_code, networks in sorted(country_networks.items()):
|
|
if aggregate:
|
|
networks = ConfigGenerator._aggregate_networks(networks)
|
|
else:
|
|
networks = sorted(list(set(networks)))
|
|
|
|
total_after += len(networks)
|
|
|
|
for network in networks:
|
|
config += f"{country_code},{network},cache\n"
|
|
|
|
# Add Redis IPs if present
|
|
if redis_ips:
|
|
redis_list = list(redis_ips)
|
|
if aggregate:
|
|
redis_list = ConfigGenerator._aggregate_networks(redis_list)
|
|
else:
|
|
redis_list = sorted(redis_list)
|
|
|
|
total_after += len(redis_list)
|
|
|
|
for network in redis_list:
|
|
config += f"REDIS,{network},redis\n"
|
|
|
|
# Update header with final count
|
|
config = config.replace(
|
|
f"# Networks before aggregation: {total_before:,}",
|
|
f"# Networks before aggregation: {total_before:,}\n# Networks after aggregation: {total_after:,}"
|
|
)
|
|
|
|
return config
|