Files
geoip_block_generator/rescan_github_merge.py
Mateusz Gruszczyński c0afc1554d first commit
2026-02-17 09:04:09 +01:00

95 lines
3.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""
Rescan script - merges GitHub networks with existing MaxMind cache
Updates source to 'maxmind+github' without rescanning MaxMind
"""
import sys
sys.path.insert(0, '/opt/geoip_block_generator')
import sqlite3
from geoip_handler import GeoIPHandler
from pathlib import Path
handler = GeoIPHandler()
conn = sqlite3.connect(str(handler.cache_db), timeout=30)
cursor = conn.cursor()
cursor.execute("SELECT country_code, network_count, source FROM cache_metadata ORDER BY country_code")
countries = cursor.fetchall()
print(f"Found {len(countries)} countries in cache\n")
for country_code, current_count, current_source in countries:
print(f"[{country_code}] Current: {current_count:,} networks, source: {current_source}")
cursor.execute(
"SELECT network FROM networks_cache WHERE country_code = ?",
(country_code,)
)
maxmind_networks = [row[0] for row in cursor.fetchall()]
if not maxmind_networks:
print(f" ⚠ Empty cache, skipping...")
continue
github_networks = handler._fetch_from_github(country_code)
if not github_networks:
print(f" GitHub: no data")
if current_source in ['unknown', None]:
cursor.execute(
"UPDATE cache_metadata SET source = ? WHERE country_code = ?",
('maxmind', country_code)
)
conn.commit()
print(f" ✓ Updated source: unknown → maxmind")
continue
maxmind_set = set(maxmind_networks)
github_set = set(github_networks)
missing = github_set - maxmind_set
if missing:
print(f" + GitHub: {len(github_networks):,} networks, {len(missing):,} NEW")
import datetime
timestamp = datetime.datetime.now().isoformat()
cursor.executemany(
"INSERT OR IGNORE INTO networks_cache (country_code, network, source, created_at) VALUES (?, ?, ?, ?)",
[(country_code, net, 'github', timestamp) for net in missing]
)
new_count = current_count + len(missing)
cursor.execute(
"UPDATE cache_metadata SET network_count = ?, source = ?, last_scan = ? WHERE country_code = ?",
(new_count, 'maxmind+github', timestamp, country_code)
)
conn.commit()
print(f" ✓ Updated: {current_count:,}{new_count:,} networks, source: maxmind+github")
else:
print(f" GitHub: {len(github_networks):,} networks, 0 new (all covered by MaxMind)")
cursor.execute(
"UPDATE cache_metadata SET source = ? WHERE country_code = ?",
('maxmind+github', country_code)
)
conn.commit()
print(f" ✓ Updated source: {current_source} → maxmind+github")
conn.close()
print("\n=== Summary ===")
conn = sqlite3.connect(str(handler.cache_db), timeout=30)
cursor = conn.cursor()
cursor.execute("SELECT source, COUNT(*), SUM(network_count) FROM cache_metadata GROUP BY source")
for source, count, total in cursor.fetchall():
print(f"{source}: {count} countries, {total:,} networks")
conn.close()