scan performance

This commit is contained in:
Mateusz Gruszczyński
2026-02-24 10:06:02 +01:00
parent 98acbc0119
commit 9ccb1651b6
4 changed files with 151 additions and 66 deletions

View File

@@ -13,7 +13,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
import threading
import config
import ipaddress
import math
from multiprocessing import cpu_count
def generate_metadata(countries: list, country_data: dict, redis_stats: dict = None, handler: 'GeoIPHandler' = None) -> dict:
"""
@@ -568,76 +569,106 @@ class GeoIPHandler:
return scan_ranges
def _scan_maxmind_for_country(self, country_code: str, progress_callback=None) -> list:
def _scan_maxmind_for_country(self, country_code: str, progress_callback=None, workers=None) -> list:
if not self.mmdb_file.exists():
return []
country_code = country_code.upper()
scan_ranges = self._get_scan_ranges()
total_ranges = len(scan_ranges)
# workers default
if workers is None or int(workers) <= 0:
workers = min(32, max(4, cpu_count() * 2))
else:
workers = int(workers)
tasks_per_worker = getattr(config, "MAXMIND_CHUNK_TASKS_PER_WORKER", 12)
chunk_min = getattr(config, "MAXMIND_CHUNK_MIN", 50)
chunk_max = getattr(config, "MAXMIND_CHUNK_MAX", 2000)
target_tasks = max(workers * int(tasks_per_worker), workers)
chunk = int(math.ceil(total_ranges / float(target_tasks)))
CHUNK = max(int(chunk_min), min(int(chunk_max), chunk))
if progress_callback:
progress_callback(f"Starting parallel MaxMind scan with {workers} workers...")
progress_callback(f"Scanning {total_ranges} IP ranges...")
progress_callback(f"Chunking: {CHUNK} ranges/task (~{int(math.ceil(total_ranges/float(CHUNK)))} tasks)")
found_networks = set()
found_networks_lock = threading.Lock()
try:
if progress_callback:
progress_callback(f"Starting parallel MaxMind scan with 32 workers...")
scan_ranges = self._get_scan_ranges()
total_ranges = len(scan_ranges)
if progress_callback:
progress_callback(f"Scanning {total_ranges} IP ranges...")
completed = 0
completed_lock = threading.Lock()
def scan_range(network_str):
nonlocal completed
reader = geoip2.database.Reader(str(self.mmdb_file))
local_networks = set()
try:
network = ipaddress.IPv4Network(network_str, strict=False)
for subnet in network.subnets(new_prefix=24):
sample_ip = str(subnet.network_address + 1)
try:
response = reader.country(sample_ip)
if response.country.iso_code == country_code:
local_networks.add(str(subnet))
except:
pass
except Exception as e:
pass
finally:
reader.close()
completed = 0
completed_lock = threading.Lock()
tls = threading.local()
def get_reader():
r = getattr(tls, "reader", None)
if r is None:
tls.reader = geoip2.database.Reader(str(self.mmdb_file))
return tls.reader
def scan_one_range(reader, network_str: str):
local = set()
try:
network = ipaddress.IPv4Network(network_str, strict=False)
for subnet in network.subnets(new_prefix=24):
sample_ip = str(subnet.network_address + 1)
try:
resp = reader.country(sample_ip)
if resp.country.iso_code == country_code:
local.add(subnet) # mniej alokacji niż str() w pętli
except Exception:
pass
except Exception:
pass
return local
def scan_chunk(ranges):
nonlocal completed
reader = get_reader()
local_chunk = set()
for r in ranges:
local_chunk.update(scan_one_range(reader, r))
with completed_lock:
completed += 1
if completed % 2000 == 0 and progress_callback:
with found_networks_lock:
progress_pct = (completed / total_ranges) * 100
progress_callback(f"Scanning: {completed}/{total_ranges} ranges ({progress_pct:.1f}%), found {len(found_networks)} networks")
return local_networks
with ThreadPoolExecutor(max_workers=32) as executor:
futures = {executor.submit(scan_range, r): r for r in scan_ranges}
c = completed
# progres częściej (diagnostyka), nie wpływa na wynik
if progress_callback and (c % 500 == 0 or c == total_ranges):
with found_networks_lock:
found_cnt = len(found_networks)
pct = (c / float(total_ranges)) * 100.0
progress_callback(
f"Scanning: {c}/{total_ranges} ranges ({pct:.1f}%), found {found_cnt} networks"
)
return local_chunk
try:
chunks = [scan_ranges[i:i + CHUNK] for i in range(0, total_ranges, CHUNK)]
with ThreadPoolExecutor(max_workers=workers) as executor:
futures = [executor.submit(scan_chunk, ch) for ch in chunks]
for future in as_completed(futures):
local_nets = future.result()
with found_networks_lock:
found_networks.update(local_nets)
result = list(found_networks)
if local_nets:
with found_networks_lock:
found_networks.update(local_nets)
# konwersja na string na końcu (wynik ten sam co wcześniej)
result = [str(n) for n in found_networks]
if progress_callback:
progress_callback(f"MaxMind scan complete: {len(result)} networks")
return result
except Exception as e:
print(f"[ERROR] MaxMind scan failed for {country_code}: {e}", flush=True)
import traceback