scan performance
This commit is contained in:
149
geoip_handler.py
149
geoip_handler.py
@@ -13,7 +13,8 @@ from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
import threading
|
||||
import config
|
||||
import ipaddress
|
||||
|
||||
import math
|
||||
from multiprocessing import cpu_count
|
||||
|
||||
def generate_metadata(countries: list, country_data: dict, redis_stats: dict = None, handler: 'GeoIPHandler' = None) -> dict:
|
||||
"""
|
||||
@@ -568,76 +569,106 @@ class GeoIPHandler:
|
||||
|
||||
return scan_ranges
|
||||
|
||||
def _scan_maxmind_for_country(self, country_code: str, progress_callback=None) -> list:
|
||||
def _scan_maxmind_for_country(self, country_code: str, progress_callback=None, workers=None) -> list:
|
||||
if not self.mmdb_file.exists():
|
||||
return []
|
||||
|
||||
|
||||
country_code = country_code.upper()
|
||||
|
||||
scan_ranges = self._get_scan_ranges()
|
||||
total_ranges = len(scan_ranges)
|
||||
|
||||
# workers default
|
||||
if workers is None or int(workers) <= 0:
|
||||
workers = min(32, max(4, cpu_count() * 2))
|
||||
else:
|
||||
workers = int(workers)
|
||||
|
||||
tasks_per_worker = getattr(config, "MAXMIND_CHUNK_TASKS_PER_WORKER", 12)
|
||||
chunk_min = getattr(config, "MAXMIND_CHUNK_MIN", 50)
|
||||
chunk_max = getattr(config, "MAXMIND_CHUNK_MAX", 2000)
|
||||
|
||||
target_tasks = max(workers * int(tasks_per_worker), workers)
|
||||
chunk = int(math.ceil(total_ranges / float(target_tasks)))
|
||||
CHUNK = max(int(chunk_min), min(int(chunk_max), chunk))
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(f"Starting parallel MaxMind scan with {workers} workers...")
|
||||
progress_callback(f"Scanning {total_ranges} IP ranges...")
|
||||
progress_callback(f"Chunking: {CHUNK} ranges/task (~{int(math.ceil(total_ranges/float(CHUNK)))} tasks)")
|
||||
|
||||
found_networks = set()
|
||||
found_networks_lock = threading.Lock()
|
||||
|
||||
try:
|
||||
if progress_callback:
|
||||
progress_callback(f"Starting parallel MaxMind scan with 32 workers...")
|
||||
|
||||
scan_ranges = self._get_scan_ranges()
|
||||
total_ranges = len(scan_ranges)
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(f"Scanning {total_ranges} IP ranges...")
|
||||
|
||||
completed = 0
|
||||
completed_lock = threading.Lock()
|
||||
|
||||
def scan_range(network_str):
|
||||
nonlocal completed
|
||||
|
||||
reader = geoip2.database.Reader(str(self.mmdb_file))
|
||||
local_networks = set()
|
||||
|
||||
try:
|
||||
network = ipaddress.IPv4Network(network_str, strict=False)
|
||||
|
||||
for subnet in network.subnets(new_prefix=24):
|
||||
sample_ip = str(subnet.network_address + 1)
|
||||
|
||||
try:
|
||||
response = reader.country(sample_ip)
|
||||
if response.country.iso_code == country_code:
|
||||
local_networks.add(str(subnet))
|
||||
except:
|
||||
pass
|
||||
|
||||
except Exception as e:
|
||||
pass
|
||||
finally:
|
||||
reader.close()
|
||||
|
||||
|
||||
completed = 0
|
||||
completed_lock = threading.Lock()
|
||||
|
||||
tls = threading.local()
|
||||
|
||||
def get_reader():
|
||||
r = getattr(tls, "reader", None)
|
||||
if r is None:
|
||||
tls.reader = geoip2.database.Reader(str(self.mmdb_file))
|
||||
return tls.reader
|
||||
|
||||
def scan_one_range(reader, network_str: str):
|
||||
local = set()
|
||||
try:
|
||||
network = ipaddress.IPv4Network(network_str, strict=False)
|
||||
for subnet in network.subnets(new_prefix=24):
|
||||
sample_ip = str(subnet.network_address + 1)
|
||||
try:
|
||||
resp = reader.country(sample_ip)
|
||||
if resp.country.iso_code == country_code:
|
||||
local.add(subnet) # mniej alokacji niż str() w pętli
|
||||
except Exception:
|
||||
pass
|
||||
except Exception:
|
||||
pass
|
||||
return local
|
||||
|
||||
def scan_chunk(ranges):
|
||||
nonlocal completed
|
||||
reader = get_reader()
|
||||
local_chunk = set()
|
||||
|
||||
for r in ranges:
|
||||
local_chunk.update(scan_one_range(reader, r))
|
||||
|
||||
with completed_lock:
|
||||
completed += 1
|
||||
if completed % 2000 == 0 and progress_callback:
|
||||
with found_networks_lock:
|
||||
progress_pct = (completed / total_ranges) * 100
|
||||
progress_callback(f"Scanning: {completed}/{total_ranges} ranges ({progress_pct:.1f}%), found {len(found_networks)} networks")
|
||||
|
||||
return local_networks
|
||||
|
||||
with ThreadPoolExecutor(max_workers=32) as executor:
|
||||
futures = {executor.submit(scan_range, r): r for r in scan_ranges}
|
||||
|
||||
c = completed
|
||||
|
||||
# progres częściej (diagnostyka), nie wpływa na wynik
|
||||
if progress_callback and (c % 500 == 0 or c == total_ranges):
|
||||
with found_networks_lock:
|
||||
found_cnt = len(found_networks)
|
||||
pct = (c / float(total_ranges)) * 100.0
|
||||
progress_callback(
|
||||
f"Scanning: {c}/{total_ranges} ranges ({pct:.1f}%), found {found_cnt} networks"
|
||||
)
|
||||
|
||||
return local_chunk
|
||||
|
||||
try:
|
||||
chunks = [scan_ranges[i:i + CHUNK] for i in range(0, total_ranges, CHUNK)]
|
||||
|
||||
with ThreadPoolExecutor(max_workers=workers) as executor:
|
||||
futures = [executor.submit(scan_chunk, ch) for ch in chunks]
|
||||
for future in as_completed(futures):
|
||||
local_nets = future.result()
|
||||
|
||||
with found_networks_lock:
|
||||
found_networks.update(local_nets)
|
||||
|
||||
result = list(found_networks)
|
||||
|
||||
if local_nets:
|
||||
with found_networks_lock:
|
||||
found_networks.update(local_nets)
|
||||
|
||||
# konwersja na string na końcu (wynik ten sam co wcześniej)
|
||||
result = [str(n) for n in found_networks]
|
||||
|
||||
if progress_callback:
|
||||
progress_callback(f"MaxMind scan complete: {len(result)} networks")
|
||||
|
||||
|
||||
return result
|
||||
|
||||
|
||||
except Exception as e:
|
||||
print(f"[ERROR] MaxMind scan failed for {country_code}: {e}", flush=True)
|
||||
import traceback
|
||||
|
||||
Reference in New Issue
Block a user