precache config and performance test

This commit is contained in:
Mateusz Gruszczyński
2026-02-17 09:27:09 +01:00
parent 7b805cd177
commit 5d2c5aa451
4 changed files with 586 additions and 93 deletions

View File

@@ -49,3 +49,4 @@ REDIS_CACHE_TTL=86400
# Precache Daemon Settings
PRECACHE_INTERVAL_HOURS=168
PRECACHE_CHECK_INTERVAL=3600
PRECACHE_MIN_TTL_HOURS=7

View File

@@ -251,4 +251,9 @@ REDIS_PASSWORD = os.getenv('REDIS_PASSWORD', None)
REDIS_CACHE_TTL = int(os.getenv('REDIS_CACHE_TTL', '86400')) # 24h default
REDIS_ENABLED = os.getenv('REDIS_ENABLED', 'true').lower() == 'true'
CACHE_MAX_AGE_HOURS = 168 # 7 dni (7 * 24h)
CACHE_MAX_AGE_HOURS = 168
# Precache settings
PRECACHE_INTERVAL_HOURS = int(os.getenv('PRECACHE_INTERVAL_HOURS', 168))
PRECACHE_CHECK_INTERVAL = int(os.getenv('PRECACHE_CHECK_INTERVAL', 3600))
PRECACHE_MIN_TTL_HOURS = int(os.getenv('PRECACHE_MIN_TTL_HOURS', 7))

View File

@@ -1,6 +1,7 @@
#!/usr/bin/env python3
"""
Pre-cache individual countries in ALL config variants to Redis
Smart caching: only regenerates expired or missing entries
"""
import sys
@@ -41,6 +42,9 @@ APP_TYPES = [
'raw-csv',
]
CACHE_TTL_SECONDS = getattr(config, 'PRECACHE_INTERVAL_HOURS', 168) * 3600
MIN_TTL_THRESHOLD = getattr(config, 'PRECACHE_MIN_TTL_HOURS', 7) * 3600
def get_available_countries():
conn = sqlite3.connect(str(DB_PATH), timeout=30.0)
cursor = conn.cursor()
@@ -78,65 +82,92 @@ def fetch_country_networks(country_code):
conn.close()
return all_networks
start_time = datetime.now()
print(f"\n{'='*70}", flush=True)
print(f"[STRATEGY] Per-country cache (all config variants)", flush=True)
print(f" Each country: raw data + {len(APP_TYPES)} types × 2 aggregation = {len(APP_TYPES)*2} configs", flush=True)
print(f" Multi-country combos: generated on-demand", flush=True)
print(f"{'='*70}\n", flush=True)
available_countries = get_available_countries()
print(f"Found {len(available_countries)} countries\n", flush=True)
country_data_generated = 0
country_data_cached = 0
config_generated = 0
config_cached = 0
errors = 0
for idx, (country, count) in enumerate(available_countries.items(), 1):
print(f"[{idx}/{len(available_countries)}] {country}: {count:,} networks", flush=True)
def check_cache_validity(country):
"""Check if country data and configs are valid in Redis"""
redis_key_data = f"geoban:country:{country}"
data_exists = redis_cache.redis_client.exists(redis_key_data)
if not data_exists:
return False, "Raw data missing"
data_ttl = redis_cache.redis_client.ttl(redis_key_data)
if data_ttl < MIN_TTL_THRESHOLD:
return False, f"Raw data expiring soon (TTL: {data_ttl}s)"
missing_configs = []
expiring_configs = []
for app_type in APP_TYPES:
for aggregate in [True, False]:
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
if not cached_config:
missing_configs.append(f"{app_type}:{aggregate}")
else:
cache_key = redis_cache._generate_key([country], app_type, aggregate)
config_ttl = redis_cache.redis_client.ttl(cache_key)
if config_ttl < MIN_TTL_THRESHOLD:
expiring_configs.append(f"{app_type}:{aggregate}")
if missing_configs:
return False, f"Missing {len(missing_configs)} configs"
if expiring_configs:
return False, f"Expiring soon: {len(expiring_configs)} configs"
return True, f"Valid (TTL: {data_ttl}s)"
def process_country(country, networks_count, force=False):
"""Process single country - fetch data and generate configs"""
redis_key_data = f"geoban:country:{country}"
if not force:
is_valid, reason = check_cache_validity(country)
if is_valid:
return {
'country': country,
'status': 'skipped',
'reason': reason,
'generated': 0,
'cached': len(APP_TYPES) * 2
}
data_exists = redis_cache.redis_client.exists(redis_key_data)
if data_exists:
country_data_cached += 1
print(f" ✓ Raw data: cached", flush=True)
try:
data = redis_cache.redis_client.get(redis_key_data)
if isinstance(data, bytes):
networks = json.loads(data.decode('utf-8'))
else:
networks = json.loads(data)
country_networks = {country: networks}
except Exception as e:
print(f" ✗ Error loading: {e}", flush=True)
errors += 1
continue
print(f" ✗ Error loading cached data: {e}", flush=True)
networks = fetch_country_networks(country)
if not networks:
return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0}
redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks))
else:
networks = fetch_country_networks(country)
if not networks:
print(f" ✗ No data", flush=True)
errors += 1
continue
return {'country': country, 'status': 'error', 'reason': 'No networks', 'generated': 0, 'cached': 0}
redis_cache.redis_client.setex(redis_key_data, CACHE_TTL_SECONDS, json.dumps(networks))
redis_cache.redis_client.setex(redis_key_data, 86400, json.dumps(networks))
country_data_generated += 1
print(f" ✓ Raw data: generated", flush=True)
country_networks = {country: networks}
configs_generated_this_country = 0
configs_cached_this_country = 0
configs_generated = 0
configs_cached = 0
errors = 0
for app_type in APP_TYPES:
for aggregate in [True, False]:
try:
if not force:
cached_config = redis_cache.get_cached_config([country], app_type, aggregate)
if cached_config:
config_cached += 1
configs_cached_this_country += 1
cache_key = redis_cache._generate_key([country], app_type, aggregate)
config_ttl = redis_cache.redis_client.ttl(cache_key)
if config_ttl > MIN_TTL_THRESHOLD:
configs_cached += 1
continue
if app_type.startswith('raw-'):
@@ -155,7 +186,7 @@ for idx, (country, count) in enumerate(available_countries.items(), 1):
elif format_type == 'csv':
config_text = 'network\n' + '\n'.join(networks)
else:
print(f" ✗ Unknown raw format: {format_type}", flush=True)
errors += 1
continue
else:
@@ -184,36 +215,81 @@ for idx, (country, count) in enumerate(available_countries.items(), 1):
success = redis_cache.save_config([country], app_type, aggregate, config_text, stats)
if success:
config_generated += 1
configs_generated_this_country += 1
configs_generated += 1
else:
errors += 1
except Exception as e:
print(f"{app_type} ({aggregate}): {e}", flush=True)
errors += 1
if configs_generated_this_country > 0:
print(f" → New configs: {configs_generated_this_country}", flush=True)
if configs_cached_this_country > 0:
print(f" → Cached configs: {configs_cached_this_country}", flush=True)
return {
'country': country,
'status': 'processed',
'generated': configs_generated,
'cached': configs_cached,
'errors': errors
}
def main(force=False):
start_time = datetime.now()
print(f"\n{'='*70}", flush=True)
print(f"[STRATEGY] Smart per-country cache", flush=True)
print(f" Mode: {'FORCE (regenerate all)' if force else 'SMART (skip valid cache)'}", flush=True)
print(f" Cache TTL: {CACHE_TTL_SECONDS}s ({CACHE_TTL_SECONDS/3600:.1f}h)", flush=True)
print(f" Min TTL to skip: {MIN_TTL_THRESHOLD}s ({MIN_TTL_THRESHOLD/3600:.1f}h)", flush=True)
print(f" Config types: {len(APP_TYPES)} × 2 = {len(APP_TYPES)*2} per country", flush=True)
print(f"{'='*70}\n", flush=True)
available_countries = get_available_countries()
print(f"Found {len(available_countries)} countries\n", flush=True)
results = {
'skipped': 0,
'processed': 0,
'errors': 0,
'configs_generated': 0,
'configs_cached': 0
}
for idx, (country, count) in enumerate(available_countries.items(), 1):
print(f"[{idx}/{len(available_countries)}] {country}: {count:,} networks", flush=True)
result = process_country(country, count, force=force)
if result['status'] == 'skipped':
results['skipped'] += 1
print(f" ⊘ SKIPPED: {result['reason']}", flush=True)
elif result['status'] == 'processed':
results['processed'] += 1
results['configs_generated'] += result['generated']
results['configs_cached'] += result['cached']
results['errors'] += result.get('errors', 0)
if result['generated'] > 0:
print(f" ✓ Generated: {result['generated']}, Cached: {result['cached']}", flush=True)
else:
print(f" ✓ All valid: {result['cached']} configs", flush=True)
else:
results['errors'] += 1
print(f" ✗ ERROR: {result.get('reason', 'Unknown')}", flush=True)
progress_pct = (idx / len(available_countries)) * 100
print(f" → Progress: {progress_pct:.1f}%\n", flush=True)
duration = (datetime.now() - start_time).total_seconds()
duration = (datetime.now() - start_time).total_seconds()
print(f"{'='*70}", flush=True)
print(f"[SUMMARY] Complete in {duration/60:.1f} minutes", flush=True)
print(f"\n[Raw Country Data]", flush=True)
print(f" Generated: {country_data_generated}", flush=True)
print(f" Cached: {country_data_cached}", flush=True)
print(f"\n[Config Files]", flush=True)
print(f" Generated: {config_generated}", flush=True)
print(f" Cached: {config_cached}", flush=True)
print(f" Errors: {errors}", flush=True)
print(f"{'='*70}", flush=True)
print(f"[SUMMARY] Complete in {duration/60:.1f} minutes", flush=True)
print(f"\n[Countries]", flush=True)
print(f" Skipped (valid cache): {results['skipped']}", flush=True)
print(f" Processed: {results['processed']}", flush=True)
print(f" Errors: {results['errors']}", flush=True)
print(f"\n[Configs]", flush=True)
print(f" Generated: {results['configs_generated']}", flush=True)
print(f" Already cached: {results['configs_cached']}", flush=True)
print(f" Total valid: {results['configs_generated'] + results['configs_cached']}", flush=True)
try:
try:
total_keys = redis_cache.redis_client.dbsize()
cursor = 0
@@ -240,7 +316,21 @@ try:
print(f" Config keys: {config_keys}", flush=True)
print(f" Memory: {health.get('memory_used_mb', 0):.2f} MB", flush=True)
except Exception as e:
expected_configs = len(available_countries) * len(APP_TYPES) * 2
coverage = (config_keys / expected_configs * 100) if expected_configs > 0 else 0
print(f" Coverage: {config_keys}/{expected_configs} ({coverage:.1f}%)", flush=True)
except Exception as e:
print(f"\n[REDIS] Error: {e}", flush=True)
print(f"{'='*70}\n", flush=True)
print(f"{'='*70}\n", flush=True)
if __name__ == '__main__':
import argparse
parser = argparse.ArgumentParser(description='Pre-cache GeoIP configs to Redis')
parser.add_argument('--force', action='store_true', help='Force regenerate all configs (ignore TTL)')
args = parser.parse_args()
main(force=args.force)

397
test_performance.py Normal file
View File

@@ -0,0 +1,397 @@
#!/usr/bin/env python3
"""
Performance & Stress Testing Suite for GeoIP Ban API
Simulates real production load with concurrent users
"""
import requests
import time
import random
import statistics
from datetime import datetime
from concurrent.futures import ThreadPoolExecutor, as_completed
from collections import defaultdict
import sys
import argparse
import threading
class PerformanceTest:
def __init__(self, base_url):
self.base_url = base_url
self.results = defaultdict(list)
self.errors = []
self.lock = threading.Lock()
self.available_countries = self._fetch_available_countries()
def log(self, msg):
print(f"[{datetime.now().strftime('%H:%M:%S')}] {msg}", flush=True)
def _fetch_available_countries(self):
"""Fetch available countries from API"""
self.log("Fetching available countries from API...")
try:
resp = requests.get(f"{self.base_url}/api/database/sqlite/status", timeout=10)
if resp.status_code == 200:
data = resp.json()
countries = [item['country_code'] for item in data.get('countries', [])]
if countries:
self.log(f"Loaded {len(countries)} countries from API")
return countries
except Exception as e:
self.log(f"Failed to fetch countries: {e}")
self.log("Using default country list")
return ['CN', 'US', 'RU', 'DE', 'FR', 'GB', 'JP', 'KR', 'IN', 'BR']
def make_request(self, method, endpoint, data=None, timeout=30):
"""Single API request with timing"""
url = f"{self.base_url}{endpoint}"
start = time.time()
try:
if method == 'GET':
resp = requests.get(url, timeout=timeout)
elif method == 'POST':
resp = requests.post(url, json=data, timeout=timeout)
else:
raise ValueError(f"Unsupported method: {method}")
duration = time.time() - start
return {
'status': resp.status_code,
'duration': duration,
'size': len(resp.content),
'success': resp.status_code == 200,
'endpoint': endpoint,
'cache_type': resp.json().get('cache_type') if resp.status_code == 200 else None
}
except Exception as e:
duration = time.time() - start
with self.lock:
self.errors.append({
'endpoint': endpoint,
'error': str(e),
'duration': duration
})
return {
'status': 0,
'duration': duration,
'size': 0,
'success': False,
'endpoint': endpoint,
'error': str(e)
}
def simulate_user(self, user_id, duration_seconds, think_time_range=(1, 5)):
"""Simulate single user behavior - ONLY single countries"""
formats = [
('nginx', 'geo'),
('nginx', 'map'),
('nginx', 'deny'),
('apache', '24'),
('haproxy', 'acl'),
]
raw_formats = ['raw-cidr_txt', 'raw-newline_txt', 'raw-json', 'raw-csv']
user_requests = []
start_time = time.time()
while (time.time() - start_time) < duration_seconds:
action = random.choice(['generate', 'generate', 'generate', 'raw', 'status'])
if action == 'generate':
country = random.choice(self.available_countries)
app_type, variant = random.choice(formats)
result = self.make_request(
'POST', '/api/generate/preview',
data={
'countries': [country],
'app_type': app_type,
'app_variant': variant,
'aggregate': random.choice([True, False])
}
)
elif action == 'raw':
country = random.choice(self.available_countries)
raw_format = random.choice(raw_formats)
result = self.make_request(
'POST', '/api/generate/raw',
data={
'countries': [country],
'app_type': raw_format,
'aggregate': random.choice([True, False])
}
)
else:
result = self.make_request('GET', '/api/database/status')
user_requests.append(result)
think_time = random.uniform(*think_time_range)
time.sleep(think_time)
return user_requests
def test_concurrent_users(self, num_users, duration_seconds, think_time_range=(1, 5)):
"""Simulate multiple concurrent users"""
self.log(f"\n{'='*70}")
self.log(f"USER SIMULATION TEST")
self.log(f" Users: {num_users}")
self.log(f" Duration: {duration_seconds}s")
self.log(f" Think time: {think_time_range[0]}-{think_time_range[1]}s")
self.log(f" Countries: {len(self.available_countries)}")
self.log(f"{'='*70}")
all_results = []
start_time = time.time()
with ThreadPoolExecutor(max_workers=num_users) as executor:
futures = [
executor.submit(self.simulate_user, i, duration_seconds, think_time_range)
for i in range(num_users)
]
completed = 0
for future in as_completed(futures):
user_results = future.result()
all_results.extend(user_results)
completed += 1
self.log(f" User {completed}/{num_users} completed ({len(user_results)} requests)")
total_time = time.time() - start_time
self.analyze_results(f"Concurrent Users ({num_users} users)", all_results, total_time)
self.results[f"user_simulation_{num_users}"] = all_results
return all_results
def test_scenario(self, name, method, endpoint, data=None, count=10, concurrent=1):
"""Run test scenario with multiple requests"""
self.log(f"\n{'='*70}")
self.log(f"TEST: {name}")
self.log(f" Method: {method} {endpoint}")
self.log(f" Requests: {count} (concurrent: {concurrent})")
self.log(f"{'='*70}")
results = []
start_time = time.time()
if concurrent == 1:
for i in range(count):
result = self.make_request(method, endpoint, data)
results.append(result)
if (i + 1) % 10 == 0:
self.log(f" Progress: {i+1}/{count}")
else:
with ThreadPoolExecutor(max_workers=concurrent) as executor:
futures = [executor.submit(self.make_request, method, endpoint, data)
for _ in range(count)]
for i, future in enumerate(as_completed(futures), 1):
results.append(future.result())
if i % 10 == 0:
self.log(f" Progress: {i}/{count}")
total_time = time.time() - start_time
self.analyze_results(name, results, total_time)
self.results[name] = results
return results
def analyze_results(self, name, results, total_time):
"""Analyze and display results"""
successful = [r for r in results if r['success']]
failed = [r for r in results if not r['success']]
if not successful:
self.log(f"\nERROR: ALL REQUESTS FAILED!")
return
durations = [r['duration'] for r in successful]
sizes = [r['size'] for r in successful]
cache_hits = sum(1 for r in successful if r.get('cache_type') in ['redis-full', 'hybrid'])
cache_misses = sum(1 for r in successful if r.get('cache_type') == 'fresh')
self.log(f"\nRESULTS: {name}")
self.log(f" Total time: {total_time:.2f}s")
self.log(f" Requests: {len(results)} ({len(successful)} success, {len(failed)} failed)")
self.log(f" Success rate: {len(successful)/len(results)*100:.1f}%")
self.log(f" Throughput: {len(successful)/total_time:.2f} req/s")
self.log(f"\nTIMING:")
self.log(f" Min: {min(durations)*1000:.0f}ms")
self.log(f" Max: {max(durations)*1000:.0f}ms")
self.log(f" Mean: {statistics.mean(durations)*1000:.0f}ms")
self.log(f" Median: {statistics.median(durations)*1000:.0f}ms")
if len(durations) >= 20:
self.log(f" P95: {statistics.quantiles(durations, n=20)[18]*1000:.0f}ms")
if len(durations) >= 100:
self.log(f" P99: {statistics.quantiles(durations, n=100)[98]*1000:.0f}ms")
self.log(f"\nCACHE:")
self.log(f" Hits: {cache_hits}")
self.log(f" Misses: {cache_misses}")
if cache_hits + cache_misses > 0:
self.log(f" Hit rate: {cache_hits/(cache_hits+cache_misses)*100:.1f}%")
self.log(f"\nRESPONSE SIZE:")
self.log(f" Min: {min(sizes)/1024:.1f}KB")
self.log(f" Max: {max(sizes)/1024:.1f}KB")
self.log(f" Mean: {statistics.mean(sizes)/1024:.1f}KB")
if failed:
self.log(f"\nERRORS: {len(failed)}")
for err in failed[:5]:
self.log(f" - {err.get('error', 'Unknown error')}")
def final_report(self):
"""Generate final summary report"""
self.log(f"\n\n{'='*70}")
self.log(f"FINAL PERFORMANCE REPORT")
self.log(f"{'='*70}\n")
for name, results in self.results.items():
successful = [r for r in results if r['success']]
if successful:
durations = [r['duration'] for r in successful]
self.log(f"{name:40s} {statistics.mean(durations)*1000:6.0f}ms avg, "
f"{len(successful):3d}/{len(results):3d} success")
if self.errors:
self.log(f"\nTOTAL ERRORS: {len(self.errors)}")
error_types = defaultdict(int)
for err in self.errors:
error_types[err.get('error', 'Unknown')[:50]] += 1
for error, count in sorted(error_types.items(), key=lambda x: -x[1])[:10]:
self.log(f" {count:3d}x {error}")
def run_user_simulation(base_url, num_users, duration):
"""Run only user simulation"""
tester = PerformanceTest(base_url)
print("\n" + "="*70)
print("GEOIP BAN API - USER SIMULATION")
print(f"Target: {base_url}")
print(f"Simulating {num_users} concurrent users for {duration}s")
print("="*70 + "\n")
tester.test_concurrent_users(num_users, duration, think_time_range=(1, 5))
tester.final_report()
def run_quick_test(base_url):
"""Quick performance test - single countries only"""
tester = PerformanceTest(base_url)
print("\n" + "="*70)
print("GEOIP BAN API - QUICK PERFORMANCE TEST")
print(f"Target: {base_url}")
print("="*70 + "\n")
if not tester.available_countries:
print("ERROR: No countries available from API")
return
test_country = tester.available_countries[0]
tester.test_scenario(
"Warm-up",
"POST", "/api/generate/preview",
data={'countries': [test_country], 'app_type': 'nginx', 'app_variant': 'geo', 'aggregate': True},
count=10, concurrent=2
)
tester.test_scenario(
"Single Country (Cached)",
"POST", "/api/generate/preview",
data={'countries': [test_country], 'app_type': 'nginx', 'app_variant': 'geo', 'aggregate': True},
count=50, concurrent=10
)
tester.test_concurrent_users(num_users=10, duration_seconds=30, think_time_range=(1, 3))
tester.final_report()
def run_full_test_suite(base_url):
"""Execute complete test suite - single countries only"""
tester = PerformanceTest(base_url)
print("\n" + "="*70)
print("GEOIP BAN API - FULL PERFORMANCE TEST SUITE")
print(f"Target: {base_url}")
print("="*70 + "\n")
if not tester.available_countries:
print("ERROR: No countries available from API")
return
tester.test_scenario("Health Check", "GET", "/api/database/status", count=50, concurrent=10)
time.sleep(1)
test_country = tester.available_countries[0]
tester.test_scenario(
"Single Country (Cached)",
"POST", "/api/generate/preview",
data={'countries': [test_country], 'app_type': 'nginx', 'app_variant': 'geo', 'aggregate': True},
count=100, concurrent=20
)
time.sleep(1)
tester.test_scenario(
"Heavy Load (50 concurrent)",
"POST", "/api/generate/preview",
data={'countries': [test_country], 'app_type': 'nginx', 'app_variant': 'map', 'aggregate': True},
count=200, concurrent=50
)
time.sleep(2)
tester.log("\nSPIKE TEST - Sudden burst of 100 requests")
start = time.time()
tester.test_scenario(
"Spike Test",
"POST", "/api/generate/preview",
data={'countries': [test_country], 'app_type': 'apache', 'app_variant': '24', 'aggregate': True},
count=100, concurrent=100
)
spike_duration = time.time() - start
tester.log(f" Spike handled in: {spike_duration:.2f}s")
time.sleep(2)
tester.test_concurrent_users(num_users=10, duration_seconds=60, think_time_range=(1, 5))
time.sleep(2)
tester.test_concurrent_users(num_users=50, duration_seconds=30, think_time_range=(0.5, 2))
tester.final_report()
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='Performance testing for GeoIP Ban API')
parser.add_argument('--url', default='http://127.0.0.1:5000', help='API base URL')
parser.add_argument('--mode', choices=['quick', 'full', 'users'], default='quick',
help='Test mode: quick (5min), full (15min), users (simulation only)')
parser.add_argument('--users', type=int, default=10, help='Number of concurrent users (for users mode)')
parser.add_argument('--duration', type=int, default=60, help='Test duration in seconds (for users mode)')
args = parser.parse_args()
try:
if args.mode == 'quick':
run_quick_test(args.url)
elif args.mode == 'full':
run_full_test_suite(args.url)
elif args.mode == 'users':
run_user_simulation(args.url, args.users, args.duration)
except KeyboardInterrupt:
print("\n\nTest interrupted by user")
sys.exit(1)