Files
SashegDev 0ee8077787 Server: Reduce rate limit log spam - periodic summary only
- Instead of logging every rate limit warning, now logs summary every 60s
- Shows: IP_blocked=X, rate_limited=Y
2026-05-07 17:56:46 +00:00

206 lines
7.1 KiB
Python

# middleware.py
from fastapi import Request, Response
from starlette.middleware.base import BaseHTTPMiddleware
import logging
import time
import uuid
import traceback
import httpx
import re
from collections import defaultdict
from typing import Optional
logger = logging.getLogger(__name__)
# Public blocklist URLs
BLOCKLIST_URLS = [
"https://raw.githubusercontent.com/firehol/blocklist-ipsets/master/firehol_level1.netset",
"https://raw.githubusercontent.com/firehol/blocklist-ipsets/master/iblocklist_isp.netset",
]
def load_blocklist_from_url(url: str, timeout: int = 10) -> set[str]:
"""Download and parse IP blocklist from URL"""
ips = set()
try:
response = httpx.get(url, timeout=timeout, follow_redirects=True)
if response.status_code == 200:
for line in response.text.splitlines():
line = line.strip()
if not line or line.startswith("#"):
continue
if re.match(r"^\d+\.\d+\.\d+\.\d+(/\d+)?$", line):
ip = line.split("/")[0]
ips.add(ip)
logger.info(f"Loaded {len(ips)} IPs from blocklist: {url}")
except Exception as e:
logger.warning(f"Failed to load blocklist from {url}: {e}")
return ips
def load_public_blocklists() -> set[str]:
"""Load all public blocklists"""
all_ips = set()
for url in BLOCKLIST_URLS:
all_ips.update(load_blocklist_from_url(url))
logger.info(f"Total blocked IPs from public lists: {len(all_ips)}")
return all_ips
# Rate limiting config
RATE_LIMIT_REQUESTS = 60 # Max requests per window
RATE_LIMIT_WINDOW = 60 # Window in seconds
_ip_request_counts: dict[str, list[float]] = defaultdict(list)
# IP blocking config (set from main.py)
BLOCKED_IPS: set[str] = set()
# Request stats (for summary logging)
_stats = {"blocked": 0, "rate_limited": 0, "total": 0}
_stats_last_log = time.time()
STATS_LOG_INTERVAL = 60 # Log stats every 60 seconds
# Suspicious paths that indicate bot scanning
SUSPICIOUS_PATHS = {
".env", ".env.local", ".env.production", ".env.development", ".env.bak",
".env.old", ".env.backup", ".env.orig", ".env.save", ".env~", ".env.swp",
".env.copy", ".env.1", ".ENV",
"appsettings.json", "appsettings.Development.json", "appsettings.Production.json",
"appsettings.Staging.json", "web.config",
"phpinfo.php", "info.php", "test.php", "i.php", "phpi.php", "php.php",
"phptest.php", "server-info.php", "phpinformation.php", "infophp.php",
"php_info.php", "config.php",
"actuator/env", "actuator/configprops", "actuator",
"manage/env", "admin/env", "env",
"actuator/env/aws", "actuator/env/cloud",
"_layouts/15/", "_layouts/15/ToolPane.aspx",
"wp-admin", "wp-login.php", "wordpress",
"administrator", "phpmyadmin",
".git", ".svn", ".hg",
}
def get_client_ip(request: Request) -> str:
"""Extract client IP from request"""
client_ip = request.client.host if request.client else "unknown"
forwarded = request.headers.get("x-forwarded-for")
if forwarded:
client_ip = forwarded.split(",")[0].strip()
return client_ip
def is_ip_blocked(client_ip: str) -> bool:
"""Check if IP is blocked"""
return client_ip in BLOCKED_IPS
def check_rate_limit(client_ip: str) -> bool:
"""Check if IP has exceeded rate limit"""
now = time.time()
# Clean old requests
_ip_request_counts[client_ip] = [
t for t in _ip_request_counts[client_ip]
if now - t < RATE_LIMIT_WINDOW
]
if len(_ip_request_counts[client_ip]) >= RATE_LIMIT_REQUESTS:
return False
_ip_request_counts[client_ip].append(now)
return True
def is_suspicious_path(path: str) -> bool:
"""Check if path is suspicious (bot scanning)"""
path_lower = path.lower()
# Direct match
if path_lower in SUSPICIOUS_PATHS:
return True
# Contains suspicious patterns
suspicious_patterns = [
".env", "phpinfo", "actuator", "wp-", "phpmyadmin",
".git", ".svn",
]
for pattern in suspicious_patterns:
if pattern in path_lower:
return True
# Path traversal attempts
if ".." in path or ".." in path.replace("%2e%2e", "").replace("%252e", ""):
return True
return False
def set_ip_config(blocked: Optional[set[str]] = None):
"""Configure IP blocking (call from main.py)"""
global BLOCKED_IPS
if blocked is not None:
BLOCKED_IPS = blocked
class LoggingMiddleware(BaseHTTPMiddleware):
async def dispatch(self, request: Request, call_next):
request_id = str(uuid.uuid4())[:8]
global _stats, _stats_last_log
client_ip = get_client_ip(request)
# Check if IP is blocked (silent)
if is_ip_blocked(client_ip):
_stats["blocked"] += 1
return Response(status_code=404, content="")
# Check rate limit
if not check_rate_limit(client_ip):
_stats["rate_limited"] += 1
# Periodic stats logging instead of every warning
if time.time() - _stats_last_log > STATS_LOG_INTERVAL:
logger.warning(f"Stats: {_stats}")
_stats_last_log = time.time()
return Response(status_code=429, content="Too many requests")
# Check suspicious path (silent 404 for bots)
path = request.url.path
if is_suspicious_path(path):
# Return 404 without logging - confuse the bots
return Response(status_code=404, content="")
# Skip logging for large file downloads (don't spam logs)
is_file_download = path.startswith("/pack/") and "/file/" in path
# Track total requests for stats
_stats["total"] += 1
# Log legitimate requests (except file downloads)
start_time = time.time()
if not is_file_download:
logger.info(f"{request.method} {path} (IP: {client_ip}, ID: {request_id})")
try:
response = await call_next(request)
duration = (time.time() - start_time) * 1000
if not is_file_download:
logger.info(f"{request.method} {path}{response.status_code} ({duration:.0f}ms) [ID: {request_id}]")
# Periodic stats logging (only log if there were blocked/rate-limited)
now = time.time()
if now - _stats_last_log > STATS_LOG_INTERVAL:
if _stats["blocked"] > 0 or _stats["rate_limited"] > 0:
logger.warning(f"Blocked requests: IP_blocked={_stats['blocked']}, rate_limited={_stats['rate_limited']}")
_stats = {"blocked": 0, "rate_limited": 0, "total": 0}
_stats_last_log = now
response.headers["X-Request-ID"] = request_id
return response
except Exception as e:
duration = (time.time() - start_time) * 1000
error_traceback = traceback.format_exc()
logger.error(f"{request.method} {path} → ERROR: {str(e)} (ID: {request_id})\n{error_traceback}")
raise