""" Lookup/caching helpers shared across local plugins. Depends on VisiData (`vd`) because options are stored in vd.options. """ from __future__ import annotations import hashlib import os import pickle import sqlite3 import threading import time from email.utils import parsedate_to_datetime from typing import Any, Callable, Dict, Optional from visidata import vd vd.option( "tke_cache_db_path", os.path.expanduser("~/.visidata_cache.db"), "sqlite cache db path for local lookups (pickle-serialized)", sheettype=None, ) vd.option( "tke_lookup_cache_ttl", 60 * 60 * 24, "lookup cache ttl in seconds", sheettype=None ) vd.option( "tke_lookup_error_ttl", 5 * 60, "cache ttl in seconds for failed lookups (to avoid tight loops)", sheettype=None, ) vd.option("tke_lookup_timeout", 10, "HTTP lookup timeout in seconds", sheettype=None) vd.option( "tke_http_retries", 1, "number of retries for transient HTTP failures", sheettype=None, ) # Provider-specific minimum delay between requests (seconds). vd.option( "tke_throttle_default_sec", 0.0, "default min delay between HTTP requests", sheettype=None, ) vd.option( "tke_throttle_vt_sec", 16.0, "min delay between VirusTotal API requests", sheettype=None, ) vd.option( "tke_throttle_ipinfo_sec", 0.5, "min delay between ipinfo requests", sheettype=None ) vd.option( "tke_throttle_ipapi_sec", 1.0, "min delay between ipapi.co requests", sheettype=None ) vd.option( "tke_throttle_ipwho_sec", 0.5, "min delay between ipwho.is requests", sheettype=None ) vd.option( "tke_throttle_rdap_sec", 1.0, "min delay between RDAP requests", sheettype=None ) vd.option( "tke_throttle_mb_sec", 1.0, "min delay between MalwareBazaar requests", sheettype=None, ) # API keys/tokens (optional unless otherwise stated by the provider). vd.option("tke_ipinfo_token", "", "ipinfo token (optional)", sheettype=None) vd.option("tke_ipapi_key", "", "ipapi.co API key (optional)", sheettype=None) vd.option( "tke_vt_api_key", "", "VirusTotal API key (required for VT lookups)", sheettype=None ) vd.option( "tke_maxmind_mmdb_path", "", "path to GeoLite2/GeoIP2 .mmdb file for offline MaxMind lookups", sheettype=None, ) def opt(name: str, default: Any = "") -> Any: try: return getattr(vd.options, name) except Exception: return default def cache_path() -> str: p = str(opt("tke_cache_db_path", "") or os.path.expanduser("~/.visidata_cache.db")) return os.path.expanduser(p) def auth_tag(secret: str) -> str: if not secret: return "noauth" return hashlib.sha256(secret.encode("utf-8")).hexdigest()[:12] def cache_ttl() -> int: try: return int(opt("tke_lookup_cache_ttl", 60 * 60 * 24)) except Exception: return 60 * 60 * 24 def error_ttl() -> int: try: return int(opt("tke_lookup_error_ttl", 5 * 60)) except Exception: return 5 * 60 def http_timeout() -> int: try: return int(opt("tke_lookup_timeout", 10)) except Exception: return 10 def sqlite_getset( key: str, fn: Callable[[], Any], *, max_age: Optional[int] = None, error_max_age: Optional[int] = None, ): """SQLite+pickle cache. Falls back to computing if db can't be used. `key` should NOT contain secrets; include `auth_tag()` instead. """ try: path = cache_path() os.makedirs(os.path.dirname(path), exist_ok=True) with sqlite3.connect(path, timeout=2) as conn: conn.execute("PRAGMA journal_mode=WAL") conn.execute("PRAGMA synchronous=NORMAL") conn.execute( "CREATE TABLE IF NOT EXISTS cache (key TEXT PRIMARY KEY, value BLOB, timestamp INTEGER)" ) cur = conn.cursor() cur.execute("SELECT value, timestamp FROM cache WHERE key=?", (key,)) row = cur.fetchone() now = int(time.time()) if row: val_blob, ts = row cached_val = pickle.loads(val_blob) age = now - int(ts) ttl = max_age if cached_val is None and error_max_age is not None: ttl = error_max_age if ttl is None or age <= int(ttl): return cached_val val = fn() cur.execute( "INSERT OR REPLACE INTO cache (key, value, timestamp) VALUES (?, ?, ?)", (key, pickle.dumps(val), now), ) conn.commit() return val except Exception: return fn() def read_key_from_file(path: str) -> str: try: with open(os.path.expanduser(path)) as f: return f.readline().strip() except Exception: return "" _rate_lock = threading.Lock() _next_allowed_at: Dict[str, float] = {} _retry_after_until: Dict[str, float] = {} def _provider_for_url(url: str) -> str: u = str(url).lower() if "virustotal.com" in u: return "vt" if "ipinfo.io" in u: return "ipinfo" if "ipapi.co" in u: return "ipapi" if "ipwho.is" in u: return "ipwho" if "rdap" in u: return "rdap" if "mb-api.abuse.ch" in u: return "mb" return "default" def _provider_delay(provider: str) -> float: optname = { "vt": "tke_throttle_vt_sec", "ipinfo": "tke_throttle_ipinfo_sec", "ipapi": "tke_throttle_ipapi_sec", "ipwho": "tke_throttle_ipwho_sec", "rdap": "tke_throttle_rdap_sec", "mb": "tke_throttle_mb_sec", }.get(provider, "tke_throttle_default_sec") try: return max(0.0, float(opt(optname, 0.0))) except Exception: return 0.0 def _wait_for_slot(provider: str) -> None: now = time.monotonic() with _rate_lock: ready = max( now, _next_allowed_at.get(provider, 0.0), _retry_after_until.get(provider, 0.0), ) _next_allowed_at[provider] = ready + _provider_delay(provider) if ready > now: time.sleep(ready - now) def _mark_retry_after(provider: str, retry_after_s: float) -> None: if retry_after_s <= 0: return until = time.monotonic() + retry_after_s with _rate_lock: prev = _retry_after_until.get(provider, 0.0) if until > prev: _retry_after_until[provider] = until def _parse_retry_after(value: str) -> Optional[float]: v = (value or "").strip() if not v: return None try: sec = float(v) if sec >= 0: return sec except Exception: pass try: dt = parsedate_to_datetime(v) if dt is None: return None # parsedate_to_datetime can return naive dt; treat as UTC then. if dt.tzinfo is None: return max(0.0, dt.timestamp() - time.time()) return max(0.0, dt.timestamp() - time.time()) except Exception: return None def _request_json( method: str, url: str, *, headers: Optional[Dict[str, str]] = None, data: Optional[Dict[str, Any]] = None, provider: Optional[str] = None, ) -> Optional[Dict[str, Any]]: try: import requests # optional dep except Exception: return None prov = provider or _provider_for_url(url) try: retries = max(0, int(opt("tke_http_retries", 1))) except Exception: retries = 1 for attempt in range(retries + 1): _wait_for_slot(prov) try: r = requests.request( method, url, headers=headers, data=data, timeout=http_timeout(), ) except Exception: if attempt < retries: continue return None if r.status_code == 429: ra = _parse_retry_after(r.headers.get("Retry-After", "")) if ra is None: ra = max(1.0, _provider_delay(prov)) _mark_retry_after(prov, ra) if attempt < retries: continue return None if not r.ok: if 500 <= r.status_code < 600 and attempt < retries: _mark_retry_after(prov, max(1.0, _provider_delay(prov))) continue return None try: return r.json() except Exception: return None return None def http_get_json( url: str, *, headers: Optional[Dict[str, str]] = None, provider: Optional[str] = None, ) -> Optional[Dict[str, Any]]: return _request_json("GET", url, headers=headers, provider=provider) def http_post_json( url: str, data: Dict[str, Any], *, headers: Optional[Dict[str, str]] = None, provider: Optional[str] = None, ) -> Optional[Dict[str, Any]]: return _request_json("POST", url, headers=headers, data=data, provider=provider)