Centralize provider caching and rate-limit handling, then add Domain/URL/Hash IOC types and safer VT/IPInfo key resolution so lookups stay reliable on free-tier APIs.
337 lines
8.8 KiB
Python
337 lines
8.8 KiB
Python
"""
|
|
Lookup/caching helpers shared across local plugins.
|
|
|
|
Depends on VisiData (`vd`) because options are stored in vd.options.
|
|
"""
|
|
|
|
from __future__ import annotations
|
|
|
|
import hashlib
|
|
import os
|
|
import pickle
|
|
import sqlite3
|
|
import threading
|
|
import time
|
|
from email.utils import parsedate_to_datetime
|
|
from typing import Any, Callable, Dict, Optional
|
|
|
|
from visidata import vd
|
|
|
|
|
|
vd.option(
|
|
"tke_cache_db_path",
|
|
os.path.expanduser("~/.visidata_cache.db"),
|
|
"sqlite cache db path for local lookups (pickle-serialized)",
|
|
sheettype=None,
|
|
)
|
|
vd.option(
|
|
"tke_lookup_cache_ttl", 60 * 60 * 24, "lookup cache ttl in seconds", sheettype=None
|
|
)
|
|
vd.option(
|
|
"tke_lookup_error_ttl",
|
|
5 * 60,
|
|
"cache ttl in seconds for failed lookups (to avoid tight loops)",
|
|
sheettype=None,
|
|
)
|
|
vd.option("tke_lookup_timeout", 10, "HTTP lookup timeout in seconds", sheettype=None)
|
|
vd.option(
|
|
"tke_http_retries",
|
|
1,
|
|
"number of retries for transient HTTP failures",
|
|
sheettype=None,
|
|
)
|
|
|
|
# Provider-specific minimum delay between requests (seconds).
|
|
vd.option(
|
|
"tke_throttle_default_sec",
|
|
0.0,
|
|
"default min delay between HTTP requests",
|
|
sheettype=None,
|
|
)
|
|
vd.option(
|
|
"tke_throttle_vt_sec",
|
|
16.0,
|
|
"min delay between VirusTotal API requests",
|
|
sheettype=None,
|
|
)
|
|
vd.option(
|
|
"tke_throttle_ipinfo_sec", 0.5, "min delay between ipinfo requests", sheettype=None
|
|
)
|
|
vd.option(
|
|
"tke_throttle_ipapi_sec", 1.0, "min delay between ipapi.co requests", sheettype=None
|
|
)
|
|
vd.option(
|
|
"tke_throttle_ipwho_sec", 0.5, "min delay between ipwho.is requests", sheettype=None
|
|
)
|
|
vd.option(
|
|
"tke_throttle_rdap_sec", 1.0, "min delay between RDAP requests", sheettype=None
|
|
)
|
|
vd.option(
|
|
"tke_throttle_mb_sec",
|
|
1.0,
|
|
"min delay between MalwareBazaar requests",
|
|
sheettype=None,
|
|
)
|
|
|
|
# API keys/tokens (optional unless otherwise stated by the provider).
|
|
vd.option("tke_ipinfo_token", "", "ipinfo token (optional)", sheettype=None)
|
|
vd.option("tke_ipapi_key", "", "ipapi.co API key (optional)", sheettype=None)
|
|
vd.option(
|
|
"tke_vt_api_key", "", "VirusTotal API key (required for VT lookups)", sheettype=None
|
|
)
|
|
vd.option(
|
|
"tke_maxmind_mmdb_path",
|
|
"",
|
|
"path to GeoLite2/GeoIP2 .mmdb file for offline MaxMind lookups",
|
|
sheettype=None,
|
|
)
|
|
|
|
|
|
def opt(name: str, default: Any = "") -> Any:
|
|
try:
|
|
return getattr(vd.options, name)
|
|
except Exception:
|
|
return default
|
|
|
|
|
|
def cache_path() -> str:
|
|
p = str(opt("tke_cache_db_path", "") or os.path.expanduser("~/.visidata_cache.db"))
|
|
return os.path.expanduser(p)
|
|
|
|
|
|
def auth_tag(secret: str) -> str:
|
|
if not secret:
|
|
return "noauth"
|
|
return hashlib.sha256(secret.encode("utf-8")).hexdigest()[:12]
|
|
|
|
|
|
def cache_ttl() -> int:
|
|
try:
|
|
return int(opt("tke_lookup_cache_ttl", 60 * 60 * 24))
|
|
except Exception:
|
|
return 60 * 60 * 24
|
|
|
|
|
|
def error_ttl() -> int:
|
|
try:
|
|
return int(opt("tke_lookup_error_ttl", 5 * 60))
|
|
except Exception:
|
|
return 5 * 60
|
|
|
|
|
|
def http_timeout() -> int:
|
|
try:
|
|
return int(opt("tke_lookup_timeout", 10))
|
|
except Exception:
|
|
return 10
|
|
|
|
|
|
def sqlite_getset(
|
|
key: str,
|
|
fn: Callable[[], Any],
|
|
*,
|
|
max_age: Optional[int] = None,
|
|
error_max_age: Optional[int] = None,
|
|
):
|
|
"""SQLite+pickle cache. Falls back to computing if db can't be used.
|
|
|
|
`key` should NOT contain secrets; include `auth_tag()` instead.
|
|
"""
|
|
try:
|
|
path = cache_path()
|
|
os.makedirs(os.path.dirname(path), exist_ok=True)
|
|
with sqlite3.connect(path, timeout=2) as conn:
|
|
conn.execute("PRAGMA journal_mode=WAL")
|
|
conn.execute("PRAGMA synchronous=NORMAL")
|
|
conn.execute(
|
|
"CREATE TABLE IF NOT EXISTS cache (key TEXT PRIMARY KEY, value BLOB, timestamp INTEGER)"
|
|
)
|
|
cur = conn.cursor()
|
|
cur.execute("SELECT value, timestamp FROM cache WHERE key=?", (key,))
|
|
row = cur.fetchone()
|
|
now = int(time.time())
|
|
if row:
|
|
val_blob, ts = row
|
|
cached_val = pickle.loads(val_blob)
|
|
age = now - int(ts)
|
|
ttl = max_age
|
|
if cached_val is None and error_max_age is not None:
|
|
ttl = error_max_age
|
|
if ttl is None or age <= int(ttl):
|
|
return cached_val
|
|
val = fn()
|
|
cur.execute(
|
|
"INSERT OR REPLACE INTO cache (key, value, timestamp) VALUES (?, ?, ?)",
|
|
(key, pickle.dumps(val), now),
|
|
)
|
|
conn.commit()
|
|
return val
|
|
except Exception:
|
|
return fn()
|
|
|
|
|
|
def read_key_from_file(path: str) -> str:
|
|
try:
|
|
with open(os.path.expanduser(path)) as f:
|
|
return f.readline().strip()
|
|
except Exception:
|
|
return ""
|
|
|
|
|
|
_rate_lock = threading.Lock()
|
|
_next_allowed_at: Dict[str, float] = {}
|
|
_retry_after_until: Dict[str, float] = {}
|
|
|
|
|
|
def _provider_for_url(url: str) -> str:
|
|
u = str(url).lower()
|
|
if "virustotal.com" in u:
|
|
return "vt"
|
|
if "ipinfo.io" in u:
|
|
return "ipinfo"
|
|
if "ipapi.co" in u:
|
|
return "ipapi"
|
|
if "ipwho.is" in u:
|
|
return "ipwho"
|
|
if "rdap" in u:
|
|
return "rdap"
|
|
if "mb-api.abuse.ch" in u:
|
|
return "mb"
|
|
return "default"
|
|
|
|
|
|
def _provider_delay(provider: str) -> float:
|
|
optname = {
|
|
"vt": "tke_throttle_vt_sec",
|
|
"ipinfo": "tke_throttle_ipinfo_sec",
|
|
"ipapi": "tke_throttle_ipapi_sec",
|
|
"ipwho": "tke_throttle_ipwho_sec",
|
|
"rdap": "tke_throttle_rdap_sec",
|
|
"mb": "tke_throttle_mb_sec",
|
|
}.get(provider, "tke_throttle_default_sec")
|
|
try:
|
|
return max(0.0, float(opt(optname, 0.0)))
|
|
except Exception:
|
|
return 0.0
|
|
|
|
|
|
def _wait_for_slot(provider: str) -> None:
|
|
now = time.monotonic()
|
|
with _rate_lock:
|
|
ready = max(
|
|
now,
|
|
_next_allowed_at.get(provider, 0.0),
|
|
_retry_after_until.get(provider, 0.0),
|
|
)
|
|
_next_allowed_at[provider] = ready + _provider_delay(provider)
|
|
if ready > now:
|
|
time.sleep(ready - now)
|
|
|
|
|
|
def _mark_retry_after(provider: str, retry_after_s: float) -> None:
|
|
if retry_after_s <= 0:
|
|
return
|
|
until = time.monotonic() + retry_after_s
|
|
with _rate_lock:
|
|
prev = _retry_after_until.get(provider, 0.0)
|
|
if until > prev:
|
|
_retry_after_until[provider] = until
|
|
|
|
|
|
def _parse_retry_after(value: str) -> Optional[float]:
|
|
v = (value or "").strip()
|
|
if not v:
|
|
return None
|
|
try:
|
|
sec = float(v)
|
|
if sec >= 0:
|
|
return sec
|
|
except Exception:
|
|
pass
|
|
try:
|
|
dt = parsedate_to_datetime(v)
|
|
if dt is None:
|
|
return None
|
|
# parsedate_to_datetime can return naive dt; treat as UTC then.
|
|
if dt.tzinfo is None:
|
|
return max(0.0, dt.timestamp() - time.time())
|
|
return max(0.0, dt.timestamp() - time.time())
|
|
except Exception:
|
|
return None
|
|
|
|
|
|
def _request_json(
|
|
method: str,
|
|
url: str,
|
|
*,
|
|
headers: Optional[Dict[str, str]] = None,
|
|
data: Optional[Dict[str, Any]] = None,
|
|
provider: Optional[str] = None,
|
|
) -> Optional[Dict[str, Any]]:
|
|
try:
|
|
import requests # optional dep
|
|
except Exception:
|
|
return None
|
|
|
|
prov = provider or _provider_for_url(url)
|
|
try:
|
|
retries = max(0, int(opt("tke_http_retries", 1)))
|
|
except Exception:
|
|
retries = 1
|
|
|
|
for attempt in range(retries + 1):
|
|
_wait_for_slot(prov)
|
|
try:
|
|
r = requests.request(
|
|
method,
|
|
url,
|
|
headers=headers,
|
|
data=data,
|
|
timeout=http_timeout(),
|
|
)
|
|
except Exception:
|
|
if attempt < retries:
|
|
continue
|
|
return None
|
|
|
|
if r.status_code == 429:
|
|
ra = _parse_retry_after(r.headers.get("Retry-After", ""))
|
|
if ra is None:
|
|
ra = max(1.0, _provider_delay(prov))
|
|
_mark_retry_after(prov, ra)
|
|
if attempt < retries:
|
|
continue
|
|
return None
|
|
|
|
if not r.ok:
|
|
if 500 <= r.status_code < 600 and attempt < retries:
|
|
_mark_retry_after(prov, max(1.0, _provider_delay(prov)))
|
|
continue
|
|
return None
|
|
|
|
try:
|
|
return r.json()
|
|
except Exception:
|
|
return None
|
|
|
|
return None
|
|
|
|
|
|
def http_get_json(
|
|
url: str,
|
|
*,
|
|
headers: Optional[Dict[str, str]] = None,
|
|
provider: Optional[str] = None,
|
|
) -> Optional[Dict[str, Any]]:
|
|
return _request_json("GET", url, headers=headers, provider=provider)
|
|
|
|
|
|
def http_post_json(
|
|
url: str,
|
|
data: Dict[str, Any],
|
|
*,
|
|
headers: Optional[Dict[str, str]] = None,
|
|
provider: Optional[str] = None,
|
|
) -> Optional[Dict[str, Any]]:
|
|
return _request_json("POST", url, headers=headers, data=data, provider=provider)
|