visidata: add IOC types with cached, throttled lookups
Centralize provider caching and rate-limit handling, then add Domain/URL/Hash IOC types and safer VT/IPInfo key resolution so lookups stay reliable on free-tier APIs.
This commit is contained in:
336
config/visidata/plugins/lookupcore.py
Normal file
336
config/visidata/plugins/lookupcore.py
Normal file
@@ -0,0 +1,336 @@
|
||||
"""
|
||||
Lookup/caching helpers shared across local plugins.
|
||||
|
||||
Depends on VisiData (`vd`) because options are stored in vd.options.
|
||||
"""
|
||||
|
||||
from __future__ import annotations
|
||||
|
||||
import hashlib
|
||||
import os
|
||||
import pickle
|
||||
import sqlite3
|
||||
import threading
|
||||
import time
|
||||
from email.utils import parsedate_to_datetime
|
||||
from typing import Any, Callable, Dict, Optional
|
||||
|
||||
from visidata import vd
|
||||
|
||||
|
||||
vd.option(
|
||||
"tke_cache_db_path",
|
||||
os.path.expanduser("~/.visidata_cache.db"),
|
||||
"sqlite cache db path for local lookups (pickle-serialized)",
|
||||
sheettype=None,
|
||||
)
|
||||
vd.option(
|
||||
"tke_lookup_cache_ttl", 60 * 60 * 24, "lookup cache ttl in seconds", sheettype=None
|
||||
)
|
||||
vd.option(
|
||||
"tke_lookup_error_ttl",
|
||||
5 * 60,
|
||||
"cache ttl in seconds for failed lookups (to avoid tight loops)",
|
||||
sheettype=None,
|
||||
)
|
||||
vd.option("tke_lookup_timeout", 10, "HTTP lookup timeout in seconds", sheettype=None)
|
||||
vd.option(
|
||||
"tke_http_retries",
|
||||
1,
|
||||
"number of retries for transient HTTP failures",
|
||||
sheettype=None,
|
||||
)
|
||||
|
||||
# Provider-specific minimum delay between requests (seconds).
|
||||
vd.option(
|
||||
"tke_throttle_default_sec",
|
||||
0.0,
|
||||
"default min delay between HTTP requests",
|
||||
sheettype=None,
|
||||
)
|
||||
vd.option(
|
||||
"tke_throttle_vt_sec",
|
||||
16.0,
|
||||
"min delay between VirusTotal API requests",
|
||||
sheettype=None,
|
||||
)
|
||||
vd.option(
|
||||
"tke_throttle_ipinfo_sec", 0.5, "min delay between ipinfo requests", sheettype=None
|
||||
)
|
||||
vd.option(
|
||||
"tke_throttle_ipapi_sec", 1.0, "min delay between ipapi.co requests", sheettype=None
|
||||
)
|
||||
vd.option(
|
||||
"tke_throttle_ipwho_sec", 0.5, "min delay between ipwho.is requests", sheettype=None
|
||||
)
|
||||
vd.option(
|
||||
"tke_throttle_rdap_sec", 1.0, "min delay between RDAP requests", sheettype=None
|
||||
)
|
||||
vd.option(
|
||||
"tke_throttle_mb_sec",
|
||||
1.0,
|
||||
"min delay between MalwareBazaar requests",
|
||||
sheettype=None,
|
||||
)
|
||||
|
||||
# API keys/tokens (optional unless otherwise stated by the provider).
|
||||
vd.option("tke_ipinfo_token", "", "ipinfo token (optional)", sheettype=None)
|
||||
vd.option("tke_ipapi_key", "", "ipapi.co API key (optional)", sheettype=None)
|
||||
vd.option(
|
||||
"tke_vt_api_key", "", "VirusTotal API key (required for VT lookups)", sheettype=None
|
||||
)
|
||||
vd.option(
|
||||
"tke_maxmind_mmdb_path",
|
||||
"",
|
||||
"path to GeoLite2/GeoIP2 .mmdb file for offline MaxMind lookups",
|
||||
sheettype=None,
|
||||
)
|
||||
|
||||
|
||||
def opt(name: str, default: Any = "") -> Any:
|
||||
try:
|
||||
return getattr(vd.options, name)
|
||||
except Exception:
|
||||
return default
|
||||
|
||||
|
||||
def cache_path() -> str:
|
||||
p = str(opt("tke_cache_db_path", "") or os.path.expanduser("~/.visidata_cache.db"))
|
||||
return os.path.expanduser(p)
|
||||
|
||||
|
||||
def auth_tag(secret: str) -> str:
|
||||
if not secret:
|
||||
return "noauth"
|
||||
return hashlib.sha256(secret.encode("utf-8")).hexdigest()[:12]
|
||||
|
||||
|
||||
def cache_ttl() -> int:
|
||||
try:
|
||||
return int(opt("tke_lookup_cache_ttl", 60 * 60 * 24))
|
||||
except Exception:
|
||||
return 60 * 60 * 24
|
||||
|
||||
|
||||
def error_ttl() -> int:
|
||||
try:
|
||||
return int(opt("tke_lookup_error_ttl", 5 * 60))
|
||||
except Exception:
|
||||
return 5 * 60
|
||||
|
||||
|
||||
def http_timeout() -> int:
|
||||
try:
|
||||
return int(opt("tke_lookup_timeout", 10))
|
||||
except Exception:
|
||||
return 10
|
||||
|
||||
|
||||
def sqlite_getset(
|
||||
key: str,
|
||||
fn: Callable[[], Any],
|
||||
*,
|
||||
max_age: Optional[int] = None,
|
||||
error_max_age: Optional[int] = None,
|
||||
):
|
||||
"""SQLite+pickle cache. Falls back to computing if db can't be used.
|
||||
|
||||
`key` should NOT contain secrets; include `auth_tag()` instead.
|
||||
"""
|
||||
try:
|
||||
path = cache_path()
|
||||
os.makedirs(os.path.dirname(path), exist_ok=True)
|
||||
with sqlite3.connect(path, timeout=2) as conn:
|
||||
conn.execute("PRAGMA journal_mode=WAL")
|
||||
conn.execute("PRAGMA synchronous=NORMAL")
|
||||
conn.execute(
|
||||
"CREATE TABLE IF NOT EXISTS cache (key TEXT PRIMARY KEY, value BLOB, timestamp INTEGER)"
|
||||
)
|
||||
cur = conn.cursor()
|
||||
cur.execute("SELECT value, timestamp FROM cache WHERE key=?", (key,))
|
||||
row = cur.fetchone()
|
||||
now = int(time.time())
|
||||
if row:
|
||||
val_blob, ts = row
|
||||
cached_val = pickle.loads(val_blob)
|
||||
age = now - int(ts)
|
||||
ttl = max_age
|
||||
if cached_val is None and error_max_age is not None:
|
||||
ttl = error_max_age
|
||||
if ttl is None or age <= int(ttl):
|
||||
return cached_val
|
||||
val = fn()
|
||||
cur.execute(
|
||||
"INSERT OR REPLACE INTO cache (key, value, timestamp) VALUES (?, ?, ?)",
|
||||
(key, pickle.dumps(val), now),
|
||||
)
|
||||
conn.commit()
|
||||
return val
|
||||
except Exception:
|
||||
return fn()
|
||||
|
||||
|
||||
def read_key_from_file(path: str) -> str:
|
||||
try:
|
||||
with open(os.path.expanduser(path)) as f:
|
||||
return f.readline().strip()
|
||||
except Exception:
|
||||
return ""
|
||||
|
||||
|
||||
_rate_lock = threading.Lock()
|
||||
_next_allowed_at: Dict[str, float] = {}
|
||||
_retry_after_until: Dict[str, float] = {}
|
||||
|
||||
|
||||
def _provider_for_url(url: str) -> str:
|
||||
u = str(url).lower()
|
||||
if "virustotal.com" in u:
|
||||
return "vt"
|
||||
if "ipinfo.io" in u:
|
||||
return "ipinfo"
|
||||
if "ipapi.co" in u:
|
||||
return "ipapi"
|
||||
if "ipwho.is" in u:
|
||||
return "ipwho"
|
||||
if "rdap" in u:
|
||||
return "rdap"
|
||||
if "mb-api.abuse.ch" in u:
|
||||
return "mb"
|
||||
return "default"
|
||||
|
||||
|
||||
def _provider_delay(provider: str) -> float:
|
||||
optname = {
|
||||
"vt": "tke_throttle_vt_sec",
|
||||
"ipinfo": "tke_throttle_ipinfo_sec",
|
||||
"ipapi": "tke_throttle_ipapi_sec",
|
||||
"ipwho": "tke_throttle_ipwho_sec",
|
||||
"rdap": "tke_throttle_rdap_sec",
|
||||
"mb": "tke_throttle_mb_sec",
|
||||
}.get(provider, "tke_throttle_default_sec")
|
||||
try:
|
||||
return max(0.0, float(opt(optname, 0.0)))
|
||||
except Exception:
|
||||
return 0.0
|
||||
|
||||
|
||||
def _wait_for_slot(provider: str) -> None:
|
||||
now = time.monotonic()
|
||||
with _rate_lock:
|
||||
ready = max(
|
||||
now,
|
||||
_next_allowed_at.get(provider, 0.0),
|
||||
_retry_after_until.get(provider, 0.0),
|
||||
)
|
||||
_next_allowed_at[provider] = ready + _provider_delay(provider)
|
||||
if ready > now:
|
||||
time.sleep(ready - now)
|
||||
|
||||
|
||||
def _mark_retry_after(provider: str, retry_after_s: float) -> None:
|
||||
if retry_after_s <= 0:
|
||||
return
|
||||
until = time.monotonic() + retry_after_s
|
||||
with _rate_lock:
|
||||
prev = _retry_after_until.get(provider, 0.0)
|
||||
if until > prev:
|
||||
_retry_after_until[provider] = until
|
||||
|
||||
|
||||
def _parse_retry_after(value: str) -> Optional[float]:
|
||||
v = (value or "").strip()
|
||||
if not v:
|
||||
return None
|
||||
try:
|
||||
sec = float(v)
|
||||
if sec >= 0:
|
||||
return sec
|
||||
except Exception:
|
||||
pass
|
||||
try:
|
||||
dt = parsedate_to_datetime(v)
|
||||
if dt is None:
|
||||
return None
|
||||
# parsedate_to_datetime can return naive dt; treat as UTC then.
|
||||
if dt.tzinfo is None:
|
||||
return max(0.0, dt.timestamp() - time.time())
|
||||
return max(0.0, dt.timestamp() - time.time())
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def _request_json(
|
||||
method: str,
|
||||
url: str,
|
||||
*,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
data: Optional[Dict[str, Any]] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
try:
|
||||
import requests # optional dep
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
prov = provider or _provider_for_url(url)
|
||||
try:
|
||||
retries = max(0, int(opt("tke_http_retries", 1)))
|
||||
except Exception:
|
||||
retries = 1
|
||||
|
||||
for attempt in range(retries + 1):
|
||||
_wait_for_slot(prov)
|
||||
try:
|
||||
r = requests.request(
|
||||
method,
|
||||
url,
|
||||
headers=headers,
|
||||
data=data,
|
||||
timeout=http_timeout(),
|
||||
)
|
||||
except Exception:
|
||||
if attempt < retries:
|
||||
continue
|
||||
return None
|
||||
|
||||
if r.status_code == 429:
|
||||
ra = _parse_retry_after(r.headers.get("Retry-After", ""))
|
||||
if ra is None:
|
||||
ra = max(1.0, _provider_delay(prov))
|
||||
_mark_retry_after(prov, ra)
|
||||
if attempt < retries:
|
||||
continue
|
||||
return None
|
||||
|
||||
if not r.ok:
|
||||
if 500 <= r.status_code < 600 and attempt < retries:
|
||||
_mark_retry_after(prov, max(1.0, _provider_delay(prov)))
|
||||
continue
|
||||
return None
|
||||
|
||||
try:
|
||||
return r.json()
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
return None
|
||||
|
||||
|
||||
def http_get_json(
|
||||
url: str,
|
||||
*,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
return _request_json("GET", url, headers=headers, provider=provider)
|
||||
|
||||
|
||||
def http_post_json(
|
||||
url: str,
|
||||
data: Dict[str, Any],
|
||||
*,
|
||||
headers: Optional[Dict[str, str]] = None,
|
||||
provider: Optional[str] = None,
|
||||
) -> Optional[Dict[str, Any]]:
|
||||
return _request_json("POST", url, headers=headers, data=data, provider=provider)
|
||||
Reference in New Issue
Block a user