""" IOC datatypes for VisiData: domains, URLs, and hashes. Features: - Domain normalization and lookups: RDAP, DNS, VirusTotal domain report. - URL parsing and VT URL report. - Hash detection + VT file report and MalwareBazaar fallback. All network lookups are cached in the local sqlite cache db (`options.tke_cache_db_path`). """ from __future__ import annotations import functools import os import re from dataclasses import dataclass from typing import Any, Dict, Optional, Tuple from urllib.parse import urlsplit from visidata import vd from visidata.sheets import TableSheet from .iplib import JSONNode, VTInfo, parse_vt_domain, parse_vt_file, parse_vt_url from .ioclib import MBInfo, URLParts, parse_mb_info, vt_url_id from .lookupcore import ( auth_tag, cache_ttl, error_ttl, http_get_json, http_post_json, opt, sqlite_getset, ) vd.option( "tke_rdap_base", "https://rdap.org", "base URL for RDAP queries", sheettype=None ) vd.option( "tke_mb_api_base", "https://mb-api.abuse.ch/api/v1/", "base URL for MalwareBazaar API", sheettype=None, ) def _is_nullish(v: Any) -> bool: return v is None or v == "" or v == "null" def _vt_key() -> str: from .lookupcore import read_key_from_file return str( opt("tke_vt_api_key", "") or os.getenv("VT_API_KEY") or os.getenv("VIRUSTOTAL_API_KEY") or read_key_from_file("~/.virustotal_api_key") or "" ) def _rdap_base() -> str: return str(opt("tke_rdap_base", "https://rdap.org") or "https://rdap.org").rstrip( "/" ) def _mb_base() -> str: return str(opt("tke_mb_api_base", "https://mb-api.abuse.ch/api/v1/") or "").strip() @functools.lru_cache(maxsize=4096) def _rdap_domain_raw(domain: str) -> Optional[Dict[str, Any]]: base = _rdap_base() url = f"{base}/domain/{domain}" return sqlite_getset( f"rdap_domain:{domain}", lambda: http_get_json(url, provider="rdap"), max_age=cache_ttl(), error_max_age=error_ttl(), ) @functools.lru_cache(maxsize=4096) def _vt_domain_raw(domain: str) -> Optional[Dict[str, Any]]: key = _vt_key() if not key: return None tag = auth_tag(key) url = f"https://www.virustotal.com/api/v3/domains/{domain}" return sqlite_getset( f"vt_domain:{tag}:{domain}", lambda: http_get_json(url, headers={"x-apikey": key}, provider="vt"), max_age=cache_ttl(), error_max_age=error_ttl(), ) @functools.lru_cache(maxsize=4096) def _vt_url_raw(url: str) -> Optional[Dict[str, Any]]: key = _vt_key() if not key: return None tag = auth_tag(key) url_id = vt_url_id(url) api = f"https://www.virustotal.com/api/v3/urls/{url_id}" return sqlite_getset( f"vt_url:{tag}:{url_id}", lambda: http_get_json(api, headers={"x-apikey": key}, provider="vt"), max_age=cache_ttl(), error_max_age=error_ttl(), ) @functools.lru_cache(maxsize=4096) def _vt_file_raw(h: str) -> Optional[Dict[str, Any]]: key = _vt_key() if not key: return None tag = auth_tag(key) url = f"https://www.virustotal.com/api/v3/files/{h}" return sqlite_getset( f"vt_file:{tag}:{h}", lambda: http_get_json(url, headers={"x-apikey": key}, provider="vt"), max_age=cache_ttl(), error_max_age=error_ttl(), ) @functools.lru_cache(maxsize=4096) def _mb_hash_raw(h: str) -> Optional[Dict[str, Any]]: base = _mb_base() if not base: return None return sqlite_getset( f"mb_hash:{h}", lambda: http_post_json(base, {"query": "get_info", "hash": h}, provider="mb"), max_age=cache_ttl(), error_max_age=error_ttl(), ) @dataclass(frozen=True) class DNSInfo: a: Tuple[str, ...] = () aaaa: Tuple[str, ...] = () cname: Tuple[str, ...] = () mx: Tuple[str, ...] = () ns: Tuple[str, ...] = () txt: Tuple[str, ...] = () raw: Optional[Dict[str, Any]] = None source: str = "" @property def data(self) -> JSONNode: return JSONNode(self.raw) def _dns_resolve(domain: str, rtype: str) -> Tuple[str, ...]: domain = domain.rstrip(".") rtype = rtype.upper() try: import dns.resolver # optional dep ans = dns.resolver.resolve(domain, rtype) return tuple(str(r) for r in ans) except Exception: return () @functools.lru_cache(maxsize=4096) def _dns_info(domain: str) -> DNSInfo: def _do() -> DNSInfo: a = _dns_resolve(domain, "A") aaaa = _dns_resolve(domain, "AAAA") cname = _dns_resolve(domain, "CNAME") mx = _dns_resolve(domain, "MX") ns = _dns_resolve(domain, "NS") txt = _dns_resolve(domain, "TXT") raw = {"A": a, "AAAA": aaaa, "CNAME": cname, "MX": mx, "NS": ns, "TXT": txt} return DNSInfo( a=a, aaaa=aaaa, cname=cname, mx=mx, ns=ns, txt=txt, raw=raw, source="dns" ) return sqlite_getset( f"dns:{domain}", _do, max_age=cache_ttl(), error_max_age=error_ttl(), ) or DNSInfo(source="") @functools.total_ordering class DomainValue: __slots__ = ("_d",) def __init__(self, domain: str): self._d = domain @property def domain(self) -> str: return self._d def __str__(self) -> str: return self._d def __repr__(self) -> str: return f"DomainValue({self._d!r})" def __hash__(self) -> int: return hash(self._d) def __eq__(self, other: object) -> bool: return isinstance(other, DomainValue) and self._d == other._d def __lt__(self, other: object) -> bool: if not isinstance(other, DomainValue): return NotImplemented return self._d < other._d @property def rdap(self) -> JSONNode: return JSONNode(_rdap_domain_raw(self._d)) @property def dns(self) -> DNSInfo: return _dns_info(self._d) @property def vt(self) -> VTInfo: data = _vt_domain_raw(self._d) return parse_vt_domain(data) if data else VTInfo(object_type="domain") @property def resolveipv4(self): from .iptype import ip out = [] for v in self.dns.a: iv = ip(v) if iv is not None: out.append(iv) return tuple(out) @property def resolveipv6(self): from .iptype import ip out = [] for v in self.dns.aaaa: iv = ip(v) if iv is not None: out.append(iv) return tuple(out) @property def resolveips(self): return tuple(list(self.resolveipv4) + list(self.resolveipv6)) @property def resolveip(self): ips4 = self.resolveipv4 if ips4: return ips4[0] ips6 = self.resolveipv6 if ips6: return ips6[0] return None def _normalize_domain(s: str) -> str: s = s.strip().lower() if not s: return "" # Strip scheme/path if the input is a URL. if "://" in s: try: sp = urlsplit(s) if sp.hostname: s = sp.hostname except Exception: pass s = s.strip().rstrip(".") # Strip brackets around IPv6 host literals if accidentally passed. if s.startswith("[") and s.endswith("]"): s = s[1:-1] return s def domain(val: Any) -> Optional[DomainValue]: if _is_nullish(val): return None if isinstance(val, DomainValue): return val s = _normalize_domain(str(val)) if not s: return None return DomainValue(s) @functools.total_ordering class URLValue: __slots__ = ("_u", "_parts") def __init__(self, url: str, parts: URLParts): self._u = url self._parts = parts @property def url(self) -> str: return self._u def __str__(self) -> str: return self._u def __repr__(self) -> str: return f"URLValue({self._u!r})" def __hash__(self) -> int: return hash(self._u) def __eq__(self, other: object) -> bool: return isinstance(other, URLValue) and self._u == other._u def __lt__(self, other: object) -> bool: if not isinstance(other, URLValue): return NotImplemented return self._u < other._u @property def parts(self) -> URLParts: return self._parts @property def host(self) -> str: return self._parts.host @property def domain(self) -> Optional[DomainValue]: return domain(self._parts.host) @property def vt(self) -> VTInfo: data = _vt_url_raw(self._u) return parse_vt_url(data) if data else VTInfo(object_type="url") def url_ioc(val: Any) -> Optional[URLValue]: if _is_nullish(val): return None if isinstance(val, URLValue): return val s = str(val).strip() if not s: return None # Accept bare domains by prefixing scheme (so parsing is consistent). if "://" not in s and re.match(r"^[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}(/|$)", s): s = "http://" + s try: sp = urlsplit(s) parts = URLParts( scheme=sp.scheme or "", username=sp.username or "", password=sp.password or "", host=sp.hostname or "", port=sp.port, path=sp.path or "", query=sp.query or "", fragment=sp.fragment or "", ) return URLValue(s, parts) except Exception: return None @functools.total_ordering class HashValue: __slots__ = ("_h",) def __init__(self, h: str): self._h = h @property def hash(self) -> str: return self._h def __str__(self) -> str: return self._h def __repr__(self) -> str: return f"HashValue({self._h!r})" def __hash__(self) -> int: return hash(self._h) def __eq__(self, other: object) -> bool: return isinstance(other, HashValue) and self._h == other._h def __lt__(self, other: object) -> bool: if not isinstance(other, HashValue): return NotImplemented return self._h < other._h @property def kind(self) -> str: n = len(self._h) if n == 32: return "md5" if n == 40: return "sha1" if n == 64: return "sha256" return "" @property def vt(self) -> VTInfo: data = _vt_file_raw(self._h) return parse_vt_file(data) if data else VTInfo(object_type="file") @property def mb(self) -> MBInfo: data = _mb_hash_raw(self._h) return parse_mb_info(data) if data else MBInfo() _HASH_RE = re.compile(r"^[A-Fa-f0-9]{32}$|^[A-Fa-f0-9]{40}$|^[A-Fa-f0-9]{64}$") def hash_ioc(val: Any) -> Optional[HashValue]: if _is_nullish(val): return None if isinstance(val, HashValue): return val s = str(val).strip() if not s: return None if not _HASH_RE.match(s): return None return HashValue(s.lower()) # Make custom converters available in command/expr globals. vd.addGlobals(domain=domain, url_ioc=url_ioc, hash_ioc=hash_ioc) vd.addType( domain, icon="🌐", formatter=lambda fmt, v: "" if v is None else str(v), name="Domain", ) vd.addType( url_ioc, icon="🔗", formatter=lambda fmt, v: "" if v is None else str(v), name="URL" ) vd.addType( hash_ioc, icon="🔐", formatter=lambda fmt, v: "" if v is None else str(v), name="Hash", ) TableSheet.addCommand( None, "type-domain", "cursorCol.type=domain", "set type of current column to Domain" ) TableSheet.addCommand( None, "type-url-ioc", "cursorCol.type=url_ioc", "set type of current column to URL (IOC)", ) TableSheet.addCommand( None, "type-hash", "cursorCol.type=hash_ioc", "set type of current column to Hash (md5/sha1/sha256)", ) vd.addMenuItem("Column", "Type", "Domain", "type-domain") vd.addMenuItem("Column", "Type", "URL (IOC)", "type-url-ioc") vd.addMenuItem("Column", "Type", "Hash", "type-hash") try: _probe = TableSheet("_probe") if _probe.getCommand(";d") is None: TableSheet.bindkey(";d", "type-domain") if _probe.getCommand(";u") is None: TableSheet.bindkey(";u", "type-url-ioc") if _probe.getCommand(";h") is None: TableSheet.bindkey(";h", "type-hash") except Exception: pass