Restructure repository: organize tools by purpose, create what search tool

- Move single-file tools to tools/ organized by category (security, forensics, data, etc.)
- Move multi-file projects to projects/ (go-tools, puzzlebox, timesketch, rust-tools)
- Move system scripts to scripts/ (proxy, display, setup, windows)
- Organize config files in config/ (shell, visidata, applications)
- Move experimental tools to archive/experimental
- Create 'what' fuzzy search tool with progressive enhancement (ollama->fzf->grep)
- Add initial metadata database for intelligent tool discovery
- Preserve git history using 'git mv' commands
This commit is contained in:
tobias
2025-08-24 19:50:00 +02:00
parent 9518290544
commit 619b0bc432
124 changed files with 1063 additions and 0 deletions

423
what Executable file
View File

@@ -0,0 +1,423 @@
#!/usr/bin/env python3
"""
'what' - Smart repository search tool with progressive enhancement
Fallback hierarchy:
1. Ollama + Gemma2 (natural language search)
2. fzf (fuzzy finding)
3. grep (simple text search)
Usage:
what <query> # Find tools matching query
what -h # Show help
what -l # List all tools with short descriptions
what -a <filepath> # Add new file to database
"""
import os
import sys
import json
import argparse
import subprocess
import shutil
from pathlib import Path
import re
# Configuration
REPO_ROOT = Path(__file__).parent.absolute()
DB_FILE = REPO_ROOT / ".what_db.json"
class WhatTool:
def __init__(self):
self.db_path = DB_FILE
self.data = self.load_db()
# Detect available tools
self.has_ollama = self.check_ollama()
self.has_fzf = shutil.which('fzf') is not None
def load_db(self):
"""Load the tool database"""
if self.db_path.exists():
try:
with open(self.db_path, 'r') as f:
return json.load(f)
except json.JSONDecodeError:
print(f"Warning: Corrupted database {self.db_path}, creating new one")
return {
"version": "1.0",
"tools": {}
}
def save_db(self):
"""Save the tool database"""
with open(self.db_path, 'w') as f:
json.dump(self.data, f, indent=2, sort_keys=True)
def check_ollama(self):
"""Check if ollama with gemma2 is available"""
try:
result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, timeout=5)
if result.returncode == 0:
# Check if gemma2 model is available
models = result.stdout.lower()
return 'gemma2' in models
except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError):
pass
return False
def get_file_type(self, filepath):
"""Determine file type"""
if not filepath.exists():
return "missing"
if filepath.is_dir():
return "directory"
# Check if executable
is_executable = os.access(filepath, os.X_OK)
# Check extension
suffix = filepath.suffix.lower()
if suffix == '.py':
return "python script" if is_executable else "python module"
elif suffix == '.sh':
return "shell script"
elif suffix == '.go':
return "go program"
elif suffix == '.js':
return "javascript"
elif suffix == '.ps1':
return "powershell script"
elif suffix == '.rs':
return "rust program"
elif suffix in ['.c', '.cpp']:
return "c/c++ source"
elif suffix == '.awk':
return "awk script"
elif not suffix and is_executable:
return "binary executable"
elif not suffix:
return "script"
else:
return f"{suffix[1:]} file"
def analyze_file_with_ollama(self, filepath):
"""Analyze file using Ollama Gemma2"""
try:
# Read file content (limit size for analysis)
content = ""
if filepath.stat().st_size > 50000: # Skip very large files
content = "[File too large for analysis]"
else:
try:
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
content = f.read()[:10000] # First 10KB
except:
content = "[Binary or unreadable file]"
prompt = f"""
Analyze this code/script file and provide ONLY a JSON response with these fields:
Filename: {filepath.name}
File type: {self.get_file_type(filepath)}
Content preview:
{content[:2000]}
Respond with ONLY this JSON structure:
{{
"summary": "Brief 1-2 sentence summary of what this tool does and how it works",
"purpose": "What this tool is used for (e.g., 'Network analysis', 'File processing', 'Security scanning')",
"short_description": "Very short description for listings (e.g., 'like md5sum but for files inside tarballs')"
}}
"""
result = subprocess.run([
'ollama', 'run', 'gemma2:2b', prompt
], capture_output=True, text=True, timeout=30)
if result.returncode == 0:
# Extract JSON from response
response = result.stdout.strip()
# Try to find JSON in the response
json_match = re.search(r'\{.*\}', response, re.DOTALL)
if json_match:
return json.loads(json_match.group())
except (subprocess.TimeoutExpired, json.JSONDecodeError, Exception) as e:
print(f"Ollama analysis failed: {e}")
return None
def add_file_interactive(self, filepath):
"""Add file with interactive prompts"""
rel_path = str(filepath.relative_to(REPO_ROOT))
file_type = self.get_file_type(filepath)
print(f"\nAdding: {rel_path}")
print(f"Type: {file_type}")
print()
if self.has_ollama:
print("Analyzing with Ollama Gemma2...")
analysis = self.analyze_file_with_ollama(filepath)
if analysis:
print("AI Analysis complete. Review and edit if needed:")
summary = input(f"Summary [{analysis.get('summary', '')}]: ").strip()
purpose = input(f"Purpose [{analysis.get('purpose', '')}]: ").strip()
short_desc = input(f"Short description [{analysis.get('short_description', '')}]: ").strip()
# Use AI suggestions if user didn't provide alternatives
summary = summary or analysis.get('summary', '')
purpose = purpose or analysis.get('purpose', '')
short_desc = short_desc or analysis.get('short_description', '')
else:
print("AI analysis failed, using manual input:")
summary = input("Summary (what it does and how): ").strip()
purpose = input("Purpose (what it's used for): ").strip()
short_desc = input("Short description (for listings): ").strip()
else:
print("Manual input (Ollama not available):")
summary = input("Summary (what it does and how): ").strip()
purpose = input("Purpose (what it's used for): ").strip()
short_desc = input("Short description (for listings): ").strip()
# Store in database
self.data["tools"][rel_path] = {
"path": rel_path,
"name": filepath.name,
"type": file_type,
"summary": summary,
"purpose": purpose,
"short_description": short_desc,
"executable": os.access(filepath, os.X_OK)
}
self.save_db()
print(f"✓ Added {rel_path} to database")
def search_with_ollama(self, query):
"""Search using natural language with Ollama"""
try:
tools_info = []
for tool_data in self.data["tools"].values():
tools_info.append(f"{tool_data['name']}: {tool_data['summary']} (Purpose: {tool_data['purpose']})")
tools_text = "\n".join(tools_info)
prompt = f"""
Given this query: "{query}"
Find the most relevant tools from this list. Respond with ONLY the tool names (one per line) in order of relevance:
{tools_text}
Query: {query}
Response (tool names only, one per line, max 10):
"""
result = subprocess.run([
'ollama', 'run', 'gemma2:2b', prompt
], capture_output=True, text=True, timeout=20)
if result.returncode == 0:
tool_names = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
# Find matching tools in database
matches = []
for tool_name in tool_names[:10]: # Limit to top 10
for tool_data in self.data["tools"].values():
if tool_data['name'] == tool_name:
matches.append(tool_data)
break
return matches
except Exception as e:
print(f"Ollama search failed: {e}")
return None
def search_with_fzf(self, query):
"""Search using fzf fuzzy finder"""
try:
# Prepare search data for fzf
search_lines = []
for tool_data in self.data["tools"].values():
line = f"{tool_data['name']} # {tool_data['short_description']} | {tool_data['path']}"
search_lines.append(line)
search_input = "\n".join(search_lines)
# Run fzf with initial query
result = subprocess.run([
'fzf', '--filter', query, '--no-sort'
], input=search_input, capture_output=True, text=True)
if result.returncode == 0:
matches = []
for line in result.stdout.strip().split('\n'):
if ' | ' in line:
path = line.split(' | ')[-1]
if path in self.data["tools"]:
matches.append(self.data["tools"][path])
return matches
except Exception as e:
print(f"fzf search failed: {e}")
return None
def search_with_grep(self, query):
"""Fallback search using grep-like functionality"""
matches = []
query_lower = query.lower()
for tool_data in self.data["tools"].values():
# Search in name, summary, purpose, and short description
searchable = f"{tool_data['name']} {tool_data['summary']} {tool_data['purpose']} {tool_data['short_description']}".lower()
if query_lower in searchable:
matches.append(tool_data)
# Simple relevance scoring
def score_match(tool):
score = 0
query_lower = query.lower()
if query_lower in tool['name'].lower():
score += 10
if query_lower in tool['short_description'].lower():
score += 5
if query_lower in tool['summary'].lower():
score += 3
if query_lower in tool['purpose'].lower():
score += 2
return score
matches.sort(key=score_match, reverse=True)
return matches[:20] # Limit results
def search(self, query):
"""Search using the best available method"""
if not query:
return []
print(f"Searching for: {query}")
# Try Ollama first
if self.has_ollama:
print("Using Ollama Gemma2 for natural language search...")
results = self.search_with_ollama(query)
if results is not None:
return results
print("Ollama search failed, falling back to fzf...")
# Try fzf
if self.has_fzf:
print("Using fzf for fuzzy search...")
results = self.search_with_fzf(query)
if results is not None:
return results
print("fzf search failed, falling back to grep...")
# Fallback to grep
print("Using basic text search...")
return self.search_with_grep(query)
def list_all_tools(self):
"""List all tools with short descriptions"""
if not self.data["tools"]:
print("No tools in database. Use 'what -a <file>' to add tools.")
return
print("Available tools:")
print()
# Sort by name
tools = sorted(self.data["tools"].values(), key=lambda x: x['name'])
# Calculate max name length for alignment
max_name_len = max(len(tool['name']) for tool in tools)
for tool in tools:
executable_mark = "*" if tool.get('executable', False) else " "
name_padded = tool['name'].ljust(max_name_len)
print(f"{executable_mark}{name_padded} # {tool['short_description']}")
def show_search_results(self, results):
"""Display search results"""
if not results:
print("No tools found matching your query.")
return
print(f"\nFound {len(results)} tool(s):")
print()
for i, tool in enumerate(results, 1):
executable_mark = "*" if tool.get('executable', False) else " "
print(f"{i:2d}. {executable_mark}{tool['name']}")
print(f" Path: {tool['path']}")
print(f" Type: {tool['type']}")
print(f" Purpose: {tool['purpose']}")
print(f" Summary: {tool['summary']}")
print()
def main():
parser = argparse.ArgumentParser(description="Smart repository search tool")
parser.add_argument("query", nargs="?", help="Search query")
parser.add_argument("-l", "--list", action="store_true",
help="List all tools with short descriptions")
parser.add_argument("-a", "--add", metavar="PATH",
help="Add new file to database")
args = parser.parse_args()
tool = WhatTool()
if args.list:
tool.list_all_tools()
return
if args.add:
filepath = Path(args.add)
if not filepath.exists():
print(f"Error: File {filepath} does not exist")
sys.exit(1)
if not filepath.is_relative_to(REPO_ROOT):
print(f"Error: File must be within the repository ({REPO_ROOT})")
sys.exit(1)
tool.add_file_interactive(filepath)
return
if not args.query:
parser.print_help()
print()
print("Available search methods:")
if tool.has_ollama:
print(" ✓ Ollama + Gemma2 (natural language)")
else:
print(" ✗ Ollama + Gemma2 (not available)")
if tool.has_fzf:
print(" ✓ fzf (fuzzy finding)")
else:
print(" ✗ fzf (not available)")
print(" ✓ grep (basic text search)")
return
# Perform search
results = tool.search(args.query)
tool.show_search_results(results)
if __name__ == "__main__":
main()