Restructure repository: organize tools by purpose, create what search tool

- Move single-file tools to tools/ organized by category (security, forensics, data, etc.) - Move multi-file projects to projects/ (go-tools, puzzlebox, timesketch, rust-tools) - Move system scripts to scripts/ (proxy, display, setup, windows) - Organize config files in config/ (shell, visidata, applications) - Move experimental tools to archive/experimental - Create 'what' fuzzy search tool with progressive enhancement (ollama->fzf->grep) - Add initial metadata database for intelligent tool discovery - Preserve git history using 'git mv' commands
2025-08-24 19:50:00 +02:00
parent 9518290544
commit 619b0bc432
124 changed files with 1063 additions and 0 deletions
@@ -0,0 +1,423 @@
+#!/usr/bin/env python3
+
+"""
+'what' - Smart repository search tool with progressive enhancement
+
+Fallback hierarchy:
+1. Ollama + Gemma2 (natural language search) 
+2. fzf (fuzzy finding)
+3. grep (simple text search)
+
+Usage:
+    what <query>        # Find tools matching query
+    what -h             # Show help
+    what -l             # List all tools with short descriptions  
+    what -a <filepath>  # Add new file to database
+"""
+
+import os
+import sys
+import json
+import argparse
+import subprocess
+import shutil
+from pathlib import Path
+import re
+
+# Configuration
+REPO_ROOT = Path(__file__).parent.absolute()
+DB_FILE = REPO_ROOT / ".what_db.json"
+
+class WhatTool:
+    def __init__(self):
+        self.db_path = DB_FILE
+        self.data = self.load_db()
+        
+        # Detect available tools
+        self.has_ollama = self.check_ollama()
+        self.has_fzf = shutil.which('fzf') is not None
+        
+    def load_db(self):
+        """Load the tool database"""
+        if self.db_path.exists():
+            try:
+                with open(self.db_path, 'r') as f:
+                    return json.load(f)
+            except json.JSONDecodeError:
+                print(f"Warning: Corrupted database {self.db_path}, creating new one")
+        
+        return {
+            "version": "1.0",
+            "tools": {}
+        }
+    
+    def save_db(self):
+        """Save the tool database"""
+        with open(self.db_path, 'w') as f:
+            json.dump(self.data, f, indent=2, sort_keys=True)
+    
+    def check_ollama(self):
+        """Check if ollama with gemma2 is available"""
+        try:
+            result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, timeout=5)
+            if result.returncode == 0:
+                # Check if gemma2 model is available
+                models = result.stdout.lower()
+                return 'gemma2' in models
+        except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError):
+            pass
+        return False
+    
+    def get_file_type(self, filepath):
+        """Determine file type"""
+        if not filepath.exists():
+            return "missing"
+            
+        if filepath.is_dir():
+            return "directory"
+            
+        # Check if executable
+        is_executable = os.access(filepath, os.X_OK)
+        
+        # Check extension
+        suffix = filepath.suffix.lower()
+        
+        if suffix == '.py':
+            return "python script" if is_executable else "python module"
+        elif suffix == '.sh':
+            return "shell script"
+        elif suffix == '.go':
+            return "go program"
+        elif suffix == '.js':
+            return "javascript"
+        elif suffix == '.ps1':
+            return "powershell script"
+        elif suffix == '.rs':
+            return "rust program"
+        elif suffix in ['.c', '.cpp']:
+            return "c/c++ source"
+        elif suffix == '.awk':
+            return "awk script"
+        elif not suffix and is_executable:
+            return "binary executable"
+        elif not suffix:
+            return "script"
+        else:
+            return f"{suffix[1:]} file"
+    
+    def analyze_file_with_ollama(self, filepath):
+        """Analyze file using Ollama Gemma2"""
+        try:
+            # Read file content (limit size for analysis)
+            content = ""
+            if filepath.stat().st_size > 50000:  # Skip very large files
+                content = "[File too large for analysis]"
+            else:
+                try:
+                    with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
+                        content = f.read()[:10000]  # First 10KB
+                except:
+                    content = "[Binary or unreadable file]"
+            
+            prompt = f"""
+Analyze this code/script file and provide ONLY a JSON response with these fields:
+
+Filename: {filepath.name}
+File type: {self.get_file_type(filepath)}
+Content preview:
+{content[:2000]}
+
+Respond with ONLY this JSON structure:
+{{
+    "summary": "Brief 1-2 sentence summary of what this tool does and how it works",
+    "purpose": "What this tool is used for (e.g., 'Network analysis', 'File processing', 'Security scanning')",
+    "short_description": "Very short description for listings (e.g., 'like md5sum but for files inside tarballs')"
+}}
+"""
+
+            result = subprocess.run([
+                'ollama', 'run', 'gemma2:2b', prompt
+            ], capture_output=True, text=True, timeout=30)
+            
+            if result.returncode == 0:
+                # Extract JSON from response
+                response = result.stdout.strip()
+                
+                # Try to find JSON in the response
+                json_match = re.search(r'\{.*\}', response, re.DOTALL)
+                if json_match:
+                    return json.loads(json_match.group())
+                
+        except (subprocess.TimeoutExpired, json.JSONDecodeError, Exception) as e:
+            print(f"Ollama analysis failed: {e}")
+        
+        return None
+    
+    def add_file_interactive(self, filepath):
+        """Add file with interactive prompts"""
+        rel_path = str(filepath.relative_to(REPO_ROOT))
+        file_type = self.get_file_type(filepath)
+        
+        print(f"\nAdding: {rel_path}")
+        print(f"Type: {file_type}")
+        print()
+        
+        if self.has_ollama:
+            print("Analyzing with Ollama Gemma2...")
+            analysis = self.analyze_file_with_ollama(filepath)
+            
+            if analysis:
+                print("AI Analysis complete. Review and edit if needed:")
+                summary = input(f"Summary [{analysis.get('summary', '')}]: ").strip()
+                purpose = input(f"Purpose [{analysis.get('purpose', '')}]: ").strip()
+                short_desc = input(f"Short description [{analysis.get('short_description', '')}]: ").strip()
+                
+                # Use AI suggestions if user didn't provide alternatives
+                summary = summary or analysis.get('summary', '')
+                purpose = purpose or analysis.get('purpose', '')
+                short_desc = short_desc or analysis.get('short_description', '')
+            else:
+                print("AI analysis failed, using manual input:")
+                summary = input("Summary (what it does and how): ").strip()
+                purpose = input("Purpose (what it's used for): ").strip()
+                short_desc = input("Short description (for listings): ").strip()
+        else:
+            print("Manual input (Ollama not available):")
+            summary = input("Summary (what it does and how): ").strip()
+            purpose = input("Purpose (what it's used for): ").strip()
+            short_desc = input("Short description (for listings): ").strip()
+        
+        # Store in database
+        self.data["tools"][rel_path] = {
+            "path": rel_path,
+            "name": filepath.name,
+            "type": file_type,
+            "summary": summary,
+            "purpose": purpose,
+            "short_description": short_desc,
+            "executable": os.access(filepath, os.X_OK)
+        }
+        
+        self.save_db()
+        print(f"✓ Added {rel_path} to database")
+    
+    def search_with_ollama(self, query):
+        """Search using natural language with Ollama"""
+        try:
+            tools_info = []
+            for tool_data in self.data["tools"].values():
+                tools_info.append(f"{tool_data['name']}: {tool_data['summary']} (Purpose: {tool_data['purpose']})")
+            
+            tools_text = "\n".join(tools_info)
+            
+            prompt = f"""
+Given this query: "{query}"
+
+Find the most relevant tools from this list. Respond with ONLY the tool names (one per line) in order of relevance:
+
+{tools_text}
+
+Query: {query}
+
+Response (tool names only, one per line, max 10):
+"""
+            
+            result = subprocess.run([
+                'ollama', 'run', 'gemma2:2b', prompt
+            ], capture_output=True, text=True, timeout=20)
+            
+            if result.returncode == 0:
+                tool_names = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()]
+                
+                # Find matching tools in database
+                matches = []
+                for tool_name in tool_names[:10]:  # Limit to top 10
+                    for tool_data in self.data["tools"].values():
+                        if tool_data['name'] == tool_name:
+                            matches.append(tool_data)
+                            break
+                
+                return matches
+                
+        except Exception as e:
+            print(f"Ollama search failed: {e}")
+        
+        return None
+    
+    def search_with_fzf(self, query):
+        """Search using fzf fuzzy finder"""
+        try:
+            # Prepare search data for fzf
+            search_lines = []
+            for tool_data in self.data["tools"].values():
+                line = f"{tool_data['name']} # {tool_data['short_description']} | {tool_data['path']}"
+                search_lines.append(line)
+            
+            search_input = "\n".join(search_lines)
+            
+            # Run fzf with initial query
+            result = subprocess.run([
+                'fzf', '--filter', query, '--no-sort'
+            ], input=search_input, capture_output=True, text=True)
+            
+            if result.returncode == 0:
+                matches = []
+                for line in result.stdout.strip().split('\n'):
+                    if ' | ' in line:
+                        path = line.split(' | ')[-1]
+                        if path in self.data["tools"]:
+                            matches.append(self.data["tools"][path])
+                
+                return matches
+            
+        except Exception as e:
+            print(f"fzf search failed: {e}")
+        
+        return None
+    
+    def search_with_grep(self, query):
+        """Fallback search using grep-like functionality"""
+        matches = []
+        query_lower = query.lower()
+        
+        for tool_data in self.data["tools"].values():
+            # Search in name, summary, purpose, and short description
+            searchable = f"{tool_data['name']} {tool_data['summary']} {tool_data['purpose']} {tool_data['short_description']}".lower()
+            
+            if query_lower in searchable:
+                matches.append(tool_data)
+        
+        # Simple relevance scoring
+        def score_match(tool):
+            score = 0
+            query_lower = query.lower()
+            if query_lower in tool['name'].lower():
+                score += 10
+            if query_lower in tool['short_description'].lower():
+                score += 5
+            if query_lower in tool['summary'].lower():
+                score += 3
+            if query_lower in tool['purpose'].lower():
+                score += 2
+            return score
+        
+        matches.sort(key=score_match, reverse=True)
+        return matches[:20]  # Limit results
+    
+    def search(self, query):
+        """Search using the best available method"""
+        if not query:
+            return []
+        
+        print(f"Searching for: {query}")
+        
+        # Try Ollama first
+        if self.has_ollama:
+            print("Using Ollama Gemma2 for natural language search...")
+            results = self.search_with_ollama(query)
+            if results is not None:
+                return results
+            print("Ollama search failed, falling back to fzf...")
+        
+        # Try fzf
+        if self.has_fzf:
+            print("Using fzf for fuzzy search...")
+            results = self.search_with_fzf(query)
+            if results is not None:
+                return results
+            print("fzf search failed, falling back to grep...")
+        
+        # Fallback to grep
+        print("Using basic text search...")
+        return self.search_with_grep(query)
+    
+    def list_all_tools(self):
+        """List all tools with short descriptions"""
+        if not self.data["tools"]:
+            print("No tools in database. Use 'what -a <file>' to add tools.")
+            return
+        
+        print("Available tools:")
+        print()
+        
+        # Sort by name
+        tools = sorted(self.data["tools"].values(), key=lambda x: x['name'])
+        
+        # Calculate max name length for alignment
+        max_name_len = max(len(tool['name']) for tool in tools)
+        
+        for tool in tools:
+            executable_mark = "*" if tool.get('executable', False) else " "
+            name_padded = tool['name'].ljust(max_name_len)
+            print(f"{executable_mark}{name_padded}  #  {tool['short_description']}")
+    
+    def show_search_results(self, results):
+        """Display search results"""
+        if not results:
+            print("No tools found matching your query.")
+            return
+        
+        print(f"\nFound {len(results)} tool(s):")
+        print()
+        
+        for i, tool in enumerate(results, 1):
+            executable_mark = "*" if tool.get('executable', False) else " "
+            print(f"{i:2d}. {executable_mark}{tool['name']}")
+            print(f"    Path: {tool['path']}")
+            print(f"    Type: {tool['type']}")
+            print(f"    Purpose: {tool['purpose']}")
+            print(f"    Summary: {tool['summary']}")
+            print()
+
+def main():
+    parser = argparse.ArgumentParser(description="Smart repository search tool")
+    parser.add_argument("query", nargs="?", help="Search query")
+    parser.add_argument("-l", "--list", action="store_true", 
+                       help="List all tools with short descriptions")
+    parser.add_argument("-a", "--add", metavar="PATH", 
+                       help="Add new file to database")
+    
+    args = parser.parse_args()
+    
+    tool = WhatTool()
+    
+    if args.list:
+        tool.list_all_tools()
+        return
+    
+    if args.add:
+        filepath = Path(args.add)
+        if not filepath.exists():
+            print(f"Error: File {filepath} does not exist")
+            sys.exit(1)
+        
+        if not filepath.is_relative_to(REPO_ROOT):
+            print(f"Error: File must be within the repository ({REPO_ROOT})")
+            sys.exit(1)
+        
+        tool.add_file_interactive(filepath)
+        return
+    
+    if not args.query:
+        parser.print_help()
+        print()
+        print("Available search methods:")
+        if tool.has_ollama:
+            print("  ✓ Ollama + Gemma2 (natural language)")
+        else:
+            print("  ✗ Ollama + Gemma2 (not available)")
+        
+        if tool.has_fzf:
+            print("  ✓ fzf (fuzzy finding)")
+        else:
+            print("  ✗ fzf (not available)")
+        
+        print("  ✓ grep (basic text search)")
+        return
+    
+    # Perform search
+    results = tool.search(args.query)
+    tool.show_search_results(results)
+
+if __name__ == "__main__":
+    main()