#!/usr/bin/env python3 """ 'what' - Smart repository search tool with progressive enhancement Fallback hierarchy: 1. Ollama + Gemma2 (natural language search) 2. fzf (fuzzy finding) 3. grep (simple text search) Usage: what # Find tools matching query what -h # Show help what -l # List all tools with short descriptions what -a # Add new file to database """ import os import sys import json import argparse import subprocess import shutil from pathlib import Path import re # Configuration REPO_ROOT = Path(__file__).parent.absolute() DB_FILE = REPO_ROOT / ".what_db.json" class WhatTool: def __init__(self): self.db_path = DB_FILE self.data = self.load_db() # Detect available tools self.has_ollama = self.check_ollama() self.has_fzf = shutil.which('fzf') is not None def load_db(self): """Load the tool database""" if self.db_path.exists(): try: with open(self.db_path, 'r') as f: return json.load(f) except json.JSONDecodeError: print(f"Warning: Corrupted database {self.db_path}, creating new one") return { "version": "1.0", "tools": {} } def save_db(self): """Save the tool database""" with open(self.db_path, 'w') as f: json.dump(self.data, f, indent=2, sort_keys=True) def check_ollama(self): """Check if ollama with gemma2 is available""" try: result = subprocess.run(['ollama', 'list'], capture_output=True, text=True, timeout=5) if result.returncode == 0: # Check if gemma2 model is available models = result.stdout.lower() return 'gemma2' in models except (subprocess.TimeoutExpired, FileNotFoundError, subprocess.SubprocessError): pass return False def get_file_type(self, filepath): """Determine file type""" if not filepath.exists(): return "missing" if filepath.is_dir(): return "directory" # Check if executable is_executable = os.access(filepath, os.X_OK) # Check extension suffix = filepath.suffix.lower() if suffix == '.py': return "python script" if is_executable else "python module" elif suffix == '.sh': return "shell script" elif suffix == '.go': return "go program" elif suffix == '.js': return "javascript" elif suffix == '.ps1': return "powershell script" elif suffix == '.rs': return "rust program" elif suffix in ['.c', '.cpp']: return "c/c++ source" elif suffix == '.awk': return "awk script" elif not suffix and is_executable: return "binary executable" elif not suffix: return "script" else: return f"{suffix[1:]} file" def analyze_file_with_ollama(self, filepath): """Analyze file using Ollama Gemma2""" try: # Read file content (limit size for analysis) content = "" if filepath.stat().st_size > 50000: # Skip very large files content = "[File too large for analysis]" else: try: with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: content = f.read()[:10000] # First 10KB except: content = "[Binary or unreadable file]" prompt = f""" Analyze this code/script file and provide ONLY a JSON response with these fields: Filename: {filepath.name} File type: {self.get_file_type(filepath)} Content preview: {content[:2000]} Respond with ONLY this JSON structure: {{ "summary": "Brief 1-2 sentence summary of what this tool does and how it works", "purpose": "What this tool is used for (e.g., 'Network analysis', 'File processing', 'Security scanning')", "short_description": "Very short description for listings (e.g., 'like md5sum but for files inside tarballs')" }} """ result = subprocess.run([ 'ollama', 'run', 'gemma2:2b', prompt ], capture_output=True, text=True, timeout=30) if result.returncode == 0: # Extract JSON from response response = result.stdout.strip() # Try to find JSON in the response json_match = re.search(r'\{.*\}', response, re.DOTALL) if json_match: return json.loads(json_match.group()) except (subprocess.TimeoutExpired, json.JSONDecodeError, Exception) as e: print(f"Ollama analysis failed: {e}") return None def add_file_interactive(self, filepath): """Add file with interactive prompts""" rel_path = str(filepath.relative_to(REPO_ROOT)) file_type = self.get_file_type(filepath) print(f"\nAdding: {rel_path}") print(f"Type: {file_type}") print() if self.has_ollama: print("Analyzing with Ollama Gemma2...") analysis = self.analyze_file_with_ollama(filepath) if analysis: print("AI Analysis complete. Review and edit if needed:") summary = input(f"Summary [{analysis.get('summary', '')}]: ").strip() purpose = input(f"Purpose [{analysis.get('purpose', '')}]: ").strip() short_desc = input(f"Short description [{analysis.get('short_description', '')}]: ").strip() # Use AI suggestions if user didn't provide alternatives summary = summary or analysis.get('summary', '') purpose = purpose or analysis.get('purpose', '') short_desc = short_desc or analysis.get('short_description', '') else: print("AI analysis failed, using manual input:") summary = input("Summary (what it does and how): ").strip() purpose = input("Purpose (what it's used for): ").strip() short_desc = input("Short description (for listings): ").strip() else: print("Manual input (Ollama not available):") summary = input("Summary (what it does and how): ").strip() purpose = input("Purpose (what it's used for): ").strip() short_desc = input("Short description (for listings): ").strip() # Store in database self.data["tools"][rel_path] = { "path": rel_path, "name": filepath.name, "type": file_type, "summary": summary, "purpose": purpose, "short_description": short_desc, "executable": os.access(filepath, os.X_OK) } self.save_db() print(f"✓ Added {rel_path} to database") def search_with_ollama(self, query): """Search using natural language with Ollama""" try: tools_info = [] for tool_data in self.data["tools"].values(): tools_info.append(f"{tool_data['name']}: {tool_data['summary']} (Purpose: {tool_data['purpose']})") tools_text = "\n".join(tools_info) prompt = f""" Given this query: "{query}" Find the most relevant tools from this list. Respond with ONLY the tool names (one per line) in order of relevance: {tools_text} Query: {query} Response (tool names only, one per line, max 10): """ result = subprocess.run([ 'ollama', 'run', 'gemma2:2b', prompt ], capture_output=True, text=True, timeout=20) if result.returncode == 0: tool_names = [line.strip() for line in result.stdout.strip().split('\n') if line.strip()] # Find matching tools in database matches = [] for tool_name in tool_names[:10]: # Limit to top 10 for tool_data in self.data["tools"].values(): if tool_data['name'] == tool_name: matches.append(tool_data) break return matches except Exception as e: print(f"Ollama search failed: {e}") return None def search_with_fzf(self, query): """Search using fzf fuzzy finder""" try: # Prepare search data for fzf search_lines = [] for tool_data in self.data["tools"].values(): line = f"{tool_data['name']} # {tool_data['short_description']} | {tool_data['path']}" search_lines.append(line) search_input = "\n".join(search_lines) # Run fzf with initial query result = subprocess.run([ 'fzf', '--filter', query, '--no-sort' ], input=search_input, capture_output=True, text=True) if result.returncode == 0: matches = [] for line in result.stdout.strip().split('\n'): if ' | ' in line: path = line.split(' | ')[-1] if path in self.data["tools"]: matches.append(self.data["tools"][path]) return matches except Exception as e: print(f"fzf search failed: {e}") return None def search_with_grep(self, query): """Fallback search using grep-like functionality""" matches = [] query_lower = query.lower() for tool_data in self.data["tools"].values(): # Search in name, summary, purpose, and short description searchable = f"{tool_data['name']} {tool_data['summary']} {tool_data['purpose']} {tool_data['short_description']}".lower() if query_lower in searchable: matches.append(tool_data) # Simple relevance scoring def score_match(tool): score = 0 query_lower = query.lower() if query_lower in tool['name'].lower(): score += 10 if query_lower in tool['short_description'].lower(): score += 5 if query_lower in tool['summary'].lower(): score += 3 if query_lower in tool['purpose'].lower(): score += 2 return score matches.sort(key=score_match, reverse=True) return matches[:20] # Limit results def search(self, query): """Search using the best available method""" if not query: return [] print(f"Searching for: {query}") # Try Ollama first if self.has_ollama: print("Using Ollama Gemma2 for natural language search...") results = self.search_with_ollama(query) if results is not None: return results print("Ollama search failed, falling back to fzf...") # Try fzf if self.has_fzf: print("Using fzf for fuzzy search...") results = self.search_with_fzf(query) if results is not None: return results print("fzf search failed, falling back to grep...") # Fallback to grep print("Using basic text search...") return self.search_with_grep(query) def list_all_tools(self): """List all tools with short descriptions""" if not self.data["tools"]: print("No tools in database. Use 'what -a ' to add tools.") return print("Available tools:") print() # Sort by name tools = sorted(self.data["tools"].values(), key=lambda x: x['name']) # Calculate max name length for alignment max_name_len = max(len(tool['name']) for tool in tools) for tool in tools: executable_mark = "*" if tool.get('executable', False) else " " name_padded = tool['name'].ljust(max_name_len) print(f"{executable_mark}{name_padded} # {tool['short_description']}") def show_search_results(self, results): """Display search results""" if not results: print("No tools found matching your query.") return print(f"\nFound {len(results)} tool(s):") print() for i, tool in enumerate(results, 1): executable_mark = "*" if tool.get('executable', False) else " " print(f"{i:2d}. {executable_mark}{tool['name']}") print(f" Path: {tool['path']}") print(f" Type: {tool['type']}") print(f" Purpose: {tool['purpose']}") print(f" Summary: {tool['summary']}") print() def main(): parser = argparse.ArgumentParser(description="Smart repository search tool") parser.add_argument("query", nargs="?", help="Search query") parser.add_argument("-l", "--list", action="store_true", help="List all tools with short descriptions") parser.add_argument("-a", "--add", metavar="PATH", help="Add new file to database") args = parser.parse_args() tool = WhatTool() if args.list: tool.list_all_tools() return if args.add: filepath = Path(args.add) if not filepath.exists(): print(f"Error: File {filepath} does not exist") sys.exit(1) if not filepath.is_relative_to(REPO_ROOT): print(f"Error: File must be within the repository ({REPO_ROOT})") sys.exit(1) tool.add_file_interactive(filepath) return if not args.query: parser.print_help() print() print("Available search methods:") if tool.has_ollama: print(" ✓ Ollama + Gemma2 (natural language)") else: print(" ✗ Ollama + Gemma2 (not available)") if tool.has_fzf: print(" ✓ fzf (fuzzy finding)") else: print(" ✗ fzf (not available)") print(" ✓ grep (basic text search)") return # Perform search results = tool.search(args.query) tool.show_search_results(results) if __name__ == "__main__": main()