diff --git a/tools/go/gouniq/gouniq.go b/tools/go/gouniq/gouniq.go new file mode 100644 index 0000000..42687ef --- /dev/null +++ b/tools/go/gouniq/gouniq.go @@ -0,0 +1,75 @@ +package main + +import ( + "bufio" + "flag" + "fmt" + "hash/fnv" + "log" + "os" +) + +func hashLine(s string) uint32 { + hasher := fnv.New32a() + hasher.Write([]byte(s)) + return hasher.Sum32() +} + +func main() { + // Define command line flags + reverse := flag.Bool("d", false, "Print only lines that appear more than once.") + help := flag.Bool("h", false, "Display help and usage information.") + flag.Usage = func() { + fmt.Fprintf(flag.CommandLine.Output(), "Usage of %s:\n", os.Args[0]) + fmt.Println("This program reads from a file or standard input, deduplicates lines, and outputs the results.") + fmt.Println("Options:") + flag.PrintDefaults() + fmt.Println("Example usage:") + fmt.Println("\t", os.Args[0], "[options] [filename]") + fmt.Println("\t", os.Args[0], "-d filename # Only print duplicates") + fmt.Println("\t", "cat /some/text/file |", os.Args[0], "# Read from standard input") + } + flag.Parse() + + // Check for help flag + if *help { + flag.Usage() + os.Exit(0) + } + + // Detemine the input source (file or stdin) + inputSource := os.Stdin + var err error + if flag.NArg() > 0 { + inputSource, err = os.Open(flag.Args()[0]) + if err != nil { + log.Fatalf("Failed to open file: %v\n", err) + } + defer inputSource.Close() + } + seenLines := make(map[uint32]int) + scanner := bufio.NewScanner(inputSource) + + //Readin lines + for scanner.Scan() { + line := scanner.Text() + hash := hashLine(line) + seenLines[hash]++ + + if *reverse { + // Print only lines that appear more than once + if seenLines[hash] > 1 { + fmt.Println(line) + } + } else { + // Normal mode, print only unique lines + if seenLines[hash] == 1 { + fmt.Println(line) + } + } + } + //Check for errors during scanning + if err := scanner.Err(); err != nil { + log.Fatalf("Failed to read input: %v\n", err) + } +} diff --git a/tools/uniqrs b/tools/uniqrs deleted file mode 100755 index 2f5f633..0000000 Binary files a/tools/uniqrs and /dev/null differ