Files
gists/tools/go/gouniq/gouniq.go
Tobias Kessels f44d0fb652 Replace uniqrs with gouniq
replaced binary unqirs with a plattform independent golang rewrite.
gouniq works removes duplicate lines from an unsorted text file thus maintaining the original order of lines
2024-08-01 10:51:37 +02:00

76 lines
1.7 KiB
Go

package main
import (
"bufio"
"flag"
"fmt"
"hash/fnv"
"log"
"os"
)
func hashLine(s string) uint32 {
hasher := fnv.New32a()
hasher.Write([]byte(s))
return hasher.Sum32()
}
func main() {
// Define command line flags
reverse := flag.Bool("d", false, "Print only lines that appear more than once.")
help := flag.Bool("h", false, "Display help and usage information.")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), "Usage of %s:\n", os.Args[0])
fmt.Println("This program reads from a file or standard input, deduplicates lines, and outputs the results.")
fmt.Println("Options:")
flag.PrintDefaults()
fmt.Println("Example usage:")
fmt.Println("\t", os.Args[0], "[options] [filename]")
fmt.Println("\t", os.Args[0], "-d filename # Only print duplicates")
fmt.Println("\t", "cat /some/text/file |", os.Args[0], "# Read from standard input")
}
flag.Parse()
// Check for help flag
if *help {
flag.Usage()
os.Exit(0)
}
// Detemine the input source (file or stdin)
inputSource := os.Stdin
var err error
if flag.NArg() > 0 {
inputSource, err = os.Open(flag.Args()[0])
if err != nil {
log.Fatalf("Failed to open file: %v\n", err)
}
defer inputSource.Close()
}
seenLines := make(map[uint32]int)
scanner := bufio.NewScanner(inputSource)
//Readin lines
for scanner.Scan() {
line := scanner.Text()
hash := hashLine(line)
seenLines[hash]++
if *reverse {
// Print only lines that appear more than once
if seenLines[hash] > 1 {
fmt.Println(line)
}
} else {
// Normal mode, print only unique lines
if seenLines[hash] == 1 {
fmt.Println(line)
}
}
}
//Check for errors during scanning
if err := scanner.Err(); err != nil {
log.Fatalf("Failed to read input: %v\n", err)
}
}