Replace uniqrs with gouniq

replaced binary unqirs with a plattform independent golang rewrite.
gouniq works removes duplicate lines from an unsorted text file thus maintaining the original order of lines
This commit is contained in:
Tobias Kessels
2024-08-01 10:51:37 +02:00
parent 1380c7df75
commit f44d0fb652
2 changed files with 75 additions and 0 deletions

75
tools/go/gouniq/gouniq.go Normal file
View File

@@ -0,0 +1,75 @@
package main
import (
"bufio"
"flag"
"fmt"
"hash/fnv"
"log"
"os"
)
func hashLine(s string) uint32 {
hasher := fnv.New32a()
hasher.Write([]byte(s))
return hasher.Sum32()
}
func main() {
// Define command line flags
reverse := flag.Bool("d", false, "Print only lines that appear more than once.")
help := flag.Bool("h", false, "Display help and usage information.")
flag.Usage = func() {
fmt.Fprintf(flag.CommandLine.Output(), "Usage of %s:\n", os.Args[0])
fmt.Println("This program reads from a file or standard input, deduplicates lines, and outputs the results.")
fmt.Println("Options:")
flag.PrintDefaults()
fmt.Println("Example usage:")
fmt.Println("\t", os.Args[0], "[options] [filename]")
fmt.Println("\t", os.Args[0], "-d filename # Only print duplicates")
fmt.Println("\t", "cat /some/text/file |", os.Args[0], "# Read from standard input")
}
flag.Parse()
// Check for help flag
if *help {
flag.Usage()
os.Exit(0)
}
// Detemine the input source (file or stdin)
inputSource := os.Stdin
var err error
if flag.NArg() > 0 {
inputSource, err = os.Open(flag.Args()[0])
if err != nil {
log.Fatalf("Failed to open file: %v\n", err)
}
defer inputSource.Close()
}
seenLines := make(map[uint32]int)
scanner := bufio.NewScanner(inputSource)
//Readin lines
for scanner.Scan() {
line := scanner.Text()
hash := hashLine(line)
seenLines[hash]++
if *reverse {
// Print only lines that appear more than once
if seenLines[hash] > 1 {
fmt.Println(line)
}
} else {
// Normal mode, print only unique lines
if seenLines[hash] == 1 {
fmt.Println(line)
}
}
}
//Check for errors during scanning
if err := scanner.Err(); err != nil {
log.Fatalf("Failed to read input: %v\n", err)
}
}