Replace uniqrs with gouniq
replaced binary unqirs with a plattform independent golang rewrite. gouniq works removes duplicate lines from an unsorted text file thus maintaining the original order of lines
This commit is contained in:
75
tools/go/gouniq/gouniq.go
Normal file
75
tools/go/gouniq/gouniq.go
Normal file
@@ -0,0 +1,75 @@
|
||||
package main
|
||||
|
||||
import (
|
||||
"bufio"
|
||||
"flag"
|
||||
"fmt"
|
||||
"hash/fnv"
|
||||
"log"
|
||||
"os"
|
||||
)
|
||||
|
||||
func hashLine(s string) uint32 {
|
||||
hasher := fnv.New32a()
|
||||
hasher.Write([]byte(s))
|
||||
return hasher.Sum32()
|
||||
}
|
||||
|
||||
func main() {
|
||||
// Define command line flags
|
||||
reverse := flag.Bool("d", false, "Print only lines that appear more than once.")
|
||||
help := flag.Bool("h", false, "Display help and usage information.")
|
||||
flag.Usage = func() {
|
||||
fmt.Fprintf(flag.CommandLine.Output(), "Usage of %s:\n", os.Args[0])
|
||||
fmt.Println("This program reads from a file or standard input, deduplicates lines, and outputs the results.")
|
||||
fmt.Println("Options:")
|
||||
flag.PrintDefaults()
|
||||
fmt.Println("Example usage:")
|
||||
fmt.Println("\t", os.Args[0], "[options] [filename]")
|
||||
fmt.Println("\t", os.Args[0], "-d filename # Only print duplicates")
|
||||
fmt.Println("\t", "cat /some/text/file |", os.Args[0], "# Read from standard input")
|
||||
}
|
||||
flag.Parse()
|
||||
|
||||
// Check for help flag
|
||||
if *help {
|
||||
flag.Usage()
|
||||
os.Exit(0)
|
||||
}
|
||||
|
||||
// Detemine the input source (file or stdin)
|
||||
inputSource := os.Stdin
|
||||
var err error
|
||||
if flag.NArg() > 0 {
|
||||
inputSource, err = os.Open(flag.Args()[0])
|
||||
if err != nil {
|
||||
log.Fatalf("Failed to open file: %v\n", err)
|
||||
}
|
||||
defer inputSource.Close()
|
||||
}
|
||||
seenLines := make(map[uint32]int)
|
||||
scanner := bufio.NewScanner(inputSource)
|
||||
|
||||
//Readin lines
|
||||
for scanner.Scan() {
|
||||
line := scanner.Text()
|
||||
hash := hashLine(line)
|
||||
seenLines[hash]++
|
||||
|
||||
if *reverse {
|
||||
// Print only lines that appear more than once
|
||||
if seenLines[hash] > 1 {
|
||||
fmt.Println(line)
|
||||
}
|
||||
} else {
|
||||
// Normal mode, print only unique lines
|
||||
if seenLines[hash] == 1 {
|
||||
fmt.Println(line)
|
||||
}
|
||||
}
|
||||
}
|
||||
//Check for errors during scanning
|
||||
if err := scanner.Err(); err != nil {
|
||||
log.Fatalf("Failed to read input: %v\n", err)
|
||||
}
|
||||
}
|
||||
Reference in New Issue
Block a user