Replace uniqrs with gouniq
replaced binary unqirs with a plattform independent golang rewrite. gouniq works removes duplicate lines from an unsorted text file thus maintaining the original order of lines
This commit is contained in:
75
tools/go/gouniq/gouniq.go
Normal file
75
tools/go/gouniq/gouniq.go
Normal file
@@ -0,0 +1,75 @@
|
|||||||
|
package main
|
||||||
|
|
||||||
|
import (
|
||||||
|
"bufio"
|
||||||
|
"flag"
|
||||||
|
"fmt"
|
||||||
|
"hash/fnv"
|
||||||
|
"log"
|
||||||
|
"os"
|
||||||
|
)
|
||||||
|
|
||||||
|
func hashLine(s string) uint32 {
|
||||||
|
hasher := fnv.New32a()
|
||||||
|
hasher.Write([]byte(s))
|
||||||
|
return hasher.Sum32()
|
||||||
|
}
|
||||||
|
|
||||||
|
func main() {
|
||||||
|
// Define command line flags
|
||||||
|
reverse := flag.Bool("d", false, "Print only lines that appear more than once.")
|
||||||
|
help := flag.Bool("h", false, "Display help and usage information.")
|
||||||
|
flag.Usage = func() {
|
||||||
|
fmt.Fprintf(flag.CommandLine.Output(), "Usage of %s:\n", os.Args[0])
|
||||||
|
fmt.Println("This program reads from a file or standard input, deduplicates lines, and outputs the results.")
|
||||||
|
fmt.Println("Options:")
|
||||||
|
flag.PrintDefaults()
|
||||||
|
fmt.Println("Example usage:")
|
||||||
|
fmt.Println("\t", os.Args[0], "[options] [filename]")
|
||||||
|
fmt.Println("\t", os.Args[0], "-d filename # Only print duplicates")
|
||||||
|
fmt.Println("\t", "cat /some/text/file |", os.Args[0], "# Read from standard input")
|
||||||
|
}
|
||||||
|
flag.Parse()
|
||||||
|
|
||||||
|
// Check for help flag
|
||||||
|
if *help {
|
||||||
|
flag.Usage()
|
||||||
|
os.Exit(0)
|
||||||
|
}
|
||||||
|
|
||||||
|
// Detemine the input source (file or stdin)
|
||||||
|
inputSource := os.Stdin
|
||||||
|
var err error
|
||||||
|
if flag.NArg() > 0 {
|
||||||
|
inputSource, err = os.Open(flag.Args()[0])
|
||||||
|
if err != nil {
|
||||||
|
log.Fatalf("Failed to open file: %v\n", err)
|
||||||
|
}
|
||||||
|
defer inputSource.Close()
|
||||||
|
}
|
||||||
|
seenLines := make(map[uint32]int)
|
||||||
|
scanner := bufio.NewScanner(inputSource)
|
||||||
|
|
||||||
|
//Readin lines
|
||||||
|
for scanner.Scan() {
|
||||||
|
line := scanner.Text()
|
||||||
|
hash := hashLine(line)
|
||||||
|
seenLines[hash]++
|
||||||
|
|
||||||
|
if *reverse {
|
||||||
|
// Print only lines that appear more than once
|
||||||
|
if seenLines[hash] > 1 {
|
||||||
|
fmt.Println(line)
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
// Normal mode, print only unique lines
|
||||||
|
if seenLines[hash] == 1 {
|
||||||
|
fmt.Println(line)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
//Check for errors during scanning
|
||||||
|
if err := scanner.Err(); err != nil {
|
||||||
|
log.Fatalf("Failed to read input: %v\n", err)
|
||||||
|
}
|
||||||
|
}
|
||||||
BIN
tools/uniqrs
BIN
tools/uniqrs
Binary file not shown.
Reference in New Issue
Block a user