80 lines
1.6 KiB
Go
80 lines
1.6 KiB
Go
package extract
|
|
|
|
import (
|
|
"bufio"
|
|
"context"
|
|
"errors"
|
|
"io"
|
|
"regexp"
|
|
)
|
|
|
|
type Mode int
|
|
|
|
const (
|
|
ModeIP Mode = iota
|
|
ModeMAC
|
|
)
|
|
|
|
type Options struct {
|
|
Mode Mode
|
|
IPv6 bool
|
|
}
|
|
|
|
// Grep scans r line-by-line and emits raw matches via emit.
|
|
// It avoids reading the full input into memory.
|
|
func Grep(ctx context.Context, r io.Reader, opts Options, emit func(string)) error {
|
|
if emit == nil {
|
|
return errors.New("emit func is nil")
|
|
}
|
|
|
|
var re *regexp.Regexp
|
|
switch opts.Mode {
|
|
case ModeMAC:
|
|
re = macRE
|
|
case ModeIP:
|
|
if opts.IPv6 {
|
|
re = ipAnyRE
|
|
} else {
|
|
re = ipv4RE
|
|
}
|
|
default:
|
|
return errors.New("unknown mode")
|
|
}
|
|
|
|
sc := bufio.NewScanner(r)
|
|
// Large-ish buffer for long lines (logs, JSON blobs, etc.).
|
|
buf := make([]byte, 64*1024)
|
|
sc.Buffer(buf, 1024*1024)
|
|
|
|
for sc.Scan() {
|
|
select {
|
|
case <-ctx.Done():
|
|
return ctx.Err()
|
|
default:
|
|
}
|
|
line := sc.Bytes()
|
|
matches := re.FindAll(line, -1)
|
|
for _, m := range matches {
|
|
emit(string(m))
|
|
}
|
|
}
|
|
if err := sc.Err(); err != nil {
|
|
return err
|
|
}
|
|
return nil
|
|
}
|
|
|
|
// Note: These regexes intentionally find candidates only; callers should
|
|
// validate/normalize as needed.
|
|
var (
|
|
// \b based candidate matcher, validated later.
|
|
ipv4RE = regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`)
|
|
|
|
// Rough IPv6 candidate matcher (validation happens later if caller chooses to).
|
|
ipv6RE = regexp.MustCompile(`(?i)\b(?:[0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}\b`)
|
|
|
|
ipAnyRE = regexp.MustCompile(`(?i)(?:` + `\b(?:\d{1,3}\.){3}\d{1,3}\b` + `|` + `\b(?:[0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}\b` + `)`)
|
|
|
|
macRE = regexp.MustCompile(`(?i)\b(?:[0-9a-f]{2}[:-]){5}[0-9a-f]{2}\b|\b(?:[0-9a-f]{4}\.){2}[0-9a-f]{4}\b`)
|
|
)
|