package extract import ( "bufio" "context" "errors" "io" "regexp" ) type Mode int const ( ModeIP Mode = iota ModeMAC ) type Options struct { Mode Mode IPv6 bool } // Grep scans r line-by-line and emits raw matches via emit. // It avoids reading the full input into memory. func Grep(ctx context.Context, r io.Reader, opts Options, emit func(string)) error { if emit == nil { return errors.New("emit func is nil") } var re *regexp.Regexp switch opts.Mode { case ModeMAC: re = macRE case ModeIP: if opts.IPv6 { re = ipAnyRE } else { re = ipv4RE } default: return errors.New("unknown mode") } sc := bufio.NewScanner(r) // Large-ish buffer for long lines (logs, JSON blobs, etc.). buf := make([]byte, 64*1024) sc.Buffer(buf, 1024*1024) for sc.Scan() { select { case <-ctx.Done(): return ctx.Err() default: } line := sc.Bytes() matches := re.FindAll(line, -1) for _, m := range matches { emit(string(m)) } } if err := sc.Err(); err != nil { return err } return nil } // Note: These regexes intentionally find candidates only; callers should // validate/normalize as needed. var ( // \b based candidate matcher, validated later. ipv4RE = regexp.MustCompile(`\b(?:\d{1,3}\.){3}\d{1,3}\b`) // Rough IPv6 candidate matcher (validation happens later if caller chooses to). ipv6RE = regexp.MustCompile(`(?i)\b(?:[0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}\b`) ipAnyRE = regexp.MustCompile(`(?i)(?:` + `\b(?:\d{1,3}\.){3}\d{1,3}\b` + `|` + `\b(?:[0-9a-f]{0,4}:){2,7}[0-9a-f]{0,4}\b` + `)`) macRE = regexp.MustCompile(`(?i)\b(?:[0-9a-f]{2}[:-]){5}[0-9a-f]{2}\b|\b(?:[0-9a-f]{4}\.){2}[0-9a-f]{4}\b`) )