1
0
Fork 0
collect/collector/matcher.go
Andrew Tomaka e63426f7c7 Add directory filtering with --include-dir and --exclude-dir flags
Implement comprehensive directory filtering functionality that allows users
to control which directories are traversed during file collection.

Features:
- --include-dir: Only traverse directories matching specified patterns
- --exclude-dir: Skip directories matching specified patterns
- Support for glob patterns (e.g., 'temp-*', 'project-*')
- Multiple filters with OR logic (like existing --name/--match flags)
- Include filters take precedence over exclude filters when both specified
- Seamless integration with existing file matching functionality

Implementation:
- Add DirectoryFilter interface with Include/Exclude/Composite implementations
- Update Collector to accept optional DirectoryFilter and use filepath.SkipDir
- Add CLI flags and argument parsing for new directory filtering options
- Comprehensive test suite with 7 new test cases covering all scenarios
2025-06-12 22:12:45 -04:00

300 lines
8.1 KiB
Go

package collector
import (
"os"
"path/filepath"
"strings"
)
// Matcher determines if a file should be included in the collection
type Matcher interface {
ShouldInclude(path string, info os.FileInfo) bool
}
// NameMatcher matches files by exact name
type NameMatcher struct {
name string
}
// NewNameMatcher creates a matcher for exact filename matching
func NewNameMatcher(name string) *NameMatcher {
return &NameMatcher{name: name}
}
// ShouldInclude returns true if the file matches the exact name
func (m *NameMatcher) ShouldInclude(path string, info os.FileInfo) bool {
if info.IsDir() {
return false
}
return info.Name() == m.name
}
// PatternMatcher matches files within directories matching a glob pattern
type PatternMatcher struct {
pattern string
matchedDirs map[string]bool
patternSegments []string
}
// NewPatternMatcher creates a matcher for directory pattern matching
func NewPatternMatcher(pattern string) *PatternMatcher {
// Remove trailing slash if present
pattern = strings.TrimSuffix(pattern, "/")
return &PatternMatcher{
pattern: pattern,
matchedDirs: make(map[string]bool),
patternSegments: strings.Split(pattern, string(os.PathSeparator)),
}
}
// ShouldInclude returns true if the file is within a directory matching the pattern
func (m *PatternMatcher) ShouldInclude(path string, info os.FileInfo) bool {
// For directories, check if they match the pattern and cache the result
if info.IsDir() {
matched, err := m.dirMatchesPattern(path)
if err == nil && matched {
m.matchedDirs[path] = true
}
return false // Don't include the directory itself, only files within
}
// For files, check if any parent directory is in the matched set
dir := filepath.Dir(path)
for {
if m.matchedDirs[dir] {
return true
}
// Also check if this directory matches the pattern (in case we haven't seen it yet)
if matched, err := m.dirMatchesPattern(dir); err == nil && matched {
m.matchedDirs[dir] = true
return true
}
parent := filepath.Dir(dir)
if parent == dir || parent == "." {
break
}
dir = parent
}
return false
}
// dirMatchesPattern checks if a directory path matches the glob pattern
func (m *PatternMatcher) dirMatchesPattern(dirPath string) (bool, error) {
// Get the directory name
dirName := filepath.Base(dirPath)
// For simple patterns (no path separators), just match the directory name
if len(m.patternSegments) == 1 {
return filepath.Match(m.pattern, dirName)
}
// For complex patterns, we need to match the full path segments
pathSegments := strings.Split(dirPath, string(os.PathSeparator))
// Try to match the pattern segments against the path segments
if len(pathSegments) < len(m.patternSegments) {
return false, nil
}
// Check each pattern segment against the corresponding path segment
for i := 0; i < len(m.patternSegments); i++ {
// Start from the end of both slices
patternIdx := len(m.patternSegments) - 1 - i
pathIdx := len(pathSegments) - 1 - i
matched, err := filepath.Match(m.patternSegments[patternIdx], pathSegments[pathIdx])
if err != nil {
return false, err
}
if !matched {
return false, nil
}
}
return true, nil
}
// CompositeMatcher combines multiple matchers with OR logic
type CompositeMatcher struct {
matchers []Matcher
}
// NewCompositeMatcher creates a matcher that combines multiple matchers
func NewCompositeMatcher(matchers []Matcher) *CompositeMatcher {
return &CompositeMatcher{matchers: matchers}
}
// ShouldInclude returns true if ANY of the matchers match the file
func (m *CompositeMatcher) ShouldInclude(path string, info os.FileInfo) bool {
for _, matcher := range m.matchers {
if matcher.ShouldInclude(path, info) {
return true
}
}
return false
}
// DirectoryFilter determines if a directory should be traversed
type DirectoryFilter interface {
ShouldTraverse(path string, sourceDir string) bool
}
// IncludeDirectoryFilter only traverses directories matching the patterns
type IncludeDirectoryFilter struct {
patterns []string
}
// NewIncludeDirectoryFilter creates a filter that only includes matching directories
func NewIncludeDirectoryFilter(patterns []string) *IncludeDirectoryFilter {
return &IncludeDirectoryFilter{patterns: patterns}
}
// ShouldTraverse returns true if the directory path matches any include pattern
func (f *IncludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
// Always traverse the source directory itself
if path == sourceDir {
return true
}
// Get relative path from source directory
relPath, err := filepath.Rel(sourceDir, path)
if err != nil {
return false
}
relPath = filepath.ToSlash(relPath)
// Check if this directory or any parent directory matches any pattern
for _, pattern := range f.patterns {
if f.pathMatchesPattern(relPath, pattern) {
return true
}
}
return false
}
// pathMatchesPattern checks if a path matches a glob pattern
func (f *IncludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool {
// Direct match
if matched, _ := filepath.Match(pattern, path); matched {
return true
}
// Check if any parent directory matches the pattern
pathSegments := strings.Split(path, "/")
for i := 0; i < len(pathSegments); i++ {
segment := pathSegments[i]
if matched, _ := filepath.Match(pattern, segment); matched {
return true
}
}
// Check if we're a subdirectory of a matching directory
dir := filepath.Dir(path)
for dir != "." && dir != "/" {
if matched, _ := filepath.Match(pattern, filepath.Base(dir)); matched {
return true
}
dir = filepath.Dir(dir)
}
return false
}
// ExcludeDirectoryFilter skips directories matching the patterns
type ExcludeDirectoryFilter struct {
patterns []string
}
// NewExcludeDirectoryFilter creates a filter that excludes matching directories
func NewExcludeDirectoryFilter(patterns []string) *ExcludeDirectoryFilter {
return &ExcludeDirectoryFilter{patterns: patterns}
}
// ShouldTraverse returns false if the directory path matches any exclude pattern
func (f *ExcludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
// Always traverse the source directory itself
if path == sourceDir {
return true
}
// Get relative path from source directory
relPath, err := filepath.Rel(sourceDir, path)
if err != nil {
return true
}
relPath = filepath.ToSlash(relPath)
// Check if this directory or any parent directory matches any exclude pattern
for _, pattern := range f.patterns {
if f.pathMatchesPattern(relPath, pattern) {
return false
}
}
return true
}
// pathMatchesPattern checks if a path matches a glob pattern
func (f *ExcludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool {
// Direct match
if matched, _ := filepath.Match(pattern, path); matched {
return true
}
// Check if any parent directory matches the pattern
pathSegments := strings.Split(path, "/")
for i := 0; i < len(pathSegments); i++ {
segment := pathSegments[i]
if matched, _ := filepath.Match(pattern, segment); matched {
return true
}
}
return false
}
// CompositeDirectoryFilter combines multiple directory filters
type CompositeDirectoryFilter struct {
includeFilters []DirectoryFilter
excludeFilters []DirectoryFilter
}
// NewCompositeDirectoryFilter creates a filter combining include and exclude filters
func NewCompositeDirectoryFilter(includeFilters, excludeFilters []DirectoryFilter) *CompositeDirectoryFilter {
return &CompositeDirectoryFilter{
includeFilters: includeFilters,
excludeFilters: excludeFilters,
}
}
// ShouldTraverse applies include filters first, then exclude filters
// Include filters take precedence when both are present
func (f *CompositeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
// If we have include filters, the directory must match at least one
if len(f.includeFilters) > 0 {
shouldInclude := false
for _, filter := range f.includeFilters {
if filter.ShouldTraverse(path, sourceDir) {
shouldInclude = true
break
}
}
if !shouldInclude {
return false
}
}
// Apply exclude filters
for _, filter := range f.excludeFilters {
if !filter.ShouldTraverse(path, sourceDir) {
return false
}
}
return true
}