Implement comprehensive directory filtering functionality that allows users to control which directories are traversed during file collection. Features: - --include-dir: Only traverse directories matching specified patterns - --exclude-dir: Skip directories matching specified patterns - Support for glob patterns (e.g., 'temp-*', 'project-*') - Multiple filters with OR logic (like existing --name/--match flags) - Include filters take precedence over exclude filters when both specified - Seamless integration with existing file matching functionality Implementation: - Add DirectoryFilter interface with Include/Exclude/Composite implementations - Update Collector to accept optional DirectoryFilter and use filepath.SkipDir - Add CLI flags and argument parsing for new directory filtering options - Comprehensive test suite with 7 new test cases covering all scenarios
300 lines
8.1 KiB
Go
300 lines
8.1 KiB
Go
package collector
|
|
|
|
import (
|
|
"os"
|
|
"path/filepath"
|
|
"strings"
|
|
)
|
|
|
|
// Matcher determines if a file should be included in the collection
|
|
type Matcher interface {
|
|
ShouldInclude(path string, info os.FileInfo) bool
|
|
}
|
|
|
|
// NameMatcher matches files by exact name
|
|
type NameMatcher struct {
|
|
name string
|
|
}
|
|
|
|
// NewNameMatcher creates a matcher for exact filename matching
|
|
func NewNameMatcher(name string) *NameMatcher {
|
|
return &NameMatcher{name: name}
|
|
}
|
|
|
|
// ShouldInclude returns true if the file matches the exact name
|
|
func (m *NameMatcher) ShouldInclude(path string, info os.FileInfo) bool {
|
|
if info.IsDir() {
|
|
return false
|
|
}
|
|
return info.Name() == m.name
|
|
}
|
|
|
|
// PatternMatcher matches files within directories matching a glob pattern
|
|
type PatternMatcher struct {
|
|
pattern string
|
|
matchedDirs map[string]bool
|
|
patternSegments []string
|
|
}
|
|
|
|
// NewPatternMatcher creates a matcher for directory pattern matching
|
|
func NewPatternMatcher(pattern string) *PatternMatcher {
|
|
// Remove trailing slash if present
|
|
pattern = strings.TrimSuffix(pattern, "/")
|
|
|
|
return &PatternMatcher{
|
|
pattern: pattern,
|
|
matchedDirs: make(map[string]bool),
|
|
patternSegments: strings.Split(pattern, string(os.PathSeparator)),
|
|
}
|
|
}
|
|
|
|
// ShouldInclude returns true if the file is within a directory matching the pattern
|
|
func (m *PatternMatcher) ShouldInclude(path string, info os.FileInfo) bool {
|
|
// For directories, check if they match the pattern and cache the result
|
|
if info.IsDir() {
|
|
matched, err := m.dirMatchesPattern(path)
|
|
if err == nil && matched {
|
|
m.matchedDirs[path] = true
|
|
}
|
|
return false // Don't include the directory itself, only files within
|
|
}
|
|
|
|
// For files, check if any parent directory is in the matched set
|
|
dir := filepath.Dir(path)
|
|
for {
|
|
if m.matchedDirs[dir] {
|
|
return true
|
|
}
|
|
|
|
// Also check if this directory matches the pattern (in case we haven't seen it yet)
|
|
if matched, err := m.dirMatchesPattern(dir); err == nil && matched {
|
|
m.matchedDirs[dir] = true
|
|
return true
|
|
}
|
|
|
|
parent := filepath.Dir(dir)
|
|
if parent == dir || parent == "." {
|
|
break
|
|
}
|
|
dir = parent
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// dirMatchesPattern checks if a directory path matches the glob pattern
|
|
func (m *PatternMatcher) dirMatchesPattern(dirPath string) (bool, error) {
|
|
// Get the directory name
|
|
dirName := filepath.Base(dirPath)
|
|
|
|
// For simple patterns (no path separators), just match the directory name
|
|
if len(m.patternSegments) == 1 {
|
|
return filepath.Match(m.pattern, dirName)
|
|
}
|
|
|
|
// For complex patterns, we need to match the full path segments
|
|
pathSegments := strings.Split(dirPath, string(os.PathSeparator))
|
|
|
|
// Try to match the pattern segments against the path segments
|
|
if len(pathSegments) < len(m.patternSegments) {
|
|
return false, nil
|
|
}
|
|
|
|
// Check each pattern segment against the corresponding path segment
|
|
for i := 0; i < len(m.patternSegments); i++ {
|
|
// Start from the end of both slices
|
|
patternIdx := len(m.patternSegments) - 1 - i
|
|
pathIdx := len(pathSegments) - 1 - i
|
|
|
|
matched, err := filepath.Match(m.patternSegments[patternIdx], pathSegments[pathIdx])
|
|
if err != nil {
|
|
return false, err
|
|
}
|
|
if !matched {
|
|
return false, nil
|
|
}
|
|
}
|
|
|
|
return true, nil
|
|
}
|
|
|
|
// CompositeMatcher combines multiple matchers with OR logic
|
|
type CompositeMatcher struct {
|
|
matchers []Matcher
|
|
}
|
|
|
|
// NewCompositeMatcher creates a matcher that combines multiple matchers
|
|
func NewCompositeMatcher(matchers []Matcher) *CompositeMatcher {
|
|
return &CompositeMatcher{matchers: matchers}
|
|
}
|
|
|
|
// ShouldInclude returns true if ANY of the matchers match the file
|
|
func (m *CompositeMatcher) ShouldInclude(path string, info os.FileInfo) bool {
|
|
for _, matcher := range m.matchers {
|
|
if matcher.ShouldInclude(path, info) {
|
|
return true
|
|
}
|
|
}
|
|
return false
|
|
}
|
|
|
|
// DirectoryFilter determines if a directory should be traversed
|
|
type DirectoryFilter interface {
|
|
ShouldTraverse(path string, sourceDir string) bool
|
|
}
|
|
|
|
// IncludeDirectoryFilter only traverses directories matching the patterns
|
|
type IncludeDirectoryFilter struct {
|
|
patterns []string
|
|
}
|
|
|
|
// NewIncludeDirectoryFilter creates a filter that only includes matching directories
|
|
func NewIncludeDirectoryFilter(patterns []string) *IncludeDirectoryFilter {
|
|
return &IncludeDirectoryFilter{patterns: patterns}
|
|
}
|
|
|
|
// ShouldTraverse returns true if the directory path matches any include pattern
|
|
func (f *IncludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
|
|
// Always traverse the source directory itself
|
|
if path == sourceDir {
|
|
return true
|
|
}
|
|
|
|
// Get relative path from source directory
|
|
relPath, err := filepath.Rel(sourceDir, path)
|
|
if err != nil {
|
|
return false
|
|
}
|
|
relPath = filepath.ToSlash(relPath)
|
|
|
|
// Check if this directory or any parent directory matches any pattern
|
|
for _, pattern := range f.patterns {
|
|
if f.pathMatchesPattern(relPath, pattern) {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// pathMatchesPattern checks if a path matches a glob pattern
|
|
func (f *IncludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool {
|
|
// Direct match
|
|
if matched, _ := filepath.Match(pattern, path); matched {
|
|
return true
|
|
}
|
|
|
|
// Check if any parent directory matches the pattern
|
|
pathSegments := strings.Split(path, "/")
|
|
for i := 0; i < len(pathSegments); i++ {
|
|
segment := pathSegments[i]
|
|
if matched, _ := filepath.Match(pattern, segment); matched {
|
|
return true
|
|
}
|
|
}
|
|
|
|
// Check if we're a subdirectory of a matching directory
|
|
dir := filepath.Dir(path)
|
|
for dir != "." && dir != "/" {
|
|
if matched, _ := filepath.Match(pattern, filepath.Base(dir)); matched {
|
|
return true
|
|
}
|
|
dir = filepath.Dir(dir)
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// ExcludeDirectoryFilter skips directories matching the patterns
|
|
type ExcludeDirectoryFilter struct {
|
|
patterns []string
|
|
}
|
|
|
|
// NewExcludeDirectoryFilter creates a filter that excludes matching directories
|
|
func NewExcludeDirectoryFilter(patterns []string) *ExcludeDirectoryFilter {
|
|
return &ExcludeDirectoryFilter{patterns: patterns}
|
|
}
|
|
|
|
// ShouldTraverse returns false if the directory path matches any exclude pattern
|
|
func (f *ExcludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
|
|
// Always traverse the source directory itself
|
|
if path == sourceDir {
|
|
return true
|
|
}
|
|
|
|
// Get relative path from source directory
|
|
relPath, err := filepath.Rel(sourceDir, path)
|
|
if err != nil {
|
|
return true
|
|
}
|
|
relPath = filepath.ToSlash(relPath)
|
|
|
|
// Check if this directory or any parent directory matches any exclude pattern
|
|
for _, pattern := range f.patterns {
|
|
if f.pathMatchesPattern(relPath, pattern) {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|
|
|
|
// pathMatchesPattern checks if a path matches a glob pattern
|
|
func (f *ExcludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool {
|
|
// Direct match
|
|
if matched, _ := filepath.Match(pattern, path); matched {
|
|
return true
|
|
}
|
|
|
|
// Check if any parent directory matches the pattern
|
|
pathSegments := strings.Split(path, "/")
|
|
for i := 0; i < len(pathSegments); i++ {
|
|
segment := pathSegments[i]
|
|
if matched, _ := filepath.Match(pattern, segment); matched {
|
|
return true
|
|
}
|
|
}
|
|
|
|
return false
|
|
}
|
|
|
|
// CompositeDirectoryFilter combines multiple directory filters
|
|
type CompositeDirectoryFilter struct {
|
|
includeFilters []DirectoryFilter
|
|
excludeFilters []DirectoryFilter
|
|
}
|
|
|
|
// NewCompositeDirectoryFilter creates a filter combining include and exclude filters
|
|
func NewCompositeDirectoryFilter(includeFilters, excludeFilters []DirectoryFilter) *CompositeDirectoryFilter {
|
|
return &CompositeDirectoryFilter{
|
|
includeFilters: includeFilters,
|
|
excludeFilters: excludeFilters,
|
|
}
|
|
}
|
|
|
|
// ShouldTraverse applies include filters first, then exclude filters
|
|
// Include filters take precedence when both are present
|
|
func (f *CompositeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
|
|
// If we have include filters, the directory must match at least one
|
|
if len(f.includeFilters) > 0 {
|
|
shouldInclude := false
|
|
for _, filter := range f.includeFilters {
|
|
if filter.ShouldTraverse(path, sourceDir) {
|
|
shouldInclude = true
|
|
break
|
|
}
|
|
}
|
|
if !shouldInclude {
|
|
return false
|
|
}
|
|
}
|
|
|
|
// Apply exclude filters
|
|
for _, filter := range f.excludeFilters {
|
|
if !filter.ShouldTraverse(path, sourceDir) {
|
|
return false
|
|
}
|
|
}
|
|
|
|
return true
|
|
}
|