diff --git a/collector/collector.go b/collector/collector.go index cb93fb7..59f7fbd 100644 --- a/collector/collector.go +++ b/collector/collector.go @@ -14,15 +14,25 @@ type FileEntry struct { FullPath string // Absolute path for reading } -// Collector handles file collection based on a matcher +// Collector handles file collection based on a matcher and optional directory filter type Collector struct { - matcher Matcher + matcher Matcher + dirFilter DirectoryFilter } // New creates a new collector with the specified matcher func New(matcher Matcher) *Collector { return &Collector{ - matcher: matcher, + matcher: matcher, + dirFilter: nil, + } +} + +// NewWithDirectoryFilter creates a new collector with the specified matcher and directory filter +func NewWithDirectoryFilter(matcher Matcher, dirFilter DirectoryFilter) *Collector { + return &Collector{ + matcher: matcher, + dirFilter: dirFilter, } } @@ -70,6 +80,13 @@ func (c *Collector) Collect(sourceDir string) ([]FileEntry, error) { return nil } + // Check directory filter for directories + if info.IsDir() && c.dirFilter != nil { + if !c.dirFilter.ShouldTraverse(path, absSourceDir) { + return filepath.SkipDir + } + } + // Check if this file should be included if c.matcher.ShouldInclude(path, info) { // Calculate relative path from source directory diff --git a/collector/matcher.go b/collector/matcher.go index 0fd0210..7774acd 100644 --- a/collector/matcher.go +++ b/collector/matcher.go @@ -137,3 +137,164 @@ func (m *CompositeMatcher) ShouldInclude(path string, info os.FileInfo) bool { } return false } + +// DirectoryFilter determines if a directory should be traversed +type DirectoryFilter interface { + ShouldTraverse(path string, sourceDir string) bool +} + +// IncludeDirectoryFilter only traverses directories matching the patterns +type IncludeDirectoryFilter struct { + patterns []string +} + +// NewIncludeDirectoryFilter creates a filter that only includes matching directories +func NewIncludeDirectoryFilter(patterns []string) *IncludeDirectoryFilter { + return &IncludeDirectoryFilter{patterns: patterns} +} + +// ShouldTraverse returns true if the directory path matches any include pattern +func (f *IncludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool { + // Always traverse the source directory itself + if path == sourceDir { + return true + } + + // Get relative path from source directory + relPath, err := filepath.Rel(sourceDir, path) + if err != nil { + return false + } + relPath = filepath.ToSlash(relPath) + + // Check if this directory or any parent directory matches any pattern + for _, pattern := range f.patterns { + if f.pathMatchesPattern(relPath, pattern) { + return true + } + } + + return false +} + +// pathMatchesPattern checks if a path matches a glob pattern +func (f *IncludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool { + // Direct match + if matched, _ := filepath.Match(pattern, path); matched { + return true + } + + // Check if any parent directory matches the pattern + pathSegments := strings.Split(path, "/") + for i := 0; i < len(pathSegments); i++ { + segment := pathSegments[i] + if matched, _ := filepath.Match(pattern, segment); matched { + return true + } + } + + // Check if we're a subdirectory of a matching directory + dir := filepath.Dir(path) + for dir != "." && dir != "/" { + if matched, _ := filepath.Match(pattern, filepath.Base(dir)); matched { + return true + } + dir = filepath.Dir(dir) + } + + return false +} + +// ExcludeDirectoryFilter skips directories matching the patterns +type ExcludeDirectoryFilter struct { + patterns []string +} + +// NewExcludeDirectoryFilter creates a filter that excludes matching directories +func NewExcludeDirectoryFilter(patterns []string) *ExcludeDirectoryFilter { + return &ExcludeDirectoryFilter{patterns: patterns} +} + +// ShouldTraverse returns false if the directory path matches any exclude pattern +func (f *ExcludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool { + // Always traverse the source directory itself + if path == sourceDir { + return true + } + + // Get relative path from source directory + relPath, err := filepath.Rel(sourceDir, path) + if err != nil { + return true + } + relPath = filepath.ToSlash(relPath) + + // Check if this directory or any parent directory matches any exclude pattern + for _, pattern := range f.patterns { + if f.pathMatchesPattern(relPath, pattern) { + return false + } + } + + return true +} + +// pathMatchesPattern checks if a path matches a glob pattern +func (f *ExcludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool { + // Direct match + if matched, _ := filepath.Match(pattern, path); matched { + return true + } + + // Check if any parent directory matches the pattern + pathSegments := strings.Split(path, "/") + for i := 0; i < len(pathSegments); i++ { + segment := pathSegments[i] + if matched, _ := filepath.Match(pattern, segment); matched { + return true + } + } + + return false +} + +// CompositeDirectoryFilter combines multiple directory filters +type CompositeDirectoryFilter struct { + includeFilters []DirectoryFilter + excludeFilters []DirectoryFilter +} + +// NewCompositeDirectoryFilter creates a filter combining include and exclude filters +func NewCompositeDirectoryFilter(includeFilters, excludeFilters []DirectoryFilter) *CompositeDirectoryFilter { + return &CompositeDirectoryFilter{ + includeFilters: includeFilters, + excludeFilters: excludeFilters, + } +} + +// ShouldTraverse applies include filters first, then exclude filters +// Include filters take precedence when both are present +func (f *CompositeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool { + // If we have include filters, the directory must match at least one + if len(f.includeFilters) > 0 { + shouldInclude := false + for _, filter := range f.includeFilters { + if filter.ShouldTraverse(path, sourceDir) { + shouldInclude = true + break + } + } + if !shouldInclude { + return false + } + } + + // Apply exclude filters + for _, filter := range f.excludeFilters { + if !filter.ShouldTraverse(path, sourceDir) { + return false + } + } + + return true +} diff --git a/main.go b/main.go index 3fb7a08..60a565e 100644 --- a/main.go +++ b/main.go @@ -34,13 +34,17 @@ func main() { // Define flags using custom type for multiple values var nameFlags stringSlice var matchFlags stringSlice + var includeDirFlags stringSlice + var excludeDirFlags stringSlice flag.Var(&nameFlags, "name", "Match exact filename (can be specified multiple times)") flag.Var(&matchFlags, "match", "Match directory pattern (can be specified multiple times)") + flag.Var(&includeDirFlags, "include-dir", "Only traverse directories matching pattern (can be specified multiple times)") + flag.Var(&excludeDirFlags, "exclude-dir", "Skip directories matching pattern (can be specified multiple times)") // Custom usage message flag.Usage = func() { - fmt.Fprintf(os.Stderr, "Usage: %s [--name ]... [--match ]... \n\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Usage: %s [--name ]... [--match ]... [--include-dir ]... [--exclude-dir ]... \n\n", os.Args[0]) fmt.Fprintf(os.Stderr, "Collects files recursively matching specific criteria and archives them.\n\n") fmt.Fprintf(os.Stderr, "Options:\n") flag.PrintDefaults() @@ -48,6 +52,7 @@ func main() { fmt.Fprintf(os.Stderr, " %s --name .mise.toml ./ backup.tgz\n", os.Args[0]) fmt.Fprintf(os.Stderr, " %s --match 'aet-*/' ./ backup.zip\n", os.Args[0]) fmt.Fprintf(os.Stderr, " %s --name .mise.toml --name README.md --match 'test-*' ./ backup.tgz\n", os.Args[0]) + fmt.Fprintf(os.Stderr, " %s --include-dir src --exclude-dir 'temp-*' --name '*.go' ./ backup.tgz\n", os.Args[0]) } flag.Parse() @@ -98,8 +103,32 @@ func main() { matcher = collector.NewCompositeMatcher(matchers) } + // Create directory filters + var dirFilter collector.DirectoryFilter + if len(includeDirFlags) > 0 || len(excludeDirFlags) > 0 { + var includeFilters []collector.DirectoryFilter + var excludeFilters []collector.DirectoryFilter + + // Create include filters + if len(includeDirFlags) > 0 { + includeFilters = append(includeFilters, collector.NewIncludeDirectoryFilter(includeDirFlags)) + } + + // Create exclude filters + if len(excludeDirFlags) > 0 { + excludeFilters = append(excludeFilters, collector.NewExcludeDirectoryFilter(excludeDirFlags)) + } + + dirFilter = collector.NewCompositeDirectoryFilter(includeFilters, excludeFilters) + } + // Create collector and collect files - c := collector.New(matcher) + var c *collector.Collector + if dirFilter != nil { + c = collector.NewWithDirectoryFilter(matcher, dirFilter) + } else { + c = collector.New(matcher) + } files, err := c.Collect(sourceDir) if err != nil { fmt.Fprintf(os.Stderr, "Error: %v\n", err) diff --git a/test.sh b/test.sh index 2e91b1a..c326286 100755 --- a/test.sh +++ b/test.sh @@ -166,6 +166,53 @@ run_test "combined name and match flags" \ "./collect --name .mise.toml --match 'aet-*' --name README.md test/ test-combined.tgz" verify_archive_contents "test-combined.tgz" ".mise.toml README.md deep/nested/aet-tools/deep.sh subdir/.mise.toml subdir/aet-bin/.mise.toml subdir/aet-bin/tool subdir/aet-config/settings.conf" +# Test 15: Include directory filter - simple path +mkdir -p test/src test/docs test/examples +echo "source file" > test/src/main.go +echo "doc file" > test/docs/README.md +echo "example file" > test/examples/demo.txt +run_test "include directory - simple path" \ + "./collect --include-dir src --name 'main.go' test/ test-include-simple.tgz" +verify_archive_contents "test-include-simple.tgz" "src/main.go" + +# Test 16: Include directory filter - glob pattern +mkdir -p test/project-one/src test/project-two/src test/other/src +echo "p1 source" > test/project-one/src/app.go +echo "p2 source" > test/project-two/src/lib.go +echo "other source" > test/other/src/util.go +run_test "include directory - glob pattern" \ + "./collect --include-dir 'project-*' --name 'app.go' --name 'lib.go' test/ test-include-glob.tgz" +verify_archive_contents "test-include-glob.tgz" "project-one/src/app.go project-two/src/lib.go" + +# Test 17: Exclude directory filter - simple path +run_test "exclude directory - simple path" \ + "./collect --exclude-dir docs --name 'README.md' test/ test-exclude-simple.tgz" +verify_archive_contents "test-exclude-simple.tgz" "README.md" + +# Test 18: Exclude directory filter - glob pattern +mkdir -p test/temp-cache test/temp-logs test/important +echo "cache file" > test/temp-cache/data.txt +echo "log file" > test/temp-logs/app.log +echo "important file" > test/important/config.txt +run_test "exclude directory - glob pattern" \ + "./collect --exclude-dir 'temp-*' --name 'data.txt' --name 'config.txt' --name 'demo.txt' test/ test-exclude-glob.tgz" +verify_archive_contents "test-exclude-glob.tgz" "examples/demo.txt important/config.txt prefix-one/data.txt" + +# Test 19: Multiple include directories +run_test "multiple include directories" \ + "./collect --include-dir src --include-dir docs --name 'main.go' --name 'README.md' test/ test-multi-include.tgz" +verify_archive_contents "test-multi-include.tgz" "docs/README.md src/main.go" + +# Test 20: Multiple exclude directories +run_test "multiple exclude directories" \ + "./collect --exclude-dir 'temp-*' --exclude-dir examples --name 'data.txt' --name 'config.txt' test/ test-multi-exclude.tgz" +verify_archive_contents "test-multi-exclude.tgz" "important/config.txt prefix-one/data.txt" + +# Test 21: Combined include and exclude (include takes precedence in overlaps) +run_test "combined include and exclude filters" \ + "./collect --include-dir 'project-*' --exclude-dir 'project-two' --name 'app.go' --name 'lib.go' test/ test-include-exclude.tgz" +verify_archive_contents "test-include-exclude.tgz" "project-one/src/app.go" + # Clean up echo -e "\nCleaning up..." rm -rf test test-*.tgz test-*.zip collect /tmp/test_output.txt