1
0
Fork 0

Add directory filtering with --include-dir and --exclude-dir flags

Implement comprehensive directory filtering functionality that allows users
to control which directories are traversed during file collection.

Features:
- --include-dir: Only traverse directories matching specified patterns
- --exclude-dir: Skip directories matching specified patterns
- Support for glob patterns (e.g., 'temp-*', 'project-*')
- Multiple filters with OR logic (like existing --name/--match flags)
- Include filters take precedence over exclude filters when both specified
- Seamless integration with existing file matching functionality

Implementation:
- Add DirectoryFilter interface with Include/Exclude/Composite implementations
- Update Collector to accept optional DirectoryFilter and use filepath.SkipDir
- Add CLI flags and argument parsing for new directory filtering options
- Comprehensive test suite with 7 new test cases covering all scenarios
This commit is contained in:
Andrew Tomaka 2025-06-12 22:12:45 -04:00
parent bde7aeed90
commit e63426f7c7
4 changed files with 259 additions and 5 deletions

View file

@ -14,15 +14,25 @@ type FileEntry struct {
FullPath string // Absolute path for reading
}
// Collector handles file collection based on a matcher
// Collector handles file collection based on a matcher and optional directory filter
type Collector struct {
matcher Matcher
matcher Matcher
dirFilter DirectoryFilter
}
// New creates a new collector with the specified matcher
func New(matcher Matcher) *Collector {
return &Collector{
matcher: matcher,
matcher: matcher,
dirFilter: nil,
}
}
// NewWithDirectoryFilter creates a new collector with the specified matcher and directory filter
func NewWithDirectoryFilter(matcher Matcher, dirFilter DirectoryFilter) *Collector {
return &Collector{
matcher: matcher,
dirFilter: dirFilter,
}
}
@ -70,6 +80,13 @@ func (c *Collector) Collect(sourceDir string) ([]FileEntry, error) {
return nil
}
// Check directory filter for directories
if info.IsDir() && c.dirFilter != nil {
if !c.dirFilter.ShouldTraverse(path, absSourceDir) {
return filepath.SkipDir
}
}
// Check if this file should be included
if c.matcher.ShouldInclude(path, info) {
// Calculate relative path from source directory

View file

@ -137,3 +137,164 @@ func (m *CompositeMatcher) ShouldInclude(path string, info os.FileInfo) bool {
}
return false
}
// DirectoryFilter determines if a directory should be traversed
type DirectoryFilter interface {
ShouldTraverse(path string, sourceDir string) bool
}
// IncludeDirectoryFilter only traverses directories matching the patterns
type IncludeDirectoryFilter struct {
patterns []string
}
// NewIncludeDirectoryFilter creates a filter that only includes matching directories
func NewIncludeDirectoryFilter(patterns []string) *IncludeDirectoryFilter {
return &IncludeDirectoryFilter{patterns: patterns}
}
// ShouldTraverse returns true if the directory path matches any include pattern
func (f *IncludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
// Always traverse the source directory itself
if path == sourceDir {
return true
}
// Get relative path from source directory
relPath, err := filepath.Rel(sourceDir, path)
if err != nil {
return false
}
relPath = filepath.ToSlash(relPath)
// Check if this directory or any parent directory matches any pattern
for _, pattern := range f.patterns {
if f.pathMatchesPattern(relPath, pattern) {
return true
}
}
return false
}
// pathMatchesPattern checks if a path matches a glob pattern
func (f *IncludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool {
// Direct match
if matched, _ := filepath.Match(pattern, path); matched {
return true
}
// Check if any parent directory matches the pattern
pathSegments := strings.Split(path, "/")
for i := 0; i < len(pathSegments); i++ {
segment := pathSegments[i]
if matched, _ := filepath.Match(pattern, segment); matched {
return true
}
}
// Check if we're a subdirectory of a matching directory
dir := filepath.Dir(path)
for dir != "." && dir != "/" {
if matched, _ := filepath.Match(pattern, filepath.Base(dir)); matched {
return true
}
dir = filepath.Dir(dir)
}
return false
}
// ExcludeDirectoryFilter skips directories matching the patterns
type ExcludeDirectoryFilter struct {
patterns []string
}
// NewExcludeDirectoryFilter creates a filter that excludes matching directories
func NewExcludeDirectoryFilter(patterns []string) *ExcludeDirectoryFilter {
return &ExcludeDirectoryFilter{patterns: patterns}
}
// ShouldTraverse returns false if the directory path matches any exclude pattern
func (f *ExcludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
// Always traverse the source directory itself
if path == sourceDir {
return true
}
// Get relative path from source directory
relPath, err := filepath.Rel(sourceDir, path)
if err != nil {
return true
}
relPath = filepath.ToSlash(relPath)
// Check if this directory or any parent directory matches any exclude pattern
for _, pattern := range f.patterns {
if f.pathMatchesPattern(relPath, pattern) {
return false
}
}
return true
}
// pathMatchesPattern checks if a path matches a glob pattern
func (f *ExcludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool {
// Direct match
if matched, _ := filepath.Match(pattern, path); matched {
return true
}
// Check if any parent directory matches the pattern
pathSegments := strings.Split(path, "/")
for i := 0; i < len(pathSegments); i++ {
segment := pathSegments[i]
if matched, _ := filepath.Match(pattern, segment); matched {
return true
}
}
return false
}
// CompositeDirectoryFilter combines multiple directory filters
type CompositeDirectoryFilter struct {
includeFilters []DirectoryFilter
excludeFilters []DirectoryFilter
}
// NewCompositeDirectoryFilter creates a filter combining include and exclude filters
func NewCompositeDirectoryFilter(includeFilters, excludeFilters []DirectoryFilter) *CompositeDirectoryFilter {
return &CompositeDirectoryFilter{
includeFilters: includeFilters,
excludeFilters: excludeFilters,
}
}
// ShouldTraverse applies include filters first, then exclude filters
// Include filters take precedence when both are present
func (f *CompositeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
// If we have include filters, the directory must match at least one
if len(f.includeFilters) > 0 {
shouldInclude := false
for _, filter := range f.includeFilters {
if filter.ShouldTraverse(path, sourceDir) {
shouldInclude = true
break
}
}
if !shouldInclude {
return false
}
}
// Apply exclude filters
for _, filter := range f.excludeFilters {
if !filter.ShouldTraverse(path, sourceDir) {
return false
}
}
return true
}

33
main.go
View file

@ -34,13 +34,17 @@ func main() {
// Define flags using custom type for multiple values
var nameFlags stringSlice
var matchFlags stringSlice
var includeDirFlags stringSlice
var excludeDirFlags stringSlice
flag.Var(&nameFlags, "name", "Match exact filename (can be specified multiple times)")
flag.Var(&matchFlags, "match", "Match directory pattern (can be specified multiple times)")
flag.Var(&includeDirFlags, "include-dir", "Only traverse directories matching pattern (can be specified multiple times)")
flag.Var(&excludeDirFlags, "exclude-dir", "Skip directories matching pattern (can be specified multiple times)")
// Custom usage message
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s [--name <filename>]... [--match <pattern>]... <source-dir> <output-archive>\n\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Usage: %s [--name <filename>]... [--match <pattern>]... [--include-dir <pattern>]... [--exclude-dir <pattern>]... <source-dir> <output-archive>\n\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Collects files recursively matching specific criteria and archives them.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
flag.PrintDefaults()
@ -48,6 +52,7 @@ func main() {
fmt.Fprintf(os.Stderr, " %s --name .mise.toml ./ backup.tgz\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s --match 'aet-*/' ./ backup.zip\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s --name .mise.toml --name README.md --match 'test-*' ./ backup.tgz\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s --include-dir src --exclude-dir 'temp-*' --name '*.go' ./ backup.tgz\n", os.Args[0])
}
flag.Parse()
@ -98,8 +103,32 @@ func main() {
matcher = collector.NewCompositeMatcher(matchers)
}
// Create directory filters
var dirFilter collector.DirectoryFilter
if len(includeDirFlags) > 0 || len(excludeDirFlags) > 0 {
var includeFilters []collector.DirectoryFilter
var excludeFilters []collector.DirectoryFilter
// Create include filters
if len(includeDirFlags) > 0 {
includeFilters = append(includeFilters, collector.NewIncludeDirectoryFilter(includeDirFlags))
}
// Create exclude filters
if len(excludeDirFlags) > 0 {
excludeFilters = append(excludeFilters, collector.NewExcludeDirectoryFilter(excludeDirFlags))
}
dirFilter = collector.NewCompositeDirectoryFilter(includeFilters, excludeFilters)
}
// Create collector and collect files
c := collector.New(matcher)
var c *collector.Collector
if dirFilter != nil {
c = collector.NewWithDirectoryFilter(matcher, dirFilter)
} else {
c = collector.New(matcher)
}
files, err := c.Collect(sourceDir)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)

47
test.sh
View file

@ -166,6 +166,53 @@ run_test "combined name and match flags" \
"./collect --name .mise.toml --match 'aet-*' --name README.md test/ test-combined.tgz"
verify_archive_contents "test-combined.tgz" ".mise.toml README.md deep/nested/aet-tools/deep.sh subdir/.mise.toml subdir/aet-bin/.mise.toml subdir/aet-bin/tool subdir/aet-config/settings.conf"
# Test 15: Include directory filter - simple path
mkdir -p test/src test/docs test/examples
echo "source file" > test/src/main.go
echo "doc file" > test/docs/README.md
echo "example file" > test/examples/demo.txt
run_test "include directory - simple path" \
"./collect --include-dir src --name 'main.go' test/ test-include-simple.tgz"
verify_archive_contents "test-include-simple.tgz" "src/main.go"
# Test 16: Include directory filter - glob pattern
mkdir -p test/project-one/src test/project-two/src test/other/src
echo "p1 source" > test/project-one/src/app.go
echo "p2 source" > test/project-two/src/lib.go
echo "other source" > test/other/src/util.go
run_test "include directory - glob pattern" \
"./collect --include-dir 'project-*' --name 'app.go' --name 'lib.go' test/ test-include-glob.tgz"
verify_archive_contents "test-include-glob.tgz" "project-one/src/app.go project-two/src/lib.go"
# Test 17: Exclude directory filter - simple path
run_test "exclude directory - simple path" \
"./collect --exclude-dir docs --name 'README.md' test/ test-exclude-simple.tgz"
verify_archive_contents "test-exclude-simple.tgz" "README.md"
# Test 18: Exclude directory filter - glob pattern
mkdir -p test/temp-cache test/temp-logs test/important
echo "cache file" > test/temp-cache/data.txt
echo "log file" > test/temp-logs/app.log
echo "important file" > test/important/config.txt
run_test "exclude directory - glob pattern" \
"./collect --exclude-dir 'temp-*' --name 'data.txt' --name 'config.txt' --name 'demo.txt' test/ test-exclude-glob.tgz"
verify_archive_contents "test-exclude-glob.tgz" "examples/demo.txt important/config.txt prefix-one/data.txt"
# Test 19: Multiple include directories
run_test "multiple include directories" \
"./collect --include-dir src --include-dir docs --name 'main.go' --name 'README.md' test/ test-multi-include.tgz"
verify_archive_contents "test-multi-include.tgz" "docs/README.md src/main.go"
# Test 20: Multiple exclude directories
run_test "multiple exclude directories" \
"./collect --exclude-dir 'temp-*' --exclude-dir examples --name 'data.txt' --name 'config.txt' test/ test-multi-exclude.tgz"
verify_archive_contents "test-multi-exclude.tgz" "important/config.txt prefix-one/data.txt"
# Test 21: Combined include and exclude (include takes precedence in overlaps)
run_test "combined include and exclude filters" \
"./collect --include-dir 'project-*' --exclude-dir 'project-two' --name 'app.go' --name 'lib.go' test/ test-include-exclude.tgz"
verify_archive_contents "test-include-exclude.tgz" "project-one/src/app.go"
# Clean up
echo -e "\nCleaning up..."
rm -rf test test-*.tgz test-*.zip collect /tmp/test_output.txt