Add directory filtering with --include-dir and --exclude-dir flags
Implement comprehensive directory filtering functionality that allows users to control which directories are traversed during file collection. Features: - --include-dir: Only traverse directories matching specified patterns - --exclude-dir: Skip directories matching specified patterns - Support for glob patterns (e.g., 'temp-*', 'project-*') - Multiple filters with OR logic (like existing --name/--match flags) - Include filters take precedence over exclude filters when both specified - Seamless integration with existing file matching functionality Implementation: - Add DirectoryFilter interface with Include/Exclude/Composite implementations - Update Collector to accept optional DirectoryFilter and use filepath.SkipDir - Add CLI flags and argument parsing for new directory filtering options - Comprehensive test suite with 7 new test cases covering all scenarios
This commit is contained in:
parent
bde7aeed90
commit
e63426f7c7
4 changed files with 259 additions and 5 deletions
|
@ -14,15 +14,25 @@ type FileEntry struct {
|
|||
FullPath string // Absolute path for reading
|
||||
}
|
||||
|
||||
// Collector handles file collection based on a matcher
|
||||
// Collector handles file collection based on a matcher and optional directory filter
|
||||
type Collector struct {
|
||||
matcher Matcher
|
||||
dirFilter DirectoryFilter
|
||||
}
|
||||
|
||||
// New creates a new collector with the specified matcher
|
||||
func New(matcher Matcher) *Collector {
|
||||
return &Collector{
|
||||
matcher: matcher,
|
||||
dirFilter: nil,
|
||||
}
|
||||
}
|
||||
|
||||
// NewWithDirectoryFilter creates a new collector with the specified matcher and directory filter
|
||||
func NewWithDirectoryFilter(matcher Matcher, dirFilter DirectoryFilter) *Collector {
|
||||
return &Collector{
|
||||
matcher: matcher,
|
||||
dirFilter: dirFilter,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -70,6 +80,13 @@ func (c *Collector) Collect(sourceDir string) ([]FileEntry, error) {
|
|||
return nil
|
||||
}
|
||||
|
||||
// Check directory filter for directories
|
||||
if info.IsDir() && c.dirFilter != nil {
|
||||
if !c.dirFilter.ShouldTraverse(path, absSourceDir) {
|
||||
return filepath.SkipDir
|
||||
}
|
||||
}
|
||||
|
||||
// Check if this file should be included
|
||||
if c.matcher.ShouldInclude(path, info) {
|
||||
// Calculate relative path from source directory
|
||||
|
|
|
@ -137,3 +137,164 @@ func (m *CompositeMatcher) ShouldInclude(path string, info os.FileInfo) bool {
|
|||
}
|
||||
return false
|
||||
}
|
||||
|
||||
// DirectoryFilter determines if a directory should be traversed
|
||||
type DirectoryFilter interface {
|
||||
ShouldTraverse(path string, sourceDir string) bool
|
||||
}
|
||||
|
||||
// IncludeDirectoryFilter only traverses directories matching the patterns
|
||||
type IncludeDirectoryFilter struct {
|
||||
patterns []string
|
||||
}
|
||||
|
||||
// NewIncludeDirectoryFilter creates a filter that only includes matching directories
|
||||
func NewIncludeDirectoryFilter(patterns []string) *IncludeDirectoryFilter {
|
||||
return &IncludeDirectoryFilter{patterns: patterns}
|
||||
}
|
||||
|
||||
// ShouldTraverse returns true if the directory path matches any include pattern
|
||||
func (f *IncludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
|
||||
// Always traverse the source directory itself
|
||||
if path == sourceDir {
|
||||
return true
|
||||
}
|
||||
|
||||
// Get relative path from source directory
|
||||
relPath, err := filepath.Rel(sourceDir, path)
|
||||
if err != nil {
|
||||
return false
|
||||
}
|
||||
relPath = filepath.ToSlash(relPath)
|
||||
|
||||
// Check if this directory or any parent directory matches any pattern
|
||||
for _, pattern := range f.patterns {
|
||||
if f.pathMatchesPattern(relPath, pattern) {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// pathMatchesPattern checks if a path matches a glob pattern
|
||||
func (f *IncludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool {
|
||||
// Direct match
|
||||
if matched, _ := filepath.Match(pattern, path); matched {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if any parent directory matches the pattern
|
||||
pathSegments := strings.Split(path, "/")
|
||||
for i := 0; i < len(pathSegments); i++ {
|
||||
segment := pathSegments[i]
|
||||
if matched, _ := filepath.Match(pattern, segment); matched {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
// Check if we're a subdirectory of a matching directory
|
||||
dir := filepath.Dir(path)
|
||||
for dir != "." && dir != "/" {
|
||||
if matched, _ := filepath.Match(pattern, filepath.Base(dir)); matched {
|
||||
return true
|
||||
}
|
||||
dir = filepath.Dir(dir)
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// ExcludeDirectoryFilter skips directories matching the patterns
|
||||
type ExcludeDirectoryFilter struct {
|
||||
patterns []string
|
||||
}
|
||||
|
||||
// NewExcludeDirectoryFilter creates a filter that excludes matching directories
|
||||
func NewExcludeDirectoryFilter(patterns []string) *ExcludeDirectoryFilter {
|
||||
return &ExcludeDirectoryFilter{patterns: patterns}
|
||||
}
|
||||
|
||||
// ShouldTraverse returns false if the directory path matches any exclude pattern
|
||||
func (f *ExcludeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
|
||||
// Always traverse the source directory itself
|
||||
if path == sourceDir {
|
||||
return true
|
||||
}
|
||||
|
||||
// Get relative path from source directory
|
||||
relPath, err := filepath.Rel(sourceDir, path)
|
||||
if err != nil {
|
||||
return true
|
||||
}
|
||||
relPath = filepath.ToSlash(relPath)
|
||||
|
||||
// Check if this directory or any parent directory matches any exclude pattern
|
||||
for _, pattern := range f.patterns {
|
||||
if f.pathMatchesPattern(relPath, pattern) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
||||
// pathMatchesPattern checks if a path matches a glob pattern
|
||||
func (f *ExcludeDirectoryFilter) pathMatchesPattern(path, pattern string) bool {
|
||||
// Direct match
|
||||
if matched, _ := filepath.Match(pattern, path); matched {
|
||||
return true
|
||||
}
|
||||
|
||||
// Check if any parent directory matches the pattern
|
||||
pathSegments := strings.Split(path, "/")
|
||||
for i := 0; i < len(pathSegments); i++ {
|
||||
segment := pathSegments[i]
|
||||
if matched, _ := filepath.Match(pattern, segment); matched {
|
||||
return true
|
||||
}
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// CompositeDirectoryFilter combines multiple directory filters
|
||||
type CompositeDirectoryFilter struct {
|
||||
includeFilters []DirectoryFilter
|
||||
excludeFilters []DirectoryFilter
|
||||
}
|
||||
|
||||
// NewCompositeDirectoryFilter creates a filter combining include and exclude filters
|
||||
func NewCompositeDirectoryFilter(includeFilters, excludeFilters []DirectoryFilter) *CompositeDirectoryFilter {
|
||||
return &CompositeDirectoryFilter{
|
||||
includeFilters: includeFilters,
|
||||
excludeFilters: excludeFilters,
|
||||
}
|
||||
}
|
||||
|
||||
// ShouldTraverse applies include filters first, then exclude filters
|
||||
// Include filters take precedence when both are present
|
||||
func (f *CompositeDirectoryFilter) ShouldTraverse(path string, sourceDir string) bool {
|
||||
// If we have include filters, the directory must match at least one
|
||||
if len(f.includeFilters) > 0 {
|
||||
shouldInclude := false
|
||||
for _, filter := range f.includeFilters {
|
||||
if filter.ShouldTraverse(path, sourceDir) {
|
||||
shouldInclude = true
|
||||
break
|
||||
}
|
||||
}
|
||||
if !shouldInclude {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
// Apply exclude filters
|
||||
for _, filter := range f.excludeFilters {
|
||||
if !filter.ShouldTraverse(path, sourceDir) {
|
||||
return false
|
||||
}
|
||||
}
|
||||
|
||||
return true
|
||||
}
|
||||
|
|
33
main.go
33
main.go
|
@ -34,13 +34,17 @@ func main() {
|
|||
// Define flags using custom type for multiple values
|
||||
var nameFlags stringSlice
|
||||
var matchFlags stringSlice
|
||||
var includeDirFlags stringSlice
|
||||
var excludeDirFlags stringSlice
|
||||
|
||||
flag.Var(&nameFlags, "name", "Match exact filename (can be specified multiple times)")
|
||||
flag.Var(&matchFlags, "match", "Match directory pattern (can be specified multiple times)")
|
||||
flag.Var(&includeDirFlags, "include-dir", "Only traverse directories matching pattern (can be specified multiple times)")
|
||||
flag.Var(&excludeDirFlags, "exclude-dir", "Skip directories matching pattern (can be specified multiple times)")
|
||||
|
||||
// Custom usage message
|
||||
flag.Usage = func() {
|
||||
fmt.Fprintf(os.Stderr, "Usage: %s [--name <filename>]... [--match <pattern>]... <source-dir> <output-archive>\n\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, "Usage: %s [--name <filename>]... [--match <pattern>]... [--include-dir <pattern>]... [--exclude-dir <pattern>]... <source-dir> <output-archive>\n\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, "Collects files recursively matching specific criteria and archives them.\n\n")
|
||||
fmt.Fprintf(os.Stderr, "Options:\n")
|
||||
flag.PrintDefaults()
|
||||
|
@ -48,6 +52,7 @@ func main() {
|
|||
fmt.Fprintf(os.Stderr, " %s --name .mise.toml ./ backup.tgz\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, " %s --match 'aet-*/' ./ backup.zip\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, " %s --name .mise.toml --name README.md --match 'test-*' ./ backup.tgz\n", os.Args[0])
|
||||
fmt.Fprintf(os.Stderr, " %s --include-dir src --exclude-dir 'temp-*' --name '*.go' ./ backup.tgz\n", os.Args[0])
|
||||
}
|
||||
|
||||
flag.Parse()
|
||||
|
@ -98,8 +103,32 @@ func main() {
|
|||
matcher = collector.NewCompositeMatcher(matchers)
|
||||
}
|
||||
|
||||
// Create directory filters
|
||||
var dirFilter collector.DirectoryFilter
|
||||
if len(includeDirFlags) > 0 || len(excludeDirFlags) > 0 {
|
||||
var includeFilters []collector.DirectoryFilter
|
||||
var excludeFilters []collector.DirectoryFilter
|
||||
|
||||
// Create include filters
|
||||
if len(includeDirFlags) > 0 {
|
||||
includeFilters = append(includeFilters, collector.NewIncludeDirectoryFilter(includeDirFlags))
|
||||
}
|
||||
|
||||
// Create exclude filters
|
||||
if len(excludeDirFlags) > 0 {
|
||||
excludeFilters = append(excludeFilters, collector.NewExcludeDirectoryFilter(excludeDirFlags))
|
||||
}
|
||||
|
||||
dirFilter = collector.NewCompositeDirectoryFilter(includeFilters, excludeFilters)
|
||||
}
|
||||
|
||||
// Create collector and collect files
|
||||
c := collector.New(matcher)
|
||||
var c *collector.Collector
|
||||
if dirFilter != nil {
|
||||
c = collector.NewWithDirectoryFilter(matcher, dirFilter)
|
||||
} else {
|
||||
c = collector.New(matcher)
|
||||
}
|
||||
files, err := c.Collect(sourceDir)
|
||||
if err != nil {
|
||||
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
|
||||
|
|
47
test.sh
47
test.sh
|
@ -166,6 +166,53 @@ run_test "combined name and match flags" \
|
|||
"./collect --name .mise.toml --match 'aet-*' --name README.md test/ test-combined.tgz"
|
||||
verify_archive_contents "test-combined.tgz" ".mise.toml README.md deep/nested/aet-tools/deep.sh subdir/.mise.toml subdir/aet-bin/.mise.toml subdir/aet-bin/tool subdir/aet-config/settings.conf"
|
||||
|
||||
# Test 15: Include directory filter - simple path
|
||||
mkdir -p test/src test/docs test/examples
|
||||
echo "source file" > test/src/main.go
|
||||
echo "doc file" > test/docs/README.md
|
||||
echo "example file" > test/examples/demo.txt
|
||||
run_test "include directory - simple path" \
|
||||
"./collect --include-dir src --name 'main.go' test/ test-include-simple.tgz"
|
||||
verify_archive_contents "test-include-simple.tgz" "src/main.go"
|
||||
|
||||
# Test 16: Include directory filter - glob pattern
|
||||
mkdir -p test/project-one/src test/project-two/src test/other/src
|
||||
echo "p1 source" > test/project-one/src/app.go
|
||||
echo "p2 source" > test/project-two/src/lib.go
|
||||
echo "other source" > test/other/src/util.go
|
||||
run_test "include directory - glob pattern" \
|
||||
"./collect --include-dir 'project-*' --name 'app.go' --name 'lib.go' test/ test-include-glob.tgz"
|
||||
verify_archive_contents "test-include-glob.tgz" "project-one/src/app.go project-two/src/lib.go"
|
||||
|
||||
# Test 17: Exclude directory filter - simple path
|
||||
run_test "exclude directory - simple path" \
|
||||
"./collect --exclude-dir docs --name 'README.md' test/ test-exclude-simple.tgz"
|
||||
verify_archive_contents "test-exclude-simple.tgz" "README.md"
|
||||
|
||||
# Test 18: Exclude directory filter - glob pattern
|
||||
mkdir -p test/temp-cache test/temp-logs test/important
|
||||
echo "cache file" > test/temp-cache/data.txt
|
||||
echo "log file" > test/temp-logs/app.log
|
||||
echo "important file" > test/important/config.txt
|
||||
run_test "exclude directory - glob pattern" \
|
||||
"./collect --exclude-dir 'temp-*' --name 'data.txt' --name 'config.txt' --name 'demo.txt' test/ test-exclude-glob.tgz"
|
||||
verify_archive_contents "test-exclude-glob.tgz" "examples/demo.txt important/config.txt prefix-one/data.txt"
|
||||
|
||||
# Test 19: Multiple include directories
|
||||
run_test "multiple include directories" \
|
||||
"./collect --include-dir src --include-dir docs --name 'main.go' --name 'README.md' test/ test-multi-include.tgz"
|
||||
verify_archive_contents "test-multi-include.tgz" "docs/README.md src/main.go"
|
||||
|
||||
# Test 20: Multiple exclude directories
|
||||
run_test "multiple exclude directories" \
|
||||
"./collect --exclude-dir 'temp-*' --exclude-dir examples --name 'data.txt' --name 'config.txt' test/ test-multi-exclude.tgz"
|
||||
verify_archive_contents "test-multi-exclude.tgz" "important/config.txt prefix-one/data.txt"
|
||||
|
||||
# Test 21: Combined include and exclude (include takes precedence in overlaps)
|
||||
run_test "combined include and exclude filters" \
|
||||
"./collect --include-dir 'project-*' --exclude-dir 'project-two' --name 'app.go' --name 'lib.go' test/ test-include-exclude.tgz"
|
||||
verify_archive_contents "test-include-exclude.tgz" "project-one/src/app.go"
|
||||
|
||||
# Clean up
|
||||
echo -e "\nCleaning up..."
|
||||
rm -rf test test-*.tgz test-*.zip collect /tmp/test_output.txt
|
||||
|
|
Loading…
Add table
Reference in a new issue