Implement collect CLI tool
- Add Go implementation with modular architecture - Support --name flag for exact filename matching - Support --match flag for directory glob pattern matching - Create tar.gz and zip archives preserving directory structure - Handle errors with appropriate exit codes - Skip files with permission errors gracefully - Add comprehensive test suite with 11 test cases
This commit is contained in:
parent
216461fa96
commit
eb88ef97c0
8 changed files with 684 additions and 0 deletions
118
collector/collector.go
Normal file
118
collector/collector.go
Normal file
|
@ -0,0 +1,118 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"io/fs"
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// FileEntry represents a file to be archived
|
||||
type FileEntry struct {
|
||||
Path string // Relative path from sourceDir
|
||||
FullPath string // Absolute path for reading
|
||||
}
|
||||
|
||||
// Collector handles file collection based on a matcher
|
||||
type Collector struct {
|
||||
matcher Matcher
|
||||
}
|
||||
|
||||
// New creates a new collector with the specified matcher
|
||||
func New(matcher Matcher) *Collector {
|
||||
return &Collector{
|
||||
matcher: matcher,
|
||||
}
|
||||
}
|
||||
|
||||
// Collect walks the source directory and collects matching files
|
||||
func (c *Collector) Collect(sourceDir string) ([]FileEntry, error) {
|
||||
// Clean and convert to absolute path
|
||||
absSourceDir, err := filepath.Abs(sourceDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("failed to get absolute path: %w", err)
|
||||
}
|
||||
|
||||
// Check if source directory exists
|
||||
info, err := os.Stat(absSourceDir)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("source directory error: %w", err)
|
||||
}
|
||||
if !info.IsDir() {
|
||||
return nil, fmt.Errorf("source path is not a directory: %s", sourceDir)
|
||||
}
|
||||
|
||||
var files []FileEntry
|
||||
|
||||
err = filepath.WalkDir(absSourceDir, func(path string, d fs.DirEntry, err error) error {
|
||||
if err != nil {
|
||||
// Log permission errors but continue walking
|
||||
if os.IsPermission(err) {
|
||||
fmt.Fprintf(os.Stderr, "Warning: Permission denied: %s\n", path)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Get file info
|
||||
info, err := d.Info()
|
||||
if err != nil {
|
||||
if os.IsPermission(err) {
|
||||
fmt.Fprintf(os.Stderr, "Warning: Cannot stat file: %s\n", path)
|
||||
return nil
|
||||
}
|
||||
return err
|
||||
}
|
||||
|
||||
// Skip symlinks
|
||||
if info.Mode()&os.ModeSymlink != 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
// Check if this file should be included
|
||||
if c.matcher.ShouldInclude(path, info) {
|
||||
// Calculate relative path from source directory
|
||||
relPath, err := filepath.Rel(absSourceDir, path)
|
||||
if err != nil {
|
||||
return fmt.Errorf("failed to get relative path: %w", err)
|
||||
}
|
||||
|
||||
// Clean the relative path to ensure consistent formatting
|
||||
relPath = filepath.ToSlash(relPath)
|
||||
|
||||
files = append(files, FileEntry{
|
||||
Path: relPath,
|
||||
FullPath: path,
|
||||
})
|
||||
}
|
||||
|
||||
return nil
|
||||
})
|
||||
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("walk error: %w", err)
|
||||
}
|
||||
|
||||
// Check if any files were found
|
||||
if len(files) == 0 {
|
||||
return nil, fmt.Errorf("no files found matching criteria")
|
||||
}
|
||||
|
||||
return files, nil
|
||||
}
|
||||
|
||||
// GetArchiveFormat determines the archive format from the filename
|
||||
func GetArchiveFormat(filename string) string {
|
||||
lower := strings.ToLower(filename)
|
||||
|
||||
if strings.HasSuffix(lower, ".tar.gz") || strings.HasSuffix(lower, ".tgz") {
|
||||
return "tar.gz"
|
||||
}
|
||||
|
||||
if strings.HasSuffix(lower, ".zip") {
|
||||
return "zip"
|
||||
}
|
||||
|
||||
return ""
|
||||
}
|
119
collector/matcher.go
Normal file
119
collector/matcher.go
Normal file
|
@ -0,0 +1,119 @@
|
|||
package collector
|
||||
|
||||
import (
|
||||
"os"
|
||||
"path/filepath"
|
||||
"strings"
|
||||
)
|
||||
|
||||
// Matcher determines if a file should be included in the collection
|
||||
type Matcher interface {
|
||||
ShouldInclude(path string, info os.FileInfo) bool
|
||||
}
|
||||
|
||||
// NameMatcher matches files by exact name
|
||||
type NameMatcher struct {
|
||||
name string
|
||||
}
|
||||
|
||||
// NewNameMatcher creates a matcher for exact filename matching
|
||||
func NewNameMatcher(name string) *NameMatcher {
|
||||
return &NameMatcher{name: name}
|
||||
}
|
||||
|
||||
// ShouldInclude returns true if the file matches the exact name
|
||||
func (m *NameMatcher) ShouldInclude(path string, info os.FileInfo) bool {
|
||||
if info.IsDir() {
|
||||
return false
|
||||
}
|
||||
return info.Name() == m.name
|
||||
}
|
||||
|
||||
// PatternMatcher matches files within directories matching a glob pattern
|
||||
type PatternMatcher struct {
|
||||
pattern string
|
||||
matchedDirs map[string]bool
|
||||
patternSegments []string
|
||||
}
|
||||
|
||||
// NewPatternMatcher creates a matcher for directory pattern matching
|
||||
func NewPatternMatcher(pattern string) *PatternMatcher {
|
||||
// Remove trailing slash if present
|
||||
pattern = strings.TrimSuffix(pattern, "/")
|
||||
|
||||
return &PatternMatcher{
|
||||
pattern: pattern,
|
||||
matchedDirs: make(map[string]bool),
|
||||
patternSegments: strings.Split(pattern, string(os.PathSeparator)),
|
||||
}
|
||||
}
|
||||
|
||||
// ShouldInclude returns true if the file is within a directory matching the pattern
|
||||
func (m *PatternMatcher) ShouldInclude(path string, info os.FileInfo) bool {
|
||||
// For directories, check if they match the pattern and cache the result
|
||||
if info.IsDir() {
|
||||
matched, err := m.dirMatchesPattern(path)
|
||||
if err == nil && matched {
|
||||
m.matchedDirs[path] = true
|
||||
}
|
||||
return false // Don't include the directory itself, only files within
|
||||
}
|
||||
|
||||
// For files, check if any parent directory is in the matched set
|
||||
dir := filepath.Dir(path)
|
||||
for {
|
||||
if m.matchedDirs[dir] {
|
||||
return true
|
||||
}
|
||||
|
||||
// Also check if this directory matches the pattern (in case we haven't seen it yet)
|
||||
if matched, err := m.dirMatchesPattern(dir); err == nil && matched {
|
||||
m.matchedDirs[dir] = true
|
||||
return true
|
||||
}
|
||||
|
||||
parent := filepath.Dir(dir)
|
||||
if parent == dir || parent == "." {
|
||||
break
|
||||
}
|
||||
dir = parent
|
||||
}
|
||||
|
||||
return false
|
||||
}
|
||||
|
||||
// dirMatchesPattern checks if a directory path matches the glob pattern
|
||||
func (m *PatternMatcher) dirMatchesPattern(dirPath string) (bool, error) {
|
||||
// Get the directory name
|
||||
dirName := filepath.Base(dirPath)
|
||||
|
||||
// For simple patterns (no path separators), just match the directory name
|
||||
if len(m.patternSegments) == 1 {
|
||||
return filepath.Match(m.pattern, dirName)
|
||||
}
|
||||
|
||||
// For complex patterns, we need to match the full path segments
|
||||
pathSegments := strings.Split(dirPath, string(os.PathSeparator))
|
||||
|
||||
// Try to match the pattern segments against the path segments
|
||||
if len(pathSegments) < len(m.patternSegments) {
|
||||
return false, nil
|
||||
}
|
||||
|
||||
// Check each pattern segment against the corresponding path segment
|
||||
for i := 0; i < len(m.patternSegments); i++ {
|
||||
// Start from the end of both slices
|
||||
patternIdx := len(m.patternSegments) - 1 - i
|
||||
pathIdx := len(pathSegments) - 1 - i
|
||||
|
||||
matched, err := filepath.Match(m.patternSegments[patternIdx], pathSegments[pathIdx])
|
||||
if err != nil {
|
||||
return false, err
|
||||
}
|
||||
if !matched {
|
||||
return false, nil
|
||||
}
|
||||
}
|
||||
|
||||
return true, nil
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue