Implement collect CLI tool

- Add Go implementation with modular architecture - Support --name flag for exact filename matching - Support --match flag for directory glob pattern matching - Create tar.gz and zip archives preserving directory structure - Handle errors with appropriate exit codes - Skip files with permission errors gracefully - Add comprehensive test suite with 11 test cases
2025-06-12 21:38:00 -04:00 · 2025-06-12 21:38:00 -04:00 · eb88ef97c0
commit eb88ef97c0
parent 216461fa96
8 changed files with 684 additions and 0 deletions
--- a/archiver/archiver.go
+++ b/archiver/archiver.go
@ -0,0 +1,8 @@
 package archiver
 import "github.com/atomaka/collect/collector"
 // Archiver defines the interface for creating archives
 type Archiver interface {
 	Create(outputPath string, files []collector.FileEntry) error
 }
--- a/archiver/tar.go
+++ b/archiver/tar.go
@ -0,0 +1,86 @@
 package archiver
 import (
 	"archive/tar"
 	"compress/gzip"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"github.com/atomaka/collect/collector"
 )
 // TarArchiver creates tar.gz archives
 type TarArchiver struct{}
 // NewTarArchiver creates a new tar archiver
 func NewTarArchiver() *TarArchiver {
 	return &TarArchiver{}
 }
 // Create creates a tar.gz archive with the collected files
 func (a *TarArchiver) Create(outputPath string, files []collector.FileEntry) error {
 	// Create output file
 	outFile, err := os.Create(outputPath)
 	if err != nil {
 		return fmt.Errorf("failed to create output file: %w", err)
 	}
 	defer outFile.Close()
 	// Create gzip writer
 	gzipWriter := gzip.NewWriter(outFile)
 	defer gzipWriter.Close()
 	// Create tar writer
 	tarWriter := tar.NewWriter(gzipWriter)
 	defer tarWriter.Close()
 	// Add each file to the archive
 	for _, file := range files {
 		if err := a.addFileToTar(tarWriter, file); err != nil {
 			return fmt.Errorf("failed to add file %s: %w", file.Path, err)
 		}
 	}
 	return nil
 }
 // addFileToTar adds a single file to the tar archive
 func (a *TarArchiver) addFileToTar(tw *tar.Writer, file collector.FileEntry) error {
 	// Open the file
 	f, err := os.Open(file.FullPath)
 	if err != nil {
 		// Skip files we can't read with a warning
 		if os.IsPermission(err) {
 			fmt.Fprintf(os.Stderr, "Warning: Cannot read file: %s\n", file.FullPath)
 			return nil
 		}
 		return err
 	}
 	defer f.Close()
 	// Get file info
 	info, err := f.Stat()
 	if err != nil {
 		return err
 	}
 	// Create tar header
 	header, err := tar.FileInfoHeader(info, "")
 	if err != nil {
 		return err
 	}
 	// Use the relative path in the archive
 	header.Name = filepath.ToSlash(file.Path)
 	// Write header
 	if err := tw.WriteHeader(header); err != nil {
 		return err
 	}
 	// Copy file contents
 	_, err = io.Copy(tw, f)
 	return err
 }
--- a/archiver/zip.go
+++ b/archiver/zip.go
@ -0,0 +1,85 @@
 package archiver
 import (
 	"archive/zip"
 	"fmt"
 	"io"
 	"os"
 	"path/filepath"
 	"github.com/atomaka/collect/collector"
 )
 // ZipArchiver creates zip archives
 type ZipArchiver struct{}
 // NewZipArchiver creates a new zip archiver
 func NewZipArchiver() *ZipArchiver {
 	return &ZipArchiver{}
 }
 // Create creates a zip archive with the collected files
 func (a *ZipArchiver) Create(outputPath string, files []collector.FileEntry) error {
 	// Create output file
 	outFile, err := os.Create(outputPath)
 	if err != nil {
 		return fmt.Errorf("failed to create output file: %w", err)
 	}
 	defer outFile.Close()
 	// Create zip writer
 	zipWriter := zip.NewWriter(outFile)
 	defer zipWriter.Close()
 	// Add each file to the archive
 	for _, file := range files {
 		if err := a.addFileToZip(zipWriter, file); err != nil {
 			return fmt.Errorf("failed to add file %s: %w", file.Path, err)
 		}
 	}
 	return nil
 }
 // addFileToZip adds a single file to the zip archive
 func (a *ZipArchiver) addFileToZip(zw *zip.Writer, file collector.FileEntry) error {
 	// Open the file
 	f, err := os.Open(file.FullPath)
 	if err != nil {
 		// Skip files we can't read with a warning
 		if os.IsPermission(err) {
 			fmt.Fprintf(os.Stderr, "Warning: Cannot read file: %s\n", file.FullPath)
 			return nil
 		}
 		return err
 	}
 	defer f.Close()
 	// Get file info
 	info, err := f.Stat()
 	if err != nil {
 		return err
 	}
 	// Create zip file header
 	header, err := zip.FileInfoHeader(info)
 	if err != nil {
 		return err
 	}
 	// Use the relative path in the archive
 	header.Name = filepath.ToSlash(file.Path)
 	// Set compression method
 	header.Method = zip.Deflate
 	// Create writer for this file
 	writer, err := zw.CreateHeader(header)
 	if err != nil {
 		return err
 	}
 	// Copy file contents
 	_, err = io.Copy(writer, f)
 	return err
 }
--- a/collector/collector.go
+++ b/collector/collector.go
@ -0,0 +1,118 @@
 package collector
 import (
 	"fmt"
 	"io/fs"
 	"os"
 	"path/filepath"
 	"strings"
 )
 // FileEntry represents a file to be archived
 type FileEntry struct {
 	Path     string // Relative path from sourceDir
 	FullPath string // Absolute path for reading
 }
 // Collector handles file collection based on a matcher
 type Collector struct {
 	matcher Matcher
 }
 // New creates a new collector with the specified matcher
 func New(matcher Matcher) *Collector {
 	return &Collector{
 		matcher: matcher,
 	}
 }
 // Collect walks the source directory and collects matching files
 func (c *Collector) Collect(sourceDir string) ([]FileEntry, error) {
 	// Clean and convert to absolute path
 	absSourceDir, err := filepath.Abs(sourceDir)
 	if err != nil {
 		return nil, fmt.Errorf("failed to get absolute path: %w", err)
 	}
 	// Check if source directory exists
 	info, err := os.Stat(absSourceDir)
 	if err != nil {
 		return nil, fmt.Errorf("source directory error: %w", err)
 	}
 	if !info.IsDir() {
 		return nil, fmt.Errorf("source path is not a directory: %s", sourceDir)
 	}
 	var files []FileEntry
 	err = filepath.WalkDir(absSourceDir, func(path string, d fs.DirEntry, err error) error {
 		if err != nil {
 			// Log permission errors but continue walking
 			if os.IsPermission(err) {
 				fmt.Fprintf(os.Stderr, "Warning: Permission denied: %s\n", path)
 				return nil
 			}
 			return err
 		}
 		// Get file info
 		info, err := d.Info()
 		if err != nil {
 			if os.IsPermission(err) {
 				fmt.Fprintf(os.Stderr, "Warning: Cannot stat file: %s\n", path)
 				return nil
 			}
 			return err
 		}
 		// Skip symlinks
 		if info.Mode()&os.ModeSymlink != 0 {
 			return nil
 		}
 		// Check if this file should be included
 		if c.matcher.ShouldInclude(path, info) {
 			// Calculate relative path from source directory
 			relPath, err := filepath.Rel(absSourceDir, path)
 			if err != nil {
 				return fmt.Errorf("failed to get relative path: %w", err)
 			}
 			// Clean the relative path to ensure consistent formatting
 			relPath = filepath.ToSlash(relPath)
 			files = append(files, FileEntry{
 				Path:     relPath,
 				FullPath: path,
 			})
 		}
 		return nil
 	})
 	if err != nil {
 		return nil, fmt.Errorf("walk error: %w", err)
 	}
 	// Check if any files were found
 	if len(files) == 0 {
 		return nil, fmt.Errorf("no files found matching criteria")
 	}
 	return files, nil
 }
 // GetArchiveFormat determines the archive format from the filename
 func GetArchiveFormat(filename string) string {
 	lower := strings.ToLower(filename)
 	if strings.HasSuffix(lower, ".tar.gz") || strings.HasSuffix(lower, ".tgz") {
 		return "tar.gz"
 	}
 	if strings.HasSuffix(lower, ".zip") {
 		return "zip"
 	}
 	return ""
 }
--- a/collector/matcher.go
+++ b/collector/matcher.go
@ -0,0 +1,119 @@
 package collector
 import (
 	"os"
 	"path/filepath"
 	"strings"
 )
 // Matcher determines if a file should be included in the collection
 type Matcher interface {
 	ShouldInclude(path string, info os.FileInfo) bool
 }
 // NameMatcher matches files by exact name
 type NameMatcher struct {
 	name string
 }
 // NewNameMatcher creates a matcher for exact filename matching
 func NewNameMatcher(name string) *NameMatcher {
 	return &NameMatcher{name: name}
 }
 // ShouldInclude returns true if the file matches the exact name
 func (m *NameMatcher) ShouldInclude(path string, info os.FileInfo) bool {
 	if info.IsDir() {
 		return false
 	}
 	return info.Name() == m.name
 }
 // PatternMatcher matches files within directories matching a glob pattern
 type PatternMatcher struct {
 	pattern         string
 	matchedDirs     map[string]bool
 	patternSegments []string
 }
 // NewPatternMatcher creates a matcher for directory pattern matching
 func NewPatternMatcher(pattern string) *PatternMatcher {
 	// Remove trailing slash if present
 	pattern = strings.TrimSuffix(pattern, "/")
 	return &PatternMatcher{
 		pattern:         pattern,
 		matchedDirs:     make(map[string]bool),
 		patternSegments: strings.Split(pattern, string(os.PathSeparator)),
 	}
 }
 // ShouldInclude returns true if the file is within a directory matching the pattern
 func (m *PatternMatcher) ShouldInclude(path string, info os.FileInfo) bool {
 	// For directories, check if they match the pattern and cache the result
 	if info.IsDir() {
 		matched, err := m.dirMatchesPattern(path)
 		if err == nil && matched {
 			m.matchedDirs[path] = true
 		}
 		return false // Don't include the directory itself, only files within
 	}
 	// For files, check if any parent directory is in the matched set
 	dir := filepath.Dir(path)
 	for {
 		if m.matchedDirs[dir] {
 			return true
 		}
 		// Also check if this directory matches the pattern (in case we haven't seen it yet)
 		if matched, err := m.dirMatchesPattern(dir); err == nil && matched {
 			m.matchedDirs[dir] = true
 			return true
 		}
 		parent := filepath.Dir(dir)
 		if parent == dir || parent == "." {
 			break
 		}
 		dir = parent
 	}
 	return false
 }
 // dirMatchesPattern checks if a directory path matches the glob pattern
 func (m *PatternMatcher) dirMatchesPattern(dirPath string) (bool, error) {
 	// Get the directory name
 	dirName := filepath.Base(dirPath)
 	// For simple patterns (no path separators), just match the directory name
 	if len(m.patternSegments) == 1 {
 		return filepath.Match(m.pattern, dirName)
 	}
 	// For complex patterns, we need to match the full path segments
 	pathSegments := strings.Split(dirPath, string(os.PathSeparator))
 	// Try to match the pattern segments against the path segments
 	if len(pathSegments) < len(m.patternSegments) {
 		return false, nil
 	}
 	// Check each pattern segment against the corresponding path segment
 	for i := 0; i < len(m.patternSegments); i++ {
 		// Start from the end of both slices
 		patternIdx := len(m.patternSegments) - 1 - i
 		pathIdx := len(pathSegments) - 1 - i
 		matched, err := filepath.Match(m.patternSegments[patternIdx], pathSegments[pathIdx])
 		if err != nil {
 			return false, err
 		}
 		if !matched {
 			return false, nil
 		}
 	}
 	return true, nil
 }
--- a/go.mod
+++ b/go.mod
@ -0,0 +1,3 @@
 module github.com/atomaka/collect
 go 1.21
--- a/main.go
+++ b/main.go
@ -0,0 +1,103 @@
 package main
 import (
 	"flag"
 	"fmt"
 	"os"
 	"path/filepath"
 	"github.com/atomaka/collect/archiver"
 	"github.com/atomaka/collect/collector"
 )
 const (
 	exitSuccess = 0
 	exitNoFiles = 1
 	exitArchiveError = 2
 	exitInvalidArgs = 3
 )
 func main() {
 	// Define flags
 	nameFlag := flag.String("name", "", "Match exact filename")
 	matchFlag := flag.String("match", "", "Match directory pattern")
 	// Custom usage message
 	flag.Usage = func() {
 		fmt.Fprintf(os.Stderr, "Usage: %s [--name <filename> | --match <pattern>] <source-dir> <output-archive>\n\n", os.Args[0])
 		fmt.Fprintf(os.Stderr, "Collects files recursively matching specific criteria and archives them.\n\n")
 		fmt.Fprintf(os.Stderr, "Options:\n")
 		flag.PrintDefaults()
 		fmt.Fprintf(os.Stderr, "\nExamples:\n")
 		fmt.Fprintf(os.Stderr, "  %s --name .mise.toml ./ backup.tgz\n", os.Args[0])
 		fmt.Fprintf(os.Stderr, "  %s --match 'aet-*/' ./ backup.zip\n", os.Args[0])
 	}
 	flag.Parse()
 	// Validate flags
 	if (*nameFlag == "" && *matchFlag == "") || (*nameFlag != "" && *matchFlag != "") {
 		fmt.Fprintf(os.Stderr, "Error: Exactly one of --name or --match must be specified\n\n")
 		flag.Usage()
 		os.Exit(exitInvalidArgs)
 	}
 	// Check positional arguments
 	args := flag.Args()
 	if len(args) != 2 {
 		fmt.Fprintf(os.Stderr, "Error: Expected 2 arguments (source directory and output archive), got %d\n\n", len(args))
 		flag.Usage()
 		os.Exit(exitInvalidArgs)
 	}
 	sourceDir := args[0]
 	outputPath := args[1]
 	// Determine archive format
 	format := collector.GetArchiveFormat(outputPath)
 	if format == "" {
 		fmt.Fprintf(os.Stderr, "Error: Unsupported archive format. Use .tar.gz, .tgz, or .zip\n")
 		os.Exit(exitInvalidArgs)
 	}
 	// Create matcher
 	var matcher collector.Matcher
 	if *nameFlag != "" {
 		matcher = collector.NewNameMatcher(*nameFlag)
 	} else {
 		matcher = collector.NewPatternMatcher(*matchFlag)
 	}
 	// Create collector and collect files
 	c := collector.New(matcher)
 	files, err := c.Collect(sourceDir)
 	if err != nil {
 		fmt.Fprintf(os.Stderr, "Error: %v\n", err)
 		if err.Error() == "no files found matching criteria" {
 			os.Exit(exitNoFiles)
 		}
 		os.Exit(exitArchiveError)
 	}
 	// Report number of files found
 	fmt.Printf("Found %d files to archive\n", len(files))
 	// Create appropriate archiver
 	var arch archiver.Archiver
 	switch format {
 	case "tar.gz":
 		arch = archiver.NewTarArchiver()
 	case "zip":
 		arch = archiver.NewZipArchiver()
 	}
 	// Create archive
 	if err := arch.Create(outputPath, files); err != nil {
 		fmt.Fprintf(os.Stderr, "Error creating archive: %v\n", err)
 		os.Exit(exitArchiveError)
 	}
 	// Get absolute path for cleaner output
 	absOutput, _ := filepath.Abs(outputPath)
 	fmt.Printf("Archive created successfully: %s\n", absOutput)
 }
--- a/test.sh
+++ b/test.sh
@ -0,0 +1,162 @@
 #!/bin/bash
 set -e
 # Colors for output
 RED='\033[0;31m'
 GREEN='\033[0;32m'
 YELLOW='\033[1;33m'
 NC='\033[0m' # No Color
 # Test counter
 TESTS_PASSED=0
 TESTS_FAILED=0
 # Function to run a test
 run_test() {
    local test_name="$1"
    local command="$2"
    local expected_exit_code="${3:-0}"
    echo -n "Testing $test_name... "
    # Run command and capture exit code
    set +e
    eval "$command" > /tmp/test_output.txt 2>&1
    local exit_code=$?
    set -e
    if [ $exit_code -eq $expected_exit_code ]; then
        echo -e "${GREEN}PASSED${NC}"
        ((TESTS_PASSED++))
        return 0
    else
        echo -e "${RED}FAILED${NC}"
        echo "  Expected exit code: $expected_exit_code, got: $exit_code"
        echo "  Output:"
        cat /tmp/test_output.txt | sed 's/^/    /'
        ((TESTS_FAILED++))
        return 1
    fi
 }
 # Function to verify archive contents
 verify_archive_contents() {
    local archive="$1"
    local expected_files="$2"
    echo -n "  Verifying contents of $archive... "
    if [[ "$archive" == *.tgz || "$archive" == *.tar.gz ]]; then
        actual_files=$(tar -tzf "$archive" | sort | tr '\n' ' ' | sed 's/ $//')
    elif [[ "$archive" == *.zip ]]; then
        actual_files=$(unzip -l "$archive" | grep -v "Archive:" | grep -v "Length" | grep -v -- "--------" | grep -v "files" | awk '{print $4}' | grep -v '^$' | sort | tr '\n' ' ' | sed 's/ $//')
    else
        echo -e "${RED}Unknown archive format${NC}"
        return 1
    fi
    expected_sorted=$(echo "$expected_files" | tr ' ' '\n' | sort | tr '\n' ' ' | sed 's/ $//')
    if [ "$actual_files" = "$expected_sorted" ]; then
        echo -e "${GREEN}OK${NC}"
        return 0
    else
        echo -e "${RED}FAILED${NC}"
        echo "    Expected: $expected_sorted"
        echo "    Got:      $actual_files"
        return 1
    fi
 }
 echo -e "${YELLOW}=== Collect CLI Test Suite ===${NC}\n"
 # Clean up any previous test artifacts
 echo "Cleaning up previous test artifacts..."
 rm -rf test test-*.tgz test-*.zip collect
 # Build the tool
 echo "Building collect tool..."
 if ! go build -o collect; then
    echo -e "${RED}Failed to build collect tool${NC}"
    exit 1
 fi
 echo -e "${GREEN}Build successful${NC}\n"
 # Create test directory structure
 echo "Setting up test environment..."
 mkdir -p test/subdir/aet-bin test/subdir/aet-config test/other test/deep/nested/aet-tools
 echo "test config" > test/.mise.toml
 echo "another config" > test/subdir/.mise.toml
 echo "binary" > test/subdir/aet-bin/tool
 echo "config" > test/subdir/aet-config/settings.conf
 echo "other file" > test/other/file.txt
 echo "deep tool" > test/deep/nested/aet-tools/deep.sh
 echo "not in aet dir" > test/deep/nested/regular.txt
 echo -e "${GREEN}Test environment ready${NC}\n"
 # Test 1: Name matching with tar.gz
 run_test "name matching (.mise.toml) with tar.gz" \
    "./collect --name .mise.toml test/ test-name.tgz"
 verify_archive_contents "test-name.tgz" ".mise.toml subdir/.mise.toml"
 # Test 2: Pattern matching with zip
 run_test "pattern matching (aet-*) with zip" \
    "./collect --match 'aet-*' test/ test-pattern.zip"
 verify_archive_contents "test-pattern.zip" "subdir/aet-bin/tool subdir/aet-config/settings.conf deep/nested/aet-tools/deep.sh"
 # Test 3: Pattern matching with tgz
 run_test "pattern matching (aet-*) with tgz" \
    "./collect --match 'aet-*' test/ test-pattern.tgz"
 verify_archive_contents "test-pattern.tgz" "deep/nested/aet-tools/deep.sh subdir/aet-bin/tool subdir/aet-config/settings.conf"
 # Test 4: No files found (should exit with code 1)
 run_test "no files found error" \
    "./collect --name nonexistent.file test/ test-empty.zip" 1
 # Test 5: Invalid arguments - no flags
 run_test "invalid arguments - no flags" \
    "./collect test/ output.zip" 3
 # Test 6: Invalid arguments - both flags
 run_test "invalid arguments - both flags" \
    "./collect --name .mise.toml --match 'aet-*' test/ output.zip" 3
 # Test 7: Invalid arguments - missing output file
 run_test "invalid arguments - missing output file" \
    "./collect --name .mise.toml test/" 3
 # Test 8: Invalid archive format
 run_test "invalid archive format" \
    "./collect --name .mise.toml test/ output.txt" 3
 # Test 9: Non-existent source directory
 run_test "non-existent source directory" \
    "./collect --name .mise.toml nonexistent/ output.zip" 2
 # Test 10: Help flag
 run_test "help flag output" \
    "./collect --help" 0
 # Test 11: Name matching with subdirectory pattern
 echo "subdir config" > test/subdir/aet-bin/.mise.toml
 run_test "name matching finds files in pattern dirs too" \
    "./collect --name .mise.toml test/ test-name-all.tgz"
 verify_archive_contents "test-name-all.tgz" ".mise.toml subdir/.mise.toml subdir/aet-bin/.mise.toml"
 # Clean up
 echo -e "\nCleaning up..."
 rm -rf test test-*.tgz test-*.zip collect /tmp/test_output.txt
 # Summary
 echo -e "\n${YELLOW}=== Test Summary ===${NC}"
 echo -e "Tests passed: ${GREEN}$TESTS_PASSED${NC}"
 echo -e "Tests failed: ${RED}$TESTS_FAILED${NC}"
 if [ $TESTS_FAILED -eq 0 ]; then
    echo -e "\n${GREEN}All tests passed!${NC}"
    exit 0
 else
    echo -e "\n${RED}Some tests failed!${NC}"
    exit 1
 fi
		`@ -0,0 +1,3 @@`
							`module github.com/atomaka/collect`

							`go 1.21`