diff --git a/archiver/archiver.go b/archiver/archiver.go new file mode 100644 index 0000000..96aa2f8 --- /dev/null +++ b/archiver/archiver.go @@ -0,0 +1,8 @@ +package archiver + +import "github.com/atomaka/collect/collector" + +// Archiver defines the interface for creating archives +type Archiver interface { + Create(outputPath string, files []collector.FileEntry) error +} \ No newline at end of file diff --git a/archiver/tar.go b/archiver/tar.go new file mode 100644 index 0000000..edc4566 --- /dev/null +++ b/archiver/tar.go @@ -0,0 +1,86 @@ +package archiver + +import ( + "archive/tar" + "compress/gzip" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/atomaka/collect/collector" +) + +// TarArchiver creates tar.gz archives +type TarArchiver struct{} + +// NewTarArchiver creates a new tar archiver +func NewTarArchiver() *TarArchiver { + return &TarArchiver{} +} + +// Create creates a tar.gz archive with the collected files +func (a *TarArchiver) Create(outputPath string, files []collector.FileEntry) error { + // Create output file + outFile, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer outFile.Close() + + // Create gzip writer + gzipWriter := gzip.NewWriter(outFile) + defer gzipWriter.Close() + + // Create tar writer + tarWriter := tar.NewWriter(gzipWriter) + defer tarWriter.Close() + + // Add each file to the archive + for _, file := range files { + if err := a.addFileToTar(tarWriter, file); err != nil { + return fmt.Errorf("failed to add file %s: %w", file.Path, err) + } + } + + return nil +} + +// addFileToTar adds a single file to the tar archive +func (a *TarArchiver) addFileToTar(tw *tar.Writer, file collector.FileEntry) error { + // Open the file + f, err := os.Open(file.FullPath) + if err != nil { + // Skip files we can't read with a warning + if os.IsPermission(err) { + fmt.Fprintf(os.Stderr, "Warning: Cannot read file: %s\n", file.FullPath) + return nil + } + return err + } + defer f.Close() + + // Get file info + info, err := f.Stat() + if err != nil { + return err + } + + // Create tar header + header, err := tar.FileInfoHeader(info, "") + if err != nil { + return err + } + + // Use the relative path in the archive + header.Name = filepath.ToSlash(file.Path) + + // Write header + if err := tw.WriteHeader(header); err != nil { + return err + } + + // Copy file contents + _, err = io.Copy(tw, f) + return err +} \ No newline at end of file diff --git a/archiver/zip.go b/archiver/zip.go new file mode 100644 index 0000000..8ae20f7 --- /dev/null +++ b/archiver/zip.go @@ -0,0 +1,85 @@ +package archiver + +import ( + "archive/zip" + "fmt" + "io" + "os" + "path/filepath" + + "github.com/atomaka/collect/collector" +) + +// ZipArchiver creates zip archives +type ZipArchiver struct{} + +// NewZipArchiver creates a new zip archiver +func NewZipArchiver() *ZipArchiver { + return &ZipArchiver{} +} + +// Create creates a zip archive with the collected files +func (a *ZipArchiver) Create(outputPath string, files []collector.FileEntry) error { + // Create output file + outFile, err := os.Create(outputPath) + if err != nil { + return fmt.Errorf("failed to create output file: %w", err) + } + defer outFile.Close() + + // Create zip writer + zipWriter := zip.NewWriter(outFile) + defer zipWriter.Close() + + // Add each file to the archive + for _, file := range files { + if err := a.addFileToZip(zipWriter, file); err != nil { + return fmt.Errorf("failed to add file %s: %w", file.Path, err) + } + } + + return nil +} + +// addFileToZip adds a single file to the zip archive +func (a *ZipArchiver) addFileToZip(zw *zip.Writer, file collector.FileEntry) error { + // Open the file + f, err := os.Open(file.FullPath) + if err != nil { + // Skip files we can't read with a warning + if os.IsPermission(err) { + fmt.Fprintf(os.Stderr, "Warning: Cannot read file: %s\n", file.FullPath) + return nil + } + return err + } + defer f.Close() + + // Get file info + info, err := f.Stat() + if err != nil { + return err + } + + // Create zip file header + header, err := zip.FileInfoHeader(info) + if err != nil { + return err + } + + // Use the relative path in the archive + header.Name = filepath.ToSlash(file.Path) + + // Set compression method + header.Method = zip.Deflate + + // Create writer for this file + writer, err := zw.CreateHeader(header) + if err != nil { + return err + } + + // Copy file contents + _, err = io.Copy(writer, f) + return err +} \ No newline at end of file diff --git a/collector/collector.go b/collector/collector.go new file mode 100644 index 0000000..701ca10 --- /dev/null +++ b/collector/collector.go @@ -0,0 +1,118 @@ +package collector + +import ( + "fmt" + "io/fs" + "os" + "path/filepath" + "strings" +) + +// FileEntry represents a file to be archived +type FileEntry struct { + Path string // Relative path from sourceDir + FullPath string // Absolute path for reading +} + +// Collector handles file collection based on a matcher +type Collector struct { + matcher Matcher +} + +// New creates a new collector with the specified matcher +func New(matcher Matcher) *Collector { + return &Collector{ + matcher: matcher, + } +} + +// Collect walks the source directory and collects matching files +func (c *Collector) Collect(sourceDir string) ([]FileEntry, error) { + // Clean and convert to absolute path + absSourceDir, err := filepath.Abs(sourceDir) + if err != nil { + return nil, fmt.Errorf("failed to get absolute path: %w", err) + } + + // Check if source directory exists + info, err := os.Stat(absSourceDir) + if err != nil { + return nil, fmt.Errorf("source directory error: %w", err) + } + if !info.IsDir() { + return nil, fmt.Errorf("source path is not a directory: %s", sourceDir) + } + + var files []FileEntry + + err = filepath.WalkDir(absSourceDir, func(path string, d fs.DirEntry, err error) error { + if err != nil { + // Log permission errors but continue walking + if os.IsPermission(err) { + fmt.Fprintf(os.Stderr, "Warning: Permission denied: %s\n", path) + return nil + } + return err + } + + // Get file info + info, err := d.Info() + if err != nil { + if os.IsPermission(err) { + fmt.Fprintf(os.Stderr, "Warning: Cannot stat file: %s\n", path) + return nil + } + return err + } + + // Skip symlinks + if info.Mode()&os.ModeSymlink != 0 { + return nil + } + + // Check if this file should be included + if c.matcher.ShouldInclude(path, info) { + // Calculate relative path from source directory + relPath, err := filepath.Rel(absSourceDir, path) + if err != nil { + return fmt.Errorf("failed to get relative path: %w", err) + } + + // Clean the relative path to ensure consistent formatting + relPath = filepath.ToSlash(relPath) + + files = append(files, FileEntry{ + Path: relPath, + FullPath: path, + }) + } + + return nil + }) + + if err != nil { + return nil, fmt.Errorf("walk error: %w", err) + } + + // Check if any files were found + if len(files) == 0 { + return nil, fmt.Errorf("no files found matching criteria") + } + + return files, nil +} + +// GetArchiveFormat determines the archive format from the filename +func GetArchiveFormat(filename string) string { + lower := strings.ToLower(filename) + + if strings.HasSuffix(lower, ".tar.gz") || strings.HasSuffix(lower, ".tgz") { + return "tar.gz" + } + + if strings.HasSuffix(lower, ".zip") { + return "zip" + } + + return "" +} \ No newline at end of file diff --git a/collector/matcher.go b/collector/matcher.go new file mode 100644 index 0000000..779cef0 --- /dev/null +++ b/collector/matcher.go @@ -0,0 +1,119 @@ +package collector + +import ( + "os" + "path/filepath" + "strings" +) + +// Matcher determines if a file should be included in the collection +type Matcher interface { + ShouldInclude(path string, info os.FileInfo) bool +} + +// NameMatcher matches files by exact name +type NameMatcher struct { + name string +} + +// NewNameMatcher creates a matcher for exact filename matching +func NewNameMatcher(name string) *NameMatcher { + return &NameMatcher{name: name} +} + +// ShouldInclude returns true if the file matches the exact name +func (m *NameMatcher) ShouldInclude(path string, info os.FileInfo) bool { + if info.IsDir() { + return false + } + return info.Name() == m.name +} + +// PatternMatcher matches files within directories matching a glob pattern +type PatternMatcher struct { + pattern string + matchedDirs map[string]bool + patternSegments []string +} + +// NewPatternMatcher creates a matcher for directory pattern matching +func NewPatternMatcher(pattern string) *PatternMatcher { + // Remove trailing slash if present + pattern = strings.TrimSuffix(pattern, "/") + + return &PatternMatcher{ + pattern: pattern, + matchedDirs: make(map[string]bool), + patternSegments: strings.Split(pattern, string(os.PathSeparator)), + } +} + +// ShouldInclude returns true if the file is within a directory matching the pattern +func (m *PatternMatcher) ShouldInclude(path string, info os.FileInfo) bool { + // For directories, check if they match the pattern and cache the result + if info.IsDir() { + matched, err := m.dirMatchesPattern(path) + if err == nil && matched { + m.matchedDirs[path] = true + } + return false // Don't include the directory itself, only files within + } + + // For files, check if any parent directory is in the matched set + dir := filepath.Dir(path) + for { + if m.matchedDirs[dir] { + return true + } + + // Also check if this directory matches the pattern (in case we haven't seen it yet) + if matched, err := m.dirMatchesPattern(dir); err == nil && matched { + m.matchedDirs[dir] = true + return true + } + + parent := filepath.Dir(dir) + if parent == dir || parent == "." { + break + } + dir = parent + } + + return false +} + +// dirMatchesPattern checks if a directory path matches the glob pattern +func (m *PatternMatcher) dirMatchesPattern(dirPath string) (bool, error) { + // Get the directory name + dirName := filepath.Base(dirPath) + + // For simple patterns (no path separators), just match the directory name + if len(m.patternSegments) == 1 { + return filepath.Match(m.pattern, dirName) + } + + // For complex patterns, we need to match the full path segments + pathSegments := strings.Split(dirPath, string(os.PathSeparator)) + + // Try to match the pattern segments against the path segments + if len(pathSegments) < len(m.patternSegments) { + return false, nil + } + + // Check each pattern segment against the corresponding path segment + for i := 0; i < len(m.patternSegments); i++ { + // Start from the end of both slices + patternIdx := len(m.patternSegments) - 1 - i + pathIdx := len(pathSegments) - 1 - i + + matched, err := filepath.Match(m.patternSegments[patternIdx], pathSegments[pathIdx]) + if err != nil { + return false, err + } + if !matched { + return false, nil + } + } + + return true, nil +} \ No newline at end of file diff --git a/go.mod b/go.mod new file mode 100644 index 0000000..effdc55 --- /dev/null +++ b/go.mod @@ -0,0 +1,3 @@ +module github.com/atomaka/collect + +go 1.21 \ No newline at end of file diff --git a/main.go b/main.go new file mode 100644 index 0000000..c21f09f --- /dev/null +++ b/main.go @@ -0,0 +1,103 @@ +package main + +import ( + "flag" + "fmt" + "os" + "path/filepath" + + "github.com/atomaka/collect/archiver" + "github.com/atomaka/collect/collector" +) + +const ( + exitSuccess = 0 + exitNoFiles = 1 + exitArchiveError = 2 + exitInvalidArgs = 3 +) + +func main() { + // Define flags + nameFlag := flag.String("name", "", "Match exact filename") + matchFlag := flag.String("match", "", "Match directory pattern") + + // Custom usage message + flag.Usage = func() { + fmt.Fprintf(os.Stderr, "Usage: %s [--name | --match ] \n\n", os.Args[0]) + fmt.Fprintf(os.Stderr, "Collects files recursively matching specific criteria and archives them.\n\n") + fmt.Fprintf(os.Stderr, "Options:\n") + flag.PrintDefaults() + fmt.Fprintf(os.Stderr, "\nExamples:\n") + fmt.Fprintf(os.Stderr, " %s --name .mise.toml ./ backup.tgz\n", os.Args[0]) + fmt.Fprintf(os.Stderr, " %s --match 'aet-*/' ./ backup.zip\n", os.Args[0]) + } + + flag.Parse() + + // Validate flags + if (*nameFlag == "" && *matchFlag == "") || (*nameFlag != "" && *matchFlag != "") { + fmt.Fprintf(os.Stderr, "Error: Exactly one of --name or --match must be specified\n\n") + flag.Usage() + os.Exit(exitInvalidArgs) + } + + // Check positional arguments + args := flag.Args() + if len(args) != 2 { + fmt.Fprintf(os.Stderr, "Error: Expected 2 arguments (source directory and output archive), got %d\n\n", len(args)) + flag.Usage() + os.Exit(exitInvalidArgs) + } + + sourceDir := args[0] + outputPath := args[1] + + // Determine archive format + format := collector.GetArchiveFormat(outputPath) + if format == "" { + fmt.Fprintf(os.Stderr, "Error: Unsupported archive format. Use .tar.gz, .tgz, or .zip\n") + os.Exit(exitInvalidArgs) + } + + // Create matcher + var matcher collector.Matcher + if *nameFlag != "" { + matcher = collector.NewNameMatcher(*nameFlag) + } else { + matcher = collector.NewPatternMatcher(*matchFlag) + } + + // Create collector and collect files + c := collector.New(matcher) + files, err := c.Collect(sourceDir) + if err != nil { + fmt.Fprintf(os.Stderr, "Error: %v\n", err) + if err.Error() == "no files found matching criteria" { + os.Exit(exitNoFiles) + } + os.Exit(exitArchiveError) + } + + // Report number of files found + fmt.Printf("Found %d files to archive\n", len(files)) + + // Create appropriate archiver + var arch archiver.Archiver + switch format { + case "tar.gz": + arch = archiver.NewTarArchiver() + case "zip": + arch = archiver.NewZipArchiver() + } + + // Create archive + if err := arch.Create(outputPath, files); err != nil { + fmt.Fprintf(os.Stderr, "Error creating archive: %v\n", err) + os.Exit(exitArchiveError) + } + + // Get absolute path for cleaner output + absOutput, _ := filepath.Abs(outputPath) + fmt.Printf("Archive created successfully: %s\n", absOutput) +} \ No newline at end of file diff --git a/test.sh b/test.sh new file mode 100755 index 0000000..e98e322 --- /dev/null +++ b/test.sh @@ -0,0 +1,162 @@ +#!/bin/bash + +set -e + +# Colors for output +RED='\033[0;31m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' # No Color + +# Test counter +TESTS_PASSED=0 +TESTS_FAILED=0 + +# Function to run a test +run_test() { + local test_name="$1" + local command="$2" + local expected_exit_code="${3:-0}" + + echo -n "Testing $test_name... " + + # Run command and capture exit code + set +e + eval "$command" > /tmp/test_output.txt 2>&1 + local exit_code=$? + set -e + + if [ $exit_code -eq $expected_exit_code ]; then + echo -e "${GREEN}PASSED${NC}" + ((TESTS_PASSED++)) + return 0 + else + echo -e "${RED}FAILED${NC}" + echo " Expected exit code: $expected_exit_code, got: $exit_code" + echo " Output:" + cat /tmp/test_output.txt | sed 's/^/ /' + ((TESTS_FAILED++)) + return 1 + fi +} + +# Function to verify archive contents +verify_archive_contents() { + local archive="$1" + local expected_files="$2" + + echo -n " Verifying contents of $archive... " + + if [[ "$archive" == *.tgz || "$archive" == *.tar.gz ]]; then + actual_files=$(tar -tzf "$archive" | sort | tr '\n' ' ' | sed 's/ $//') + elif [[ "$archive" == *.zip ]]; then + actual_files=$(unzip -l "$archive" | grep -v "Archive:" | grep -v "Length" | grep -v -- "--------" | grep -v "files" | awk '{print $4}' | grep -v '^$' | sort | tr '\n' ' ' | sed 's/ $//') + else + echo -e "${RED}Unknown archive format${NC}" + return 1 + fi + + expected_sorted=$(echo "$expected_files" | tr ' ' '\n' | sort | tr '\n' ' ' | sed 's/ $//') + + if [ "$actual_files" = "$expected_sorted" ]; then + echo -e "${GREEN}OK${NC}" + return 0 + else + echo -e "${RED}FAILED${NC}" + echo " Expected: $expected_sorted" + echo " Got: $actual_files" + return 1 + fi +} + +echo -e "${YELLOW}=== Collect CLI Test Suite ===${NC}\n" + +# Clean up any previous test artifacts +echo "Cleaning up previous test artifacts..." +rm -rf test test-*.tgz test-*.zip collect + +# Build the tool +echo "Building collect tool..." +if ! go build -o collect; then + echo -e "${RED}Failed to build collect tool${NC}" + exit 1 +fi +echo -e "${GREEN}Build successful${NC}\n" + +# Create test directory structure +echo "Setting up test environment..." +mkdir -p test/subdir/aet-bin test/subdir/aet-config test/other test/deep/nested/aet-tools +echo "test config" > test/.mise.toml +echo "another config" > test/subdir/.mise.toml +echo "binary" > test/subdir/aet-bin/tool +echo "config" > test/subdir/aet-config/settings.conf +echo "other file" > test/other/file.txt +echo "deep tool" > test/deep/nested/aet-tools/deep.sh +echo "not in aet dir" > test/deep/nested/regular.txt +echo -e "${GREEN}Test environment ready${NC}\n" + +# Test 1: Name matching with tar.gz +run_test "name matching (.mise.toml) with tar.gz" \ + "./collect --name .mise.toml test/ test-name.tgz" +verify_archive_contents "test-name.tgz" ".mise.toml subdir/.mise.toml" + +# Test 2: Pattern matching with zip +run_test "pattern matching (aet-*) with zip" \ + "./collect --match 'aet-*' test/ test-pattern.zip" +verify_archive_contents "test-pattern.zip" "subdir/aet-bin/tool subdir/aet-config/settings.conf deep/nested/aet-tools/deep.sh" + +# Test 3: Pattern matching with tgz +run_test "pattern matching (aet-*) with tgz" \ + "./collect --match 'aet-*' test/ test-pattern.tgz" +verify_archive_contents "test-pattern.tgz" "deep/nested/aet-tools/deep.sh subdir/aet-bin/tool subdir/aet-config/settings.conf" + +# Test 4: No files found (should exit with code 1) +run_test "no files found error" \ + "./collect --name nonexistent.file test/ test-empty.zip" 1 + +# Test 5: Invalid arguments - no flags +run_test "invalid arguments - no flags" \ + "./collect test/ output.zip" 3 + +# Test 6: Invalid arguments - both flags +run_test "invalid arguments - both flags" \ + "./collect --name .mise.toml --match 'aet-*' test/ output.zip" 3 + +# Test 7: Invalid arguments - missing output file +run_test "invalid arguments - missing output file" \ + "./collect --name .mise.toml test/" 3 + +# Test 8: Invalid archive format +run_test "invalid archive format" \ + "./collect --name .mise.toml test/ output.txt" 3 + +# Test 9: Non-existent source directory +run_test "non-existent source directory" \ + "./collect --name .mise.toml nonexistent/ output.zip" 2 + +# Test 10: Help flag +run_test "help flag output" \ + "./collect --help" 0 + +# Test 11: Name matching with subdirectory pattern +echo "subdir config" > test/subdir/aet-bin/.mise.toml +run_test "name matching finds files in pattern dirs too" \ + "./collect --name .mise.toml test/ test-name-all.tgz" +verify_archive_contents "test-name-all.tgz" ".mise.toml subdir/.mise.toml subdir/aet-bin/.mise.toml" + +# Clean up +echo -e "\nCleaning up..." +rm -rf test test-*.tgz test-*.zip collect /tmp/test_output.txt + +# Summary +echo -e "\n${YELLOW}=== Test Summary ===${NC}" +echo -e "Tests passed: ${GREEN}$TESTS_PASSED${NC}" +echo -e "Tests failed: ${RED}$TESTS_FAILED${NC}" + +if [ $TESTS_FAILED -eq 0 ]; then + echo -e "\n${GREEN}All tests passed!${NC}" + exit 0 +else + echo -e "\n${RED}Some tests failed!${NC}" + exit 1 +fi \ No newline at end of file