1
0
Fork 0

Implement collect CLI tool

- Add Go implementation with modular architecture
- Support --name flag for exact filename matching
- Support --match flag for directory glob pattern matching
- Create tar.gz and zip archives preserving directory structure
- Handle errors with appropriate exit codes
- Skip files with permission errors gracefully
- Add comprehensive test suite with 11 test cases
This commit is contained in:
Andrew Tomaka 2025-06-12 21:38:00 -04:00
parent 216461fa96
commit eb88ef97c0
8 changed files with 684 additions and 0 deletions

8
archiver/archiver.go Normal file
View file

@ -0,0 +1,8 @@
package archiver
import "github.com/atomaka/collect/collector"
// Archiver defines the interface for creating archives
type Archiver interface {
Create(outputPath string, files []collector.FileEntry) error
}

86
archiver/tar.go Normal file
View file

@ -0,0 +1,86 @@
package archiver
import (
"archive/tar"
"compress/gzip"
"fmt"
"io"
"os"
"path/filepath"
"github.com/atomaka/collect/collector"
)
// TarArchiver creates tar.gz archives
type TarArchiver struct{}
// NewTarArchiver creates a new tar archiver
func NewTarArchiver() *TarArchiver {
return &TarArchiver{}
}
// Create creates a tar.gz archive with the collected files
func (a *TarArchiver) Create(outputPath string, files []collector.FileEntry) error {
// Create output file
outFile, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("failed to create output file: %w", err)
}
defer outFile.Close()
// Create gzip writer
gzipWriter := gzip.NewWriter(outFile)
defer gzipWriter.Close()
// Create tar writer
tarWriter := tar.NewWriter(gzipWriter)
defer tarWriter.Close()
// Add each file to the archive
for _, file := range files {
if err := a.addFileToTar(tarWriter, file); err != nil {
return fmt.Errorf("failed to add file %s: %w", file.Path, err)
}
}
return nil
}
// addFileToTar adds a single file to the tar archive
func (a *TarArchiver) addFileToTar(tw *tar.Writer, file collector.FileEntry) error {
// Open the file
f, err := os.Open(file.FullPath)
if err != nil {
// Skip files we can't read with a warning
if os.IsPermission(err) {
fmt.Fprintf(os.Stderr, "Warning: Cannot read file: %s\n", file.FullPath)
return nil
}
return err
}
defer f.Close()
// Get file info
info, err := f.Stat()
if err != nil {
return err
}
// Create tar header
header, err := tar.FileInfoHeader(info, "")
if err != nil {
return err
}
// Use the relative path in the archive
header.Name = filepath.ToSlash(file.Path)
// Write header
if err := tw.WriteHeader(header); err != nil {
return err
}
// Copy file contents
_, err = io.Copy(tw, f)
return err
}

85
archiver/zip.go Normal file
View file

@ -0,0 +1,85 @@
package archiver
import (
"archive/zip"
"fmt"
"io"
"os"
"path/filepath"
"github.com/atomaka/collect/collector"
)
// ZipArchiver creates zip archives
type ZipArchiver struct{}
// NewZipArchiver creates a new zip archiver
func NewZipArchiver() *ZipArchiver {
return &ZipArchiver{}
}
// Create creates a zip archive with the collected files
func (a *ZipArchiver) Create(outputPath string, files []collector.FileEntry) error {
// Create output file
outFile, err := os.Create(outputPath)
if err != nil {
return fmt.Errorf("failed to create output file: %w", err)
}
defer outFile.Close()
// Create zip writer
zipWriter := zip.NewWriter(outFile)
defer zipWriter.Close()
// Add each file to the archive
for _, file := range files {
if err := a.addFileToZip(zipWriter, file); err != nil {
return fmt.Errorf("failed to add file %s: %w", file.Path, err)
}
}
return nil
}
// addFileToZip adds a single file to the zip archive
func (a *ZipArchiver) addFileToZip(zw *zip.Writer, file collector.FileEntry) error {
// Open the file
f, err := os.Open(file.FullPath)
if err != nil {
// Skip files we can't read with a warning
if os.IsPermission(err) {
fmt.Fprintf(os.Stderr, "Warning: Cannot read file: %s\n", file.FullPath)
return nil
}
return err
}
defer f.Close()
// Get file info
info, err := f.Stat()
if err != nil {
return err
}
// Create zip file header
header, err := zip.FileInfoHeader(info)
if err != nil {
return err
}
// Use the relative path in the archive
header.Name = filepath.ToSlash(file.Path)
// Set compression method
header.Method = zip.Deflate
// Create writer for this file
writer, err := zw.CreateHeader(header)
if err != nil {
return err
}
// Copy file contents
_, err = io.Copy(writer, f)
return err
}

118
collector/collector.go Normal file
View file

@ -0,0 +1,118 @@
package collector
import (
"fmt"
"io/fs"
"os"
"path/filepath"
"strings"
)
// FileEntry represents a file to be archived
type FileEntry struct {
Path string // Relative path from sourceDir
FullPath string // Absolute path for reading
}
// Collector handles file collection based on a matcher
type Collector struct {
matcher Matcher
}
// New creates a new collector with the specified matcher
func New(matcher Matcher) *Collector {
return &Collector{
matcher: matcher,
}
}
// Collect walks the source directory and collects matching files
func (c *Collector) Collect(sourceDir string) ([]FileEntry, error) {
// Clean and convert to absolute path
absSourceDir, err := filepath.Abs(sourceDir)
if err != nil {
return nil, fmt.Errorf("failed to get absolute path: %w", err)
}
// Check if source directory exists
info, err := os.Stat(absSourceDir)
if err != nil {
return nil, fmt.Errorf("source directory error: %w", err)
}
if !info.IsDir() {
return nil, fmt.Errorf("source path is not a directory: %s", sourceDir)
}
var files []FileEntry
err = filepath.WalkDir(absSourceDir, func(path string, d fs.DirEntry, err error) error {
if err != nil {
// Log permission errors but continue walking
if os.IsPermission(err) {
fmt.Fprintf(os.Stderr, "Warning: Permission denied: %s\n", path)
return nil
}
return err
}
// Get file info
info, err := d.Info()
if err != nil {
if os.IsPermission(err) {
fmt.Fprintf(os.Stderr, "Warning: Cannot stat file: %s\n", path)
return nil
}
return err
}
// Skip symlinks
if info.Mode()&os.ModeSymlink != 0 {
return nil
}
// Check if this file should be included
if c.matcher.ShouldInclude(path, info) {
// Calculate relative path from source directory
relPath, err := filepath.Rel(absSourceDir, path)
if err != nil {
return fmt.Errorf("failed to get relative path: %w", err)
}
// Clean the relative path to ensure consistent formatting
relPath = filepath.ToSlash(relPath)
files = append(files, FileEntry{
Path: relPath,
FullPath: path,
})
}
return nil
})
if err != nil {
return nil, fmt.Errorf("walk error: %w", err)
}
// Check if any files were found
if len(files) == 0 {
return nil, fmt.Errorf("no files found matching criteria")
}
return files, nil
}
// GetArchiveFormat determines the archive format from the filename
func GetArchiveFormat(filename string) string {
lower := strings.ToLower(filename)
if strings.HasSuffix(lower, ".tar.gz") || strings.HasSuffix(lower, ".tgz") {
return "tar.gz"
}
if strings.HasSuffix(lower, ".zip") {
return "zip"
}
return ""
}

119
collector/matcher.go Normal file
View file

@ -0,0 +1,119 @@
package collector
import (
"os"
"path/filepath"
"strings"
)
// Matcher determines if a file should be included in the collection
type Matcher interface {
ShouldInclude(path string, info os.FileInfo) bool
}
// NameMatcher matches files by exact name
type NameMatcher struct {
name string
}
// NewNameMatcher creates a matcher for exact filename matching
func NewNameMatcher(name string) *NameMatcher {
return &NameMatcher{name: name}
}
// ShouldInclude returns true if the file matches the exact name
func (m *NameMatcher) ShouldInclude(path string, info os.FileInfo) bool {
if info.IsDir() {
return false
}
return info.Name() == m.name
}
// PatternMatcher matches files within directories matching a glob pattern
type PatternMatcher struct {
pattern string
matchedDirs map[string]bool
patternSegments []string
}
// NewPatternMatcher creates a matcher for directory pattern matching
func NewPatternMatcher(pattern string) *PatternMatcher {
// Remove trailing slash if present
pattern = strings.TrimSuffix(pattern, "/")
return &PatternMatcher{
pattern: pattern,
matchedDirs: make(map[string]bool),
patternSegments: strings.Split(pattern, string(os.PathSeparator)),
}
}
// ShouldInclude returns true if the file is within a directory matching the pattern
func (m *PatternMatcher) ShouldInclude(path string, info os.FileInfo) bool {
// For directories, check if they match the pattern and cache the result
if info.IsDir() {
matched, err := m.dirMatchesPattern(path)
if err == nil && matched {
m.matchedDirs[path] = true
}
return false // Don't include the directory itself, only files within
}
// For files, check if any parent directory is in the matched set
dir := filepath.Dir(path)
for {
if m.matchedDirs[dir] {
return true
}
// Also check if this directory matches the pattern (in case we haven't seen it yet)
if matched, err := m.dirMatchesPattern(dir); err == nil && matched {
m.matchedDirs[dir] = true
return true
}
parent := filepath.Dir(dir)
if parent == dir || parent == "." {
break
}
dir = parent
}
return false
}
// dirMatchesPattern checks if a directory path matches the glob pattern
func (m *PatternMatcher) dirMatchesPattern(dirPath string) (bool, error) {
// Get the directory name
dirName := filepath.Base(dirPath)
// For simple patterns (no path separators), just match the directory name
if len(m.patternSegments) == 1 {
return filepath.Match(m.pattern, dirName)
}
// For complex patterns, we need to match the full path segments
pathSegments := strings.Split(dirPath, string(os.PathSeparator))
// Try to match the pattern segments against the path segments
if len(pathSegments) < len(m.patternSegments) {
return false, nil
}
// Check each pattern segment against the corresponding path segment
for i := 0; i < len(m.patternSegments); i++ {
// Start from the end of both slices
patternIdx := len(m.patternSegments) - 1 - i
pathIdx := len(pathSegments) - 1 - i
matched, err := filepath.Match(m.patternSegments[patternIdx], pathSegments[pathIdx])
if err != nil {
return false, err
}
if !matched {
return false, nil
}
}
return true, nil
}

3
go.mod Normal file
View file

@ -0,0 +1,3 @@
module github.com/atomaka/collect
go 1.21

103
main.go Normal file
View file

@ -0,0 +1,103 @@
package main
import (
"flag"
"fmt"
"os"
"path/filepath"
"github.com/atomaka/collect/archiver"
"github.com/atomaka/collect/collector"
)
const (
exitSuccess = 0
exitNoFiles = 1
exitArchiveError = 2
exitInvalidArgs = 3
)
func main() {
// Define flags
nameFlag := flag.String("name", "", "Match exact filename")
matchFlag := flag.String("match", "", "Match directory pattern")
// Custom usage message
flag.Usage = func() {
fmt.Fprintf(os.Stderr, "Usage: %s [--name <filename> | --match <pattern>] <source-dir> <output-archive>\n\n", os.Args[0])
fmt.Fprintf(os.Stderr, "Collects files recursively matching specific criteria and archives them.\n\n")
fmt.Fprintf(os.Stderr, "Options:\n")
flag.PrintDefaults()
fmt.Fprintf(os.Stderr, "\nExamples:\n")
fmt.Fprintf(os.Stderr, " %s --name .mise.toml ./ backup.tgz\n", os.Args[0])
fmt.Fprintf(os.Stderr, " %s --match 'aet-*/' ./ backup.zip\n", os.Args[0])
}
flag.Parse()
// Validate flags
if (*nameFlag == "" && *matchFlag == "") || (*nameFlag != "" && *matchFlag != "") {
fmt.Fprintf(os.Stderr, "Error: Exactly one of --name or --match must be specified\n\n")
flag.Usage()
os.Exit(exitInvalidArgs)
}
// Check positional arguments
args := flag.Args()
if len(args) != 2 {
fmt.Fprintf(os.Stderr, "Error: Expected 2 arguments (source directory and output archive), got %d\n\n", len(args))
flag.Usage()
os.Exit(exitInvalidArgs)
}
sourceDir := args[0]
outputPath := args[1]
// Determine archive format
format := collector.GetArchiveFormat(outputPath)
if format == "" {
fmt.Fprintf(os.Stderr, "Error: Unsupported archive format. Use .tar.gz, .tgz, or .zip\n")
os.Exit(exitInvalidArgs)
}
// Create matcher
var matcher collector.Matcher
if *nameFlag != "" {
matcher = collector.NewNameMatcher(*nameFlag)
} else {
matcher = collector.NewPatternMatcher(*matchFlag)
}
// Create collector and collect files
c := collector.New(matcher)
files, err := c.Collect(sourceDir)
if err != nil {
fmt.Fprintf(os.Stderr, "Error: %v\n", err)
if err.Error() == "no files found matching criteria" {
os.Exit(exitNoFiles)
}
os.Exit(exitArchiveError)
}
// Report number of files found
fmt.Printf("Found %d files to archive\n", len(files))
// Create appropriate archiver
var arch archiver.Archiver
switch format {
case "tar.gz":
arch = archiver.NewTarArchiver()
case "zip":
arch = archiver.NewZipArchiver()
}
// Create archive
if err := arch.Create(outputPath, files); err != nil {
fmt.Fprintf(os.Stderr, "Error creating archive: %v\n", err)
os.Exit(exitArchiveError)
}
// Get absolute path for cleaner output
absOutput, _ := filepath.Abs(outputPath)
fmt.Printf("Archive created successfully: %s\n", absOutput)
}

162
test.sh Executable file
View file

@ -0,0 +1,162 @@
#!/bin/bash
set -e
# Colors for output
RED='\033[0;31m'
GREEN='\033[0;32m'
YELLOW='\033[1;33m'
NC='\033[0m' # No Color
# Test counter
TESTS_PASSED=0
TESTS_FAILED=0
# Function to run a test
run_test() {
local test_name="$1"
local command="$2"
local expected_exit_code="${3:-0}"
echo -n "Testing $test_name... "
# Run command and capture exit code
set +e
eval "$command" > /tmp/test_output.txt 2>&1
local exit_code=$?
set -e
if [ $exit_code -eq $expected_exit_code ]; then
echo -e "${GREEN}PASSED${NC}"
((TESTS_PASSED++))
return 0
else
echo -e "${RED}FAILED${NC}"
echo " Expected exit code: $expected_exit_code, got: $exit_code"
echo " Output:"
cat /tmp/test_output.txt | sed 's/^/ /'
((TESTS_FAILED++))
return 1
fi
}
# Function to verify archive contents
verify_archive_contents() {
local archive="$1"
local expected_files="$2"
echo -n " Verifying contents of $archive... "
if [[ "$archive" == *.tgz || "$archive" == *.tar.gz ]]; then
actual_files=$(tar -tzf "$archive" | sort | tr '\n' ' ' | sed 's/ $//')
elif [[ "$archive" == *.zip ]]; then
actual_files=$(unzip -l "$archive" | grep -v "Archive:" | grep -v "Length" | grep -v -- "--------" | grep -v "files" | awk '{print $4}' | grep -v '^$' | sort | tr '\n' ' ' | sed 's/ $//')
else
echo -e "${RED}Unknown archive format${NC}"
return 1
fi
expected_sorted=$(echo "$expected_files" | tr ' ' '\n' | sort | tr '\n' ' ' | sed 's/ $//')
if [ "$actual_files" = "$expected_sorted" ]; then
echo -e "${GREEN}OK${NC}"
return 0
else
echo -e "${RED}FAILED${NC}"
echo " Expected: $expected_sorted"
echo " Got: $actual_files"
return 1
fi
}
echo -e "${YELLOW}=== Collect CLI Test Suite ===${NC}\n"
# Clean up any previous test artifacts
echo "Cleaning up previous test artifacts..."
rm -rf test test-*.tgz test-*.zip collect
# Build the tool
echo "Building collect tool..."
if ! go build -o collect; then
echo -e "${RED}Failed to build collect tool${NC}"
exit 1
fi
echo -e "${GREEN}Build successful${NC}\n"
# Create test directory structure
echo "Setting up test environment..."
mkdir -p test/subdir/aet-bin test/subdir/aet-config test/other test/deep/nested/aet-tools
echo "test config" > test/.mise.toml
echo "another config" > test/subdir/.mise.toml
echo "binary" > test/subdir/aet-bin/tool
echo "config" > test/subdir/aet-config/settings.conf
echo "other file" > test/other/file.txt
echo "deep tool" > test/deep/nested/aet-tools/deep.sh
echo "not in aet dir" > test/deep/nested/regular.txt
echo -e "${GREEN}Test environment ready${NC}\n"
# Test 1: Name matching with tar.gz
run_test "name matching (.mise.toml) with tar.gz" \
"./collect --name .mise.toml test/ test-name.tgz"
verify_archive_contents "test-name.tgz" ".mise.toml subdir/.mise.toml"
# Test 2: Pattern matching with zip
run_test "pattern matching (aet-*) with zip" \
"./collect --match 'aet-*' test/ test-pattern.zip"
verify_archive_contents "test-pattern.zip" "subdir/aet-bin/tool subdir/aet-config/settings.conf deep/nested/aet-tools/deep.sh"
# Test 3: Pattern matching with tgz
run_test "pattern matching (aet-*) with tgz" \
"./collect --match 'aet-*' test/ test-pattern.tgz"
verify_archive_contents "test-pattern.tgz" "deep/nested/aet-tools/deep.sh subdir/aet-bin/tool subdir/aet-config/settings.conf"
# Test 4: No files found (should exit with code 1)
run_test "no files found error" \
"./collect --name nonexistent.file test/ test-empty.zip" 1
# Test 5: Invalid arguments - no flags
run_test "invalid arguments - no flags" \
"./collect test/ output.zip" 3
# Test 6: Invalid arguments - both flags
run_test "invalid arguments - both flags" \
"./collect --name .mise.toml --match 'aet-*' test/ output.zip" 3
# Test 7: Invalid arguments - missing output file
run_test "invalid arguments - missing output file" \
"./collect --name .mise.toml test/" 3
# Test 8: Invalid archive format
run_test "invalid archive format" \
"./collect --name .mise.toml test/ output.txt" 3
# Test 9: Non-existent source directory
run_test "non-existent source directory" \
"./collect --name .mise.toml nonexistent/ output.zip" 2
# Test 10: Help flag
run_test "help flag output" \
"./collect --help" 0
# Test 11: Name matching with subdirectory pattern
echo "subdir config" > test/subdir/aet-bin/.mise.toml
run_test "name matching finds files in pattern dirs too" \
"./collect --name .mise.toml test/ test-name-all.tgz"
verify_archive_contents "test-name-all.tgz" ".mise.toml subdir/.mise.toml subdir/aet-bin/.mise.toml"
# Clean up
echo -e "\nCleaning up..."
rm -rf test test-*.tgz test-*.zip collect /tmp/test_output.txt
# Summary
echo -e "\n${YELLOW}=== Test Summary ===${NC}"
echo -e "Tests passed: ${GREEN}$TESTS_PASSED${NC}"
echo -e "Tests failed: ${RED}$TESTS_FAILED${NC}"
if [ $TESTS_FAILED -eq 0 ]; then
echo -e "\n${GREEN}All tests passed!${NC}"
exit 0
else
echo -e "\n${RED}Some tests failed!${NC}"
exit 1
fi