kat/internal/utils/tar.go
2025-05-10 18:18:58 -04:00

88 lines
2.9 KiB
Go

package utils
import (
"archive/tar"
"compress/gzip"
"fmt"
"io"
"path/filepath"
"strings"
)
const maxQuadletFileSize = 1 * 1024 * 1024 // 1MB limit per file in tarball
const maxTotalQuadletSize = 5 * 1024 * 1024 // 5MB limit for total uncompressed size
const maxQuadletFiles = 20 // Max number of files in a quadlet bundle
// UntarQuadlets unpacks a tar.gz stream in memory and returns a map of fileName -> fileContent.
// It performs basic validation on file names and sizes.
func UntarQuadlets(reader io.Reader) (map[string][]byte, error) {
gzr, err := gzip.NewReader(reader)
if err != nil {
return nil, fmt.Errorf("failed to create gzip reader: %w", err)
}
defer gzr.Close()
tr := tar.NewReader(gzr)
files := make(map[string][]byte)
var totalSize int64
fileCount := 0
for {
header, err := tr.Next()
if err == io.EOF {
break // End of archive
}
if err != nil {
return nil, fmt.Errorf("failed to read tar header: %w", err)
}
// Basic security checks
if strings.Contains(header.Name, "..") {
return nil, fmt.Errorf("invalid file path in tar: %s (contains '..')", header.Name)
}
// Ensure files are *.kat and are not in subdirectories within the tarball
// The Quadlet concept implies a flat directory of *.kat files.
if filepath.Dir(header.Name) != "." && filepath.Dir(header.Name) != "" {
return nil, fmt.Errorf("invalid file path in tar: %s (subdirectories are not allowed for Quadlet files)", header.Name)
}
if !strings.HasSuffix(strings.ToLower(header.Name), ".kat") {
return nil, fmt.Errorf("invalid file type in tar: %s (only .kat files are allowed)", header.Name)
}
switch header.Typeflag {
case tar.TypeReg: // Regular file
fileCount++
if fileCount > maxQuadletFiles {
return nil, fmt.Errorf("too many files in quadlet bundle; limit %d", maxQuadletFiles)
}
if header.Size > maxQuadletFileSize {
return nil, fmt.Errorf("file %s in tar is too large: %d bytes (max %d)", header.Name, header.Size, maxQuadletFileSize)
}
totalSize += header.Size
if totalSize > maxTotalQuadletSize {
return nil, fmt.Errorf("total size of files in tar is too large (max %d MB)", maxTotalQuadletSize/(1024*1024))
}
content, err := io.ReadAll(tr)
if err != nil {
return nil, fmt.Errorf("failed to read file content for %s from tar: %w", header.Name, err)
}
if int64(len(content)) != header.Size {
return nil, fmt.Errorf("file %s in tar has inconsistent size: header %d, read %d", header.Name, header.Size, len(content))
}
files[header.Name] = content
case tar.TypeDir: // Directory
// Directories are ignored; we expect a flat structure of .kat files.
continue
default:
// Symlinks, char devices, etc. are not allowed.
return nil, fmt.Errorf("unsupported file type in tar for %s: typeflag %c", header.Name, header.Typeflag)
}
}
if len(files) == 0 {
return nil, fmt.Errorf("no .kat files found in the provided archive")
}
return files, nil
}