package compression import ( "bytes" "crypto/sha256" "encoding/binary" "io" "sync" "github.com/klauspost/compress/zstd" ) var ( encOnce sync.Once decOnce sync.Once encoder *zstd.Encoder decoder *zstd.Decoder ) func getEncoder() *zstd.Encoder { encOnce.Do(func() { opts := []zstd.EOption{ zstd.WithEncoderLevel(zstd.SpeedBestCompression), zstd.WithWindowSize(1 << 24), } enc, _ := zstd.NewWriter(nil, opts...) encoder = enc }) return encoder } func getDecoder() *zstd.Decoder { decOnce.Do(func() { dec, _ := zstd.NewReader(nil) decoder = dec }) return decoder } type Chunk struct { Hash [32]byte Data []byte Offset int64 Size int } type DedupStore struct { mu sync.RWMutex chunks map[[32]byte][]byte } func NewDedupStore() *DedupStore { return &DedupStore{chunks: make(map[[32]byte][]byte)} } func chunkData(data []byte, chunkSize int) []Chunk { var chunks []Chunk for offset := 0; offset < len(data); offset += chunkSize { end := offset + chunkSize if end > len(data) { end = len(data) } chunk := data[offset:end] hash := sha256.Sum256(chunk) chunks = append(chunks, Chunk{ Hash: hash, Data: chunk, Offset: int64(offset), Size: len(chunk), }) } return chunks } func (ds *DedupStore) Deduplicate(data []byte, chunkSize int) ([]Chunk, int, error) { chunks := chunkData(data, chunkSize) uniqueSize := 0 ds.mu.Lock() defer ds.mu.Unlock() for i, c := range chunks { if existing, ok := ds.chunks[c.Hash]; ok { chunks[i].Data = existing chunks[i].Size = len(existing) } else { ds.chunks[c.Hash] = c.Data uniqueSize += len(c.Data) } } return chunks, uniqueSize, nil } func Compress(data []byte) ([]byte, error) { if len(data) == 0 { return nil, nil } var buf bytes.Buffer enc := getEncoder() enc.Reset(&buf) if _, err := enc.Write(data); err != nil { return nil, err } if err := enc.Close(); err != nil { return nil, err } return buf.Bytes(), nil } func Decompress(data []byte) ([]byte, error) { if len(data) == 0 { return nil, nil } dec := getDecoder() return dec.DecodeAll(data, nil) } type Compressor struct { ChunkSize int DedupStore *DedupStore } func New(chunkSize int) *Compressor { if chunkSize <= 0 { chunkSize = 65536 } return &Compressor{ ChunkSize: chunkSize, DedupStore: NewDedupStore(), } } type CompressResult struct { Data []byte OriginalSize int64 CompressedSize int64 Chunks int UniqueChunks int } func (c *Compressor) CompressFile(data []byte) (*CompressResult, error) { originalSize := int64(len(data)) chunks, uniqueSize, err := c.DedupStore.Deduplicate(data, c.ChunkSize) if err != nil { return nil, err } var buf bytes.Buffer buf.Write(binary.AppendVarint(nil, int64(len(chunks)))) for _, chunk := range chunks { buf.Write(chunk.Hash[:]) buf.Write(binary.AppendVarint(nil, int64(chunk.Offset))) buf.Write(binary.AppendVarint(nil, int64(chunk.Size))) } compressed, err := Compress(data) if err != nil { return nil, err } return &CompressResult{ Data: compressed, OriginalSize: originalSize, CompressedSize: int64(len(compressed)), Chunks: len(chunks), UniqueChunks: uniqueSize / c.ChunkSize, }, nil } func (c *Compressor) DecompressFile(compressed []byte) ([]byte, error) { return Decompress(compressed) } func CompressStream(r io.Reader, w io.Writer) error { enc := getEncoder() defer enc.Close() enc.Reset(w) if _, err := io.Copy(enc, r); err != nil { return err } return enc.Close() } func DecompressStream(r io.Reader, w io.Writer) error { dec := getDecoder() defer dec.Close() dec.Reset(r) if _, err := io.Copy(w, dec); err != nil { return err } return nil }