VAULT-20405 chunk decompression to prevent loading full decompressed data into memory at once (#26464)

* VAULT-20405 chunk decompression to prevent loading full decompressed data into memory at once

* Add changelog
This commit is contained in:
Violet Hynes 2024-04-18 10:13:56 -04:00 committed by GitHub
parent ade585a1e8
commit 85ed817034
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 57 additions and 8 deletions

3
changelog/26464.txt Normal file
View file

@ -0,0 +1,3 @@
```release-note:improvement
sdk/decompression: DecompressWithCanary will now chunk the decompression in memory to prevent loading it all at once.
```

View file

@ -11,7 +11,6 @@ import (
"io"
"github.com/golang/snappy"
"github.com/hashicorp/errwrap"
"github.com/pierrec/lz4"
)
@ -34,7 +33,7 @@ const (
CompressionCanaryLZ4 byte = '4'
)
// SnappyReadCloser embeds the snappy reader which implements the io.Reader
// CompressUtilReadCloser embeds the snappy reader which implements the io.Reader
// interface. The decompress procedure in this utility expects an
// io.ReadCloser. This type implements the io.Closer interface to retain the
// generic way of decompression.
@ -98,7 +97,7 @@ func Compress(data []byte, config *CompressionConfig) ([]byte, error) {
// These are valid compression levels
default:
// If compression level is set to NoCompression or to
// any invalid value, fallback to Defaultcompression
// any invalid value, fallback to DefaultCompression
config.GzipCompressionLevel = gzip.DefaultCompression
}
writer, err = gzip.NewWriterLevel(&buf, config.GzipCompressionLevel)
@ -116,7 +115,7 @@ func Compress(data []byte, config *CompressionConfig) ([]byte, error) {
}
if err != nil {
return nil, errwrap.Wrapf("failed to create a compression writer: {{err}}", err)
return nil, fmt.Errorf("failed to create a compression writer: %w", err)
}
if writer == nil {
@ -126,7 +125,7 @@ func Compress(data []byte, config *CompressionConfig) ([]byte, error) {
// Compress the input and place it in the same buffer containing the
// canary byte.
if _, err = writer.Write(data); err != nil {
return nil, errwrap.Wrapf("failed to compress input data: err: {{err}}", err)
return nil, fmt.Errorf("failed to compress input data: err: %w", err)
}
// Close the io.WriteCloser
@ -206,7 +205,7 @@ func DecompressWithCanary(data []byte) ([]byte, string, bool, error) {
return nil, "", true, nil
}
if err != nil {
return nil, "", false, errwrap.Wrapf("failed to create a compression reader: {{err}}", err)
return nil, "", false, fmt.Errorf("failed to create a compression reader: %w", err)
}
if reader == nil {
return nil, "", false, fmt.Errorf("failed to create a compression reader")
@ -217,8 +216,18 @@ func DecompressWithCanary(data []byte) ([]byte, string, bool, error) {
// Read all the compressed data into a buffer
var buf bytes.Buffer
if _, err = io.Copy(&buf, reader); err != nil {
return nil, "", false, err
// Read the compressed data into a buffer, but do so
// slowly to prevent reading all the data into memory
// at once (protecting against e.g. zip bombs).
for {
_, err := io.CopyN(&buf, reader, 1024)
if err != nil {
if err == io.EOF {
break
}
return nil, "", false, err
}
}
return buf.Bytes(), compressionType, false, nil

View file

@ -116,3 +116,40 @@ func TestCompressUtil_InvalidConfigurations(t *testing.T) {
t.Fatal("expected an error")
}
}
// TestDecompressWithCanaryLargeInput tests that DecompressWithCanary works
// as expected even with large values.
func TestDecompressWithCanaryLargeInput(t *testing.T) {
t.Parallel()
inputJSON := `{"sample":"data`
for i := 0; i < 100000; i++ {
inputJSON += " and data"
}
inputJSON += `"}`
inputJSONBytes := []byte(inputJSON)
compressedJSONBytes, err := Compress(inputJSONBytes, &CompressionConfig{Type: CompressionTypeGzip, GzipCompressionLevel: gzip.BestCompression})
if err != nil {
t.Fatal(err)
}
decompressedJSONBytes, wasNotCompressed, err := Decompress(compressedJSONBytes)
if err != nil {
t.Fatal(err)
}
// Check if the input for decompress was not compressed in the first place
if wasNotCompressed {
t.Fatalf("bytes were not compressed as expected")
}
if len(decompressedJSONBytes) == 0 {
t.Fatalf("bytes were not compressed as expected")
}
// Compare the value after decompression
if !bytes.Equal(inputJSONBytes, decompressedJSONBytes) {
t.Fatalf("decompressed value differs: decompressed value;\nexpected: %q\nactual: %q", string(inputJSONBytes), string(decompressedJSONBytes))
}
}