Files
docker-cli/vendor/github.com/klauspost/compress/compressible.go
Sebastiaan van Stijn dc9e069ff2 vendor: github.com/docker/docker v20.10.3-0.20220309172631-83b51522df43
Changed `matcher.Matches(file)` to `matcher.MatchesOrParentMatches(file)`:

    cli/command/image/build/context.go:95:9: SA1019: matcher.Matches is deprecated: This implementation is buggy (it only checks a single parent dir against the pattern) and will be removed soon. Use either MatchesOrParentMatches or MatchesUsingParentResults instead.  (staticcheck)
        return matcher.Matches(file)
               ^

And updated a test to match the JSON omitting empty RootFS.Type fields (in
practice, this field should never be empty in real situations, and always
be "layer"). Changed the test to use subtests to easier find which case
is failing.

full diff: 343665850e...83b51522df

Signed-off-by: Sebastiaan van Stijn <github@gone.nl>
2022-03-09 18:45:37 +01:00

86 lines
1.8 KiB
Go

package compress
import "math"
// Estimate returns a normalized compressibility estimate of block b.
// Values close to zero are likely uncompressible.
// Values above 0.1 are likely to be compressible.
// Values above 0.5 are very compressible.
// Very small lengths will return 0.
func Estimate(b []byte) float64 {
if len(b) < 16 {
return 0
}
// Correctly predicted order 1
hits := 0
lastMatch := false
var o1 [256]byte
var hist [256]int
c1 := byte(0)
for _, c := range b {
if c == o1[c1] {
// We only count a hit if there was two correct predictions in a row.
if lastMatch {
hits++
}
lastMatch = true
} else {
lastMatch = false
}
o1[c1] = c
c1 = c
hist[c]++
}
// Use x^0.6 to give better spread
prediction := math.Pow(float64(hits)/float64(len(b)), 0.6)
// Calculate histogram distribution
variance := float64(0)
avg := float64(len(b)) / 256
for _, v := range hist {
Δ := float64(v) - avg
variance += Δ * Δ
}
stddev := math.Sqrt(float64(variance)) / float64(len(b))
exp := math.Sqrt(1 / float64(len(b)))
// Subtract expected stddev
stddev -= exp
if stddev < 0 {
stddev = 0
}
stddev *= 1 + exp
// Use x^0.4 to give better spread
entropy := math.Pow(stddev, 0.4)
// 50/50 weight between prediction and histogram distribution
return math.Pow((prediction+entropy)/2, 0.9)
}
// ShannonEntropyBits returns the number of bits minimum required to represent
// an entropy encoding of the input bytes.
// https://en.wiktionary.org/wiki/Shannon_entropy
func ShannonEntropyBits(b []byte) int {
if len(b) == 0 {
return 0
}
var hist [256]int
for _, c := range b {
hist[c]++
}
shannon := float64(0)
invTotal := 1.0 / float64(len(b))
for _, v := range hist[:] {
if v > 0 {
n := float64(v)
shannon += math.Ceil(-math.Log2(n*invTotal) * n)
}
}
return int(math.Ceil(shannon))
}