Skip to content

Commit 998dce2

Browse files
committed
Add initial support for custom seeds
This adds support for custom seeds when using a Digest (but not to the one-shot functions Sum64 and Sum64String). The seed is not stored in the digest itself -- every Reset uses a zero seed and (Un)MarshalBinary is unchanged. This is simpler for backward compatibility but may be something to reconsider if we rework the API for a v3.
1 parent 21fc82b commit 998dce2

File tree

6 files changed

+76
-29
lines changed

6 files changed

+76
-29
lines changed

xxhash.go

Lines changed: 22 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -19,10 +19,13 @@ const (
1919
// Store the primes in an array as well.
2020
//
2121
// The consts are used when possible in Go code to avoid MOVs but we need a
22-
// contiguous array of the assembly code.
22+
// contiguous array for the assembly code.
2323
var primes = [...]uint64{prime1, prime2, prime3, prime4, prime5}
2424

2525
// Digest implements hash.Hash64.
26+
//
27+
// Note that a zero-valued Digest is not ready to receive writes.
28+
// Call Reset or create a Digest using New before calling other methods.
2629
type Digest struct {
2730
v1 uint64
2831
v2 uint64
@@ -33,19 +36,31 @@ type Digest struct {
3336
n int // how much of mem is used
3437
}
3538

36-
// New creates a new Digest that computes the 64-bit xxHash algorithm.
39+
// New creates a new Digest with a zero seed.
3740
func New() *Digest {
41+
return NewWithSeed(0)
42+
}
43+
44+
// NewWithSeed creates a new Digest with the given seed.
45+
func NewWithSeed(seed uint64) *Digest {
3846
var d Digest
39-
d.Reset()
47+
d.ResetWithSeed(seed)
4048
return &d
4149
}
4250

4351
// Reset clears the Digest's state so that it can be reused.
52+
// It uses a seed value of zero.
4453
func (d *Digest) Reset() {
45-
d.v1 = primes[0] + prime2
46-
d.v2 = prime2
47-
d.v3 = 0
48-
d.v4 = -primes[0]
54+
d.ResetWithSeed(0)
55+
}
56+
57+
// ResetWithSeed clears the Digest's state so that it can be reused.
58+
// It uses the given seed to initialize the state.
59+
func (d *Digest) ResetWithSeed(seed uint64) {
60+
d.v1 = seed + prime1 + prime2
61+
d.v2 = seed + prime2
62+
d.v3 = seed
63+
d.v4 = seed - prime1
4964
d.total = 0
5065
d.n = 0
5166
}

xxhash_asm.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66

77
package xxhash
88

9-
// Sum64 computes the 64-bit xxHash digest of b.
9+
// Sum64 computes the 64-bit xxHash digest of b with a zero seed.
1010
//
1111
//go:noescape
1212
func Sum64(b []byte) uint64

xxhash_other.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
package xxhash
55

6-
// Sum64 computes the 64-bit xxHash digest of b.
6+
// Sum64 computes the 64-bit xxHash digest of b with a zero seed.
77
func Sum64(b []byte) uint64 {
88
// A simpler version would be
99
// d := New()

xxhash_safe.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55

66
package xxhash
77

8-
// Sum64String computes the 64-bit xxHash digest of s.
8+
// Sum64String computes the 64-bit xxHash digest of s with a zero seed.
99
func Sum64String(s string) uint64 {
1010
return Sum64([]byte(s))
1111
}

xxhash_test.go

Lines changed: 50 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -4,45 +4,60 @@ import (
44
"bytes"
55
"encoding/binary"
66
"fmt"
7+
"math"
78
"strings"
89
"testing"
910
)
1011

1112
func TestAll(t *testing.T) {
13+
// Exactly 63 characters, which exercises all code paths.
14+
const s63 = "Call me Ishmael. Some years ago--never mind how long precisely-"
1215
for _, tt := range []struct {
13-
name string
1416
input string
17+
seed uint64
1518
want uint64
1619
}{
17-
{"empty", "", 0xef46db3751d8e999},
18-
{"a", "a", 0xd24ec4f1a98c6e5b},
19-
{"as", "as", 0x1c330fb2d66be179},
20-
{"asd", "asd", 0x631c37ce72a97393},
21-
{"asdf", "asdf", 0x415872f599cea71e},
22-
{
23-
"len=63",
24-
// Exactly 63 characters, which exercises all code paths.
25-
"Call me Ishmael. Some years ago--never mind how long precisely-",
26-
0x02a2e85470d6fd96,
27-
},
20+
{"", 0, 0xef46db3751d8e999},
21+
{"a", 0, 0xd24ec4f1a98c6e5b},
22+
{"as", 0, 0x1c330fb2d66be179},
23+
{"asd", 0, 0x631c37ce72a97393},
24+
{"asdf", 0, 0x415872f599cea71e},
25+
{s63, 0, 0x02a2e85470d6fd96},
26+
27+
{"", 123, 0xe0db84de91f3e198},
28+
{"asdf", math.MaxUint64, 0x9a2fd8473be539b6},
29+
{s63, 54321, 0x1736d186daf5d1cd},
2830
} {
2931
lastChunkSize := len(tt.input)
3032
if lastChunkSize == 0 {
3133
lastChunkSize = 1
3234
}
35+
var name string
36+
if tt.input == "" {
37+
name = "input=empty"
38+
} else if len(tt.input) > 10 {
39+
name = fmt.Sprintf("input=len-%d", len(tt.input))
40+
} else {
41+
name = fmt.Sprintf("input=%q", tt.input)
42+
}
43+
if tt.seed != 0 {
44+
name += fmt.Sprintf(",seed=%d", tt.seed)
45+
}
3346
for chunkSize := 1; chunkSize <= lastChunkSize; chunkSize++ {
34-
name := fmt.Sprintf("%s,chunkSize=%d", tt.name, chunkSize)
47+
name := fmt.Sprintf("%s,chunkSize=%d", name, chunkSize)
3548
t.Run(name, func(t *testing.T) {
36-
testDigest(t, tt.input, chunkSize, tt.want)
49+
testDigest(t, tt.input, tt.seed, chunkSize, tt.want)
3750
})
3851
}
39-
t.Run(tt.name, func(t *testing.T) { testSum(t, tt.input, tt.want) })
52+
if tt.seed == 0 {
53+
t.Run(name, func(t *testing.T) { testSum(t, tt.input, tt.want) })
54+
}
4055
}
4156
}
4257

43-
func testDigest(t *testing.T, input string, chunkSize int, want uint64) {
44-
d := New()
45-
ds := New() // uses WriteString
58+
func testDigest(t *testing.T, input string, seed uint64, chunkSize int, want uint64) {
59+
d := NewWithSeed(seed)
60+
ds := NewWithSeed(seed) // uses WriteString
4661
for i := 0; i < len(input); i += chunkSize {
4762
chunk := input[i:]
4863
if len(chunk) > chunkSize {
@@ -96,6 +111,23 @@ func TestReset(t *testing.T) {
96111
}
97112
}
98113

114+
func TestResetWithSeed(t *testing.T) {
115+
parts := []string{"The quic", "k br", "o", "wn fox jumps", " ov", "er the lazy ", "dog."}
116+
d := NewWithSeed(123)
117+
for _, part := range parts {
118+
d.Write([]byte(part))
119+
}
120+
h0 := d.Sum64()
121+
122+
d.ResetWithSeed(123)
123+
d.Write([]byte(strings.Join(parts, "")))
124+
h1 := d.Sum64()
125+
126+
if h0 != h1 {
127+
t.Errorf("0x%x != 0x%x", h0, h1)
128+
}
129+
}
130+
99131
func TestBinaryMarshaling(t *testing.T) {
100132
d := New()
101133
d.WriteString("abc")

xxhash_unsafe.go

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,7 +33,7 @@ import (
3333
//
3434
// See https://github.com/golang/go/issues/42739 for discussion.
3535

36-
// Sum64String computes the 64-bit xxHash digest of s.
36+
// Sum64String computes the 64-bit xxHash digest of s with a zero seed.
3737
// It may be faster than Sum64([]byte(s)) by avoiding a copy.
3838
func Sum64String(s string) uint64 {
3939
b := *(*[]byte)(unsafe.Pointer(&sliceHeader{s, len(s)}))

0 commit comments

Comments
 (0)