Skip to content

Commit 925a3cd

Browse files
jub0bsthepudds
authored andcommitted
unicode/utf8: make DecodeRune{,InString} inlineable
This change makes the fast path for ASCII characters inlineable in DecodeRune and DecodeRuneInString and removes most instances of manual inlining at call sites. Here are some benchmark results (no change to allocations): goos: darwin goarch: amd64 pkg: unicode/utf8 cpu: Intel(R) Core(TM) i7-6700HQ CPU @ 2.60GHz │ old │ new │ │ sec/op │ sec/op vs base │ DecodeASCIIRune-8 2.4545n ± 2% 0.6253n ± 2% -74.52% (p=0.000 n=20) DecodeJapaneseRune-8 3.988n ± 1% 4.023n ± 1% +0.86% (p=0.050 n=20) DecodeASCIIRuneInString-8 2.4675n ± 1% 0.6264n ± 2% -74.61% (p=0.000 n=20) DecodeJapaneseRuneInString-8 3.992n ± 1% 4.001n ± 1% ~ (p=0.625 n=20) geomean 3.134n 1.585n -49.43% Note: when #61502 gets resolved, DecodeRune and DecodeRuneInString should be reverted to their idiomatic implementations. Fixes #31666 Updates #48195 Change-Id: I4be25c4f52417dc28b3a7bd72f1b04018470f39d GitHub-Last-Rev: 2e352a0 GitHub-Pull-Request: #75181 Reviewed-on: https://go-review.googlesource.com/c/go/+/699675 Reviewed-by: Sean Liao <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]> Reviewed-by: Cherry Mui <[email protected]> Reviewed-by: Michael Pratt <[email protected]>
1 parent 3e596d4 commit 925a3cd

File tree

14 files changed

+74
-108
lines changed

14 files changed

+74
-108
lines changed

src/bufio/bufio.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -311,10 +311,7 @@ func (b *Reader) ReadRune() (r rune, size int, err error) {
311311
if b.r == b.w {
312312
return 0, 0, b.readErr()
313313
}
314-
r, size = rune(b.buf[b.r]), 1
315-
if r >= utf8.RuneSelf {
316-
r, size = utf8.DecodeRune(b.buf[b.r:b.w])
317-
}
314+
r, size = utf8.DecodeRune(b.buf[b.r:b.w])
318315
b.r += size
319316
b.lastByte = int(b.buf[b.r-1])
320317
b.lastRuneSize = size

src/bytes/bytes.go

Lines changed: 8 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -528,11 +528,7 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte {
528528
// more efficient, possibly due to cache effects.
529529
start := -1 // valid span start if >= 0
530530
for i := 0; i < len(s); {
531-
size := 1
532-
r := rune(s[i])
533-
if r >= utf8.RuneSelf {
534-
r, size = utf8.DecodeRune(s[i:])
535-
}
531+
r, size := utf8.DecodeRune(s[i:])
536532
if f(r) {
537533
if start >= 0 {
538534
spans = append(spans, span{start, i})
@@ -614,11 +610,7 @@ func Map(mapping func(r rune) rune, s []byte) []byte {
614610
// fine. It could also shrink but that falls out naturally.
615611
b := make([]byte, 0, len(s))
616612
for i := 0; i < len(s); {
617-
wid := 1
618-
r := rune(s[i])
619-
if r >= utf8.RuneSelf {
620-
r, wid = utf8.DecodeRune(s[i:])
621-
}
613+
r, wid := utf8.DecodeRune(s[i:])
622614
r = mapping(r)
623615
if r >= 0 {
624616
b = utf8.AppendRune(b, r)
@@ -917,11 +909,7 @@ func LastIndexFunc(s []byte, f func(r rune) bool) int {
917909
func indexFunc(s []byte, f func(r rune) bool, truth bool) int {
918910
start := 0
919911
for start < len(s) {
920-
wid := 1
921-
r := rune(s[start])
922-
if r >= utf8.RuneSelf {
923-
r, wid = utf8.DecodeRune(s[start:])
924-
}
912+
r, wid := utf8.DecodeRune(s[start:])
925913
if f(r) == truth {
926914
return start
927915
}
@@ -1052,10 +1040,7 @@ func trimLeftASCII(s []byte, as *asciiSet) []byte {
10521040

10531041
func trimLeftUnicode(s []byte, cutset string) []byte {
10541042
for len(s) > 0 {
1055-
r, n := rune(s[0]), 1
1056-
if r >= utf8.RuneSelf {
1057-
r, n = utf8.DecodeRune(s)
1058-
}
1043+
r, n := utf8.DecodeRune(s)
10591044
if !containsRune(cutset, r) {
10601045
break
10611046
}
@@ -1251,19 +1236,10 @@ hasUnicode:
12511236
t = t[i:]
12521237
for len(s) != 0 && len(t) != 0 {
12531238
// Extract first rune from each.
1254-
var sr, tr rune
1255-
if s[0] < utf8.RuneSelf {
1256-
sr, s = rune(s[0]), s[1:]
1257-
} else {
1258-
r, size := utf8.DecodeRune(s)
1259-
sr, s = r, s[size:]
1260-
}
1261-
if t[0] < utf8.RuneSelf {
1262-
tr, t = rune(t[0]), t[1:]
1263-
} else {
1264-
r, size := utf8.DecodeRune(t)
1265-
tr, t = r, t[size:]
1266-
}
1239+
sr, size := utf8.DecodeRune(s)
1240+
s = s[size:]
1241+
tr, size := utf8.DecodeRune(t)
1242+
t = t[size:]
12671243

12681244
// If they match, keep going; if not, return false.
12691245

src/bytes/iter.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,7 @@ func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] {
117117
return func(yield func([]byte) bool) {
118118
start := -1
119119
for i := 0; i < len(s); {
120-
size := 1
121-
r := rune(s[i])
122-
if r >= utf8.RuneSelf {
123-
r, size = utf8.DecodeRune(s[i:])
124-
}
120+
r, size := utf8.DecodeRune(s[i:])
125121
if f(r) {
126122
if start >= 0 {
127123
if !yield(s[start:i:i]) {

src/cmd/compile/internal/test/inl_test.go

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -125,6 +125,8 @@ func TestIntendedInlining(t *testing.T) {
125125
"assemble64",
126126
},
127127
"unicode/utf8": {
128+
"DecodeRune",
129+
"DecodeRuneInString",
128130
"FullRune",
129131
"FullRuneInString",
130132
"RuneLen",

src/encoding/json/decode.go

Lines changed: 0 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1214,10 +1214,6 @@ func unquoteBytes(s []byte) (t []byte, ok bool) {
12141214
if c == '\\' || c == '"' || c < ' ' {
12151215
break
12161216
}
1217-
if c < utf8.RuneSelf {
1218-
r++
1219-
continue
1220-
}
12211217
rr, size := utf8.DecodeRune(s[r:])
12221218
if rr == utf8.RuneError && size == 1 {
12231219
break

src/fmt/format.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -346,10 +346,7 @@ func (f *fmt) truncate(b []byte) []byte {
346346
if n < 0 {
347347
return b[:i]
348348
}
349-
wid := 1
350-
if b[i] >= utf8.RuneSelf {
351-
_, wid = utf8.DecodeRune(b[i:])
352-
}
349+
_, wid := utf8.DecodeRune(b[i:])
353350
i += wid
354351
}
355352
}

src/fmt/print.go

Lines changed: 1 addition & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1145,10 +1145,7 @@ formatLoop:
11451145
break
11461146
}
11471147

1148-
verb, size := rune(format[i]), 1
1149-
if verb >= utf8.RuneSelf {
1150-
verb, size = utf8.DecodeRuneInString(format[i:])
1151-
}
1148+
verb, size := utf8.DecodeRuneInString(format[i:])
11521149
i += size
11531150

11541151
switch {

src/regexp/regexp.go

Lines changed: 4 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -384,10 +384,6 @@ type inputString struct {
384384

385385
func (i *inputString) step(pos int) (rune, int) {
386386
if pos < len(i.str) {
387-
c := i.str[pos]
388-
if c < utf8.RuneSelf {
389-
return rune(c), 1
390-
}
391387
return utf8.DecodeRuneInString(i.str[pos:])
392388
}
393389
return endOfText, 0
@@ -409,17 +405,11 @@ func (i *inputString) context(pos int) lazyFlag {
409405
r1, r2 := endOfText, endOfText
410406
// 0 < pos && pos <= len(i.str)
411407
if uint(pos-1) < uint(len(i.str)) {
412-
r1 = rune(i.str[pos-1])
413-
if r1 >= utf8.RuneSelf {
414-
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
415-
}
408+
r1, _ = utf8.DecodeLastRuneInString(i.str[:pos])
416409
}
417410
// 0 <= pos && pos < len(i.str)
418411
if uint(pos) < uint(len(i.str)) {
419-
r2 = rune(i.str[pos])
420-
if r2 >= utf8.RuneSelf {
421-
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
422-
}
412+
r2, _ = utf8.DecodeRuneInString(i.str[pos:])
423413
}
424414
return newLazyFlag(r1, r2)
425415
}
@@ -431,10 +421,6 @@ type inputBytes struct {
431421

432422
func (i *inputBytes) step(pos int) (rune, int) {
433423
if pos < len(i.str) {
434-
c := i.str[pos]
435-
if c < utf8.RuneSelf {
436-
return rune(c), 1
437-
}
438424
return utf8.DecodeRune(i.str[pos:])
439425
}
440426
return endOfText, 0
@@ -456,17 +442,11 @@ func (i *inputBytes) context(pos int) lazyFlag {
456442
r1, r2 := endOfText, endOfText
457443
// 0 < pos && pos <= len(i.str)
458444
if uint(pos-1) < uint(len(i.str)) {
459-
r1 = rune(i.str[pos-1])
460-
if r1 >= utf8.RuneSelf {
461-
r1, _ = utf8.DecodeLastRune(i.str[:pos])
462-
}
445+
r1, _ = utf8.DecodeLastRune(i.str[:pos])
463446
}
464447
// 0 <= pos && pos < len(i.str)
465448
if uint(pos) < uint(len(i.str)) {
466-
r2 = rune(i.str[pos])
467-
if r2 >= utf8.RuneSelf {
468-
r2, _ = utf8.DecodeRune(i.str[pos:])
469-
}
449+
r2, _ = utf8.DecodeRune(i.str[pos:])
470450
}
471451
return newLazyFlag(r1, r2)
472452
}

src/strconv/quote.go

Lines changed: 2 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,8 @@ func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly b
3737
buf = nBuf
3838
}
3939
buf = append(buf, quote)
40-
for width := 0; len(s) > 0; s = s[width:] {
41-
r := rune(s[0])
42-
width = 1
43-
if r >= utf8.RuneSelf {
44-
r, width = utf8.DecodeRuneInString(s)
45-
}
40+
for r, width := rune(0), 0; len(s) > 0; s = s[width:] {
41+
r, width = utf8.DecodeRuneInString(s)
4642
if width == 1 && r == utf8.RuneError {
4743
buf = append(buf, `\x`...)
4844
buf = append(buf, lowerhex[s[0]>>4])

src/strings/iter.go

Lines changed: 1 addition & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -117,11 +117,7 @@ func FieldsFuncSeq(s string, f func(rune) bool) iter.Seq[string] {
117117
return func(yield func(string) bool) {
118118
start := -1
119119
for i := 0; i < len(s); {
120-
size := 1
121-
r := rune(s[i])
122-
if r >= utf8.RuneSelf {
123-
r, size = utf8.DecodeRuneInString(s[i:])
124-
}
120+
r, size := utf8.DecodeRuneInString(s[i:])
125121
if f(r) {
126122
if start >= 0 {
127123
if !yield(s[start:i]) {

0 commit comments

Comments
 (0)