diff --git a/api/next/61642.txt b/api/next/61642.txt new file mode 100644 index 00000000000000..dd67874ab9fba4 --- /dev/null +++ b/api/next/61642.txt @@ -0,0 +1 @@ +pkg net/netip, method (Prefix) Compare(Prefix) int #61642 diff --git a/doc/next/6-stdlib/99-minor/net/netip/61642.md b/doc/next/6-stdlib/99-minor/net/netip/61642.md new file mode 100644 index 00000000000000..3d79f2e76aee04 --- /dev/null +++ b/doc/next/6-stdlib/99-minor/net/netip/61642.md @@ -0,0 +1 @@ +The new [Prefix.Compare] method compares two prefixes. diff --git a/src/bufio/bufio.go b/src/bufio/bufio.go index 5244ce2e0ca943..141a9a1a2a2305 100644 --- a/src/bufio/bufio.go +++ b/src/bufio/bufio.go @@ -311,10 +311,7 @@ func (b *Reader) ReadRune() (r rune, size int, err error) { if b.r == b.w { return 0, 0, b.readErr() } - r, size = rune(b.buf[b.r]), 1 - if r >= utf8.RuneSelf { - r, size = utf8.DecodeRune(b.buf[b.r:b.w]) - } + r, size = utf8.DecodeRune(b.buf[b.r:b.w]) b.r += size b.lastByte = int(b.buf[b.r-1]) b.lastRuneSize = size diff --git a/src/bytes/bytes.go b/src/bytes/bytes.go index ce2e0049102234..9a7f4ee3c93afb 100644 --- a/src/bytes/bytes.go +++ b/src/bytes/bytes.go @@ -528,11 +528,7 @@ func FieldsFunc(s []byte, f func(rune) bool) [][]byte { // more efficient, possibly due to cache effects. start := -1 // valid span start if >= 0 for i := 0; i < len(s); { - size := 1 - r := rune(s[i]) - if r >= utf8.RuneSelf { - r, size = utf8.DecodeRune(s[i:]) - } + r, size := utf8.DecodeRune(s[i:]) if f(r) { if start >= 0 { spans = append(spans, span{start, i}) @@ -614,11 +610,7 @@ func Map(mapping func(r rune) rune, s []byte) []byte { // fine. It could also shrink but that falls out naturally. b := make([]byte, 0, len(s)) for i := 0; i < len(s); { - wid := 1 - r := rune(s[i]) - if r >= utf8.RuneSelf { - r, wid = utf8.DecodeRune(s[i:]) - } + r, wid := utf8.DecodeRune(s[i:]) r = mapping(r) if r >= 0 { b = utf8.AppendRune(b, r) @@ -917,11 +909,7 @@ func LastIndexFunc(s []byte, f func(r rune) bool) int { func indexFunc(s []byte, f func(r rune) bool, truth bool) int { start := 0 for start < len(s) { - wid := 1 - r := rune(s[start]) - if r >= utf8.RuneSelf { - r, wid = utf8.DecodeRune(s[start:]) - } + r, wid := utf8.DecodeRune(s[start:]) if f(r) == truth { return start } @@ -1052,10 +1040,7 @@ func trimLeftASCII(s []byte, as *asciiSet) []byte { func trimLeftUnicode(s []byte, cutset string) []byte { for len(s) > 0 { - r, n := rune(s[0]), 1 - if r >= utf8.RuneSelf { - r, n = utf8.DecodeRune(s) - } + r, n := utf8.DecodeRune(s) if !containsRune(cutset, r) { break } @@ -1251,19 +1236,10 @@ hasUnicode: t = t[i:] for len(s) != 0 && len(t) != 0 { // Extract first rune from each. - var sr, tr rune - if s[0] < utf8.RuneSelf { - sr, s = rune(s[0]), s[1:] - } else { - r, size := utf8.DecodeRune(s) - sr, s = r, s[size:] - } - if t[0] < utf8.RuneSelf { - tr, t = rune(t[0]), t[1:] - } else { - r, size := utf8.DecodeRune(t) - tr, t = r, t[size:] - } + sr, size := utf8.DecodeRune(s) + s = s[size:] + tr, size := utf8.DecodeRune(t) + t = t[size:] // If they match, keep going; if not, return false. diff --git a/src/bytes/iter.go b/src/bytes/iter.go index b2abb2c9ba3dc6..a4ece881d20fa1 100644 --- a/src/bytes/iter.go +++ b/src/bytes/iter.go @@ -117,11 +117,7 @@ func FieldsFuncSeq(s []byte, f func(rune) bool) iter.Seq[[]byte] { return func(yield func([]byte) bool) { start := -1 for i := 0; i < len(s); { - size := 1 - r := rune(s[i]) - if r >= utf8.RuneSelf { - r, size = utf8.DecodeRune(s[i:]) - } + r, size := utf8.DecodeRune(s[i:]) if f(r) { if start >= 0 { if !yield(s[start:i:i]) { diff --git a/src/cmd/asm/internal/asm/testdata/loong64enc1.s b/src/cmd/asm/internal/asm/testdata/loong64enc1.s index 72e65734666c2a..63676cc785967c 100644 --- a/src/cmd/asm/internal/asm/testdata/loong64enc1.s +++ b/src/cmd/asm/internal/asm/testdata/loong64enc1.s @@ -260,6 +260,28 @@ lable2: MOVV FCC0, R4 // 04dc1401 MOVV R4, FCC0 // 80d81401 + // LDPTR.{W/D} and STPTR.{W/D} instructions + MOVWP R5, -32768(R4) // 85008025 + MOVWP R5, 32764(R4) // 85fc7f25 + MOVWP R5, 32(R4) // 85200025 + MOVWP R5, 4(R4) // 85040025 + MOVWP R5, (R4) // 85000025 + MOVVP R5, -32768(R4) // 85008027 + MOVVP R5, 32764(R4) // 85fc7f27 + MOVVP R5, 32(R4) // 85200027 + MOVVP R5, 4(R4) // 85040027 + MOVVP R5, (R4) // 85000027 + MOVWP -32768(R5), R4 // a4008024 + MOVWP 32764(R5), R4 // a4fc7f24 + MOVWP 32(R5), R4 // a4200024 + MOVWP 4(R5), R4 // a4040024 + MOVWP (R5), R4 // a4000024 + MOVVP -32768(R5), R4 // a4008026 + MOVVP 32764(R5), R4 // a4fc7f26 + MOVVP 32(R5), R4 // a4200026 + MOVVP 4(R5), R4 // a4040026 + MOVVP (R5), R4 // a4000026 + // Loong64 atomic memory access instructions AMSWAPB R14, (R13), R12 // ac395c38 AMSWAPH R14, (R13), R12 // acb95c38 diff --git a/src/cmd/compile/internal/dwarfgen/dwarf.go b/src/cmd/compile/internal/dwarfgen/dwarf.go index 6ab39d2aaad1cf..9d975e0bc1ac7d 100644 --- a/src/cmd/compile/internal/dwarfgen/dwarf.go +++ b/src/cmd/compile/internal/dwarfgen/dwarf.go @@ -128,14 +128,29 @@ func Info(ctxt *obj.Link, fnsym *obj.LSym, infosym *obj.LSym, curfn obj.Func) (s // already referenced by a dwarf var, attach an R_USETYPE relocation to // the function symbol to insure that the type included in DWARF // processing during linking. + // Do the same with R_USEIFACE relocations from the function symbol for the + // same reason. + // All these R_USETYPE relocations are only looked at if the function + // survives deadcode elimination in the linker. typesyms := []*obj.LSym{} for t := range fnsym.Func().Autot { typesyms = append(typesyms, t) } + for i := range fnsym.R { + if fnsym.R[i].Type == objabi.R_USEIFACE && !strings.HasPrefix(fnsym.R[i].Sym.Name, "go:itab.") { + // Types referenced through itab will be referenced from somewhere else + typesyms = append(typesyms, fnsym.R[i].Sym) + } + } slices.SortFunc(typesyms, func(a, b *obj.LSym) int { return strings.Compare(a.Name, b.Name) }) + var lastsym *obj.LSym for _, sym := range typesyms { + if sym == lastsym { + continue + } + lastsym = sym infosym.AddRel(ctxt, obj.Reloc{Type: objabi.R_USETYPE, Sym: sym}) } fnsym.Func().Autot = nil diff --git a/src/cmd/compile/internal/loong64/ssa.go b/src/cmd/compile/internal/loong64/ssa.go index 895eadd07261d6..3959f8a7c11eb9 100644 --- a/src/cmd/compile/internal/loong64/ssa.go +++ b/src/cmd/compile/internal/loong64/ssa.go @@ -185,7 +185,7 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { ssa.OpLOONG64MULD, ssa.OpLOONG64DIVF, ssa.OpLOONG64DIVD, - ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU, + ssa.OpLOONG64MULV, ssa.OpLOONG64MULHV, ssa.OpLOONG64MULHVU, ssa.OpLOONG64MULH, ssa.OpLOONG64MULHU, ssa.OpLOONG64DIVV, ssa.OpLOONG64REMV, ssa.OpLOONG64DIVVU, ssa.OpLOONG64REMVU, ssa.OpLOONG64FCOPYSGD: p := s.Prog(v.Op.Asm()) @@ -560,28 +560,97 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Sym = ir.Syms.Duffzero p.To.Offset = v.AuxInt case ssa.OpLOONG64LoweredZero: - // MOVx R0, (Rarg0) - // ADDV $sz, Rarg0 - // BGEU Rarg1, Rarg0, -2(PC) - mov, sz := largestMove(v.AuxInt) - p := s.Prog(mov) - p.From.Type = obj.TYPE_REG - p.From.Reg = loong64.REGZERO - p.To.Type = obj.TYPE_MEM - p.To.Reg = v.Args[0].Reg() + ptrReg := v.Args[0].Reg() + n := v.AuxInt + if n < 16 { + v.Fatalf("Zero too small %d", n) + } - p2 := s.Prog(loong64.AADDVU) - p2.From.Type = obj.TYPE_CONST - p2.From.Offset = sz - p2.To.Type = obj.TYPE_REG - p2.To.Reg = v.Args[0].Reg() + // Generate Zeroing instructions. + var off int64 + for n >= 8 { + // MOVV ZR, off(ptrReg) + zero8(s, ptrReg, off) + off += 8 + n -= 8 + } + if n != 0 { + // MOVV ZR, off+n-8(ptrReg) + zero8(s, ptrReg, off+n-8) + } + case ssa.OpLOONG64LoweredZeroLoop: + ptrReg := v.Args[0].Reg() + countReg := v.RegTmp() + var off int64 + n := v.AuxInt + loopSize := int64(64) + if n < 3*loopSize { + // - a loop count of 0 won't work. + // - a loop count of 1 is useless. + // - a loop count of 2 is a code size ~tie + // 4 instructions to implement the loop + // 8 instructions in the loop body + // vs + // 16 instuctions in the straightline code + // Might as well use straightline code. + v.Fatalf("ZeroLoop size tool small %d", n) + } - p3 := s.Prog(loong64.ABGEU) - p3.From.Type = obj.TYPE_REG - p3.From.Reg = v.Args[1].Reg() - p3.Reg = v.Args[0].Reg() - p3.To.Type = obj.TYPE_BRANCH - p3.To.SetTarget(p) + // Put iteration count in a register. + // MOVV $n/loopSize, countReg + p := s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_CONST + p.From.Offset = n / loopSize + p.To.Type = obj.TYPE_REG + p.To.Reg = countReg + cntInit := p + + // Zero loopSize bytes starting at ptrReg. + for range loopSize / 8 { + // MOVV ZR, off(ptrReg) + zero8(s, ptrReg, off) + off += 8 + } + + // Increment ptrReg by loopSize. + // ADDV $loopSize, ptrReg + p = s.Prog(loong64.AADDV) + p.From.Type = obj.TYPE_CONST + p.From.Offset = loopSize + p.To.Type = obj.TYPE_REG + p.To.Reg = ptrReg + + // Decrement loop count. + // SUBV $1, countReg + p = s.Prog(loong64.ASUBV) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 1 + p.To.Type = obj.TYPE_REG + p.To.Reg = countReg + + // Jump to loop header if we're not done yet. + // BNE countReg, loop header + p = s.Prog(loong64.ABNE) + p.From.Type = obj.TYPE_REG + p.From.Reg = countReg + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(cntInit.Link) + + // Multiples of the loop size are now done. + n %= loopSize + + off = 0 + // Write any fractional portion. + for n >= 8 { + // MOVV ZR, off(ptrReg) + zero8(s, ptrReg, off) + off += 8 + n -= 8 + } + + if n != 0 { + zero8(s, ptrReg, off+n-8) + } case ssa.OpLOONG64DUFFCOPY: p := s.Prog(obj.ADUFFCOPY) @@ -590,42 +659,119 @@ func ssaGenValue(s *ssagen.State, v *ssa.Value) { p.To.Sym = ir.Syms.Duffcopy p.To.Offset = v.AuxInt case ssa.OpLOONG64LoweredMove: - // MOVx (Rarg1), Rtmp - // MOVx Rtmp, (Rarg0) - // ADDV $sz, Rarg1 - // ADDV $sz, Rarg0 - // BGEU Rarg2, Rarg0, -4(PC) - mov, sz := largestMove(v.AuxInt) - p := s.Prog(mov) - p.From.Type = obj.TYPE_MEM - p.From.Reg = v.Args[1].Reg() + dstReg := v.Args[0].Reg() + srcReg := v.Args[1].Reg() + if dstReg == srcReg { + break + } + tmpReg := int16(loong64.REG_R20) + n := v.AuxInt + if n < 16 { + v.Fatalf("Move too small %d", n) + } + + var off int64 + for n >= 8 { + // MOVV off(srcReg), tmpReg + // MOVV tmpReg, off(dstReg) + move8(s, srcReg, dstReg, tmpReg, off) + off += 8 + n -= 8 + } + + if n != 0 { + // MOVV off+n-8(srcReg), tmpReg + // MOVV tmpReg, off+n-8(srcReg) + move8(s, srcReg, dstReg, tmpReg, off+n-8) + } + case ssa.OpLOONG64LoweredMoveLoop: + dstReg := v.Args[0].Reg() + srcReg := v.Args[1].Reg() + if dstReg == srcReg { + break + } + countReg := int16(loong64.REG_R20) + tmpReg := int16(loong64.REG_R21) + var off int64 + n := v.AuxInt + loopSize := int64(64) + if n < 3*loopSize { + // - a loop count of 0 won't work. + // - a loop count of 1 is useless. + // - a loop count of 2 is a code size ~tie + // 4 instructions to implement the loop + // 8 instructions in the loop body + // vs + // 16 instructions in the straightline code + // Might as well use straightline code. + v.Fatalf("ZeroLoop size too small %d", n) + } + + // Put iteration count in a register. + // MOVV $n/loopSize, countReg + p := s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_CONST + p.From.Offset = n / loopSize p.To.Type = obj.TYPE_REG - p.To.Reg = loong64.REGTMP + p.To.Reg = countReg + cntInit := p + + // Move loopSize bytes starting at srcReg to dstReg. + for range loopSize / 8 { + // MOVV off(srcReg), tmpReg + // MOVV tmpReg, off(dstReg) + move8(s, srcReg, dstReg, tmpReg, off) + off += 8 + } - p2 := s.Prog(mov) - p2.From.Type = obj.TYPE_REG - p2.From.Reg = loong64.REGTMP - p2.To.Type = obj.TYPE_MEM - p2.To.Reg = v.Args[0].Reg() + // Increment srcReg and destReg by loopSize. + // ADDV $loopSize, srcReg + p = s.Prog(loong64.AADDV) + p.From.Type = obj.TYPE_CONST + p.From.Offset = loopSize + p.To.Type = obj.TYPE_REG + p.To.Reg = srcReg + // ADDV $loopSize, dstReg + p = s.Prog(loong64.AADDV) + p.From.Type = obj.TYPE_CONST + p.From.Offset = loopSize + p.To.Type = obj.TYPE_REG + p.To.Reg = dstReg - p3 := s.Prog(loong64.AADDVU) - p3.From.Type = obj.TYPE_CONST - p3.From.Offset = sz - p3.To.Type = obj.TYPE_REG - p3.To.Reg = v.Args[1].Reg() + // Decrement loop count. + // SUBV $1, countReg + p = s.Prog(loong64.ASUBV) + p.From.Type = obj.TYPE_CONST + p.From.Offset = 1 + p.To.Type = obj.TYPE_REG + p.To.Reg = countReg - p4 := s.Prog(loong64.AADDVU) - p4.From.Type = obj.TYPE_CONST - p4.From.Offset = sz - p4.To.Type = obj.TYPE_REG - p4.To.Reg = v.Args[0].Reg() + // Jump to loop header if we're not done yet. + // BNE countReg, loop header + p = s.Prog(loong64.ABNE) + p.From.Type = obj.TYPE_REG + p.From.Reg = countReg + p.To.Type = obj.TYPE_BRANCH + p.To.SetTarget(cntInit.Link) + + // Multiples of the loop size are now done. + n %= loopSize + + off = 0 + // Copy any fractional portion. + for n >= 8 { + // MOVV off(srcReg), tmpReg + // MOVV tmpReg, off(dstReg) + move8(s, srcReg, dstReg, tmpReg, off) + off += 8 + n -= 8 + } - p5 := s.Prog(loong64.ABGEU) - p5.From.Type = obj.TYPE_REG - p5.From.Reg = v.Args[2].Reg() - p5.Reg = v.Args[1].Reg() - p5.To.Type = obj.TYPE_BRANCH - p5.To.SetTarget(p) + if n != 0 { + // MOVV off+n-8(srcReg), tmpReg + // MOVV tmpReg, off+n-8(srcReg) + move8(s, srcReg, dstReg, tmpReg, off+n-8) + } case ssa.OpLOONG64CALLstatic, ssa.OpLOONG64CALLclosure, ssa.OpLOONG64CALLinter: s.Call(v) @@ -1155,3 +1301,32 @@ func spillArgReg(pp *objw.Progs, p *obj.Prog, f *ssa.Func, t *types.Type, reg in p.Pos = p.Pos.WithNotStmt() return p } + +// move8 copies 8 bytes at src+off to dst+off. +func move8(s *ssagen.State, src, dst, tmp int16, off int64) { + // MOVV off(src), tmp + ld := s.Prog(loong64.AMOVV) + ld.From.Type = obj.TYPE_MEM + ld.From.Reg = src + ld.From.Offset = off + ld.To.Type = obj.TYPE_REG + ld.To.Reg = tmp + // MOVV tmp, off(dst) + st := s.Prog(loong64.AMOVV) + st.From.Type = obj.TYPE_REG + st.From.Reg = tmp + st.To.Type = obj.TYPE_MEM + st.To.Reg = dst + st.To.Offset = off +} + +// zero8 zeroes 8 bytes at reg+off. +func zero8(s *ssagen.State, reg int16, off int64) { + // MOVV ZR, off(reg) + p := s.Prog(loong64.AMOVV) + p.From.Type = obj.TYPE_REG + p.From.Reg = loong64.REGZERO + p.To.Type = obj.TYPE_MEM + p.To.Reg = reg + p.To.Offset = off +} diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules index ca04bdcd42307d..3fa4f363f65515 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64.rules +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64.rules @@ -17,8 +17,8 @@ (Hmul64 ...) => (MULHV ...) (Hmul64u ...) => (MULHVU ...) -(Hmul32 x y) => (SRAVconst (MULV (SignExt32to64 x) (SignExt32to64 y)) [32]) -(Hmul32u x y) => (SRLVconst (MULV (ZeroExt32to64 x) (ZeroExt32to64 y)) [32]) +(Hmul32 ...) => (MULH ...) +(Hmul32u ...) => (MULHU ...) (Div64 x y) => (DIVV x y) (Div64u ...) => (DIVVU ...) @@ -373,24 +373,8 @@ (MOVVstore [8] ptr (MOVVconst [0]) (MOVVstore ptr (MOVVconst [0]) mem)) -// strip off fractional word zeroing -(Zero [s] ptr mem) && s%8 != 0 && s > 16 => - (Zero [s%8] - (OffPtr ptr [s-s%8]) - (Zero [s-s%8] ptr mem)) - -// medium zeroing uses a duff device -(Zero [s] ptr mem) - && s%8 == 0 && s > 16 && s <= 8*128 => - (DUFFZERO [8 * (128 - s/8)] ptr mem) - -// large zeroing uses a loop -(Zero [s] ptr mem) - && s%8 == 0 && s > 8*128 => - (LoweredZero - ptr - (ADDVconst ptr [s-8]) - mem) +(Zero [s] ptr mem) && s > 16 && s < 192 => (LoweredZero [s] ptr mem) +(Zero [s] ptr mem) && s >= 192 => (LoweredZeroLoop [s] ptr mem) // moves (Move [0] _ _ mem) => mem @@ -435,34 +419,8 @@ (MOVVstore [8] dst (MOVVload [8] src mem) (MOVVstore dst (MOVVload src mem) mem)) -// strip off fractional word move -(Move [s] dst src mem) && s%8 != 0 && s > 16 => - (Move [s%8] - (OffPtr dst [s-s%8]) - (OffPtr src [s-s%8]) - (Move [s-s%8] dst src mem)) - -// medium move uses a duff device -(Move [s] dst src mem) - && s%8 == 0 && s > 16 && s <= 8*128 - && logLargeCopy(v, s) => - (DUFFCOPY [16 * (128 - s/8)] dst src mem) -// 16 and 128 are magic constants. 16 is the number of bytes to encode: -// MOVV (R20), R30 -// ADDV $8, R20 -// MOVV R30, (R21) -// ADDV $8, R21 -// and 128 is the number of such blocks. See runtime/duff_loong64.s:duffcopy. - -// large move uses a loop -(Move [s] dst src mem) - && s%8 == 0 && s > 1024 && logLargeCopy(v, s) => - (LoweredMove - dst - src - (ADDVconst src [s-8]) - mem) - +(Move [s] dst src mem) && s > 16 && s < 192 && logLargeCopy(v, s) => (LoweredMove [s] dst src mem) +(Move [s] dst src mem) && s >= 192 && logLargeCopy(v, s) => (LoweredMoveLoop [s] dst src mem) // float <=> int register moves, with no conversion. // These come up when compiling math.{Float64bits, Float64frombits, Float32bits, Float32frombits}. @@ -471,6 +429,10 @@ (MOVWUload [off] {sym} ptr (MOVFstore [off] {sym} ptr val _)) => (ZeroExt32to64 (MOVWfpgp val)) (MOVFload [off] {sym} ptr (MOVWstore [off] {sym} ptr val _)) => (MOVWgpfp val) +// If the memory load and store operations use the same ptr, they are combined into a direct move operation between registers. +(MOV(V|W|H|B)load [off] {sym} ptr (MOV(V|W|H|B)store [off] {sym} ptr x _)) => (MOV(V|W|H|B)reg x) +(MOV(W|H|B)Uload [off] {sym} ptr (MOV(W|H|B)store [off] {sym} ptr x _)) => (MOV(W|H|B)Ureg x) + // Similarly for stores, if we see a store after FPR <=> GPR move, then redirect store to use the other register set. (MOVVstore [off] {sym} ptr (MOVVfpgp val) mem) => (MOVDstore [off] {sym} ptr val mem) (MOVDstore [off] {sym} ptr (MOVVgpfp val) mem) => (MOVVstore [off] {sym} ptr val mem) diff --git a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go index ccd9721498232a..cc6ae8fb8e65de 100644 --- a/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go +++ b/src/cmd/compile/internal/ssa/_gen/LOONG64Ops.go @@ -197,6 +197,8 @@ func init() { {name: "MULV", argLength: 2, reg: gp21, asm: "MULV", commutative: true, typ: "Int64"}, // arg0 * arg1 {name: "MULHV", argLength: 2, reg: gp21, asm: "MULHV", commutative: true, typ: "Int64"}, // (arg0 * arg1) >> 64, signed {name: "MULHVU", argLength: 2, reg: gp21, asm: "MULHVU", commutative: true, typ: "UInt64"}, // (arg0 * arg1) >> 64, unsigned + {name: "MULH", argLength: 2, reg: gp21, asm: "MULH", commutative: true, typ: "Int32"}, // (arg0 * arg1) >> 32, signed + {name: "MULHU", argLength: 2, reg: gp21, asm: "MULHU", commutative: true, typ: "UInt32"}, // (arg0 * arg1) >> 32, unsigned {name: "DIVV", argLength: 2, reg: gp21, asm: "DIVV", typ: "Int64"}, // arg0 / arg1, signed {name: "DIVVU", argLength: 2, reg: gp21, asm: "DIVVU", typ: "UInt64"}, // arg0 / arg1, unsigned {name: "REMV", argLength: 2, reg: gp21, asm: "REMV", typ: "Int64"}, // arg0 / arg1, signed @@ -376,6 +378,21 @@ func init() { faultOnNilArg0: true, }, + // medium zeroing + // arg0 = address of memory to zero + // arg1 = mem + // auxint = number of bytes to zero + // returns mem + { + name: "LoweredZero", + aux: "Int64", + argLength: 2, + reg: regInfo{ + inputs: []regMask{gp}, + }, + faultOnNilArg0: true, + }, + // duffcopy // arg0 = address of dst memory (in R21, changed as side effect) // arg1 = address of src memory (in R20, changed as side effect) @@ -395,48 +412,57 @@ func init() { faultOnNilArg1: true, }, - // large or unaligned zeroing - // arg0 = address of memory to zero (in R20, changed as side effect) - // arg1 = address of the last element to zero + // large zeroing + // arg0 = address of memory to zero + // arg1 = mem + // auxint = number of bytes to zero + // returns mem + { + name: "LoweredZeroLoop", + aux: "Int64", + argLength: 2, + reg: regInfo{ + inputs: []regMask{gp}, + clobbersArg0: true, + }, + faultOnNilArg0: true, + needIntTemp: true, + }, + + // medium copying + // arg0 = address of dst memory + // arg1 = address of src memory // arg2 = mem - // auxint = alignment + // auxint = number of bytes to copy // returns mem - // MOVx R0, (R20) - // ADDV $sz, R20 - // BGEU Rarg1, R20, -2(PC) { - name: "LoweredZero", + name: "LoweredMove", aux: "Int64", argLength: 3, reg: regInfo{ - inputs: []regMask{buildReg("R20"), gp}, + inputs: []regMask{gp &^ buildReg("R20"), gp &^ buildReg("R20")}, clobbers: buildReg("R20"), }, - typ: "Mem", faultOnNilArg0: true, + faultOnNilArg1: true, }, - // large or unaligned move - // arg0 = address of dst memory (in R21, changed as side effect) - // arg1 = address of src memory (in R20, changed as side effect) - // arg2 = address of the last element of src - // arg3 = mem - // auxint = alignment + // large copying + // arg0 = address of dst memory + // arg1 = address of src memory + // arg2 = mem + // auxint = number of bytes to copy // returns mem - // MOVx (R20), Rtmp - // MOVx Rtmp, (R21) - // ADDV $sz, R20 - // ADDV $sz, R21 - // BGEU Rarg2, R20, -4(PC) { - name: "LoweredMove", + name: "LoweredMoveLoop", aux: "Int64", - argLength: 4, + argLength: 3, reg: regInfo{ - inputs: []regMask{buildReg("R21"), buildReg("R20"), gp}, - clobbers: buildReg("R20 R21"), + inputs: []regMask{gp &^ buildReg("R20 R21"), gp &^ buildReg("R20 R21")}, + clobbers: buildReg("R20 R21"), + clobbersArg0: true, + clobbersArg1: true, }, - typ: "Mem", faultOnNilArg0: true, faultOnNilArg1: true, }, diff --git a/src/cmd/compile/internal/ssa/opGen.go b/src/cmd/compile/internal/ssa/opGen.go index 126682b9866849..f42d64228fae3a 100644 --- a/src/cmd/compile/internal/ssa/opGen.go +++ b/src/cmd/compile/internal/ssa/opGen.go @@ -1795,6 +1795,8 @@ const ( OpLOONG64MULV OpLOONG64MULHV OpLOONG64MULHVU + OpLOONG64MULH + OpLOONG64MULHU OpLOONG64DIVV OpLOONG64DIVVU OpLOONG64REMV @@ -1923,9 +1925,11 @@ const ( OpLOONG64CALLclosure OpLOONG64CALLinter OpLOONG64DUFFZERO - OpLOONG64DUFFCOPY OpLOONG64LoweredZero + OpLOONG64DUFFCOPY + OpLOONG64LoweredZeroLoop OpLOONG64LoweredMove + OpLOONG64LoweredMoveLoop OpLOONG64LoweredAtomicLoad8 OpLOONG64LoweredAtomicLoad32 OpLOONG64LoweredAtomicLoad64 @@ -24138,6 +24142,36 @@ var opcodeTable = [...]opInfo{ }, }, }, + { + name: "MULH", + argLen: 2, + commutative: true, + asm: loong64.AMULH, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + {1, 1073741817}, // ZERO R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, + { + name: "MULHU", + argLen: 2, + commutative: true, + asm: loong64.AMULHU, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1073741816}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + {1, 1073741817}, // ZERO R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 g R23 R24 R25 R26 R27 R28 R29 R31 + }, + outputs: []outputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "DIVV", argLen: 2, @@ -25912,6 +25946,17 @@ var opcodeTable = [...]opInfo{ clobbers: 524290, // R1 R20 }, }, + { + name: "LoweredZero", + auxType: auxInt64, + argLen: 2, + faultOnNilArg0: true, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + }, + }, { name: "DUFFCOPY", auxType: auxInt64, @@ -25927,31 +25972,46 @@ var opcodeTable = [...]opInfo{ }, }, { - name: "LoweredZero", + name: "LoweredZeroLoop", + auxType: auxInt64, + argLen: 2, + needIntTemp: true, + faultOnNilArg0: true, + reg: regInfo{ + inputs: []inputInfo{ + {0, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + }, + clobbersArg0: true, + }, + }, + { + name: "LoweredMove", auxType: auxInt64, argLen: 3, faultOnNilArg0: true, + faultOnNilArg1: true, reg: regInfo{ inputs: []inputInfo{ - {0, 524288}, // R20 - {1, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + {0, 1071120376}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R21 R23 R24 R25 R26 R27 R28 R29 R31 + {1, 1071120376}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R21 R23 R24 R25 R26 R27 R28 R29 R31 }, clobbers: 524288, // R20 }, }, { - name: "LoweredMove", + name: "LoweredMoveLoop", auxType: auxInt64, - argLen: 4, + argLen: 3, faultOnNilArg0: true, faultOnNilArg1: true, reg: regInfo{ inputs: []inputInfo{ - {0, 1048576}, // R21 - {1, 524288}, // R20 - {2, 1071644664}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R20 R21 R23 R24 R25 R26 R27 R28 R29 R31 + {0, 1070071800}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R23 R24 R25 R26 R27 R28 R29 R31 + {1, 1070071800}, // R4 R5 R6 R7 R8 R9 R10 R11 R12 R13 R14 R15 R16 R17 R18 R19 R23 R24 R25 R26 R27 R28 R29 R31 }, - clobbers: 1572864, // R20 R21 + clobbers: 1572864, // R20 R21 + clobbersArg0: true, + clobbersArg1: true, }, }, { diff --git a/src/cmd/compile/internal/ssa/rewriteLOONG64.go b/src/cmd/compile/internal/ssa/rewriteLOONG64.go index eb134789f74131..5890fe050a222b 100644 --- a/src/cmd/compile/internal/ssa/rewriteLOONG64.go +++ b/src/cmd/compile/internal/ssa/rewriteLOONG64.go @@ -296,9 +296,11 @@ func rewriteValueLOONG64(v *Value) bool { v.Op = OpLOONG64LoweredGetClosurePtr return true case OpHmul32: - return rewriteValueLOONG64_OpHmul32(v) + v.Op = OpLOONG64MULH + return true case OpHmul32u: - return rewriteValueLOONG64_OpHmul32u(v) + v.Op = OpLOONG64MULHU + return true case OpHmul64: v.Op = OpLOONG64MULHV return true @@ -1576,50 +1578,6 @@ func rewriteValueLOONG64_OpEqPtr(v *Value) bool { return true } } -func rewriteValueLOONG64_OpHmul32(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Hmul32 x y) - // result: (SRAVconst (MULV (SignExt32to64 x) (SignExt32to64 y)) [32]) - for { - x := v_0 - y := v_1 - v.reset(OpLOONG64SRAVconst) - v.AuxInt = int64ToAuxInt(32) - v0 := b.NewValue0(v.Pos, OpLOONG64MULV, typ.Int64) - v1 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpSignExt32to64, typ.Int64) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) - return true - } -} -func rewriteValueLOONG64_OpHmul32u(v *Value) bool { - v_1 := v.Args[1] - v_0 := v.Args[0] - b := v.Block - typ := &b.Func.Config.Types - // match: (Hmul32u x y) - // result: (SRLVconst (MULV (ZeroExt32to64 x) (ZeroExt32to64 y)) [32]) - for { - x := v_0 - y := v_1 - v.reset(OpLOONG64SRLVconst) - v.AuxInt = int64ToAuxInt(32) - v0 := b.NewValue0(v.Pos, OpLOONG64MULV, typ.Int64) - v1 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v1.AddArg(x) - v2 := b.NewValue0(v.Pos, OpZeroExt32to64, typ.UInt64) - v2.AddArg(y) - v0.AddArg2(v1, v2) - v.AddArg(v0) - return true - } -} func rewriteValueLOONG64_OpIsInBounds(v *Value) bool { v_1 := v.Args[1] v_0 := v.Args[0] @@ -2371,6 +2329,23 @@ func rewriteValueLOONG64_OpLOONG64MOVBUload(v *Value) bool { v_0 := v.Args[0] b := v.Block config := b.Func.Config + // match: (MOVBUload [off] {sym} ptr (MOVBstore [off] {sym} ptr x _)) + // result: (MOVBUreg x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpLOONG64MOVBstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpLOONG64MOVBUreg) + v.AddArg(x) + return true + } // match: (MOVBUload [off1] {sym} (ADDVconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) // result: (MOVBUload [off1+int32(off2)] {sym} ptr mem) @@ -2648,6 +2623,23 @@ func rewriteValueLOONG64_OpLOONG64MOVBload(v *Value) bool { v_0 := v.Args[0] b := v.Block config := b.Func.Config + // match: (MOVBload [off] {sym} ptr (MOVBstore [off] {sym} ptr x _)) + // result: (MOVBreg x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpLOONG64MOVBstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpLOONG64MOVBreg) + v.AddArg(x) + return true + } // match: (MOVBload [off1] {sym} (ADDVconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) // result: (MOVBload [off1+int32(off2)] {sym} ptr mem) @@ -3568,6 +3560,23 @@ func rewriteValueLOONG64_OpLOONG64MOVHUload(v *Value) bool { v_0 := v.Args[0] b := v.Block config := b.Func.Config + // match: (MOVHUload [off] {sym} ptr (MOVHstore [off] {sym} ptr x _)) + // result: (MOVHUreg x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpLOONG64MOVHstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpLOONG64MOVHUreg) + v.AddArg(x) + return true + } // match: (MOVHUload [off1] {sym} (ADDVconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) // result: (MOVHUload [off1+int32(off2)] {sym} ptr mem) @@ -3807,6 +3816,23 @@ func rewriteValueLOONG64_OpLOONG64MOVHload(v *Value) bool { v_0 := v.Args[0] b := v.Block config := b.Func.Config + // match: (MOVHload [off] {sym} ptr (MOVHstore [off] {sym} ptr x _)) + // result: (MOVHreg x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpLOONG64MOVHstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpLOONG64MOVHreg) + v.AddArg(x) + return true + } // match: (MOVHload [off1] {sym} (ADDVconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) // result: (MOVHload [off1+int32(off2)] {sym} ptr mem) @@ -4250,6 +4276,23 @@ func rewriteValueLOONG64_OpLOONG64MOVVload(v *Value) bool { v.AddArg(val) return true } + // match: (MOVVload [off] {sym} ptr (MOVVstore [off] {sym} ptr x _)) + // result: (MOVVreg x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpLOONG64MOVVstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpLOONG64MOVVreg) + v.AddArg(x) + return true + } // match: (MOVVload [off1] {sym} (ADDVconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) // result: (MOVVload [off1+int32(off2)] {sym} ptr mem) @@ -4558,6 +4601,23 @@ func rewriteValueLOONG64_OpLOONG64MOVWUload(v *Value) bool { v.AddArg(v0) return true } + // match: (MOVWUload [off] {sym} ptr (MOVWstore [off] {sym} ptr x _)) + // result: (MOVWUreg x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpLOONG64MOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpLOONG64MOVWUreg) + v.AddArg(x) + return true + } // match: (MOVWUload [off1] {sym} (ADDVconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) // result: (MOVWUload [off1+int32(off2)] {sym} ptr mem) @@ -4830,6 +4890,23 @@ func rewriteValueLOONG64_OpLOONG64MOVWload(v *Value) bool { v_0 := v.Args[0] b := v.Block config := b.Func.Config + // match: (MOVWload [off] {sym} ptr (MOVWstore [off] {sym} ptr x _)) + // result: (MOVWreg x) + for { + off := auxIntToInt32(v.AuxInt) + sym := auxToSym(v.Aux) + ptr := v_0 + if v_1.Op != OpLOONG64MOVWstore || auxIntToInt32(v_1.AuxInt) != off || auxToSym(v_1.Aux) != sym { + break + } + x := v_1.Args[1] + if ptr != v_1.Args[0] { + break + } + v.reset(OpLOONG64MOVWreg) + v.AddArg(x) + return true + } // match: (MOVWload [off1] {sym} (ADDVconst [off2] ptr) mem) // cond: is32Bit(int64(off1)+off2) && (ptr.Op != OpSB || !config.ctxt.Flag_dynlink) // result: (MOVWload [off1+int32(off2)] {sym} ptr mem) @@ -9056,62 +9133,35 @@ func rewriteValueLOONG64_OpMove(v *Value) bool { return true } // match: (Move [s] dst src mem) - // cond: s%8 != 0 && s > 16 - // result: (Move [s%8] (OffPtr dst [s-s%8]) (OffPtr src [s-s%8]) (Move [s-s%8] dst src mem)) - for { - s := auxIntToInt64(v.AuxInt) - dst := v_0 - src := v_1 - mem := v_2 - if !(s%8 != 0 && s > 16) { - break - } - v.reset(OpMove) - v.AuxInt = int64ToAuxInt(s % 8) - v0 := b.NewValue0(v.Pos, OpOffPtr, dst.Type) - v0.AuxInt = int64ToAuxInt(s - s%8) - v0.AddArg(dst) - v1 := b.NewValue0(v.Pos, OpOffPtr, src.Type) - v1.AuxInt = int64ToAuxInt(s - s%8) - v1.AddArg(src) - v2 := b.NewValue0(v.Pos, OpMove, types.TypeMem) - v2.AuxInt = int64ToAuxInt(s - s%8) - v2.AddArg3(dst, src, mem) - v.AddArg3(v0, v1, v2) - return true - } - // match: (Move [s] dst src mem) - // cond: s%8 == 0 && s > 16 && s <= 8*128 && logLargeCopy(v, s) - // result: (DUFFCOPY [16 * (128 - s/8)] dst src mem) + // cond: s > 16 && s < 192 && logLargeCopy(v, s) + // result: (LoweredMove [s] dst src mem) for { s := auxIntToInt64(v.AuxInt) dst := v_0 src := v_1 mem := v_2 - if !(s%8 == 0 && s > 16 && s <= 8*128 && logLargeCopy(v, s)) { + if !(s > 16 && s < 192 && logLargeCopy(v, s)) { break } - v.reset(OpLOONG64DUFFCOPY) - v.AuxInt = int64ToAuxInt(16 * (128 - s/8)) + v.reset(OpLOONG64LoweredMove) + v.AuxInt = int64ToAuxInt(s) v.AddArg3(dst, src, mem) return true } // match: (Move [s] dst src mem) - // cond: s%8 == 0 && s > 1024 && logLargeCopy(v, s) - // result: (LoweredMove dst src (ADDVconst src [s-8]) mem) + // cond: s >= 192 && logLargeCopy(v, s) + // result: (LoweredMoveLoop [s] dst src mem) for { s := auxIntToInt64(v.AuxInt) dst := v_0 src := v_1 mem := v_2 - if !(s%8 == 0 && s > 1024 && logLargeCopy(v, s)) { + if !(s >= 192 && logLargeCopy(v, s)) { break } - v.reset(OpLOONG64LoweredMove) - v0 := b.NewValue0(v.Pos, OpLOONG64ADDVconst, src.Type) - v0.AuxInt = int64ToAuxInt(s - 8) - v0.AddArg(src) - v.AddArg4(dst, src, v0, mem) + v.reset(OpLOONG64LoweredMoveLoop) + v.AuxInt = int64ToAuxInt(s) + v.AddArg3(dst, src, mem) return true } return false @@ -11497,56 +11547,33 @@ func rewriteValueLOONG64_OpZero(v *Value) bool { return true } // match: (Zero [s] ptr mem) - // cond: s%8 != 0 && s > 16 - // result: (Zero [s%8] (OffPtr ptr [s-s%8]) (Zero [s-s%8] ptr mem)) + // cond: s > 16 && s < 192 + // result: (LoweredZero [s] ptr mem) for { s := auxIntToInt64(v.AuxInt) ptr := v_0 mem := v_1 - if !(s%8 != 0 && s > 16) { + if !(s > 16 && s < 192) { break } - v.reset(OpZero) - v.AuxInt = int64ToAuxInt(s % 8) - v0 := b.NewValue0(v.Pos, OpOffPtr, ptr.Type) - v0.AuxInt = int64ToAuxInt(s - s%8) - v0.AddArg(ptr) - v1 := b.NewValue0(v.Pos, OpZero, types.TypeMem) - v1.AuxInt = int64ToAuxInt(s - s%8) - v1.AddArg2(ptr, mem) - v.AddArg2(v0, v1) - return true - } - // match: (Zero [s] ptr mem) - // cond: s%8 == 0 && s > 16 && s <= 8*128 - // result: (DUFFZERO [8 * (128 - s/8)] ptr mem) - for { - s := auxIntToInt64(v.AuxInt) - ptr := v_0 - mem := v_1 - if !(s%8 == 0 && s > 16 && s <= 8*128) { - break - } - v.reset(OpLOONG64DUFFZERO) - v.AuxInt = int64ToAuxInt(8 * (128 - s/8)) + v.reset(OpLOONG64LoweredZero) + v.AuxInt = int64ToAuxInt(s) v.AddArg2(ptr, mem) return true } // match: (Zero [s] ptr mem) - // cond: s%8 == 0 && s > 8*128 - // result: (LoweredZero ptr (ADDVconst ptr [s-8]) mem) + // cond: s >= 192 + // result: (LoweredZeroLoop [s] ptr mem) for { s := auxIntToInt64(v.AuxInt) ptr := v_0 mem := v_1 - if !(s%8 == 0 && s > 8*128) { + if !(s >= 192) { break } - v.reset(OpLOONG64LoweredZero) - v0 := b.NewValue0(v.Pos, OpLOONG64ADDVconst, ptr.Type) - v0.AuxInt = int64ToAuxInt(s - 8) - v0.AddArg(ptr) - v.AddArg3(ptr, v0, mem) + v.reset(OpLOONG64LoweredZeroLoop) + v.AuxInt = int64ToAuxInt(s) + v.AddArg2(ptr, mem) return true } return false diff --git a/src/cmd/compile/internal/test/inl_test.go b/src/cmd/compile/internal/test/inl_test.go index eda6084b48e7cc..a49cd767db43d8 100644 --- a/src/cmd/compile/internal/test/inl_test.go +++ b/src/cmd/compile/internal/test/inl_test.go @@ -125,6 +125,8 @@ func TestIntendedInlining(t *testing.T) { "assemble64", }, "unicode/utf8": { + "DecodeRune", + "DecodeRuneInString", "FullRune", "FullRuneInString", "RuneLen", diff --git a/src/cmd/dist/test.go b/src/cmd/dist/test.go index dfa4ffb522fa65..91e3716f07b1c5 100644 --- a/src/cmd/dist/test.go +++ b/src/cmd/dist/test.go @@ -677,7 +677,7 @@ func (t *tester) registerTests() { } t.registerStdTest(pkg) } - if t.race { + if t.race && !t.short { for _, pkg := range pkgs { if t.packageHasBenchmarks(pkg) { t.registerRaceBenchTest(pkg) diff --git a/src/cmd/gofmt/gofmt.go b/src/cmd/gofmt/gofmt.go index d91a75b1050e20..bbb8b4fd15c2f7 100644 --- a/src/cmd/gofmt/gofmt.go +++ b/src/cmd/gofmt/gofmt.go @@ -87,10 +87,8 @@ func initParserMode() { } } -func isGoFile(f fs.DirEntry) bool { - // ignore non-Go files - name := f.Name() - return !strings.HasPrefix(name, ".") && strings.HasSuffix(name, ".go") && !f.IsDir() +func isGoFilename(name string) bool { + return !strings.HasPrefix(name, ".") && strings.HasSuffix(name, ".go") } // A sequencer performs concurrent tasks that may write output, but emits that @@ -411,34 +409,30 @@ func gofmtMain(s *sequencer) { } for _, arg := range args { - switch info, err := os.Stat(arg); { - case err != nil: - s.AddReport(err) - case !info.IsDir(): - // Non-directory arguments are always formatted. - arg := arg - s.Add(fileWeight(arg, info), func(r *reporter) error { - return processFile(arg, info, nil, r) - }) - default: - // Directories are walked, ignoring non-Go files. - err := filepath.WalkDir(arg, func(path string, f fs.DirEntry, err error) error { - if err != nil || !isGoFile(f) { - return err - } - info, err := f.Info() - if err != nil { - s.AddReport(err) - return nil - } - s.Add(fileWeight(path, info), func(r *reporter) error { - return processFile(path, info, nil, r) - }) - return nil - }) + // Walk each given argument as a directory tree. + // If the argument is not a directory, it's always formatted as a Go file. + // If the argument is a directory, we walk it, ignoring non-Go files. + if err := filepath.WalkDir(arg, func(path string, d fs.DirEntry, err error) error { + switch { + case err != nil: + return err + case d.IsDir(): + return nil // simply recurse into directories + case path == arg: + // non-directories given as explicit arguments are always formatted + case !isGoFilename(d.Name()): + return nil // skip walked non-Go files + } + info, err := d.Info() if err != nil { - s.AddReport(err) + return err } + s.Add(fileWeight(path, info), func(r *reporter) error { + return processFile(path, info, nil, r) + }) + return nil + }); err != nil { + s.AddReport(err) } } } diff --git a/src/cmd/gofmt/long_test.go b/src/cmd/gofmt/long_test.go index 21a01196cf6cc2..372e324387843d 100644 --- a/src/cmd/gofmt/long_test.go +++ b/src/cmd/gofmt/long_test.go @@ -115,7 +115,7 @@ func genFilenames(t *testing.T, filenames chan<- string) { return nil } // don't descend into testdata directories - if isGoFile(d) && !strings.Contains(filepath.ToSlash(filename), "/testdata/") { + if !d.IsDir() && isGoFilename(d.Name()) && !strings.Contains(filepath.ToSlash(filename), "/testdata/") { filenames <- filename nfiles++ } diff --git a/src/cmd/internal/obj/loong64/a.out.go b/src/cmd/internal/obj/loong64/a.out.go index f5d20cfabe76d5..8e651cdfef0e21 100644 --- a/src/cmd/internal/obj/loong64/a.out.go +++ b/src/cmd/internal/obj/loong64/a.out.go @@ -666,6 +666,10 @@ const ( ABSTRPICKW ABSTRPICKV + // 2.2.5.3 + AMOVWP + AMOVVP + // 2.2.5.4. Prefetch Instructions APRELD APRELDX diff --git a/src/cmd/internal/obj/loong64/anames.go b/src/cmd/internal/obj/loong64/anames.go index 67b5f2fc809927..c629553d5598af 100644 --- a/src/cmd/internal/obj/loong64/anames.go +++ b/src/cmd/internal/obj/loong64/anames.go @@ -202,6 +202,8 @@ var Anames = []string{ "BSTRINSV", "BSTRPICKW", "BSTRPICKV", + "MOVWP", + "MOVVP", "PRELD", "PRELDX", "CRCWBW", diff --git a/src/cmd/internal/obj/loong64/asm.go b/src/cmd/internal/obj/loong64/asm.go index 5d85585ebec11e..1b982f6c86fa53 100644 --- a/src/cmd/internal/obj/loong64/asm.go +++ b/src/cmd/internal/obj/loong64/asm.go @@ -212,6 +212,8 @@ var optab = []Optab{ {AMOVV, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, {AMOVB, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, {AMOVBU, C_REG, C_NONE, C_NONE, C_TLS_LE, C_NONE, 53, 16, 0, 0}, + {AMOVWP, C_REG, C_NONE, C_NONE, C_SOREG, C_NONE, 73, 4, 0, 0}, + {AMOVWP, C_REG, C_NONE, C_NONE, C_LOREG, C_NONE, 73, 4, 0, 0}, {AMOVW, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0}, {AMOVWU, C_LAUTO, C_NONE, C_NONE, C_REG, C_NONE, 36, 12, REGSP, 0}, @@ -233,6 +235,8 @@ var optab = []Optab{ {AMOVV, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, {AMOVB, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, {AMOVBU, C_TLS_LE, C_NONE, C_NONE, C_REG, C_NONE, 54, 16, 0, 0}, + {AMOVWP, C_SOREG, C_NONE, C_NONE, C_REG, C_NONE, 74, 4, 0, 0}, + {AMOVWP, C_LOREG, C_NONE, C_NONE, C_REG, C_NONE, 74, 4, 0, 0}, {AMOVW, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0}, {AMOVV, C_SACON, C_NONE, C_NONE, C_REG, C_NONE, 3, 4, REGSP, 0}, @@ -1437,6 +1441,9 @@ func buildop(ctxt *obj.Link) { case AMOVBU: opset(AMOVHU, r0) + case AMOVWP: + opset(AMOVVP, r0) + case AMUL: opset(AMULU, r0) opset(AMULH, r0) @@ -1964,6 +1971,10 @@ func OP_16IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { return op | (i&0xFFFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 } +func OP_14IRR(op uint32, i uint32, r2 uint32, r3 uint32) uint32 { + return op | (i&0x3FFF)<<10 | (r2&0x1F)<<5 | (r3&0x1F)<<0 +} + func OP_12IR_5I(op uint32, i1 uint32, r2 uint32, i2 uint32) uint32 { return op | (i1&0xFFF)<<10 | (r2&0x1F)<<5 | (i2&0x1F)<<0 } @@ -2893,6 +2904,20 @@ func (c *ctxt0) asmout(p *obj.Prog, o *Optab, out []uint32) { o3 = OP_12IRR(c.opirr(ALU52ID), uint32(v>>52), uint32(REGTMP), uint32(REGTMP)) } o4 = OP_RRR(c.oprrr(p.As), uint32(REGTMP), uint32(r), uint32(p.To.Reg)) + + case 73: + v := c.regoff(&p.To) + if v&3 != 0 { + c.ctxt.Diag("%v: offset must be a multiple of 4.\n", p) + } + o1 = OP_14IRR(c.opirr(p.As), uint32(v>>2), uint32(p.To.Reg), uint32(p.From.Reg)) + + case 74: + v := c.regoff(&p.From) + if v&3 != 0 { + c.ctxt.Diag("%v: offset must be a multiple of 4.\n", p) + } + o1 = OP_14IRR(c.opirr(-p.As), uint32(v>>2), uint32(p.From.Reg), uint32(p.To.Reg)) } out[0] = o1 @@ -4026,6 +4051,10 @@ func (c *ctxt0) opirr(a obj.As) uint32 { return 0x0ad << 22 case AMOVD: return 0x0af << 22 + case AMOVVP: + return 0x27 << 24 // stptr.d + case AMOVWP: + return 0x25 << 24 // stptr.w case -AMOVB: return 0x0a0 << 22 case -AMOVBU: @@ -4044,6 +4073,10 @@ func (c *ctxt0) opirr(a obj.As) uint32 { return 0x0ac << 22 case -AMOVD: return 0x0ae << 22 + case -AMOVVP: + return 0x26 << 24 // ldptr.d + case -AMOVWP: + return 0x24 << 24 // ldptr.w case -AVMOVQ: return 0x0b0 << 22 // vld case -AXVMOVQ: diff --git a/src/cmd/internal/obj/loong64/doc.go b/src/cmd/internal/obj/loong64/doc.go index 64bb41ae5a2219..6c8f2618a2cb73 100644 --- a/src/cmd/internal/obj/loong64/doc.go +++ b/src/cmd/internal/obj/loong64/doc.go @@ -289,6 +289,34 @@ Note: In the following sections 3.1 to 3.6, "ui4" (4-bit unsigned int immediate) Go assembly | instruction Encoding ALSLV $4, r4, r5, R6 | 002d9486 + +5. Note of special memory access instructions + Instruction format: + MOVWP offset(Rj), Rd + MOVVP offset(Rj), Rd + MOVWP Rd, offset(Rj) + MOVVP Rd, offset(Rj) + + Mapping between Go and platform assembly: + Go assembly | platform assembly + MOVWP offset(Rj), Rd | ldptr.w rd, rj, si14 + MOVVP offset(Rj), Rd | ldptr.d rd, rj, si14 + MOVWP Rd, offset(Rj) | stptr.w rd, rj, si14 + MOVVP Rd, offset(Rj) | stptr.d rd, rj, si14 + + note: In Go assembly, for ease of understanding, offset is a 16-bit immediate number representing + the actual address offset, but in platform assembly, it need a 14-bit immediate number. + si14 = offset>>2 + + The addressing calculation for the above instruction involves logically left-shifting the 14-bit + immediate number si14 by 2 bits, then sign-extending it, and finally adding it to the value in the + general-purpose register rj to obtain the sum. + + For example: + + Go assembly | platform assembly + MOVWP 8(R4), R5 | ldptr.w r5, r4, $2 + */ package loong64 diff --git a/src/encoding/json/decode.go b/src/encoding/json/decode.go index 70885a517e1876..fc29296c0f464f 100644 --- a/src/encoding/json/decode.go +++ b/src/encoding/json/decode.go @@ -1214,10 +1214,6 @@ func unquoteBytes(s []byte) (t []byte, ok bool) { if c == '\\' || c == '"' || c < ' ' { break } - if c < utf8.RuneSelf { - r++ - continue - } rr, size := utf8.DecodeRune(s[r:]) if rr == utf8.RuneError && size == 1 { break diff --git a/src/fmt/format.go b/src/fmt/format.go index 90e18cd696375f..334a94e2983e63 100644 --- a/src/fmt/format.go +++ b/src/fmt/format.go @@ -346,10 +346,7 @@ func (f *fmt) truncate(b []byte) []byte { if n < 0 { return b[:i] } - wid := 1 - if b[i] >= utf8.RuneSelf { - _, wid = utf8.DecodeRune(b[i:]) - } + _, wid := utf8.DecodeRune(b[i:]) i += wid } } diff --git a/src/fmt/print.go b/src/fmt/print.go index 155218046f47ce..01cfa1a1c7d7b4 100644 --- a/src/fmt/print.go +++ b/src/fmt/print.go @@ -1145,10 +1145,7 @@ formatLoop: break } - verb, size := rune(format[i]), 1 - if verb >= utf8.RuneSelf { - verb, size = utf8.DecodeRuneInString(format[i:]) - } + verb, size := utf8.DecodeRuneInString(format[i:]) i += size switch { diff --git a/src/go/doc/comment_test.go b/src/go/doc/comment_test.go index 004ae9d13d6de6..0e7de3eb78f38f 100644 --- a/src/go/doc/comment_test.go +++ b/src/go/doc/comment_test.go @@ -24,12 +24,12 @@ func TestComment(t *testing.T) { pkg := New(pkgs["pkgdoc"], "testdata/pkgdoc", 0) var ( - input = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods.\n" - wantHTML = `

T and U are types, and T.M is a method, but [V] is a broken link. rand.Int and crand.Reader are things. G.M1 and G.M2 are generic methods.` + "\n" - wantOldHTML = "

[T] and [U] are types, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods.\n" - wantMarkdown = "[T](#T) and [U](#U) are types, and [T.M](#T.M) is a method, but \\[V] is a broken link. [rand.Int](/math/rand#Int) and [crand.Reader](/crypto/rand#Reader) are things. [G.M1](#G.M1) and [G.M2](#G.M2) are generic methods.\n" - wantText = "T and U are types, and T.M is a method, but [V] is a broken link. rand.Int and\ncrand.Reader are things. G.M1 and G.M2 are generic methods.\n" - wantOldText = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link.\n[rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods.\n" + input = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n" + wantHTML = `

T and U are types, and T.M is a method, but [V] is a broken link. rand.Int and crand.Reader are things. G.M1 and G.M2 are generic methods. I.F is an interface method and [I.V] is a broken link.` + "\n" + wantOldHTML = "

[T] and [U] are types, and [T.M] is a method, but [V] is a broken link. [rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods. [I.F] is an interface method and [I.V] is a broken link.\n" + wantMarkdown = "[T](#T) and [U](#U) are types, and [T.M](#T.M) is a method, but \\[V] is a broken link. [rand.Int](/math/rand#Int) and [crand.Reader](/crypto/rand#Reader) are things. [G.M1](#G.M1) and [G.M2](#G.M2) are generic methods. [I.F](#I.F) is an interface method and \\[I.V] is a broken link.\n" + wantText = "T and U are types, and T.M is a method, but [V] is a broken link. rand.Int and\ncrand.Reader are things. G.M1 and G.M2 are generic methods. I.F is an interface\nmethod and [I.V] is a broken link.\n" + wantOldText = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link.\n[rand.Int] and [crand.Reader] are things. [G.M1] and [G.M2] are generic methods.\n[I.F] is an interface method and [I.V] is a broken link.\n" wantSynopsis = "T and U are types, and T.M is a method, but [V] is a broken link." wantOldSynopsis = "[T] and [U] are types, and [T.M] is a method, but [V] is a broken link." ) diff --git a/src/go/doc/doc.go b/src/go/doc/doc.go index f7e3c1bad8207b..0c23f1a46c87fd 100644 --- a/src/go/doc/doc.go +++ b/src/go/doc/doc.go @@ -167,6 +167,7 @@ func (p *Package) collectTypes(types []*Type) { p.collectValues(t.Vars) p.collectFuncs(t.Funcs) p.collectFuncs(t.Methods) + p.collectInterfaceMethods(t) } } @@ -184,6 +185,33 @@ func (p *Package) collectFuncs(funcs []*Func) { } } +// collectInterfaceMethods adds methods of interface types within t to p.syms. +// Note that t.Methods will contain methods of non-interface types, but not interface types. +// Adding interface methods to t.Methods might make sense, but would cause us to +// include those methods in the documentation index. Adding interface methods to p.syms +// here allows us to linkify references like [io.Reader.Read] without making any other +// changes to the documentation formatting at this time. +// +// If we do start adding interface methods to t.Methods in the future, +// collectInterfaceMethods can be dropped as redundant with collectFuncs(t.Methods). +func (p *Package) collectInterfaceMethods(t *Type) { + for _, s := range t.Decl.Specs { + spec, ok := s.(*ast.TypeSpec) + if !ok { + continue + } + list, isStruct := fields(spec.Type) + if isStruct { + continue + } + for _, field := range list { + for _, name := range field.Names { + p.syms[t.Name+"."+name.Name] = true + } + } + } +} + // NewFromFiles computes documentation for a package. // // The package is specified by a list of *ast.Files and corresponding diff --git a/src/go/doc/testdata/pkgdoc/doc.go b/src/go/doc/testdata/pkgdoc/doc.go index 3f822c75546c63..d542dc2cdd0cb6 100644 --- a/src/go/doc/testdata/pkgdoc/doc.go +++ b/src/go/doc/testdata/pkgdoc/doc.go @@ -20,5 +20,9 @@ var _ = crand.Reader type G[T any] struct{ x T } -func (g G[T]) M1() {} +func (g G[T]) M1() {} func (g *G[T]) M2() {} + +type I interface { + F() +} diff --git a/src/internal/runtime/atomic/atomic_loong64.s b/src/internal/runtime/atomic/atomic_loong64.s index 95d3e2bdab8c86..4215af24febaa2 100644 --- a/src/internal/runtime/atomic/atomic_loong64.s +++ b/src/internal/runtime/atomic/atomic_loong64.s @@ -19,7 +19,7 @@ TEXT ·Cas(SB), NOSPLIT, $0-17 MOVW new+12(FP), R6 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLAMCAS(SB), R8 - BEQ R8, cas_again + BEQ R8, ll_sc MOVV R5, R7 // backup old value AMCASDBW R6, (R4), R5 BNE R7, R5, cas_fail0 @@ -30,6 +30,7 @@ cas_fail0: MOVB R0, ret+16(FP) RET +ll_sc: // Implemented using the ll-sc instruction pair DBAR $0x14 // LoadAcquire barrier cas_again: @@ -60,7 +61,7 @@ TEXT ·Cas64(SB), NOSPLIT, $0-25 MOVV new+16(FP), R6 MOVBU internal∕cpu·Loong64+const_offsetLOONG64HasLAMCAS(SB), R8 - BEQ R8, cas64_again + BEQ R8, ll_sc_64 MOVV R5, R7 // backup old value AMCASDBV R6, (R4), R5 BNE R7, R5, cas64_fail0 @@ -71,6 +72,7 @@ cas64_fail0: MOVB R0, ret+24(FP) RET +ll_sc_64: // Implemented using the ll-sc instruction pair DBAR $0x14 cas64_again: diff --git a/src/math/modf.go b/src/math/modf.go index ab73e2dc36831e..12630958e969b7 100644 --- a/src/math/modf.go +++ b/src/math/modf.go @@ -11,8 +11,8 @@ package math // // Modf(±Inf) = ±Inf, NaN // Modf(NaN) = NaN, NaN -func Modf(f float64) (int float64, frac float64) { - int = Trunc(f) - frac = Copysign(f-int, f) +func Modf(f float64) (integer float64, fractional float64) { + integer = Trunc(f) + fractional = Copysign(f-integer, f) return } diff --git a/src/net/http/server.go b/src/net/http/server.go index cf0bd0a91d7624..6fdcd51c0a6777 100644 --- a/src/net/http/server.go +++ b/src/net/http/server.go @@ -2759,9 +2759,12 @@ func (mux *ServeMux) matchOrRedirect(host, method, path string, u *url.URL) (_ * defer mux.mu.RUnlock() n, matches := mux.tree.match(host, method, path) - // If we have an exact match, or we were asked not to try trailing-slash redirection, - // or the URL already has a trailing slash, then we're done. - if !exactMatch(n, path) && u != nil && !strings.HasSuffix(path, "/") { + // We can terminate here if any of the following is true: + // - We have an exact match already. + // - We were asked not to try trailing slash redirection. + // - The URL already has a trailing slash. + // - The URL is an empty string. + if !exactMatch(n, path) && u != nil && !strings.HasSuffix(path, "/") && path != "" { // If there is an exact match with a trailing slash, then redirect. path += "/" n2, _ := mux.tree.match(host, method, path) diff --git a/src/net/http/server_test.go b/src/net/http/server_test.go index f4aafc853bd5d6..832f9688b63d9c 100644 --- a/src/net/http/server_test.go +++ b/src/net/http/server_test.go @@ -97,6 +97,7 @@ func TestFindHandler(t *testing.T) { {"GET", "/foo/x", "&http.handler{i:2}"}, {"GET", "/bar/x", "&http.handler{i:4}"}, {"GET", "/bar", `&http.redirectHandler{url:"/bar/", code:301}`}, + {"CONNECT", "", "(http.HandlerFunc)(.*)"}, {"CONNECT", "/", "&http.handler{i:1}"}, {"CONNECT", "//", "&http.handler{i:1}"}, {"CONNECT", "//foo", "&http.handler{i:5}"}, @@ -112,7 +113,7 @@ func TestFindHandler(t *testing.T) { r.URL = &url.URL{Path: test.path} gotH, _, _, _ := mux.findHandler(&r) got := fmt.Sprintf("%#v", gotH) - if got != test.wantHandler { + if !regexp.MustCompile(test.wantHandler).MatchString(got) { t.Errorf("%s %q: got %q, want %q", test.method, test.path, got, test.wantHandler) } } diff --git a/src/net/netip/export_test.go b/src/net/netip/export_test.go index b2fae1aa47eedc..777a76a6b26401 100644 --- a/src/net/netip/export_test.go +++ b/src/net/netip/export_test.go @@ -34,5 +34,3 @@ var TestAppendToMarshal = testAppendToMarshal func (a Addr) IsZero() bool { return a.isZero() } func (p Prefix) IsZero() bool { return p.isZero() } - -func (p Prefix) Compare(p2 Prefix) int { return p.compare(p2) } diff --git a/src/net/netip/netip.go b/src/net/netip/netip.go index 35abfd3241bc13..b1b15b47287de2 100644 --- a/src/net/netip/netip.go +++ b/src/net/netip/netip.go @@ -1330,21 +1330,23 @@ func (p Prefix) isZero() bool { return p == Prefix{} } // IsSingleIP reports whether p contains exactly one IP. func (p Prefix) IsSingleIP() bool { return p.IsValid() && p.Bits() == p.ip.BitLen() } -// compare returns an integer comparing two prefixes. +// Compare returns an integer comparing two prefixes. // The result will be 0 if p == p2, -1 if p < p2, and +1 if p > p2. // Prefixes sort first by validity (invalid before valid), then -// address family (IPv4 before IPv6), then prefix length, then -// address. -// -// Unexported for Go 1.22 because we may want to compare by p.Addr first. -// See post-acceptance discussion on go.dev/issue/61642. -func (p Prefix) compare(p2 Prefix) int { - if c := cmp.Compare(p.Addr().BitLen(), p2.Addr().BitLen()); c != 0 { +// address family (IPv4 before IPv6), then masked prefix address, then +// prefix length, then unmasked address. +func (p Prefix) Compare(p2 Prefix) int { + // Aside from sorting based on the masked address, this use of + // Addr.Compare also enforces the valid vs. invalid and address + // family ordering for the prefix. + if c := p.Masked().Addr().Compare(p2.Masked().Addr()); c != 0 { return c } + if c := cmp.Compare(p.Bits(), p2.Bits()); c != 0 { return c } + return p.Addr().Compare(p2.Addr()) } diff --git a/src/net/netip/netip_test.go b/src/net/netip/netip_test.go index ea03f9a9e72473..71e39021ca8969 100644 --- a/src/net/netip/netip_test.go +++ b/src/net/netip/netip_test.go @@ -1123,6 +1123,9 @@ func TestPrefixCompare(t *testing.T) { {mustPrefix("fe80::/48"), mustPrefix("fe80::/64"), -1}, {mustPrefix("1.2.3.0/24"), mustPrefix("fe80::/8"), -1}, + + {mustPrefix("1.2.3.0/24"), mustPrefix("1.2.3.4/24"), -1}, + {mustPrefix("1.2.3.0/24"), mustPrefix("1.2.3.0/28"), -1}, } for _, tt := range tests { got := tt.a.Compare(tt.b) @@ -1148,10 +1151,70 @@ func TestPrefixCompare(t *testing.T) { Prefix{}, mustPrefix("fe80::/48"), mustPrefix("1.2.0.0/24"), + mustPrefix("1.2.3.4/24"), + mustPrefix("1.2.3.0/28"), } slices.SortFunc(values, Prefix.Compare) got := fmt.Sprintf("%s", values) - want := `[invalid Prefix 1.2.0.0/16 1.2.0.0/24 1.2.3.0/24 fe80::/48 fe80::/64 fe90::/64]` + want := `[invalid Prefix 1.2.0.0/16 1.2.0.0/24 1.2.3.0/24 1.2.3.4/24 1.2.3.0/28 fe80::/48 fe80::/64 fe90::/64]` + if got != want { + t.Errorf("unexpected sort\n got: %s\nwant: %s\n", got, want) + } + + // Lists from + // https://www.iana.org/assignments/iana-ipv4-special-registry/iana-ipv4-special-registry.xhtml and + // https://www.iana.org/assignments/ipv6-address-space/ipv6-address-space.xhtml, + // to verify that the sort order matches IANA's conventional + // ordering. + values = []Prefix{ + mustPrefix("0.0.0.0/8"), + mustPrefix("127.0.0.0/8"), + mustPrefix("10.0.0.0/8"), + mustPrefix("203.0.113.0/24"), + mustPrefix("169.254.0.0/16"), + mustPrefix("192.0.0.0/24"), + mustPrefix("240.0.0.0/4"), + mustPrefix("192.0.2.0/24"), + mustPrefix("192.0.0.170/32"), + mustPrefix("198.18.0.0/15"), + mustPrefix("192.0.0.8/32"), + mustPrefix("0.0.0.0/32"), + mustPrefix("192.0.0.9/32"), + mustPrefix("198.51.100.0/24"), + mustPrefix("192.168.0.0/16"), + mustPrefix("192.0.0.10/32"), + mustPrefix("192.175.48.0/24"), + mustPrefix("192.52.193.0/24"), + mustPrefix("100.64.0.0/10"), + mustPrefix("255.255.255.255/32"), + mustPrefix("192.31.196.0/24"), + mustPrefix("172.16.0.0/12"), + mustPrefix("192.0.0.0/29"), + mustPrefix("192.88.99.0/24"), + mustPrefix("fec0::/10"), + mustPrefix("6000::/3"), + mustPrefix("fe00::/9"), + mustPrefix("8000::/3"), + mustPrefix("0000::/8"), + mustPrefix("0400::/6"), + mustPrefix("f800::/6"), + mustPrefix("e000::/4"), + mustPrefix("ff00::/8"), + mustPrefix("a000::/3"), + mustPrefix("fc00::/7"), + mustPrefix("1000::/4"), + mustPrefix("0800::/5"), + mustPrefix("4000::/3"), + mustPrefix("0100::/8"), + mustPrefix("c000::/3"), + mustPrefix("fe80::/10"), + mustPrefix("0200::/7"), + mustPrefix("f000::/5"), + mustPrefix("2000::/3"), + } + slices.SortFunc(values, func(a, b Prefix) int { return a.Compare(b) }) + got = fmt.Sprintf("%s", values) + want = `[0.0.0.0/8 0.0.0.0/32 10.0.0.0/8 100.64.0.0/10 127.0.0.0/8 169.254.0.0/16 172.16.0.0/12 192.0.0.0/24 192.0.0.0/29 192.0.0.8/32 192.0.0.9/32 192.0.0.10/32 192.0.0.170/32 192.0.2.0/24 192.31.196.0/24 192.52.193.0/24 192.88.99.0/24 192.168.0.0/16 192.175.48.0/24 198.18.0.0/15 198.51.100.0/24 203.0.113.0/24 240.0.0.0/4 255.255.255.255/32 ::/8 100::/8 200::/7 400::/6 800::/5 1000::/4 2000::/3 4000::/3 6000::/3 8000::/3 a000::/3 c000::/3 e000::/4 f000::/5 f800::/6 fc00::/7 fe00::/9 fe80::/10 fec0::/10 ff00::/8]` if got != want { t.Errorf("unexpected sort\n got: %s\nwant: %s\n", got, want) } diff --git a/src/net/url/url.go b/src/net/url/url.go index 2a57659460373d..7021f343972ea2 100644 --- a/src/net/url/url.go +++ b/src/net/url/url.go @@ -661,6 +661,13 @@ func parseHost(host string) (string, error) { return host1 + host2 + host3, nil } } else if i := strings.LastIndex(host, ":"); i != -1 { + if j := strings.LastIndex(host[:i], ":"); j != -1 { // multiple colons + if k := strings.LastIndex(host[:j], ":"); k == -1 { // only one other colon + if port := host[j:i]; validOptionalPort(port) { // see issue #75223 + return "", fmt.Errorf("a colon after port %q is not allowed", port) + } + } + } colonPort := host[i:] if !validOptionalPort(colonPort) { return "", fmt.Errorf("invalid port %q after host", colonPort) diff --git a/src/net/url/url_test.go b/src/net/url/url_test.go index 16e08b63c6d098..6c16f8fc057933 100644 --- a/src/net/url/url_test.go +++ b/src/net/url/url_test.go @@ -707,6 +707,13 @@ var parseRequestURLTests = []struct { // RFC 6874. {"http://[fe80::1%en0]/", false}, {"http://[fe80::1%en0]:8080/", false}, + + {"http://x:x:", true}, // malformed IPv6 but still accepted + {"http://x::", false}, // a colon after empty port is not allowed + {"http://x:1:", false}, // a colon after the port is not allowed + {"http://x:12:", false}, // a colon after the port is not allowed + {"http://x:123:", false}, // a colon after the port is not allowed + {"http://127.0.0.1:8080:", false}, // a colon after the port is not allowed } func TestParseRequestURI(t *testing.T) { @@ -1643,6 +1650,13 @@ func TestParseErrors(t *testing.T) { {"cache_object:foo", true}, {"cache_object:foo/bar", true}, {"cache_object/:foo/bar", false}, + + {"http://x:x:", false}, // malformed IPv6 but still accepted + {"http://x::", true}, // a colon after empty port is not allowed + {"http://x:1:", true}, // a colon after the port is not allowed + {"http://x:12:", true}, // a colon after the port is not allowed + {"http://x:123:", true}, // a colon after the port is not allowed + {"http://127.0.0.1:8080:", true}, // a colon after the port is not allowed } for _, tt := range tests { u, err := Parse(tt.in) diff --git a/src/regexp/regexp.go b/src/regexp/regexp.go index 253415fb6a44c6..66c73693995a42 100644 --- a/src/regexp/regexp.go +++ b/src/regexp/regexp.go @@ -384,10 +384,6 @@ type inputString struct { func (i *inputString) step(pos int) (rune, int) { if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } return utf8.DecodeRuneInString(i.str[pos:]) } return endOfText, 0 @@ -409,17 +405,11 @@ func (i *inputString) context(pos int) lazyFlag { r1, r2 := endOfText, endOfText // 0 < pos && pos <= len(i.str) if uint(pos-1) < uint(len(i.str)) { - r1 = rune(i.str[pos-1]) - if r1 >= utf8.RuneSelf { - r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) - } + r1, _ = utf8.DecodeLastRuneInString(i.str[:pos]) } // 0 <= pos && pos < len(i.str) if uint(pos) < uint(len(i.str)) { - r2 = rune(i.str[pos]) - if r2 >= utf8.RuneSelf { - r2, _ = utf8.DecodeRuneInString(i.str[pos:]) - } + r2, _ = utf8.DecodeRuneInString(i.str[pos:]) } return newLazyFlag(r1, r2) } @@ -431,10 +421,6 @@ type inputBytes struct { func (i *inputBytes) step(pos int) (rune, int) { if pos < len(i.str) { - c := i.str[pos] - if c < utf8.RuneSelf { - return rune(c), 1 - } return utf8.DecodeRune(i.str[pos:]) } return endOfText, 0 @@ -456,17 +442,11 @@ func (i *inputBytes) context(pos int) lazyFlag { r1, r2 := endOfText, endOfText // 0 < pos && pos <= len(i.str) if uint(pos-1) < uint(len(i.str)) { - r1 = rune(i.str[pos-1]) - if r1 >= utf8.RuneSelf { - r1, _ = utf8.DecodeLastRune(i.str[:pos]) - } + r1, _ = utf8.DecodeLastRune(i.str[:pos]) } // 0 <= pos && pos < len(i.str) if uint(pos) < uint(len(i.str)) { - r2 = rune(i.str[pos]) - if r2 >= utf8.RuneSelf { - r2, _ = utf8.DecodeRune(i.str[pos:]) - } + r2, _ = utf8.DecodeRune(i.str[pos:]) } return newLazyFlag(r1, r2) } diff --git a/src/runtime/string.go b/src/runtime/string.go index 44d586bc53ee7d..3726d9235bfa4b 100644 --- a/src/runtime/string.go +++ b/src/runtime/string.go @@ -59,6 +59,9 @@ func concatstrings(buf *tmpBuf, a []string) string { return s } +// concatstring2 helps make the callsite smaller (compared to concatstrings), +// and we think this is currently more valuable than omitting one call in the +// chain, the same goes for concatstring{3,4,5}. func concatstring2(buf *tmpBuf, a0, a1 string) string { return concatstrings(buf, []string{a0, a1}) } @@ -108,6 +111,9 @@ func concatbytes(buf *tmpBuf, a []string) []byte { return b } +// concatbyte2 helps make the callsite smaller (compared to concatbytes), +// and we think this is currently more valuable than omitting one call in +// the chain, the same goes for concatbyte{3,4,5}. func concatbyte2(buf *tmpBuf, a0, a1 string) []byte { return concatbytes(buf, []string{a0, a1}) } diff --git a/src/runtime/tagptr_64bit.go b/src/runtime/tagptr_64bit.go index 3d79332e2dcaff..76733cc1d64630 100644 --- a/src/runtime/tagptr_64bit.go +++ b/src/runtime/tagptr_64bit.go @@ -22,10 +22,17 @@ const ( // On AMD64, virtual addresses are 48-bit (or 57-bit) sign-extended. // Other archs are 48-bit zero-extended. // + // We use one extra bit to placate systems which simulate amd64 binaries on + // an arm64 host. Allocated arm64 addresses could be as high as 1<<48-1, + // which would be invalid if we assumed 48-bit sign-extended addresses. + // See issue 69255. + // (Note that this does not help the other way around, simluating arm64 + // on amd64, but we don't have that problem at the moment.) + // // On s390x, virtual addresses are 64-bit. There's not much we // can do about this, so we just hope that the kernel doesn't // get to really high addresses and panic if it does. - defaultAddrBits = 48 + defaultAddrBits = 48 + 1 // On AIX, 64-bit addresses are split into 36-bit segment number and 28-bit // offset in segment. Segment numbers in the range 0x0A0000000-0x0AFFFFFFF(LSA) diff --git a/src/strconv/quote.go b/src/strconv/quote.go index 99c292a8ed5884..da2325647d3817 100644 --- a/src/strconv/quote.go +++ b/src/strconv/quote.go @@ -37,12 +37,8 @@ func appendQuotedWith(buf []byte, s string, quote byte, ASCIIonly, graphicOnly b buf = nBuf } buf = append(buf, quote) - for width := 0; len(s) > 0; s = s[width:] { - r := rune(s[0]) - width = 1 - if r >= utf8.RuneSelf { - r, width = utf8.DecodeRuneInString(s) - } + for r, width := rune(0), 0; len(s) > 0; s = s[width:] { + r, width = utf8.DecodeRuneInString(s) if width == 1 && r == utf8.RuneError { buf = append(buf, `\x`...) buf = append(buf, lowerhex[s[0]>>4]) diff --git a/src/strings/iter.go b/src/strings/iter.go index 69fe031739628c..84e763a8343df4 100644 --- a/src/strings/iter.go +++ b/src/strings/iter.go @@ -117,11 +117,7 @@ func FieldsFuncSeq(s string, f func(rune) bool) iter.Seq[string] { return func(yield func(string) bool) { start := -1 for i := 0; i < len(s); { - size := 1 - r := rune(s[i]) - if r >= utf8.RuneSelf { - r, size = utf8.DecodeRuneInString(s[i:]) - } + r, size := utf8.DecodeRuneInString(s[i:]) if f(r) { if start >= 0 { if !yield(s[start:i]) { diff --git a/src/strings/reader.go b/src/strings/reader.go index 497ffb7a39c635..f12c9b18b36d43 100644 --- a/src/strings/reader.go +++ b/src/strings/reader.go @@ -90,10 +90,6 @@ func (r *Reader) ReadRune() (ch rune, size int, err error) { return 0, 0, io.EOF } r.prevRune = int(r.i) - if c := r.s[r.i]; c < utf8.RuneSelf { - r.i++ - return rune(c), 1, nil - } ch, size = utf8.DecodeRuneInString(r.s[r.i:]) r.i += int64(size) return diff --git a/src/strings/strings.go b/src/strings/strings.go index 74007977d911f0..3cc3e79f982248 100644 --- a/src/strings/strings.go +++ b/src/strings/strings.go @@ -896,7 +896,7 @@ func TrimLeftFunc(s string, f func(rune) bool) string { // Unicode code points c satisfying f(c) removed. func TrimRightFunc(s string, f func(rune) bool) string { i := lastIndexFunc(s, f, false) - if i >= 0 && s[i] >= utf8.RuneSelf { + if i >= 0 { _, wid := utf8.DecodeRuneInString(s[i:]) i += wid } else { @@ -1028,10 +1028,7 @@ func trimLeftASCII(s string, as *asciiSet) string { func trimLeftUnicode(s, cutset string) string { for len(s) > 0 { - r, n := rune(s[0]), 1 - if r >= utf8.RuneSelf { - r, n = utf8.DecodeRuneInString(s) - } + r, n := utf8.DecodeRuneInString(s) if !ContainsRune(cutset, r) { break } @@ -1224,13 +1221,8 @@ hasUnicode: } // Extract first rune from second string. - var tr rune - if t[0] < utf8.RuneSelf { - tr, t = rune(t[0]), t[1:] - } else { - r, size := utf8.DecodeRuneInString(t) - tr, t = r, t[size:] - } + tr, size := utf8.DecodeRuneInString(t) + t = t[size:] // If they match, keep going; if not, return false. diff --git a/src/unicode/utf8/utf8.go b/src/unicode/utf8/utf8.go index 01cad1cc81f880..68283341d92ace 100644 --- a/src/unicode/utf8/utf8.go +++ b/src/unicode/utf8/utf8.go @@ -155,6 +155,20 @@ func FullRuneInString(s string) bool { // out of range, or is not the shortest possible UTF-8 encoding for the // value. No other validation is performed. func DecodeRune(p []byte) (r rune, size int) { + // Inlineable fast path for ASCII characters; see #48195. + // This implementation is weird but effective at rendering the + // function inlineable. + for _, b := range p { + if b < RuneSelf { + return rune(b), 1 + } + break + } + r, size = decodeRuneSlow(p) + return +} + +func decodeRuneSlow(p []byte) (r rune, size int) { n := len(p) if n < 1 { return RuneError, 0 @@ -203,6 +217,18 @@ func DecodeRune(p []byte) (r rune, size int) { // out of range, or is not the shortest possible UTF-8 encoding for the // value. No other validation is performed. func DecodeRuneInString(s string) (r rune, size int) { + // Inlineable fast path for ASCII characters; see #48195. + // This implementation is a bit weird but effective at rendering the + // function inlineable. + if s != "" && s[0] < RuneSelf { + return rune(s[0]), 1 + } else { + r, size = decodeRuneInStringSlow(s) + } + return +} + +func decodeRuneInStringSlow(s string) (rune, int) { n := len(s) if n < 1 { return RuneError, 0 diff --git a/src/unicode/utf8/utf8_test.go b/src/unicode/utf8/utf8_test.go index aece0fab731f41..bf4f074ffd0f5f 100644 --- a/src/unicode/utf8/utf8_test.go +++ b/src/unicode/utf8/utf8_test.go @@ -747,18 +747,37 @@ func BenchmarkAppendInvalidRuneNegative(b *testing.B) { func BenchmarkDecodeASCIIRune(b *testing.B) { a := []byte{'a'} - for i := 0; i < b.N; i++ { - DecodeRune(a) + for range b.N { + runeSink, sizeSink = DecodeRune(a) } } func BenchmarkDecodeJapaneseRune(b *testing.B) { nihon := []byte("本") - for i := 0; i < b.N; i++ { - DecodeRune(nihon) + for range b.N { + runeSink, sizeSink = DecodeRune(nihon) + } +} + +func BenchmarkDecodeASCIIRuneInString(b *testing.B) { + a := "a" + for range b.N { + runeSink, sizeSink = DecodeRuneInString(a) } } +func BenchmarkDecodeJapaneseRuneInString(b *testing.B) { + nihon := "本" + for range b.N { + runeSink, sizeSink = DecodeRuneInString(nihon) + } +} + +var ( + runeSink rune + sizeSink int +) + // boolSink is used to reference the return value of benchmarked // functions to avoid dead code elimination. var boolSink bool