我没想到我的程序的瓶颈竟然来自标准库,但这似乎是一个例外。
net/textproto库中的DotReader/DotWriter在每个输入/输出字节上使用bufio.ReadByte/WriteByte,这比相同数据量的bufio.Read/Write函数慢300多倍。这对使用这些接口的应用程序的数据吞吐量产生了重大影响。
package my_test
import (
"bufio"
"io"
"net/textproto"
"testing"
)
type FillReader byte
func (r FillReader) Read(b []byte) (n int, err error) {
n = len(b)
if n > 0 {
b[0] = byte(r)
for i := 1; i < n; i *= 2 {
copy(b[i:], b[:i])
}
}
return n, nil
}
func BenchmarkIONull(b *testing.B) {
size := int64(1024 * 1024 * 4)
b.SetBytes(size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
io.Copy(io.Discard, io.LimitReader(FillReader('.'), size))
}
}
func BenchmarkIOBufioWrite(b *testing.B) {
size := int64(1024 * 1024 * 4)
b.SetBytes(size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := io.LimitReader(FillReader('.'), size)
io.Copy(io.Discard, r)
}
}
func BenchmarkIOBufioWriteByte(b *testing.B) {
size := int64(1024 * 1024 * 4)
b.SetBytes(size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
w := bufio.NewWriter(io.Discard)
for j := int64(0); j < size; j++ {
w.WriteByte(0)
}
}
}
func BenchmarkIODotWriter(b *testing.B) {
size := int64(1024 * 1024 * 4)
b.SetBytes(size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := io.LimitReader(FillReader('.'), size)
w := textproto.NewWriter(bufio.NewWriter(io.Discard)).DotWriter()
io.Copy(w, r)
}
}
func BenchmarkIOBufioRead(b *testing.B) {
size := int64(1024 * 1024 * 4)
b.SetBytes(size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := bufio.NewReader(io.LimitReader(FillReader('.'), size))
io.Copy(io.Discard, r)
}
}
func BenchmarkIOBufioReadByte(b *testing.B) {
size := int64(1024 * 1024 * 4)
b.SetBytes(size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := bufio.NewReader(io.LimitReader(FillReader('.'), size))
for j := int64(0); j < size; j++ {
r.ReadByte()
}
}
}
func BenchmarkIODotReader(b *testing.B) {
size := int64(1024 * 1024 * 4)
b.SetBytes(size)
b.ResetTimer()
for i := 0; i < b.N; i++ {
r := textproto.NewReader(bufio.NewReader(io.LimitReader(FillReader('.'), size))).DotReader()
io.Copy(io.Discard, r)
}
}
❯ go test -cpuprofile cpu.prof -memprofile mem.prof -benchmem -bench IO
goos: windows
goarch: amd64
pkg: mytest
cpu: Intel(R) Core(TM) i7-8086K CPU @ 4.00GHz
BenchmarkIONull-12 13660 91626 ns/op 45776.23 MB/s 24 B/op 1 allocs/op
BenchmarkIOBufioWrite-12 19095 55199 ns/op 75984.94 MB/s 24 B/op 1 allocs/op
BenchmarkIOBufioWriteByte-12 99 10311620 ns/op 406.76 MB/s 4178 B/op 1 allocs/op
BenchmarkIODotWriter-12 67 16815748 ns/op 249.43 MB/s 36984 B/op 6 allocs/op
BenchmarkIOBufioRead-12 20808 58743 ns/op 71401.44 MB/s 4223 B/op 3 allocs/op
BenchmarkIOBufioReadByte-12 99 10507806 ns/op 399.16 MB/s 4202 B/op 2 allocs/op
BenchmarkIODotReader-12 60 19456187 ns/op 215.58 MB/s 4305 B/op 5 allocs/op
PASS
ok mytest 11.788s
4条答案
按热度按时间9bfwbjaz1#
你能运行go pprof并上传性能分析数据吗?谢谢~
r9f1avp52#
❯ go test -cpuprofile cpu.prof -memprofile mem.prof -benchmem -bench IO
goos: windows
goarch: amd64
pkg: mytest
cpu: Intel(R) Core(TM) i7-8086K CPU @ 4.00GHz
BenchmarkIONull-12 13660 91626 ns/op 45776.23 MB/s 24 B/op 1 allocs/op
BenchmarkIOBufioWrite-12 19095 55199 ns/op 75984.94 MB/s 24 B/op 1 allocs/op
BenchmarkIOBufioWriteByte-12 99 10311620 ns/op 406.76 MB/s 4178 B/op 1 allocs/op
BenchmarkIODotWriter-12 67 16815748 ns/op 249.43 MB/s 36984 B/op 6 allocs/op
BenchmarkIOBufioRead-12 20808 58743 ns/op 71401.44 MB/s 4223 B/op 3 allocs/op
BenchmarkIOBufioReadByte-12 99 10507806 ns/op 399.16 MB/s 4202 B/op 2 allocs/op
BenchmarkIODotReader-12 60 19456187 ns/op 215.58 MB/s 4305 B/op 5 allocs/op
PASS
ok mytest 11.788s
❯ go test -cpuprofile dotreader_cpu.prof -memprofile dotreader_mem.prof -benchmem -bench DotReader
goos: windows
goarch: amd64
pkg: mytest
cpu: Intel(R) Core(TM) i7-8086K CPU @ 4.00GHz
BenchmarkIODotReader-12 56 20478373 ns/op 204.82 MB/s 4770 B/op 5 allocs/op
PASS
ok mytest 1.306s
❯ go test -cpuprofile dotwriter_cpu.prof -memprofile dotwriter_mem.prof -benchmem -bench DotWriter
goos: windows
goarch: amd64
pkg: mytest
cpu: Intel(R) Core(TM) i7-8086K CPU @ 4.00GHz
BenchmarkIODotWriter-12 75 17587541 ns/op 238.48 MB/s 37218 B/op 6 allocs/op
PASS
ok mytest 1.511s
q9rjltbz3#
我在这里优化了这些例程:https://github.com/go-textproto/textproto
但是我不确定它是否适合提交给Golang的标准库。我使用了一种技巧来“倒回”从
bufio.Reader
读取的任意字节,并且我不确定如何更安全地做到这一点。然而,如果我们使用
ReaderFrom
和WriterTo
接口,我可以编写一个更安全、更优化的版本。不过,无论如何,这表明标准库中的实现在性能改进方面有很大的空间。
t98cgbkg4#
我们鼓励开发者为Go语言发送高质量的CL(PR),如果你找到了改进stdlib的方法并且它可以接受,请随意发送一个CL,遵循这个tutorial。