go net/textproto: DotReader/DotWriter 非常慢

4bbkushb  于 5个月前  发布在  Go
关注(0)|答案(4)|浏览(68)

我没想到我的程序的瓶颈竟然来自标准库,但这似乎是一个例外。
net/textproto库中的DotReader/DotWriter在每个输入/输出字节上使用bufio.ReadByte/WriteByte,这比相同数据量的bufio.Read/Write函数慢300多倍。这对使用这些接口的应用程序的数据吞吐量产生了重大影响。

package my_test

import (
	"bufio"
	"io"
	"net/textproto"
	"testing"
)

type FillReader byte

func (r FillReader) Read(b []byte) (n int, err error) {
	n = len(b)
	if n > 0 {
		b[0] = byte(r)
		for i := 1; i < n; i *= 2 {
			copy(b[i:], b[:i])
		}
	}
	return n, nil
}

func BenchmarkIONull(b *testing.B) {
	size := int64(1024 * 1024 * 4)
	b.SetBytes(size)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		io.Copy(io.Discard, io.LimitReader(FillReader('.'), size))
	}
}

func BenchmarkIOBufioWrite(b *testing.B) {
	size := int64(1024 * 1024 * 4)
	b.SetBytes(size)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		r := io.LimitReader(FillReader('.'), size)
		io.Copy(io.Discard, r)
	}
}

func BenchmarkIOBufioWriteByte(b *testing.B) {
	size := int64(1024 * 1024 * 4)
	b.SetBytes(size)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		w := bufio.NewWriter(io.Discard)
		for j := int64(0); j < size; j++ {
			w.WriteByte(0)
		}
	}
}

func BenchmarkIODotWriter(b *testing.B) {
	size := int64(1024 * 1024 * 4)
	b.SetBytes(size)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		r := io.LimitReader(FillReader('.'), size)
		w := textproto.NewWriter(bufio.NewWriter(io.Discard)).DotWriter()
		io.Copy(w, r)
	}
}

func BenchmarkIOBufioRead(b *testing.B) {
	size := int64(1024 * 1024 * 4)
	b.SetBytes(size)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		r := bufio.NewReader(io.LimitReader(FillReader('.'), size))
		io.Copy(io.Discard, r)
	}
}

func BenchmarkIOBufioReadByte(b *testing.B) {
	size := int64(1024 * 1024 * 4)
	b.SetBytes(size)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		r := bufio.NewReader(io.LimitReader(FillReader('.'), size))
		for j := int64(0); j < size; j++ {
			r.ReadByte()
		}
	}
}

func BenchmarkIODotReader(b *testing.B) {
	size := int64(1024 * 1024 * 4)
	b.SetBytes(size)
	b.ResetTimer()
	for i := 0; i < b.N; i++ {
		r := textproto.NewReader(bufio.NewReader(io.LimitReader(FillReader('.'), size))).DotReader()
		io.Copy(io.Discard, r)
	}
}
❯ go test -cpuprofile cpu.prof -memprofile mem.prof -benchmem -bench IO
goos: windows
goarch: amd64
pkg: mytest
cpu: Intel(R) Core(TM) i7-8086K CPU @ 4.00GHz
BenchmarkIONull-12                         13660             91626 ns/op        45776.23 MB/s         24 B/op          1 allocs/op
BenchmarkIOBufioWrite-12                   19095             55199 ns/op        75984.94 MB/s         24 B/op          1 allocs/op
BenchmarkIOBufioWriteByte-12                  99          10311620 ns/op         406.76 MB/s        4178 B/op          1 allocs/op
BenchmarkIODotWriter-12                       67          16815748 ns/op         249.43 MB/s       36984 B/op          6 allocs/op
BenchmarkIOBufioRead-12                    20808             58743 ns/op        71401.44 MB/s       4223 B/op          3 allocs/op
BenchmarkIOBufioReadByte-12                   99          10507806 ns/op         399.16 MB/s        4202 B/op          2 allocs/op
BenchmarkIODotReader-12                       60          19456187 ns/op         215.58 MB/s        4305 B/op          5 allocs/op
PASS
ok      mytest  11.788s
9bfwbjaz

9bfwbjaz1#

你能运行go pprof并上传性能分析数据吗?谢谢~

r9f1avp5

r9f1avp52#

❯ go test -cpuprofile cpu.prof -memprofile mem.prof -benchmem -bench IO
goos: windows
goarch: amd64
pkg: mytest
cpu: Intel(R) Core(TM) i7-8086K CPU @ 4.00GHz
BenchmarkIONull-12 13660 91626 ns/op 45776.23 MB/s 24 B/op 1 allocs/op
BenchmarkIOBufioWrite-12 19095 55199 ns/op 75984.94 MB/s 24 B/op 1 allocs/op
BenchmarkIOBufioWriteByte-12 99 10311620 ns/op 406.76 MB/s 4178 B/op 1 allocs/op
BenchmarkIODotWriter-12 67 16815748 ns/op 249.43 MB/s 36984 B/op 6 allocs/op
BenchmarkIOBufioRead-12 20808 58743 ns/op 71401.44 MB/s 4223 B/op 3 allocs/op
BenchmarkIOBufioReadByte-12 99 10507806 ns/op 399.16 MB/s 4202 B/op 2 allocs/op
BenchmarkIODotReader-12 60 19456187 ns/op 215.58 MB/s 4305 B/op 5 allocs/op
PASS
ok mytest 11.788s


[all_perf.zip](https://github.com/golang/go/files/9712743/all_perf.zip)

❯ go test -cpuprofile dotreader_cpu.prof -memprofile dotreader_mem.prof -benchmem -bench DotReader
goos: windows
goarch: amd64
pkg: mytest
cpu: Intel(R) Core(TM) i7-8086K CPU @ 4.00GHz
BenchmarkIODotReader-12 56 20478373 ns/op 204.82 MB/s 4770 B/op 5 allocs/op
PASS
ok mytest 1.306s


[dotreader_perf.zip](https://github.com/golang/go/files/9712746/dotreader_perf.zip)

❯ go test -cpuprofile dotwriter_cpu.prof -memprofile dotwriter_mem.prof -benchmem -bench DotWriter
goos: windows
goarch: amd64
pkg: mytest
cpu: Intel(R) Core(TM) i7-8086K CPU @ 4.00GHz
BenchmarkIODotWriter-12 75 17587541 ns/op 238.48 MB/s 37218 B/op 6 allocs/op
PASS
ok mytest 1.511s


[dotwriter_perf.zip](https://github.com/golang/go/files/9712751/dotwriter_perf.zip)
q9rjltbz

q9rjltbz3#

我在这里优化了这些例程:https://github.com/go-textproto/textproto

BenchmarkDotReader/Legacy-12                  62          21610894 ns/op         194.08 MB/s
BenchmarkDotReader/Optimized-12             1486            821764 ns/op        5104.02 MB/s
BenchmarkDotWriter/Legacy-12                  56          19386652 ns/op         216.35 MB/s
BenchmarkDotWriter/Optimized-12             1694            700181 ns/op        5990.31 MB/s

但是我不确定它是否适合提交给Golang的标准库。我使用了一种技巧来“倒回”从bufio.Reader读取的任意字节,并且我不确定如何更安全地做到这一点。
然而,如果我们使用ReaderFromWriterTo接口,我可以编写一个更安全、更优化的版本。
不过,无论如何,这表明标准库中的实现在性能改进方面有很大的空间。

t98cgbkg

t98cgbkg4#

我们鼓励开发者为Go语言发送高质量的CL(PR),如果你找到了改进stdlib的方法并且它可以接受,请随意发送一个CL,遵循这个tutorial

相关问题