Go语言 如何使用bufio.ScanWords

cdmah0mi  于 2022-12-31  发布在  Go
关注(0)|答案(4)|浏览(172)

如何使用bufio.ScanWordsbufio.ScanLines函数计算字数和行数?
我试过:

fmt.Println(bufio.ScanWords([]byte("Good day everyone"), false))

图纸:

5 [103 111 111 100] <nil>

不知道那是什么意思?

eni9jsuy

eni9jsuy1#

要计算单词数:

input := "Spicy jalapeno pastrami ut ham turducken.\n Lorem sed ullamco, leberkas sint short loin strip steak ut shoulder shankle porchetta venison prosciutto turducken swine.\n Deserunt kevin frankfurter tongue aliqua incididunt tri-tip shank nostrud.\n"
scanner := bufio.NewScanner(strings.NewReader(input))
// Set the split function for the scanning operation.
scanner.Split(bufio.ScanWords)
// Count the words.
count := 0
for scanner.Scan() {
    count++
}
if err := scanner.Err(); err != nil {
    fmt.Fprintln(os.Stderr, "reading input:", err)
}
fmt.Printf("%d\n", count)

要计算行数:

input := "Spicy jalapeno pastrami ut ham turducken.\n Lorem sed ullamco, leberkas sint short loin strip steak ut shoulder shankle porchetta venison prosciutto turducken swine.\n Deserunt kevin frankfurter tongue aliqua incididunt tri-tip shank nostrud.\n"

scanner := bufio.NewScanner(strings.NewReader(input))
// Set the split function for the scanning operation.
scanner.Split(bufio.ScanLines)
// Count the lines.
count := 0
for scanner.Scan() {
    count++
}
if err := scanner.Err(); err != nil {
    fmt.Fprintln(os.Stderr, "reading input:", err)
}
fmt.Printf("%d\n", count)
6l7fqoea

6l7fqoea2#

这是Go语言练习7.1中的一个练习
这是@repler解决方案的扩展:

package main

import (
    "bufio"
    "fmt"
    "os"
    "strings"
)

type byteCounter int
type wordCounter int
type lineCounter int

func main() {
    var c byteCounter
    c.Write([]byte("Hello This is a line"))
    fmt.Println("Byte Counter ", c)

    var w wordCounter
    w.Write([]byte("Hello This is a line"))
    fmt.Println("Word Counter ", w)

    var l lineCounter
    l.Write([]byte("Hello \nThis \n is \na line\n.\n.\n"))
    fmt.Println("Length ", l)

}

func (c *byteCounter) Write(p []byte) (int, error) {
    *c += byteCounter(len(p))
    return len(p), nil
}

func (w *wordCounter) Write(p []byte) (int, error) {
    count := retCount(p, bufio.ScanWords)
    *w += wordCounter(count)
    return count, nil
}

func (l *lineCounter) Write(p []byte) (int, error) {
    count := retCount(p, bufio.ScanLines)
    *l += lineCounter(count)
    return count, nil
}

func retCount(p []byte, fn bufio.SplitFunc) (count int) {
    s := string(p)
    scanner := bufio.NewScanner(strings.NewReader(s))
    scanner.Split(fn)
    count = 0
    for scanner.Scan() {
        count++
    }
    if err := scanner.Err(); err != nil {
        fmt.Fprintln(os.Stderr, "reading input:", err)
    }
    return
}
nzrxty8p

nzrxty8p3#

这是《Go语言》一书中的一个练习。练习7.1
这是我的解决方案:

package main

import (
    "bufio"
    "fmt"
)

// WordCounter count words
type WordCounter int

// LineCounter count Lines
type LineCounter int

type scanFunc func(p []byte, EOF bool) (advance int, token []byte, err error)

func scanBytes(p []byte, fn scanFunc) (cnt int) {
    for true {
        advance, token, _ := fn(p, true)
        if len(token) == 0 {
            break
        }
        p = p[advance:]
        cnt++
    }
    return cnt
}

func (c *WordCounter) Write(p []byte) (int, error) {
    cnt := scanBytes(p, bufio.ScanWords)
    *c += WordCounter(cnt)
    return cnt, nil
}

func (c WordCounter) String() string {
    return fmt.Sprintf("contains %d words", c)
}

func (c *LineCounter) Write(p []byte) (int, error) {
    cnt := scanBytes(p, bufio.ScanLines)
    *c += LineCounter(cnt)
    return cnt, nil
}

func (c LineCounter) String() string {
    return fmt.Sprintf("contains %d lines", c)
}

func main() {
    var c WordCounter
    fmt.Println(c)

    fmt.Fprintf(&c, "This is an sentence.")
    fmt.Println(c)

    c = 0
    fmt.Fprintf(&c, "This")
    fmt.Println(c)

    var l LineCounter
    fmt.Println(l)

    fmt.Fprintf(&l, `This is another
line`)
    fmt.Println(l)

    l = 0
    fmt.Fprintf(&l, "This is another\nline")
    fmt.Println(l)

    fmt.Fprintf(&l, "This is one line")
    fmt.Println(l)
}
8hhllhi2

8hhllhi24#

bufio.ScanWordsbufio.ScanLines(以及bufio.ScanBytesbufio.ScanRunes)是 * 拆分函数 *:它们为bufio.Scanner提供了将其输入数据标记化的策略-扫描过程应如何拆分数据。bufio.Scanner的拆分函数默认为bufio.ScanLines,但可以通过bufio.Scanner.Split方法进行更改。
这些拆分函数的类型为SplitFunc

type SplitFunc func(data []byte, atEOF bool) (advance int, token []byte, err error)

通常,您不需要直接调用这些函数中的任何一个;但是,您可能需要创建自己的split函数来实现自定义标记化策略,因此,让我们看看它的参数:

  • data:剩余数据尚未处理。
  • atEOF:调用方是否已经到达EOF,因此在下一次调用中没有更多的新数据要提供。
  • advance字节数调用方必须为下一次调用推进输入数据。
  • token:作为执行拆分的结果返回给调用者的令牌。

为了进一步理解,让我们来看一下bufio.ScanBytes的实现:

func ScanBytes(data []byte, atEOF bool) (advance int, token []byte, err error) {
    if atEOF && len(data) == 0 {
        return 0, nil, nil
    }
    return 1, data[0:1], nil
}

只要data不为空,它就向调用者返回一个令牌字节(data[0:1]),并告诉调用者将输入数据向前推进一个字节。

相关问题