登录
首页 >  Golang >  Go问答

难以理解数据竞赛

来源:stackoverflow

时间:2024-03-03 16:21:26 106浏览 收藏

在Golang实战开发的过程中,我们经常会遇到一些这样那样的问题,然后要卡好半天,等问题解决了才发现原来一些细节知识点还是没有掌握好。今天golang学习网就整理分享《难以理解数据竞赛》,聊聊,希望可以帮助到正在努力赚钱的你。

问题内容

动机:我有一个巨大的 json 文件,我打算解析它并用它做一些事情。

现在,我确信会有一些库可以执行此操作,但我想自己执行此操作以更好地理解 go 的并发结构。

所以我的目标是使用 scanner 读取文件并将数据泵入 []byte chan,如下所示:

// not the actual code.
    for scanner.scan() {
       input <- []byte(scanner.text())
    }

我要求超过 1 个 go-routine 从 input chan 接收数据并解组 json 并返回结果(无论编组是否成功)并显示进度条

// not the actual code.
     for {
        bytes := <- input
        if err := json.unmarshal(bytes); err != nil {
          errorchan <- true
        } else {
           successchan <- true
        }
        progress <-  size_of_byte(bytes)
     }
    
      // now have other go-routine to handle errorchan, successchan and progress thing.

所有这些在纸上看起来都是合乎逻辑的,但是当我设法组装代码(如下所示)时,我看到了数据争用,并且我尽力了解数据争用是如何发生的,但无法理解(因为我删除了我们认为的其他一些数据争用)存在于前面的代码中)

workers 0xc0000c2000
 completed 0.000000==================
warning: data race
read at 0x00c0000c2048 by goroutine 8:
  mongo_import/race-d.readfile()
      /users/admin/documents/goproject/src/mongo_import/race-d/main.go:197 +0x6ff
  mongo_import/race-d.testreadjson()
      /users/admin/documents/goproject/src/mongo_import/race-d/main_test.go:8 +0x47
  testing.trunner()
      /usr/local/cellar/go/1.13.7/libexec/src/testing/testing.go:909 +0x199

previous write at 0x00c0000c2048 by goroutine 12:
  mongo_import/race-d.(*worker).tracksuccess()
      /users/admin/documents/goproject/src/mongo_import/race-d/main.go:103 +0xc0

goroutine 8 (running) created at:
  testing.(*t).run()
      /usr/local/cellar/go/1.13.7/libexec/src/testing/testing.go:960 +0x651
  testing.runtests.func1()
      /usr/local/cellar/go/1.13.7/libexec/src/testing/testing.go:1202 +0xa6
  testing.trunner()
      /usr/local/cellar/go/1.13.7/libexec/src/testing/testing.go:909 +0x199
  testing.runtests()
      /usr/local/cellar/go/1.13.7/libexec/src/testing/testing.go:1200 +0x521
  testing.(*m).run()
      /usr/local/cellar/go/1.13.7/libexec/src/testing/testing.go:1117 +0x2ff
  main.main()
      _testmain.go:44 +0x223

goroutine 12 (running) created at:
  mongo_import/race-d.(*worker).start()
      /users/admin/documents/goproject/src/mongo_import/race-d/main.go:72 +0x15f
==================
--- fail: testreadjson (1.18s)
    testing.go:853: race detected during execution of test
fail
fail    mongo_import/race-d 1.192s
fail
  • 测试包中的数据竞争对我来说是新事物。

  • 但我无法理解为什么这会导致数据竞争(这对我来说毫无意义)

    previous write at 0x00c0000c2048 by goroutine 12:
    mongo_import/race-d.(*worker).tracksuccess()
        /users/admin/documents/goproject/src/mongo_import/race-d/main.go:103 +0xc0
    
     goroutine 12 (running) created at:
    mongo_import/race-d.(*worker).start()
        /users/admin/documents/goproject/src/mongo_import/race-d/main.go:72 +0x15f

代码:

代码如下

package main
    
    import (
        "bufio"
        "encoding/binary"
        "encoding/json"
        "fmt"
        "log"
        "os"
        "sync"
        "time"
    )
    
    // thread that does that job of unmarshal
    type thread struct {
        w *worker
    }
    
    // run the individual thread and process the bytes
    // read for worter.input chan
    func (thread thread) run() {
        for {
            bytes, ok := <-thread.w.input
            if !ok {
                return
            }
    
            var data map[string]interface{}
            if err := json.unmarshal(bytes, &data); err != nil {
                thread.w.errorchan <- true
            } else {
                thread.w.successchan <- true
            }
    
            thread.w.progress <- int64(binary.size(bytes))
            // do other thing
            // like insert in db etc.
        }
    }
    
    // worker that
    type worker struct {
        errmutex      sync.mutex
        succmutex     sync.mutex
        progmutex     sync.mutex
        wg            sync.waitgroup
        done          bool
        workers       int
        filesize      int64
        completedbyte int64
        errorcount    int
        successcount  int
        input         chan []byte
        progress      chan int64
        errorchan     chan bool
        successchan   chan bool
    }
    
    // newworker
    func newworker(count int) *worker {
        return &worker{workers: count}
    }
    
    // start the worker
    func (w *worker) start() {
        fmt.printf("workers %p\n", w)
        w.wg.add(1)
        go w.display()
        w.wg.add(1)
        go w.trackprogress()
        w.wg.add(1)
        go w.tracksuccess()
        w.wg.add(1)
        go w.trackerror()
        w.wg.add(1)
        go w.spawn()
        w.wg.wait()
    }
    
    // add the error count
    func (w *worker) trackerror() {
        w.wg.done()
        for {
            _, ok := <-w.errorchan
            if !ok {
                return
            }
            w.errmutex.lock()
            w.errorcount = w.errorcount + 1
            w.errmutex.unlock()
        }
    }
    
    // add the success count
    func (w *worker) tracksuccess() {
        defer w.wg.done()
        for {
            _, ok := <-w.successchan
            if !ok {
                return
            }
            w.succmutex.lock()
            w.successcount += 1
            w.succmutex.unlock()
        }
    }
    
    // spawn individual thread to process the bytes
    func (w *worker) spawn() {
        defer w.wg.done()
        defer w.clean()
        var wg sync.waitgroup
        for i := 0; i < w.workers; i++ {
            wg.add(1)
            go func() {
                defer wg.done()
                thread{w: w}.run()
            }()
        }
    
        wg.wait()
    
    }
    
    // close the other open chan
    func (w *worker) clean() {
        close(w.errorchan)
        close(w.successchan)
        close(w.progress)
    }
    
    // close the input chan
    func (w *worker) done() {
        close(w.input)
    }
    
    // sum the total byte we have processed
    func (w *worker) trackprogress() {
        defer w.wg.done()
        for {
            read, ok := <-w.progress
            if !ok {
                w.done = true
                return
            }
            w.progmutex.lock()
            w.completedbyte += read
            w.progmutex.unlock()
    
        }
    }
    
    // display the progress bar
    func (w *worker) display() {
        defer w.wg.done()
        for !w.done {
            w.progmutex.lock()
            percentage := (float64(w.completedbyte) / float64(w.filesize)) * 100
            w.progmutex.unlock()
            fmt.printf("\r completed %f", percentage)
            time.sleep(5 * time.second)
        }
    }
    
    func readfile(path string) map[string]int {
        handler, err := os.open(path)
        if err != nil {
            log.fatal(err)
        }
        defer handler.close()
        worker := &worker{workers: 2}
        worker.input = make(chan []byte, 2)
        worker.progress = make(chan int64, 1)
        worker.errorchan = make(chan bool, 1)
        worker.successchan = make(chan bool, 1)
    
        if fi, err := handler.stat(); err != nil {
            log.fatal(err)
        } else {
            worker.filesize = fi.size()
        }
    
        scanner := bufio.newscanner(handler)
        go worker.start()
        for scanner.scan() {
            worker.input <- []byte(scanner.text())
        }
    
        worker.done()
        if err := scanner.err(); err != nil {
            log.fatal(err)
            return nil
        }
    
        return map[string]int{
            "error":   worker.errorcount,
            "success": worker.successcount,
        }
    }
    
    func main() {
        readfile("dump.json")
    }

和测试代码

package main // main_test.go
    
    import (
        "testing"
    )
    
    func testreadjson(t *testing.t) {
        data := readfile("dump2.json")
        if data == nil {
            t.error("we got a nil data")
        }
    }

这里是示例 dump2.json 数据

{"name": "tutorialspoint10"}
{"name":"tutorialspoint2", "age": 15}
{"name":"tutorialspoint3", "age": 25}
{"name":"tutorialspoint4", "age": 28}
{"name":"tutorialspoint5", "age": 40}
{"name": "tutorialspoint6"}
{"name":"tutorialspoint8", "age": 7}
{"name":"tutorialspoint4", "age": 55}
{"name":"tutorialspoint1","age":4}
{"name":"tutorialspoint2"}

最后,我知道这里发布的代码必须是简约的,但我尽力保持代码简约(从原始项目中提取)。我不确定如何(或目前有能力)进一步最小化它。


解决方案


您需要在 main.go:197 行添加读锁

“成功”:worker.successCount,

正如日志所说。您尝试读取,而另一个 go 例程尝试写入。 /Users/admin/Documents/goProject/src/mongo_import/race-d/main.go:197

简短说明:

https://dev.to/wagslane/golang-mutexes-what-is-rwmutex-for-57a0

在这种情况下使用 Atomic 可能会更好。 https://gobyexample.com/atomic-counters

理论要掌握,实操不能落!以上关于《难以理解数据竞赛》的详细介绍,大家都掌握了吧!如果想要继续提升自己的能力,那么就来关注golang学习网公众号吧!

声明:本文转载于:stackoverflow 如有侵犯,请联系study_golang@163.com删除
相关阅读
更多>
最新阅读
更多>
课程推荐
更多>