加线程delay
This commit is contained in:
+20
-17
@@ -623,26 +623,29 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
|
|||||||
c.updateCrawlStatus(func(cs *CrawlStatus) {
|
c.updateCrawlStatus(func(cs *CrawlStatus) {
|
||||||
cs.IsRunning = false
|
cs.IsRunning = false
|
||||||
})
|
})
|
||||||
// 空循环等 normalChildCh,新数据到达后立即从 epoch 0 重新开始
|
// 空循环等 normalChildCh,新数据到达后立即从 epoch 0 重新开始
|
||||||
for {
|
// 加 1000ms 睡眠避免 CPU 空转轮询
|
||||||
select {
|
for {
|
||||||
case gc, ok := <-c.normalChildCh:
|
select {
|
||||||
if !ok {
|
case gc, ok := <-c.normalChildCh:
|
||||||
// channel 已关闭(正常情况或 stop 时关闭)
|
if !ok {
|
||||||
return
|
// channel 已关闭(正常情况或 stop 时关闭)
|
||||||
}
|
|
||||||
newLinks = append(newLinks, gc)
|
|
||||||
log.Printf("[crawler] new URLs detected, restarting from epoch 0 (%d in pool)", len(newLinks))
|
|
||||||
c.updateCrawlStatus(func(cs *CrawlStatus) {
|
|
||||||
cs.IsRunning = true
|
|
||||||
})
|
|
||||||
ep = -1 // continue 后 ep++ 变成 0
|
|
||||||
goto restartEpochLoop // 退出空循环,进入正常队列处理
|
|
||||||
case <-c.stopCh:
|
|
||||||
// 收到停止信号
|
|
||||||
return
|
return
|
||||||
}
|
}
|
||||||
|
newLinks = append(newLinks, gc)
|
||||||
|
log.Printf("[crawler] new URLs detected, restarting from epoch 0 (%d in pool)", len(newLinks))
|
||||||
|
c.updateCrawlStatus(func(cs *CrawlStatus) {
|
||||||
|
cs.IsRunning = true
|
||||||
|
})
|
||||||
|
ep = -1 // continue 后 ep++ 变成 0
|
||||||
|
goto restartEpochLoop // 退出空循环,进入正常队列处理
|
||||||
|
case <-c.stopCh:
|
||||||
|
// 收到停止信号
|
||||||
|
return
|
||||||
|
case <-time.After(1000 * time.Millisecond):
|
||||||
|
// 每 1000ms 检查一次,降低 CPU 占用
|
||||||
}
|
}
|
||||||
|
}
|
||||||
restartEpochLoop:
|
restartEpochLoop:
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
+1
-1
Submodule sese-engine-ui updated: 52c1b9de99...60e897f110
Reference in New Issue
Block a user