加线程delay

This commit is contained in:
2026-04-13 23:02:10 +08:00
parent ae001b82e6
commit c79192e2ce
2 changed files with 21 additions and 18 deletions
+20 -17
View File
@@ -623,26 +623,29 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
c.updateCrawlStatus(func(cs *CrawlStatus) { c.updateCrawlStatus(func(cs *CrawlStatus) {
cs.IsRunning = false cs.IsRunning = false
}) })
// 空循环等 normalChildCh,新数据到达后立即从 epoch 0 重新开始 // 空循环等 normalChildCh,新数据到达后立即从 epoch 0 重新开始
for { // 加 1000ms 睡眠避免 CPU 空转轮询
select { for {
case gc, ok := <-c.normalChildCh: select {
if !ok { case gc, ok := <-c.normalChildCh:
// channel 已关闭(正常情况或 stop 时关闭) if !ok {
return // channel 已关闭(正常情况或 stop 时关闭)
}
newLinks = append(newLinks, gc)
log.Printf("[crawler] new URLs detected, restarting from epoch 0 (%d in pool)", len(newLinks))
c.updateCrawlStatus(func(cs *CrawlStatus) {
cs.IsRunning = true
})
ep = -1 // continue 后 ep++ 变成 0
goto restartEpochLoop // 退出空循环,进入正常队列处理
case <-c.stopCh:
// 收到停止信号
return return
} }
newLinks = append(newLinks, gc)
log.Printf("[crawler] new URLs detected, restarting from epoch 0 (%d in pool)", len(newLinks))
c.updateCrawlStatus(func(cs *CrawlStatus) {
cs.IsRunning = true
})
ep = -1 // continue 后 ep++ 变成 0
goto restartEpochLoop // 退出空循环,进入正常队列处理
case <-c.stopCh:
// 收到停止信号
return
case <-time.After(1000 * time.Millisecond):
// 每 1000ms 检查一次,降低 CPU 占用
} }
}
restartEpochLoop: restartEpochLoop:
} }