From c79192e2ce06066b7439127e3b03fc09a5954091 Mon Sep 17 00:00:00 2001 From: kevin Date: Mon, 13 Apr 2026 23:02:10 +0800 Subject: [PATCH] =?UTF-8?q?=E5=8A=A0=E7=BA=BF=E7=A8=8Bdelay?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- crawler/crawler.go | 37 ++++++++++++++++++++----------------- sese-engine-ui | 2 +- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/crawler/crawler.go b/crawler/crawler.go index 782c389..16f588e 100644 --- a/crawler/crawler.go +++ b/crawler/crawler.go @@ -623,26 +623,29 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) { c.updateCrawlStatus(func(cs *CrawlStatus) { cs.IsRunning = false }) - // 空循环等 normalChildCh,新数据到达后立即从 epoch 0 重新开始 - for { - select { - case gc, ok := <-c.normalChildCh: - if !ok { - // channel 已关闭(正常情况或 stop 时关闭) - return - } - newLinks = append(newLinks, gc) - log.Printf("[crawler] new URLs detected, restarting from epoch 0 (%d in pool)", len(newLinks)) - c.updateCrawlStatus(func(cs *CrawlStatus) { - cs.IsRunning = true - }) - ep = -1 // continue 后 ep++ 变成 0 - goto restartEpochLoop // 退出空循环,进入正常队列处理 - case <-c.stopCh: - // 收到停止信号 + // 空循环等 normalChildCh,新数据到达后立即从 epoch 0 重新开始 + // 加 1000ms 睡眠避免 CPU 空转轮询 + for { + select { + case gc, ok := <-c.normalChildCh: + if !ok { + // channel 已关闭(正常情况或 stop 时关闭) return } + newLinks = append(newLinks, gc) + log.Printf("[crawler] new URLs detected, restarting from epoch 0 (%d in pool)", len(newLinks)) + c.updateCrawlStatus(func(cs *CrawlStatus) { + cs.IsRunning = true + }) + ep = -1 // continue 后 ep++ 变成 0 + goto restartEpochLoop // 退出空循环,进入正常队列处理 + case <-c.stopCh: + // 收到停止信号 + return + case <-time.After(1000 * time.Millisecond): + // 每 1000ms 检查一次,降低 CPU 占用 } + } restartEpochLoop: } diff --git a/sese-engine-ui b/sese-engine-ui index 52c1b9d..60e897f 160000 --- a/sese-engine-ui +++ b/sese-engine-ui @@ -1 +1 @@ -Subproject commit 52c1b9de996d12e63bc2774f502dad3cec08bc6c +Subproject commit 60e897f110866903ab30d74eb16d8e61214aea47