动态修改线程数量

This commit is contained in:
2026-04-09 12:52:33 +08:00
parent ab9c005e3d
commit 2e5876004b
3 changed files with 70 additions and 4 deletions
+7 -4
View File
@@ -104,12 +104,15 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
queue := []string{entryURL} // 当前轮次的待抓取队列
for ep := 0; ep < maxEpoch; ep++ {
// 每轮 epoch 从 config 读取最新 workers 值,支持运行时动态调整
workers := config.CrawlerWorkers()
// 每轮开始前:拉取 priority URLs,插入队列前端
priorityAdded := c.fetchAndApplyPriorityURLs(visited, &queue)
if priorityAdded > 0 {
log.Printf("[crawler] epoch %d/%d queue=%d (+%d priority)", ep+1, maxEpoch, len(queue), priorityAdded)
log.Printf("[crawler] epoch %d/%d queue=%d (+%d priority) workers=%d", ep+1, maxEpoch, len(queue), priorityAdded, workers)
} else {
log.Printf("[crawler] epoch %d/%d queue=%d", ep+1, maxEpoch, len(queue))
log.Printf("[crawler] epoch %d/%d queue=%d workers=%d", ep+1, maxEpoch, len(queue), workers)
}
// 将本轮所有 URL 标记为已访问(防止下一轮重复入队)
for _, u := range queue {
@@ -123,8 +126,8 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
wg sync.WaitGroup
)
// 信号量:限制同时并发数不超过配置的工作线程数
sem := make(chan struct{}, config.CrawlerWorkers())
// 信号量:限制同时并发数(使用上方读取的 workers 值)
sem := make(chan struct{}, workers)
for _, u := range queue {
wg.Add(1)
sem <- struct{}{} // 获取一个令牌(阻塞直到有空闲槽位)