动态修改线程数量
This commit is contained in:
+7
-4
@@ -104,12 +104,15 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
|
||||
queue := []string{entryURL} // 当前轮次的待抓取队列
|
||||
|
||||
for ep := 0; ep < maxEpoch; ep++ {
|
||||
// 每轮 epoch 从 config 读取最新 workers 值,支持运行时动态调整
|
||||
workers := config.CrawlerWorkers()
|
||||
|
||||
// 每轮开始前:拉取 priority URLs,插入队列前端
|
||||
priorityAdded := c.fetchAndApplyPriorityURLs(visited, &queue)
|
||||
if priorityAdded > 0 {
|
||||
log.Printf("[crawler] epoch %d/%d queue=%d (+%d priority)", ep+1, maxEpoch, len(queue), priorityAdded)
|
||||
log.Printf("[crawler] epoch %d/%d queue=%d (+%d priority) workers=%d", ep+1, maxEpoch, len(queue), priorityAdded, workers)
|
||||
} else {
|
||||
log.Printf("[crawler] epoch %d/%d queue=%d", ep+1, maxEpoch, len(queue))
|
||||
log.Printf("[crawler] epoch %d/%d queue=%d workers=%d", ep+1, maxEpoch, len(queue), workers)
|
||||
}
|
||||
// 将本轮所有 URL 标记为已访问(防止下一轮重复入队)
|
||||
for _, u := range queue {
|
||||
@@ -123,8 +126,8 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
|
||||
wg sync.WaitGroup
|
||||
)
|
||||
|
||||
// 信号量:限制同时并发数不超过配置的工作线程数
|
||||
sem := make(chan struct{}, config.CrawlerWorkers())
|
||||
// 信号量:限制同时并发数(使用上方读取的 workers 值)
|
||||
sem := make(chan struct{}, workers)
|
||||
for _, u := range queue {
|
||||
wg.Add(1)
|
||||
sem <- struct{}{} // 获取一个令牌(阻塞直到有空闲槽位)
|
||||
|
||||
Reference in New Issue
Block a user