可修改线程

This commit is contained in:
2026-04-09 13:16:12 +08:00
parent 2e5876004b
commit b59c0f6763
14 changed files with 32 additions and 47 deletions
+21
View File
@@ -54,6 +54,23 @@ type Crawler struct {
circuitState int32 // circuitClosed | circuitOpen | circuitHalfOpen
circuitFailures int32 // 连续失败计数(atomic
circuitExpiry int64 // 熔断/半开截止 Unix 时间戳(秒)
// 运行时活跃线程计数(atomic,每轮 epoch 自动归零前重新开始计数)
activeWorkers int64
}
// 全局活跃线程计数器(跨包可读,无需持有 Crawler 引用)
var globalActiveWorkers int64
// ActiveWorkers 返回当前正在运行的爬虫 goroutine 数量。
// 也可通过包级函数 GlobalActiveWorkers() 读取(供 search 等外部包使用)。
func (c *Crawler) ActiveWorkers() int64 {
return atomic.LoadInt64(&c.activeWorkers)
}
// GlobalActiveWorkers 返回当前全局活跃爬虫 goroutine 数量(包级,外部包可直接调用)。
func GlobalActiveWorkers() int64 {
return atomic.LoadInt64(&globalActiveWorkers)
}
// New 创建一个 Crawler 实例。
@@ -131,9 +148,13 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
for _, u := range queue {
wg.Add(1)
sem <- struct{}{} // 获取一个令牌(阻塞直到有空闲槽位)
atomic.AddInt64(&c.activeWorkers, 1)
atomic.AddInt64(&globalActiveWorkers, 1)
go func(rawURL string) {
defer wg.Done()
defer func() { <-sem }() // 释放令牌
defer atomic.AddInt64(&c.activeWorkers, -1)
defer atomic.AddInt64(&globalActiveWorkers, -1)
// 抓取单个 URL,返回发现的子链接
hrefs := c.visitURL(rawURL)