可修改线程
This commit is contained in:
@@ -54,6 +54,23 @@ type Crawler struct {
|
||||
circuitState int32 // circuitClosed | circuitOpen | circuitHalfOpen
|
||||
circuitFailures int32 // 连续失败计数(atomic)
|
||||
circuitExpiry int64 // 熔断/半开截止 Unix 时间戳(秒)
|
||||
|
||||
// 运行时活跃线程计数(atomic,每轮 epoch 自动归零前重新开始计数)
|
||||
activeWorkers int64
|
||||
}
|
||||
|
||||
// 全局活跃线程计数器(跨包可读,无需持有 Crawler 引用)
|
||||
var globalActiveWorkers int64
|
||||
|
||||
// ActiveWorkers 返回当前正在运行的爬虫 goroutine 数量。
|
||||
// 也可通过包级函数 GlobalActiveWorkers() 读取(供 search 等外部包使用)。
|
||||
func (c *Crawler) ActiveWorkers() int64 {
|
||||
return atomic.LoadInt64(&c.activeWorkers)
|
||||
}
|
||||
|
||||
// GlobalActiveWorkers 返回当前全局活跃爬虫 goroutine 数量(包级,外部包可直接调用)。
|
||||
func GlobalActiveWorkers() int64 {
|
||||
return atomic.LoadInt64(&globalActiveWorkers)
|
||||
}
|
||||
|
||||
// New 创建一个 Crawler 实例。
|
||||
@@ -131,9 +148,13 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
|
||||
for _, u := range queue {
|
||||
wg.Add(1)
|
||||
sem <- struct{}{} // 获取一个令牌(阻塞直到有空闲槽位)
|
||||
atomic.AddInt64(&c.activeWorkers, 1)
|
||||
atomic.AddInt64(&globalActiveWorkers, 1)
|
||||
go func(rawURL string) {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }() // 释放令牌
|
||||
defer atomic.AddInt64(&c.activeWorkers, -1)
|
||||
defer atomic.AddInt64(&globalActiveWorkers, -1)
|
||||
|
||||
// 抓取单个 URL,返回发现的子链接
|
||||
hrefs := c.visitURL(rawURL)
|
||||
|
||||
Reference in New Issue
Block a user