可修改线程
This commit is contained in:
@@ -54,6 +54,23 @@ type Crawler struct {
|
||||
circuitState int32 // circuitClosed | circuitOpen | circuitHalfOpen
|
||||
circuitFailures int32 // 连续失败计数(atomic)
|
||||
circuitExpiry int64 // 熔断/半开截止 Unix 时间戳(秒)
|
||||
|
||||
// 运行时活跃线程计数(atomic,每轮 epoch 自动归零前重新开始计数)
|
||||
activeWorkers int64
|
||||
}
|
||||
|
||||
// 全局活跃线程计数器(跨包可读,无需持有 Crawler 引用)
|
||||
var globalActiveWorkers int64
|
||||
|
||||
// ActiveWorkers 返回当前正在运行的爬虫 goroutine 数量。
|
||||
// 也可通过包级函数 GlobalActiveWorkers() 读取(供 search 等外部包使用)。
|
||||
func (c *Crawler) ActiveWorkers() int64 {
|
||||
return atomic.LoadInt64(&c.activeWorkers)
|
||||
}
|
||||
|
||||
// GlobalActiveWorkers 返回当前全局活跃爬虫 goroutine 数量(包级,外部包可直接调用)。
|
||||
func GlobalActiveWorkers() int64 {
|
||||
return atomic.LoadInt64(&globalActiveWorkers)
|
||||
}
|
||||
|
||||
// New 创建一个 Crawler 实例。
|
||||
@@ -131,9 +148,13 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
|
||||
for _, u := range queue {
|
||||
wg.Add(1)
|
||||
sem <- struct{}{} // 获取一个令牌(阻塞直到有空闲槽位)
|
||||
atomic.AddInt64(&c.activeWorkers, 1)
|
||||
atomic.AddInt64(&globalActiveWorkers, 1)
|
||||
go func(rawURL string) {
|
||||
defer wg.Done()
|
||||
defer func() { <-sem }() // 释放令牌
|
||||
defer atomic.AddInt64(&c.activeWorkers, -1)
|
||||
defer atomic.AddInt64(&globalActiveWorkers, -1)
|
||||
|
||||
// 抓取单个 URL,返回发现的子链接
|
||||
hrefs := c.visitURL(rawURL)
|
||||
|
||||
Vendored
+2
File diff suppressed because one or more lines are too long
Vendored
-2
File diff suppressed because one or more lines are too long
Vendored
-6
File diff suppressed because one or more lines are too long
Vendored
-6
File diff suppressed because one or more lines are too long
Vendored
-2
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Vendored
-6
File diff suppressed because one or more lines are too long
Vendored
-6
File diff suppressed because one or more lines are too long
Vendored
-6
File diff suppressed because one or more lines are too long
Vendored
-2
File diff suppressed because one or more lines are too long
Vendored
-6
File diff suppressed because one or more lines are too long
Vendored
+2
-2
@@ -5,8 +5,8 @@
|
||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>SESE 爬取管理</title>
|
||||
<script type="module" crossorigin src="/assets/index-BomiJv32.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="/assets/index-Bj4UMEhQ.css">
|
||||
<script type="module" crossorigin src="/assets/index-DXbMW8tX.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="/assets/index-Bbt6GbEM.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
|
||||
+6
-2
@@ -23,6 +23,7 @@ import (
|
||||
|
||||
"sese-engine/analyzer" // 分词和语言检测
|
||||
"sese-engine/config" // 排序权重配置
|
||||
"sese-engine/crawler" // 爬虫(读取活跃线程数)
|
||||
"sese-engine/info" // info 服务
|
||||
"sese-engine/parser" // HTML 解析(在线摘要)
|
||||
"sese-engine/storage" // 持久化存储
|
||||
@@ -410,7 +411,7 @@ func (s *Server) handleAdminPending(w http.ResponseWriter, r *http.Request) {
|
||||
}
|
||||
|
||||
// handleAdminWorkers 查看和动态调整爬虫并发线程数。
|
||||
// GET 返回当前 workers 值
|
||||
// GET 返回 configured(设定值)和 active(实际运行中的 goroutine 数)
|
||||
// POST {"workers": N} 动态修改(范围 1~500),下一轮 epoch 立即生效
|
||||
func (s *Server) handleAdminWorkers(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||||
@@ -425,7 +426,10 @@ func (s *Server) handleAdminWorkers(w http.ResponseWriter, r *http.Request) {
|
||||
return
|
||||
|
||||
case http.MethodGet:
|
||||
json.NewEncoder(w).Encode(map[string]int{"workers": config.CrawlerWorkers()})
|
||||
json.NewEncoder(w).Encode(map[string]int64{
|
||||
"configured": int64(config.CrawlerWorkers()),
|
||||
"active": crawler.GlobalActiveWorkers(),
|
||||
})
|
||||
|
||||
case http.MethodPost:
|
||||
var body struct {
|
||||
|
||||
Reference in New Issue
Block a user