可修改线程

This commit is contained in:
2026-04-09 13:16:12 +08:00
parent 2e5876004b
commit b59c0f6763
14 changed files with 32 additions and 47 deletions
+21
View File
@@ -54,6 +54,23 @@ type Crawler struct {
circuitState int32 // circuitClosed | circuitOpen | circuitHalfOpen circuitState int32 // circuitClosed | circuitOpen | circuitHalfOpen
circuitFailures int32 // 连续失败计数(atomic circuitFailures int32 // 连续失败计数(atomic
circuitExpiry int64 // 熔断/半开截止 Unix 时间戳(秒) circuitExpiry int64 // 熔断/半开截止 Unix 时间戳(秒)
// 运行时活跃线程计数(atomic,每轮 epoch 自动归零前重新开始计数)
activeWorkers int64
}
// 全局活跃线程计数器(跨包可读,无需持有 Crawler 引用)
var globalActiveWorkers int64
// ActiveWorkers 返回当前正在运行的爬虫 goroutine 数量。
// 也可通过包级函数 GlobalActiveWorkers() 读取(供 search 等外部包使用)。
func (c *Crawler) ActiveWorkers() int64 {
return atomic.LoadInt64(&c.activeWorkers)
}
// GlobalActiveWorkers 返回当前全局活跃爬虫 goroutine 数量(包级,外部包可直接调用)。
func GlobalActiveWorkers() int64 {
return atomic.LoadInt64(&globalActiveWorkers)
} }
// New 创建一个 Crawler 实例。 // New 创建一个 Crawler 实例。
@@ -131,9 +148,13 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
for _, u := range queue { for _, u := range queue {
wg.Add(1) wg.Add(1)
sem <- struct{}{} // 获取一个令牌(阻塞直到有空闲槽位) sem <- struct{}{} // 获取一个令牌(阻塞直到有空闲槽位)
atomic.AddInt64(&c.activeWorkers, 1)
atomic.AddInt64(&globalActiveWorkers, 1)
go func(rawURL string) { go func(rawURL string) {
defer wg.Done() defer wg.Done()
defer func() { <-sem }() // 释放令牌 defer func() { <-sem }() // 释放令牌
defer atomic.AddInt64(&c.activeWorkers, -1)
defer atomic.AddInt64(&globalActiveWorkers, -1)
// 抓取单个 URL,返回发现的子链接 // 抓取单个 URL,返回发现的子链接
hrefs := c.visitURL(rawURL) hrefs := c.visitURL(rawURL)
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
-6
View File
File diff suppressed because one or more lines are too long
-6
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
-6
View File
File diff suppressed because one or more lines are too long
-6
View File
File diff suppressed because one or more lines are too long
-6
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
-6
View File
File diff suppressed because one or more lines are too long
+2 -2
View File
@@ -5,8 +5,8 @@
<link rel="icon" type="image/svg+xml" href="/vite.svg" /> <link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>SESE 爬取管理</title> <title>SESE 爬取管理</title>
<script type="module" crossorigin src="/assets/index-BomiJv32.js"></script> <script type="module" crossorigin src="/assets/index-DXbMW8tX.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-Bj4UMEhQ.css"> <link rel="stylesheet" crossorigin href="/assets/index-Bbt6GbEM.css">
</head> </head>
<body> <body>
<div id="app"></div> <div id="app"></div>
+6 -2
View File
@@ -23,6 +23,7 @@ import (
"sese-engine/analyzer" // 分词和语言检测 "sese-engine/analyzer" // 分词和语言检测
"sese-engine/config" // 排序权重配置 "sese-engine/config" // 排序权重配置
"sese-engine/crawler" // 爬虫(读取活跃线程数)
"sese-engine/info" // info 服务 "sese-engine/info" // info 服务
"sese-engine/parser" // HTML 解析(在线摘要) "sese-engine/parser" // HTML 解析(在线摘要)
"sese-engine/storage" // 持久化存储 "sese-engine/storage" // 持久化存储
@@ -410,7 +411,7 @@ func (s *Server) handleAdminPending(w http.ResponseWriter, r *http.Request) {
} }
// handleAdminWorkers 查看和动态调整爬虫并发线程数。 // handleAdminWorkers 查看和动态调整爬虫并发线程数。
// GET 返回当前 workers 值 // GET 返回 configured(设定值)和 active(实际运行中的 goroutine 数)
// POST {"workers": N} 动态修改(范围 1~500),下一轮 epoch 立即生效 // POST {"workers": N} 动态修改(范围 1~500),下一轮 epoch 立即生效
func (s *Server) handleAdminWorkers(w http.ResponseWriter, r *http.Request) { func (s *Server) handleAdminWorkers(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*") w.Header().Set("Access-Control-Allow-Origin", "*")
@@ -425,7 +426,10 @@ func (s *Server) handleAdminWorkers(w http.ResponseWriter, r *http.Request) {
return return
case http.MethodGet: case http.MethodGet:
json.NewEncoder(w).Encode(map[string]int{"workers": config.CrawlerWorkers()}) json.NewEncoder(w).Encode(map[string]int64{
"configured": int64(config.CrawlerWorkers()),
"active": crawler.GlobalActiveWorkers(),
})
case http.MethodPost: case http.MethodPost:
var body struct { var body struct {