优先爬取的队列立即执行

This commit is contained in:
2026-04-10 13:14:12 +08:00
parent c5da660c7f
commit 5b8b256b35
8 changed files with 150 additions and 6 deletions
+37
View File
@@ -61,6 +61,11 @@ type Server struct {
Status() map[string]interface{}
RunNow() error
}
// crawler 爬虫实例(用于 Priority URL 添加时立即触发爬取)
crawler interface {
TriggerPriorityCrawl(url string)
}
}
// New 创建一个 search Server(内嵌收获服务,统一在同一端口)。
@@ -89,6 +94,13 @@ func (s *Server) SetBacklinkRunner(r interface {
s.backlinkRunner = r
}
// SetCrawler 注入爬虫实例(用于 Priority URL 添加时立即触发爬取)。
func (s *Server) SetCrawler(c interface {
TriggerPriorityCrawl(url string)
}) {
s.crawler = c
}
// runPeriodicFlush 每隔 FlushIntervalSeconds 秒触发一次刷盘。
func (s *Server) runPeriodicFlush() {
ticker := time.NewTicker(time.Duration(config.FlushIntervalSeconds()) * time.Second)
@@ -113,6 +125,7 @@ func (s *Server) Handler() http.Handler {
mux.HandleFunc("/admin/recent", s.handleAdminRecent)
mux.HandleFunc("/admin/stats", s.handleAdminStats)
mux.HandleFunc("/admin/priority", s.handleAdminPriority)
mux.HandleFunc("/admin/priority/status", s.handleAdminPriorityStatus)
mux.HandleFunc("/admin/flush", s.handleAdminFlush)
mux.HandleFunc("/admin/pending", s.handleAdminPending)
mux.HandleFunc("/admin/workers", s.handleAdminWorkers)
@@ -513,6 +526,12 @@ func (s *Server) handleAdminPriority(w http.ResponseWriter, r *http.Request) {
http.Error(w, `{"error":"`+err.Error()+`"}`, 500)
return
}
// 关键:立即触发 priority crawl(突破 workers 上限)
if s.crawler != nil {
s.crawler.TriggerPriorityCrawl(entry.URL)
}
json.NewEncoder(w).Encode(map[string]string{"status": "added", "url": entry.URL})
case http.MethodDelete:
@@ -533,6 +552,24 @@ func (s *Server) handleAdminPriority(w http.ResponseWriter, r *http.Request) {
}
}
// handleAdminPriorityStatus 返回 Priority Worker 的实时状态。
// GET: 返回 pending(待处理)、active(正在处理)、max_workers(独立上限)
func (s *Server) handleAdminPriorityStatus(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Content-Type", "application/json; charset=utf-8")
if r.Method != http.MethodGet && r.Method != http.MethodOptions {
http.Error(w, `{"error":"method not allowed"}`, 405)
return
}
if r.Method == http.MethodOptions {
w.WriteHeader(204)
return
}
json.NewEncoder(w).Encode(crawler.GlobalPriorityStatus())
}
// handleAdminFlush 强制刷盘。
func (s *Server) handleAdminFlush(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")