增加爬取状态api

This commit is contained in:
2026-04-10 18:40:40 +08:00
parent 65e6547d54
commit fd827cbde3
7 changed files with 104 additions and 7 deletions
+34 -1
View File
@@ -66,6 +66,7 @@ type Server struct {
// crawler 爬虫实例(用于 Priority URL 添加时立即触发爬取)
crawler interface {
TriggerPriorityCrawl(url string)
GetCrawlStatus() crawler.CrawlStatus
}
}
@@ -95,9 +96,10 @@ func (s *Server) SetBacklinkRunner(r interface {
s.backlinkRunner = r
}
// SetCrawler 注入爬虫实例(用于 Priority URL 添加时立即触发爬取)。
// SetCrawler 注入爬虫实例(用于 Priority URL 添加时立即触发爬取和状态查询)。
func (s *Server) SetCrawler(c interface {
TriggerPriorityCrawl(url string)
GetCrawlStatus() crawler.CrawlStatus
}) {
s.crawler = c
}
@@ -132,6 +134,7 @@ func (s *Server) Handler() http.Handler {
mux.HandleFunc("/admin/pending", s.handleAdminPending)
mux.HandleFunc("/admin/workers", s.handleAdminWorkers)
mux.HandleFunc("/admin/backlink", s.handleAdminBacklink)
mux.HandleFunc("/admin/crawl/status", s.handleAdminCrawlStatus)
// 静态文件(SPA fallback
mux.Handle("/", spaHandler{dist: "dist"})
return mux
@@ -705,6 +708,36 @@ func (s *Server) handleAdminBacklink(w http.ResponseWriter, r *http.Request) {
}
}
// handleAdminCrawlStatus 返回爬虫爬取状态。
// GET: 返回当前轮次、总轮数、队列长度、已完成数、已收录总数、是否运行中
func (s *Server) handleAdminCrawlStatus(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Content-Type", "application/json; charset=utf-8")
if r.Method != http.MethodGet && r.Method != http.MethodOptions {
http.Error(w, `{"error":"method not allowed"}`, 405)
return
}
if r.Method == http.MethodOptions {
w.WriteHeader(204)
return
}
if s.crawler == nil {
json.NewEncoder(w).Encode(crawler.CrawlStatus{
CurrentEpoch: 0,
MaxEpoch: 0,
QueueLength: 0,
CompletedCount: 0,
VisitedTotal: 0,
IsRunning: false,
})
return
}
json.NewEncoder(w).Encode(s.crawler.GetCrawlStatus())
}
// ---- 搜索处理器 ----
// searchResponse 是搜索 API 的 JSON 响应结构。