修复一个卡死问题
This commit is contained in:
@@ -11,6 +11,7 @@ import (
|
||||
"net/url" // URL 解析
|
||||
"regexp" // 正则表达式(site: 过滤语法)
|
||||
"sort" // 排序
|
||||
"strconv" // 字符串转整数
|
||||
"strings" // 字符串操作
|
||||
"sync" // 互斥锁(保护并发切片写入)
|
||||
"time" // 时间戳
|
||||
@@ -46,6 +47,8 @@ func New(db *storage.DB, infoSvc *info.Service, a *analyzer.Analyzer) *Server {
|
||||
func (s *Server) Handler() http.Handler {
|
||||
mux := http.NewServeMux()
|
||||
mux.HandleFunc("/search", s.handleSearch)
|
||||
mux.HandleFunc("/admin/recent", s.handleAdminRecent)
|
||||
mux.HandleFunc("/admin/stats", s.handleAdminStats)
|
||||
return mux
|
||||
}
|
||||
|
||||
@@ -55,6 +58,149 @@ func (s *Server) ListenAndServe(addr string) error {
|
||||
return http.ListenAndServe(addr, s.Handler())
|
||||
}
|
||||
|
||||
// ---- Admin 接口 ----
|
||||
|
||||
// recentItem 是 /admin/recent 接口返回的单条记录。
|
||||
type recentItem struct {
|
||||
URL string `json:"url"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
Domain string `json:"domain"`
|
||||
Language map[string]float64 `json:"language"`
|
||||
WordCount int `json:"word_count"`
|
||||
CrawledAt int64 `json:"crawled_at"`
|
||||
}
|
||||
|
||||
// handleAdminRecent 返回最近爬取的条目列表,按爬取时间倒序。
|
||||
// 参数:limit(默认50,最大200)。
|
||||
func (s *Server) handleAdminRecent(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
|
||||
limit := 50
|
||||
if l := r.URL.Query().Get("limit"); l != "" {
|
||||
if v, err := strconv.Atoi(l); err == nil && v > 0 {
|
||||
limit = v
|
||||
}
|
||||
}
|
||||
if limit > 200 {
|
||||
limit = 200
|
||||
}
|
||||
|
||||
type entry struct {
|
||||
url string
|
||||
snippet *storage.SnippetEntry
|
||||
siteInfo *storage.SiteInfo
|
||||
}
|
||||
|
||||
var items []entry
|
||||
s.db.ForEachSnippet(func(url string, snippet *storage.SnippetEntry) error {
|
||||
siteInfo, _ := s.db.GetSiteInfo(netloc(url))
|
||||
items = append(items, entry{url, snippet, siteInfo})
|
||||
return nil
|
||||
})
|
||||
|
||||
// 按时间倒序
|
||||
sort.Slice(items, func(i, j int) bool {
|
||||
return items[i].snippet.Timestamp > items[j].snippet.Timestamp
|
||||
})
|
||||
|
||||
if len(items) > limit {
|
||||
items = items[:limit]
|
||||
}
|
||||
|
||||
result := make([]recentItem, 0, len(items))
|
||||
for _, e := range items {
|
||||
lang := e.siteInfo.Languages
|
||||
if lang == nil {
|
||||
lang = make(map[string]float64)
|
||||
}
|
||||
desc := e.snippet.Description
|
||||
if len(desc) > 200 {
|
||||
desc = desc[:200]
|
||||
}
|
||||
result = append(result, recentItem{
|
||||
URL: e.url,
|
||||
Title: e.snippet.Title,
|
||||
Description: desc,
|
||||
Domain: netloc(e.url),
|
||||
Language: lang,
|
||||
WordCount: len(e.snippet.Text),
|
||||
CrawledAt: e.snippet.Timestamp,
|
||||
})
|
||||
}
|
||||
|
||||
resp := map[string]any{
|
||||
"items": result,
|
||||
"total": len(items),
|
||||
}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}
|
||||
|
||||
// handleAdminStats 返回全局统计:域名分布、语种分布、总 URL 数、总词数。
|
||||
func (s *Server) handleAdminStats(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||||
w.Header().Set("Content-Type", "application/json; charset=utf-8")
|
||||
|
||||
domainCount := make(map[string]int)
|
||||
langCount := make(map[string]int)
|
||||
totalWords := 0
|
||||
total := 0
|
||||
|
||||
s.db.ForEachSnippet(func(url string, snippet *storage.SnippetEntry) error {
|
||||
total++
|
||||
domain := netloc(url)
|
||||
domainCount[domain]++
|
||||
totalWords += len(snippet.Text)
|
||||
|
||||
siteInfo, _ := s.db.GetSiteInfo(domain)
|
||||
if siteInfo != nil {
|
||||
for lang, ratio := range siteInfo.Languages {
|
||||
if ratio > 0.1 {
|
||||
langCount[lang]++
|
||||
}
|
||||
}
|
||||
}
|
||||
return nil
|
||||
})
|
||||
|
||||
// 排序取 Top
|
||||
type kv struct{ k string; v int }
|
||||
topDomains := make([]kv, 0, len(domainCount))
|
||||
for k, v := range domainCount {
|
||||
topDomains = append(topDomains, kv{k, v})
|
||||
}
|
||||
sort.Slice(topDomains, func(i, j int) bool { return topDomains[i].v > topDomains[j].v })
|
||||
if len(topDomains) > 20 {
|
||||
topDomains = topDomains[:20]
|
||||
}
|
||||
topLangs := make([]kv, 0, len(langCount))
|
||||
for k, v := range langCount {
|
||||
topLangs = append(topLangs, kv{k, v})
|
||||
}
|
||||
sort.Slice(topLangs, func(i, j int) bool { return topLangs[i].v > topLangs[j].v })
|
||||
if len(topLangs) > 10 {
|
||||
topLangs = topLangs[:10]
|
||||
}
|
||||
|
||||
domainsMap := make(map[string]int)
|
||||
for _, kv := range topDomains {
|
||||
domainsMap[kv.k] = kv.v
|
||||
}
|
||||
langsMap := make(map[string]int)
|
||||
for _, kv := range topLangs {
|
||||
langsMap[kv.k] = kv.v
|
||||
}
|
||||
|
||||
resp := map[string]any{
|
||||
"total_urls": total,
|
||||
"total_words": totalWords,
|
||||
"domains": domainsMap,
|
||||
"languages": langsMap,
|
||||
}
|
||||
json.NewEncoder(w).Encode(resp)
|
||||
}
|
||||
|
||||
// ---- 搜索处理器 ----
|
||||
|
||||
// searchResponse 是搜索 API 的 JSON 响应结构。
|
||||
|
||||
Reference in New Issue
Block a user