前端显示缓存信息

This commit is contained in:
2026-04-11 23:37:52 +08:00
parent 079a4c6291
commit 4eaf134efc
7 changed files with 95 additions and 22 deletions
+7
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
-7
View File
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
+2 -2
View File
@@ -5,8 +5,8 @@
<link rel="icon" type="image/svg+xml" href="/vite.svg" /> <link rel="icon" type="image/svg+xml" href="/vite.svg" />
<meta name="viewport" content="width=device-width, initial-scale=1.0" /> <meta name="viewport" content="width=device-width, initial-scale=1.0" />
<title>SESE 爬取管理</title> <title>SESE 爬取管理</title>
<script type="module" crossorigin src="/assets/index-G5PISGmH.js"></script> <script type="module" crossorigin src="/assets/index-B5dBfm7U.js"></script>
<link rel="stylesheet" crossorigin href="/assets/index-DNzRL3Ws.css"> <link rel="stylesheet" crossorigin href="/assets/index-t-HtfReR.css">
</head> </head>
<body> <body>
<div id="app"></div> <div id="app"></div>
+83 -10
View File
@@ -41,6 +41,8 @@ type urlKeywordsCache struct {
// urlKeywordsEntry LRU 缓存条目 // urlKeywordsEntry LRU 缓存条目
type urlKeywordsEntry struct { type urlKeywordsEntry struct {
URL string // URL(用于删除时从 map 中移除) URL string // URL(用于删除时从 map 中移除)
Title string // 页面标题(从 bbolt Snippet 缓存获取)
Snippet string // 摘要(从 bbolt Snippet 缓存获取)
Keywords []urlKeywordInfo // 关键词列表 Keywords []urlKeywordInfo // 关键词列表
} }
@@ -60,19 +62,22 @@ func newURLKeywordsCache(maxSize int) *urlKeywordsCache {
} }
// Put 写入或更新缓存 // Put 写入或更新缓存
func (c *urlKeywordsCache) Put(url string, keywords []urlKeywordInfo) { func (c *urlKeywordsCache) Put(url string, title, snippet string, keywords []urlKeywordInfo) {
c.mu.Lock() c.mu.Lock()
defer c.mu.Unlock() defer c.mu.Unlock()
// 已存在,移到队尾(更新新鲜度) // 已存在,移到队尾(更新新鲜度)
if elem, ok := c.items[url]; ok { if elem, ok := c.items[url]; ok {
c.order.MoveToBack(elem) c.order.MoveToBack(elem)
elem.Value.(*urlKeywordsEntry).Keywords = keywords entry := elem.Value.(*urlKeywordsEntry)
entry.Keywords = keywords
entry.Title = title
entry.Snippet = snippet
return return
} }
// 新增到队尾 // 新增到队尾
entry := &urlKeywordsEntry{URL: url, Keywords: keywords} entry := &urlKeywordsEntry{URL: url, Title: title, Snippet: snippet, Keywords: keywords}
elem := c.order.PushBack(entry) elem := c.order.PushBack(entry)
c.items[url] = elem c.items[url] = elem
@@ -87,13 +92,14 @@ func (c *urlKeywordsCache) Put(url string, keywords []urlKeywordInfo) {
} }
// Get 读取缓存,同时更新新鲜度 // Get 读取缓存,同时更新新鲜度
func (c *urlKeywordsCache) Get(url string) ([]urlKeywordInfo, bool) { func (c *urlKeywordsCache) Get(url string) (*urlKeywordsEntry, bool) {
c.mu.Lock() c.mu.Lock()
defer c.mu.Unlock() defer c.mu.Unlock()
if elem, ok := c.items[url]; ok { if elem, ok := c.items[url]; ok {
c.order.MoveToBack(elem) // 访问过,移到队尾 c.order.MoveToBack(elem) // 访问过,移到队尾
return elem.Value.(*urlKeywordsEntry).Keywords, true entry := elem.Value.(*urlKeywordsEntry)
return entry, true
} }
return nil, false return nil, false
} }
@@ -105,6 +111,17 @@ func (c *urlKeywordsCache) Stats() (size int, maxSize int) {
return len(c.items), c.maxSize return len(c.items), c.maxSize
} }
// ListAll 返回所有缓存条目列表(按访问时间从旧到新)
func (c *urlKeywordsCache) ListAll() []*urlKeywordsEntry {
c.mu.RLock()
defer c.mu.RUnlock()
entries := make([]*urlKeywordsEntry, 0, len(c.items))
for elem := c.order.Front(); elem != nil; elem = elem.Next() {
entries = append(entries, elem.Value.(*urlKeywordsEntry))
}
return entries
}
// Server 是搜索 HTTP 服务器,同时内嵌收获服务(统一在同一端口)。 // Server 是搜索 HTTP 服务器,同时内嵌收获服务(统一在同一端口)。
type Server struct { type Server struct {
db *storage.DB db *storage.DB
@@ -217,6 +234,7 @@ func (s *Server) Handler() http.Handler {
mux.HandleFunc("/admin/crawl/status", s.handleAdminCrawlStatus) mux.HandleFunc("/admin/crawl/status", s.handleAdminCrawlStatus)
mux.HandleFunc("/admin/url/keywords", s.handleUrlKeywords) mux.HandleFunc("/admin/url/keywords", s.handleUrlKeywords)
mux.HandleFunc("/admin/url/keywords/stats", s.handleUrlKeywordsStats) mux.HandleFunc("/admin/url/keywords/stats", s.handleUrlKeywordsStats)
mux.HandleFunc("/admin/url/keywords/list", s.handleUrlKeywordsList)
// 静态文件(SPA fallback // 静态文件(SPA fallback
mux.Handle("/", spaHandler{dist: "dist"}) mux.Handle("/", spaHandler{dist: "dist"})
return mux return mux
@@ -821,7 +839,7 @@ func (s *Server) handleAdminCrawlStatus(w http.ResponseWriter, r *http.Request)
json.NewEncoder(w).Encode(s.crawler.GetCrawlStatus()) json.NewEncoder(w).Encode(s.crawler.GetCrawlStatus())
} }
// handleUrlKeywords 返回指定 URL 的关键词列表LRU 缓存查询) // handleUrlKeywords 返回指定 URL 的完整缓存信息LRU 缓存查询)
func (s *Server) handleUrlKeywords(w http.ResponseWriter, r *http.Request) { func (s *Server) handleUrlKeywords(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*") w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Content-Type", "application/json; charset=utf-8") w.Header().Set("Content-Type", "application/json; charset=utf-8")
@@ -841,17 +859,23 @@ func (s *Server) handleUrlKeywords(w http.ResponseWriter, r *http.Request) {
return return
} }
keywords, ok := s.urlKeywords.Get(url) entry, ok := s.urlKeywords.Get(url)
resp := struct { resp := struct {
URL string `json:"url"` URL string `json:"url"`
Title string `json:"title"`
Snippet string `json:"snippet"`
Keywords []urlKeywordInfo `json:"keywords"` Keywords []urlKeywordInfo `json:"keywords"`
Cached bool `json:"cached"` Cached bool `json:"cached"`
}{ }{
URL: url, URL: url,
Keywords: keywords,
Cached: ok, Cached: ok,
} }
if ok {
resp.Title = entry.Title
resp.Snippet = entry.Snippet
resp.Keywords = entry.Keywords
}
json.NewEncoder(w).Encode(resp) json.NewEncoder(w).Encode(resp)
} }
@@ -884,6 +908,47 @@ func (s *Server) handleUrlKeywordsStats(w http.ResponseWriter, r *http.Request)
json.NewEncoder(w).Encode(resp) json.NewEncoder(w).Encode(resp)
} }
// handleUrlKeywordsList 返回所有缓存条目(按访问时间从旧到新)
func (s *Server) handleUrlKeywordsList(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Access-Control-Allow-Origin", "*")
w.Header().Set("Content-Type", "application/json; charset=utf-8")
if r.Method != http.MethodGet && r.Method != http.MethodOptions {
http.Error(w, `{"error":"method not allowed"}`, http.StatusMethodNotAllowed)
return
}
if r.Method == http.MethodOptions {
w.WriteHeader(http.StatusNoContent)
return
}
entries := s.urlKeywords.ListAll()
size, maxSize := s.urlKeywords.Stats()
// 转换为前端需要的格式
items := make([]map[string]any, len(entries))
for i, e := range entries {
items[i] = map[string]any{
"url": e.URL,
"title": e.Title,
"snippet": e.Snippet,
"keywords": e.Keywords,
}
}
resp := struct {
Items []map[string]any `json:"items"` // 缓存条目列表
Size int `json:"size"` // 当前缓存的 URL 数量
MaxSize int `json:"max_size"` // 缓存容量上限
}{
Items: items,
Size: size,
MaxSize: maxSize,
}
json.NewEncoder(w).Encode(resp)
}
// ---- 搜索处理器 ---- // ---- 搜索处理器 ----
// searchResponse 是搜索 API 的 JSON 响应结构。 // searchResponse 是搜索 API 的 JSON 响应结构。
@@ -1800,7 +1865,7 @@ func (s *Server) handleIngest(w http.ResponseWriter, r *http.Request) {
go s.Flush() go s.Flush()
} }
// 更新 URL→关键词 LRU 缓存 // 更新 URL→关键词 LRU 缓存(从 bbolt 获取标题和摘要)
keywords := make([]urlKeywordInfo, len(payload.Keywords)) keywords := make([]urlKeywordInfo, len(payload.Keywords))
for i, kw := range payload.Keywords { for i, kw := range payload.Keywords {
keywords[i] = urlKeywordInfo{ keywords[i] = urlKeywordInfo{
@@ -1808,7 +1873,15 @@ func (s *Server) handleIngest(w http.ResponseWriter, r *http.Request) {
Weight: kw.Weight, Weight: kw.Weight,
} }
} }
s.urlKeywords.Put(payload.URL, keywords) title, snippet := "", ""
if snippetEntry, err := s.db.GetSnippet(payload.URL); err == nil {
title = snippetEntry.Title
snippet = snippetEntry.Description
if snippet == "" {
snippet = snippetEntry.Text
}
}
s.urlKeywords.Put(payload.URL, title, snippet, keywords)
w.Write([]byte("ok")) w.Write([]byte("ok"))
} }