This commit is contained in:
2026-04-09 00:14:55 +08:00
parent 223177b1dd
commit 439d0c1cb6
9 changed files with 88 additions and 46 deletions
+22 -7
View File
@@ -290,11 +290,12 @@ func (s *Server) handleAdminStats(w http.ResponseWriter, r *http.Request) {
}
resp := map[string]any{
"total_urls": total,
"total_words": totalWords,
"domains": domainsMap,
"languages": langsMap,
"pending": atomic.LoadInt64(&s.rowCount), // 内存中未刷盘的索引条目数
"total_urls": total,
"total_words": totalWords,
"total_domains": len(domainCount), // 真实的域名总数(非Top 20
"domains": domainsMap,
"languages": langsMap,
"pending": atomic.LoadInt64(&s.rowCount), // 内存中未刷盘的索引条目数
}
json.NewEncoder(w).Encode(resp)
@@ -1232,11 +1233,12 @@ func (s *Server) flush() {
s.mem = make(map[string][]storage.IndexEntry)
atomic.StoreInt64(&s.rowCount, 0)
s.memMu.Unlock()
log.Printf("[harvester] flushing %d keys", len(snapshot))
totalKeys := len(snapshot)
log.Printf("[harvester] flushing %d keys", totalKeys)
items := make([]struct {
key string
entries []storage.IndexEntry
}, 0, len(snapshot))
}, 0, totalKeys)
for k, v := range snapshot {
items = append(items, struct {
key string
@@ -1250,6 +1252,14 @@ func (s *Server) flush() {
}
results := make(chan result, len(items))
sem := make(chan struct{}, 8)
processed := int64(0)
progressInterval := 1000
if totalKeys < 10000 {
progressInterval = totalKeys / 10
}
if progressInterval < 1 {
progressInterval = 1
}
for _, item := range items {
sem <- struct{}{}
go func(k string, newEntries []storage.IndexEntry) {
@@ -1262,6 +1272,11 @@ func (s *Server) flush() {
for range items {
r := <-results
batch[r.key] = r.entries
current := atomic.AddInt64(&processed, 1)
if int(current)%progressInterval == 0 || int(current) == totalKeys {
percent := float64(current) * 100 / float64(totalKeys)
log.Printf("[harvester] flush progress: %d/%d (%.1f%%)", current, totalKeys, percent)
}
}
if err := s.db.BatchSetIndex(batch); err != nil {
log.Printf("[harvester] flush write error: %v", err)