刷盘与搜索都是独立线程

2026-04-09 02:33:31 +08:00
parent 7f2cb12e71
commit 5c90ad246c
2 changed files with 30 additions and 7 deletions
@@ -37,7 +37,7 @@ type Server struct {

 	// 以下为收获服务（harvester）内嵌字段
 	mem       map[string][]storage.IndexEntry // 内存索引聚合器：关键词 → [权重, URL] 条目
-	memMu     sync.Mutex                       // 保护内存索引的并发写入
+	memMu     sync.RWMutex                     // 保护内存索引的读写（刷盘时读操作不阻塞）
 	rowCount  int64                            // 内存中累计的索引条目总数（触发刷盘）
 	flushMu   sync.Mutex                       // 确保同一时刻只有一个 flush 在执行
 }
@@ -508,7 +508,7 @@ func (s *Server) query(tokens []string, from, to int, siteFilter string) ([]sear
 		return nil, 0
 	}

-	// 加载每个词对应的倒排索引条目
+	// 加载每个词对应的倒排索引条目（磁盘 + 内存）
 	type tokenIndex struct {
 		token   string
 		entries []storage.IndexEntry
@@ -516,8 +516,29 @@ func (s *Server) query(tokens []string, from, to int, siteFilter string) ([]sear
 	}
 	tokenIndexes := make([]tokenIndex, 0, len(tokens))
 	maxURLsPerKey := config.MaxURLsPerKey()
+
+	// 读锁保护内存索引访问（与刷盘互斥，但多个搜索可并发）
+	s.memMu.RLock()
 	for _, t := range tokens {
-		entries, _ := s.db.GetIndex(t)
+		// 1. 从磁盘加载
+		diskEntries, _ := s.db.GetIndex(t)
+		// 2. 从内存加载（尚未刷盘的数据）
+		memEntries := s.mem[t]
+		// 3. 合并（内存数据优先，因为更新）
+		entries := make([]storage.IndexEntry, 0, len(diskEntries)+len(memEntries))
+		seen := make(map[string]bool, len(diskEntries)+len(memEntries))
+		for _, e := range memEntries {
+			if !seen[e.URL] {
+				entries = append(entries, e)
+				seen[e.URL] = true
+			}
+		}
+		for _, e := range diskEntries {
+			if !seen[e.URL] {
+				entries = append(entries, e)
+				seen[e.URL] = true
+			}
+		}
 		// 计算缺省权重：当条目数达到上限时，权重低于第 MaxURLsPerKey 名的条目使用缺省权重
 		defVal := 1.0 / 10000 * float64(max(100, len(entries))) / float64(maxURLsPerKey)
 		if len(entries) >= maxURLsPerKey {
@@ -530,6 +551,7 @@ func (s *Server) query(tokens []string, from, to int, siteFilter string) ([]sear
 		}
 		tokenIndexes = append(tokenIndexes, tokenIndex{t, entries, defVal})
 	}
+	s.memMu.RUnlock()

 	// 构建 URL → (词 → 权重) 映射，收集所有候选 URL
 	urlWeights := make(map[string]map[string]float64)