up

2026-04-09 00:14:55 +08:00
parent 223177b1dd
commit 439d0c1cb6
9 changed files with 88 additions and 46 deletions
@@ -399,8 +399,7 @@ func (c *Crawler) sendToHarvester(finalURL string, kws []analyzer.Keyword) {
 		if failures >= circuitFailureThreshold {
 			atomic.StoreInt32(&c.circuitState, circuitOpen)
 			atomic.StoreInt64(&c.circuitExpiry, now+int64(circuitCooldownSeconds))
-			log.Printf("[crawler] circuit OPEN: harvest endpoint unreachable (%d failures), cooling for %ds",
+			//log.Printf("[crawler] circuit OPEN: harvest endpoint unreachable (%d failures), cooling for %ds",failures, circuitCooldownSeconds)
 				failures, circuitCooldownSeconds)
 		}
 		return
 	}
@@ -482,7 +481,7 @@ func (c *Crawler) schedule(links []URLWeight) []string {
 	}
 	// 根据目标繁荣占比计算普通 URL 应保留数量
 	expectedProsperRatio := config.ExpectedProsperRatio()
-	n := int(float64(len(prosperURLs)) * (1-expectedProsperRatio) / expectedProsperRatio)
+	n := int(float64(len(prosperURLs)) * (1 - expectedProsperRatio) / expectedProsperRatio)
 	if len(otherURLs) > n {
 		keep := max(len(otherURLs)-len(selected)/10, n)
 		if keep < len(otherURLs) {
@@ -5,8 +5,8 @@
    <link rel="icon" type="image/svg+xml" href="/vite.svg" />
    <meta name="viewport" content="width=device-width, initial-scale=1.0" />
    <title>SESE 爬取管理</title>
-    <script type="module" crossorigin src="/assets/index-KhES34ts.js"></script>
+    <script type="module" crossorigin src="/assets/index-BomiJv32.js"></script>
-    <link rel="stylesheet" crossorigin href="/assets/index-CO25_jVn.css">
+    <link rel="stylesheet" crossorigin href="/assets/index-Bj4UMEhQ.css">
  </head>
  <body>
    <div id="app"></div>
@@ -292,6 +292,7 @@ func (s *Server) handleAdminStats(w http.ResponseWriter, r *http.Request) {
 	resp := map[string]any{
 		"total_urls":     total,
 		"total_words":    totalWords,
 		"total_domains":  len(domainCount), // 真实的域名总数（非Top 20）
 		"domains":        domainsMap,
 		"languages":      langsMap,
 		"pending":        atomic.LoadInt64(&s.rowCount), // 内存中未刷盘的索引条目数
@@ -1232,11 +1233,12 @@ func (s *Server) flush() {
 	s.mem = make(map[string][]storage.IndexEntry)
 	atomic.StoreInt64(&s.rowCount, 0)
 	s.memMu.Unlock()
-	log.Printf("[harvester] flushing %d keys", len(snapshot))
+	totalKeys := len(snapshot)
 	log.Printf("[harvester] flushing %d keys", totalKeys)
 	items := make([]struct {
 		key     string
 		entries []storage.IndexEntry
-	}, 0, len(snapshot))
+	}, 0, totalKeys)
 	for k, v := range snapshot {
 		items = append(items, struct {
 			key     string
@@ -1250,6 +1252,14 @@ func (s *Server) flush() {
 	}
 	results := make(chan result, len(items))
 	sem := make(chan struct{}, 8)
 	processed := int64(0)
 	progressInterval := 1000
 	if totalKeys < 10000 {
 		progressInterval = totalKeys / 10
 	}
 	if progressInterval < 1 {
 		progressInterval = 1
 	}
 	for _, item := range items {
 		sem <- struct{}{}
 		go func(k string, newEntries []storage.IndexEntry) {
@@ -1262,6 +1272,11 @@ func (s *Server) flush() {
 	for range items {
 		r := <-results
 		batch[r.key] = r.entries
 		current := atomic.AddInt64(&processed, 1)
 		if int(current)%progressInterval == 0 || int(current) == totalKeys {
 			percent := float64(current) * 100 / float64(totalKeys)
 			log.Printf("[harvester] flush progress: %d/%d (%.1f%%)", current, totalKeys, percent)
 		}
 	}
 	if err := s.db.BatchSetIndex(batch); err != nil {
 		log.Printf("[harvester] flush write error: %v", err)