up
This commit is contained in:
+1
-2
@@ -399,8 +399,7 @@ func (c *Crawler) sendToHarvester(finalURL string, kws []analyzer.Keyword) {
|
||||
if failures >= circuitFailureThreshold {
|
||||
atomic.StoreInt32(&c.circuitState, circuitOpen)
|
||||
atomic.StoreInt64(&c.circuitExpiry, now+int64(circuitCooldownSeconds))
|
||||
log.Printf("[crawler] circuit OPEN: harvest endpoint unreachable (%d failures), cooling for %ds",
|
||||
failures, circuitCooldownSeconds)
|
||||
//log.Printf("[crawler] circuit OPEN: harvest endpoint unreachable (%d failures), cooling for %ds",failures, circuitCooldownSeconds)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
Vendored
+2
File diff suppressed because one or more lines are too long
Vendored
+6
File diff suppressed because one or more lines are too long
Vendored
+6
File diff suppressed because one or more lines are too long
Vendored
+6
File diff suppressed because one or more lines are too long
Vendored
+2
File diff suppressed because one or more lines are too long
Vendored
+6
File diff suppressed because one or more lines are too long
Vendored
+2
-2
@@ -5,8 +5,8 @@
|
||||
<link rel="icon" type="image/svg+xml" href="/vite.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>SESE 爬取管理</title>
|
||||
<script type="module" crossorigin src="/assets/index-KhES34ts.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="/assets/index-CO25_jVn.css">
|
||||
<script type="module" crossorigin src="/assets/index-BomiJv32.js"></script>
|
||||
<link rel="stylesheet" crossorigin href="/assets/index-Bj4UMEhQ.css">
|
||||
</head>
|
||||
<body>
|
||||
<div id="app"></div>
|
||||
|
||||
+17
-2
@@ -292,6 +292,7 @@ func (s *Server) handleAdminStats(w http.ResponseWriter, r *http.Request) {
|
||||
resp := map[string]any{
|
||||
"total_urls": total,
|
||||
"total_words": totalWords,
|
||||
"total_domains": len(domainCount), // 真实的域名总数(非Top 20)
|
||||
"domains": domainsMap,
|
||||
"languages": langsMap,
|
||||
"pending": atomic.LoadInt64(&s.rowCount), // 内存中未刷盘的索引条目数
|
||||
@@ -1232,11 +1233,12 @@ func (s *Server) flush() {
|
||||
s.mem = make(map[string][]storage.IndexEntry)
|
||||
atomic.StoreInt64(&s.rowCount, 0)
|
||||
s.memMu.Unlock()
|
||||
log.Printf("[harvester] flushing %d keys", len(snapshot))
|
||||
totalKeys := len(snapshot)
|
||||
log.Printf("[harvester] flushing %d keys", totalKeys)
|
||||
items := make([]struct {
|
||||
key string
|
||||
entries []storage.IndexEntry
|
||||
}, 0, len(snapshot))
|
||||
}, 0, totalKeys)
|
||||
for k, v := range snapshot {
|
||||
items = append(items, struct {
|
||||
key string
|
||||
@@ -1250,6 +1252,14 @@ func (s *Server) flush() {
|
||||
}
|
||||
results := make(chan result, len(items))
|
||||
sem := make(chan struct{}, 8)
|
||||
processed := int64(0)
|
||||
progressInterval := 1000
|
||||
if totalKeys < 10000 {
|
||||
progressInterval = totalKeys / 10
|
||||
}
|
||||
if progressInterval < 1 {
|
||||
progressInterval = 1
|
||||
}
|
||||
for _, item := range items {
|
||||
sem <- struct{}{}
|
||||
go func(k string, newEntries []storage.IndexEntry) {
|
||||
@@ -1262,6 +1272,11 @@ func (s *Server) flush() {
|
||||
for range items {
|
||||
r := <-results
|
||||
batch[r.key] = r.entries
|
||||
current := atomic.AddInt64(&processed, 1)
|
||||
if int(current)%progressInterval == 0 || int(current) == totalKeys {
|
||||
percent := float64(current) * 100 / float64(totalKeys)
|
||||
log.Printf("[harvester] flush progress: %d/%d (%.1f%%)", current, totalKeys, percent)
|
||||
}
|
||||
}
|
||||
if err := s.db.BatchSetIndex(batch); err != nil {
|
||||
log.Printf("[harvester] flush write error: %v", err)
|
||||
|
||||
Reference in New Issue
Block a user