up

2026-04-10 13:22:35 +08:00
parent 22d0b72ee5
commit 8e4cdaca47
1 changed files with 13 additions and 13 deletions
@@ -45,8 +45,8 @@ const (

 // Priority Worker 配置
 const (
-	priorityMaxWorkers   = 50  // Priority 独立 goroutine 上限（突破主 workers）
-	priorityQueueSize    = 100 // Priority 任务队列缓冲大小
+	priorityMaxWorkers = 50  // Priority 独立 goroutine 上限（突破主 workers）
+	priorityQueueSize  = 100 // Priority 任务队列缓冲大小
 )

 // Crawler 编排整个 BFS 爬取流程。
@@ -70,10 +70,10 @@ type Crawler struct {
 	activeWorkers int64

 	// ---- Priority Worker（独立 goroutine，不受主 workers 限制）----
-	priorityCh   chan string          // Priority URL 任务队列
-	prioritySem   chan struct{}        // Priority 信号量（上限 priorityMaxWorkers）
-	priorityWg   sync.WaitGroup      // 等待所有 Priority goroutine 结束
-	priorityMu   sync.RWMutex        // 保护 priorityStats
+	priorityCh    chan string    // Priority URL 任务队列
+	prioritySem   chan struct{}  // Priority 信号量（上限 priorityMaxWorkers）
+	priorityWg    sync.WaitGroup // 等待所有 Priority goroutine 结束
+	priorityMu    sync.RWMutex   // 保护 priorityStats
 	priorityStats struct {
 		pending int64 // 待处理的 Priority URL 数量（入队但未开始）
 		active  int64 // 正在处理的 Priority URL 数量
@@ -113,12 +113,12 @@ func GlobalPriorityStatus() map[string]interface{} {
 // prosperMap 由 info 模块加载，传入域名繁荣值用于调度优先级计算。
 func New(db *storage.DB, a *analyzer.Analyzer, prosperMap map[string]float64) *Crawler {
 	c := &Crawler{
-		fetcher:    NewFetcher(config.SpiderName(), time.Duration(config.CrawlerCooldown())*time.Second),
-		db:         db,
-		analyzer:   a,
-		prosperMap: prosperMap,
-		visited:    make(map[string]bool),
-		priorityCh: make(chan string, priorityQueueSize),
+		fetcher:     NewFetcher(config.SpiderName(), time.Duration(config.CrawlerCooldown())*time.Second),
+		db:          db,
+		analyzer:    a,
+		prosperMap:  prosperMap,
+		visited:     make(map[string]bool),
+		priorityCh:  make(chan string, priorityQueueSize),
 		prioritySem: make(chan struct{}, priorityMaxWorkers),
 	}
 	// 启动 Priority Worker（独立 goroutine，不受主 workers 限制）
@@ -425,7 +425,7 @@ func (c *Crawler) visitURL(rawURL string) (hrefs []string) {
 	oldEntry, _ := c.db.GetSnippet(res.FinalURL)
 	if oldEntry != nil && oldEntry.ContentHash != "" && oldEntry.ContentHash == contentHash {
 		isRecrawl = true
-		log.Printf("[crawler] unchanged (recrawl skip): %s", res.FinalURL)
+		//log.Printf("[crawler] unchanged (recrawl skip): %s", res.FinalURL)
 	}

 	// 缓存 URL 摘要（仅对短 URL 缓存，防止超长 URL 浪费空间）