子链接不再被 isVisited 过滤

This commit is contained in:
2026-04-10 20:49:49 +08:00
parent 69d3f37555
commit 1b88ca1efb
+2 -5
View File
@@ -258,16 +258,13 @@ func (c *Crawler) runPriorityWorker() {
hrefs := c.visitURLUnlimited(rawURL) hrefs := c.visitURLUnlimited(rawURL)
// 将子链接加入优先队列(保持优先级) // 将子链接加入优先队列(保持优先级)
// 注意:Priority URL 的子链接强制加入队列,即使已访问过也要重新爬取
if len(hrefs) > 0 { if len(hrefs) > 0 {
c.priorityChildrenMu.Lock() c.priorityChildrenMu.Lock()
added := 0
for _, child := range hrefs { for _, child := range hrefs {
if !c.isVisited(child) {
c.priorityChildren = append(c.priorityChildren, child) c.priorityChildren = append(c.priorityChildren, child)
added++
} }
} IncrementPriorityChildren(int64(len(hrefs)))
IncrementPriorityChildren(int64(added))
c.priorityChildrenMu.Unlock() c.priorityChildrenMu.Unlock()
} }