刷盘与搜索都是独立线程
This commit is contained in:
@@ -250,7 +250,7 @@ func (r *Runner) aggregate(filter func(*storage.SiteInfo) bool, stats *siteStats
|
||||
})
|
||||
|
||||
// 向量余弦过滤:去除 Server 类型特征偏离核心向量的域名(可能是噪音/作弊)
|
||||
d = vectorFilter(d, vectors, desc)
|
||||
d = vectorFilter(d, vectors, desc, r.storagePath)
|
||||
|
||||
// 最终清理:分数 ≤ 0.16 的域名不写入(低于此阈值认为不繁荣)
|
||||
for k, v := range d {
|
||||
@@ -313,7 +313,7 @@ func (r *Runner) aggregateWithScores(scores map[string]float64, stats *siteStats
|
||||
return nil
|
||||
})
|
||||
|
||||
d = vectorFilter(d, vectors, desc)
|
||||
d = vectorFilter(d, vectors, desc, r.storagePath)
|
||||
for k, v := range d {
|
||||
if v <= 0.16 {
|
||||
delete(d, k)
|
||||
@@ -326,7 +326,7 @@ func (r *Runner) aggregateWithScores(scores map[string]float64, stats *siteStats
|
||||
|
||||
// vectorFilter 使用余弦相似度过滤域名分数:保留与核心 Server 类型向量相似的域名。
|
||||
// 与核心方向偏离的域名可能是噪音(如作弊农场、链接买卖)。
|
||||
func vectorFilter(d map[string]float64, vectors map[string][]float32, desc string) map[string]float64 {
|
||||
func vectorFilter(d map[string]float64, vectors map[string][]float32, desc string, storagePath string) map[string]float64 {
|
||||
// 计算全网站的 Server 类型核心向量(所有向量求和)
|
||||
core := make([]float64, 64)
|
||||
for _, vec := range vectors {
|
||||
@@ -370,7 +370,8 @@ func vectorFilter(d map[string]float64, vectors map[string][]float32, desc strin
|
||||
cosMap[k] = dot32_64(vec, core) / (vn * coreNorm)
|
||||
}
|
||||
}
|
||||
_ = writeJSON(desc+"_cos.json", cosMap)
|
||||
cosPath := filepath.Join(storagePath, desc+"_cos.json")
|
||||
_ = writeJSON(cosPath, cosMap)
|
||||
|
||||
return newD
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user