up

2026-04-08 23:35:50 +08:00
parent 422a69397a
commit 7abcca6836
6 changed files with 257 additions and 85 deletions
@@ -2,56 +2,225 @@
 // config 包存放 sese-engine 的所有全局配置参数。
 package config
-// Index / storage limits
+import (
-// 索引 / 存储相关限制常量
+	"fmt"
-const (
+	"os"
-	MaxURLsPerKey       = 11000   // 每个索引词最多保存的 URL 数量上限
+	"path/filepath"
-	MaxSameDomainPerKey = 20      // 同一域名在每个索引词下最多出现的次数
+
-	BigCleanThreshold   = 10000000 // 内存中累计多少条索引后触发一次刷盘清理
+	"gopkg.in/yaml.v3"
 	MaxNewURLsPerKey    = 10000   // 每次刷盘时，每个索引词最多写入的新 URL 数量上限
 	MinURLsForNewKey    = 3       // 新索引词如果 URL 数少于该值则丢弃，不写入磁盘
 )
-// Crawler settings
+// Config 是完整的配置结构体
-// 爬虫行为相关配置
+type Config struct {
-const (
+	Index      IndexConfig      `yaml:"index"`
-	SpiderName      = "loli_spider"   // HTTP 请求的 User-Agent 标识
+	Crawler    CrawlerConfig    `yaml:"crawler"`
-	CrawlerCooldown = 3               // 同一主机相邻两次请求的最小间隔（秒），用于遵守 robots.txt 和避免被封
+	Search     SearchConfig     `yaml:"search"`
-	CrawlerWorkers  = 22              // 爬虫并发 goroutine 数量
+	Backlink   BacklinkConfig   `yaml:"backlink"`
-	CrawlFocus      = 0.7             // 域名集中度因子，越大越倾向在少量域名内深挖，越小越分散
+	Storage    StorageConfig    `yaml:"storage"`
-	MaxKeywordsPerPage = 250           // 单个页面最多提取的关键词数量
+	Prometheus PrometheusConfig `yaml:"prometheus"`
-	MaxEpoch        = 100             // BFS 爬取的最大轮次上限
+}
 	ExpectedProsperRatio = 0.6         // 队列中预期"繁荣"域名（高反向链接）的占比，用于调度决策
 	EntryURL        = "https://zh.wikipedia.org/" // BFS 爬取的起始入口 URL
 )
-// Search / ranking weights
+// IndexConfig 索引/存储相关限制
-// 搜索结果排序权重配置
+type IndexConfig struct {
-const (
+	MaxURLsPerKey       int `yaml:"max_urls_per_key"`
-	UseOnlineSnippet      = true       // 是否在线抓取摘要（搜索时实时抓取页面补充摘要）
+	MaxSameDomainPerKey int `yaml:"max_same_domain_per_key"`
-	OnlineSnippetTimeout  = 3          // 在线抓取摘要的超时时间（秒）
+	BigCleanThreshold   int `yaml:"big_clean_threshold"`
-	WeightDailyDecay      = 0.996      // 页面年龄的时间衰减因子（每天乘以此系数）
+	MaxNewURLsPerKey    int `yaml:"max_new_urls_per_key"`
-	LanguageWeight        = 0.5        // 语种匹配权重：与查询语种一致时加分
+	MinURLsForNewKey    int `yaml:"min_urls_for_new_key"`
-	ConsecutiveKeyWeight  = 1.3        // 连续关键词命中权重：多词连续出现时加分
+}
 	BacklinkWeight        = 1.0        // 反向链接权重：指向该 URL 的链接越多得分越高
 	SearchServerPort      = 80         // 搜索服务和收获服务的统一 HTTP 监听端口
 	FlushIntervalSeconds  = 60         // 定期刷盘间隔（秒）：将内存索引批量写入磁盘
 )
-// Backlink computation
+// CrawlerConfig 爬虫行为相关配置
-// 反向链接（PageRank 类）计算相关常量
+type CrawlerConfig struct {
-const (
+	SpiderName           string  `yaml:"spider_name"`
-	BacklinkBaseline = 200000 // 反向链接得分归一化的除数（用于将原始链接数映射到 [0,1] 区间）
+	Cooldown             int     `yaml:"cooldown"`
-)
+	Workers              int     `yaml:"workers"`
 	CrawlFocus           float64 `yaml:"crawl_focus"`
 	MaxKeywordsPerPage   int     `yaml:"max_keywords_per_page"`
 	MaxEpoch             int     `yaml:"max_epoch"`
 	ExpectedProsperRatio float64 `yaml:"expected_prosper_ratio"`
 	EntryURL             string  `yaml:"entry_url"`
 }
-// Storage path (relative to process working directory)
+// SearchConfig 搜索结果排序权重配置
-// 存储根目录路径，相对于进程启动时的工作目录
+type SearchConfig struct {
 	UseOnlineSnippet     bool    `yaml:"use_online_snippet"`
 	OnlineSnippetTimeout int     `yaml:"online_snippet_timeout"`
 	WeightDailyDecay     float64 `yaml:"weight_daily_decay"`
 	LanguageWeight       float64 `yaml:"language_weight"`
 	ConsecutiveKeyWeight float64 `yaml:"consecutive_key_weight"`
 	BacklinkWeight       float64 `yaml:"backlink_weight"`
 	ServerPort           int     `yaml:"server_port"`
 	FlushIntervalSeconds int     `yaml:"flush_interval_seconds"`
 }
 // BacklinkConfig 反向链接计算相关配置
 type BacklinkConfig struct {
 	Baseline int `yaml:"baseline"`
 }
 // StorageConfig 存储配置
 type StorageConfig struct {
 	Path string `yaml:"path"`
 }
 // PrometheusConfig Prometheus监控端口配置
 type PrometheusConfig struct {
 	CrawlerPort  int `yaml:"crawler_port"`
 	BacklinkPort int `yaml:"backlink_port"`
 	SearchPort   int `yaml:"search_port"`
 }
 // Global 全局配置实例，加载后可通过此变量访问
 var Global Config
 // Load 从指定路径加载配置文件
 func Load(configPath string) error {
 	data, err := os.ReadFile(configPath)
 	if err != nil {
 		return fmt.Errorf("failed to read config file: %v", err)
 	}
 	var cfg Config
 	if err := yaml.Unmarshal(data, &cfg); err != nil {
 		return fmt.Errorf("failed to parse config file: %v", err)
 	}
 	Global = cfg
 	return nil
 }
 // LoadFromSavedata 从 savedata 目录加载 config.yml
 func LoadFromSavedata() error {
 	configPath := filepath.Join("savedata", "config.yml")
 	return Load(configPath)
 }
 // GetDefaultConfig 返回默认配置
 func GetDefaultConfig() Config {
 	return Config{
 		Index: IndexConfig{
 			MaxURLsPerKey:       11000,
 			MaxSameDomainPerKey: 20,
 			BigCleanThreshold:   10000000,
 			MaxNewURLsPerKey:    10000,
 			MinURLsForNewKey:    3,
 		},
 		Crawler: CrawlerConfig{
 			SpiderName:           "loli_spider",
 			Cooldown:             3,
 			Workers:              22,
 			CrawlFocus:           0.7,
 			MaxKeywordsPerPage:   250,
 			MaxEpoch:             100,
 			ExpectedProsperRatio: 0.6,
 			EntryURL:             "https://zh.wikipedia.org/",
 		},
 		Search: SearchConfig{
 			UseOnlineSnippet:     true,
 			OnlineSnippetTimeout: 3,
 			WeightDailyDecay:     0.996,
 			LanguageWeight:       0.5,
 			ConsecutiveKeyWeight: 1.3,
 			BacklinkWeight:       1.0,
 			ServerPort:           8082,
 			FlushIntervalSeconds: 60,
 		},
 		Backlink: BacklinkConfig{
 			Baseline: 200000,
 		},
 		Storage: StorageConfig{
 			Path: "./savedata",
 		},
 		Prometheus: PrometheusConfig{
 			CrawlerPort:  14950,
 			BacklinkPort: 14952,
 			SearchPort:   14953,
 		},
 	}
 }
 // 以下是向后兼容的常量定义，使用 Global 变量的值
 // 在 Init() 被调用后，这些函数会返回加载的配置值
 func init() {
 	// 初始化时设置默认值
 	Global = GetDefaultConfig()
 }
 // MaxURLsPerKey 返回配置值
 func MaxURLsPerKey() int { return Global.Index.MaxURLsPerKey }
 // MaxSameDomainPerKey 返回配置值
 func MaxSameDomainPerKey() int { return Global.Index.MaxSameDomainPerKey }
 // BigCleanThreshold 返回配置值
 func BigCleanThreshold() int { return Global.Index.BigCleanThreshold }
 // MaxNewURLsPerKey 返回配置值
 func MaxNewURLsPerKey() int { return Global.Index.MaxNewURLsPerKey }
 // MinURLsForNewKey 返回配置值
 func MinURLsForNewKey() int { return Global.Index.MinURLsForNewKey }
 // SpiderName 返回配置值
 func SpiderName() string { return Global.Crawler.SpiderName }
 // CrawlerCooldown 返回配置值
 func CrawlerCooldown() int { return Global.Crawler.Cooldown }
 // CrawlerWorkers 返回配置值
 func CrawlerWorkers() int { return Global.Crawler.Workers }
 // CrawlFocus 返回配置值
 func CrawlFocus() float64 { return Global.Crawler.CrawlFocus }
 // MaxKeywordsPerPage 返回配置值
 func MaxKeywordsPerPage() int { return Global.Crawler.MaxKeywordsPerPage }
 // MaxEpoch 返回配置值
 func MaxEpoch() int { return Global.Crawler.MaxEpoch }
 // ExpectedProsperRatio 返回配置值
 func ExpectedProsperRatio() float64 { return Global.Crawler.ExpectedProsperRatio }
 // EntryURL 返回配置值
 func EntryURL() string { return Global.Crawler.EntryURL }
 // UseOnlineSnippet 返回配置值
 func UseOnlineSnippet() bool { return Global.Search.UseOnlineSnippet }
 // OnlineSnippetTimeout 返回配置值
 func OnlineSnippetTimeout() int { return Global.Search.OnlineSnippetTimeout }
 // WeightDailyDecay 返回配置值
 func WeightDailyDecay() float64 { return Global.Search.WeightDailyDecay }
 // LanguageWeight 返回配置值
 func LanguageWeight() float64 { return Global.Search.LanguageWeight }
 // ConsecutiveKeyWeight 返回配置值
 func ConsecutiveKeyWeight() float64 { return Global.Search.ConsecutiveKeyWeight }
 // BacklinkWeight 返回配置值
 func BacklinkWeight() float64 { return Global.Search.BacklinkWeight }
 // SearchServerPort 返回配置值
 func SearchServerPort() int { return Global.Search.ServerPort }
 // FlushIntervalSeconds 返回配置值
 func FlushIntervalSeconds() int { return Global.Search.FlushIntervalSeconds }
 // BacklinkBaseline 返回配置值
 func BacklinkBaseline() int { return Global.Backlink.Baseline }
 // PromPortCrawler 返回配置值
 func PromPortCrawler() int { return Global.Prometheus.CrawlerPort }
 // PromPortBacklink 返回配置值
 func PromPortBacklink() int { return Global.Prometheus.BacklinkPort }
 // PromPortSearch 返回配置值
 func PromPortSearch() int { return Global.Prometheus.SearchPort }
 // 为了向后兼容，保留 StoragePath 常量
 const StoragePath = "./savedata"
 // Prometheus ports
 // 各模块 Prometheus 监控指标的 HTTP 端口
 const (
 	PromPortCrawler  = 14950 // 爬虫模块的 metrics 端口
 	PromPortBacklink = 14952 // 反向链接计算模块的 metrics 端口
 	PromPortSearch   = 14953 // 搜索服务（含收获功能）模块的 metrics 端口
 )
@@ -60,7 +60,7 @@ type Crawler struct {
 // prosperMap 由 info 模块加载，传入域名繁荣值用于调度优先级计算。
 func New(db *storage.DB, a *analyzer.Analyzer, prosperMap map[string]float64) *Crawler {
 	return &Crawler{
-		fetcher:    NewFetcher(config.SpiderName, config.CrawlerCooldown*time.Second),
+		fetcher:    NewFetcher(config.SpiderName(), time.Duration(config.CrawlerCooldown())*time.Second),
 		db:         db,
 		analyzer:   a,
 		prosperMap: prosperMap,
@@ -124,7 +124,7 @@ func (c *Crawler) Run(entryURL string, maxEpoch int) {
 		)
 		// 信号量：限制同时并发数不超过配置的工作线程数
-		sem := make(chan struct{}, config.CrawlerWorkers)
+		sem := make(chan struct{}, config.CrawlerWorkers())
 		for _, u := range queue {
 			wg.Add(1)
 			sem <- struct{}{} // 获取一个令牌（阻塞直到有空闲槽位）
@@ -219,8 +219,9 @@ func (c *Crawler) visitURL(rawURL string) (hrefs []string) {
 	kws := c.analyzer.Analyze(title, desc, text)
 	if len(kws) > 0 {
 		// 限制每个页面最多发送的关键词数量
-		if len(kws) > config.MaxKeywordsPerPage {
+		maxKws := config.MaxKeywordsPerPage()
-			kws = kws[:config.MaxKeywordsPerPage]
+		if len(kws) > maxKws {
 			kws = kws[:maxKws]
 		}
 		atomic.AddInt64(&c.stats.KeywordsFetched, int64(len(kws)))
 		// 异步发送到收获服务器写入倒排索引（不阻塞爬取流程）
@@ -383,7 +384,7 @@ func (c *Crawler) sendToHarvester(finalURL string, kws []analyzer.Keyword) {
 	ctx, cancel := context.WithTimeout(context.Background(), 10*time.Second)
 	defer cancel()
-	req, err := http.NewRequestWithContext(ctx, "POST", fmt.Sprintf("http://localhost:%d/l", config.SearchServerPort), bytes.NewReader(data))
+	req, err := http.NewRequestWithContext(ctx, "POST", fmt.Sprintf("http://localhost:%d/l", config.SearchServerPort()), bytes.NewReader(data))
 	if err != nil {
 		return
 	}
@@ -454,7 +455,7 @@ func (c *Crawler) schedule(links []URLWeight) []string {
 	selected := weightedSample(scored_list, k)
 	// 域名集中度过滤：限制每个域名被选中的数量，防止被少数网站垄断
-	selected = concentrationFilter(selected, config.CrawlFocus)
+	selected = concentrationFilter(selected, config.CrawlFocus())
 	// 分离 HTTPS 和 HTTP 链接，HTTP 最多占 HTTPS 的 1/4
 	var httpsURLs, httpURLs []string
@@ -480,7 +481,8 @@ func (c *Crawler) schedule(links []URLWeight) []string {
 		}
 	}
 	// 根据目标繁荣占比计算普通 URL 应保留数量
-	n := int(float64(len(prosperURLs)) * (1-config.ExpectedProsperRatio) / config.ExpectedProsperRatio)
+	expectedProsperRatio := config.ExpectedProsperRatio()
 	n := int(float64(len(prosperURLs)) * (1-expectedProsperRatio) / expectedProsperRatio)
 	if len(otherURLs) > n {
 		keep := max(len(otherURLs)-len(selected)/10, n)
 		if keep < len(otherURLs) {
@@ -8,6 +8,7 @@ require (
 	github.com/yanyiwu/gojieba v1.4.4
 	go.etcd.io/bbolt v1.3.9
 	golang.org/x/net v0.23.0
 	gopkg.in/yaml.v3 v3.0.1
 )
 require (
@@ -32,5 +32,7 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
 google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
 google.golang.org/protobuf v1.31.0 h1:g0LDEJHgrBl9N9r17Ru3sqWhkIx2NB67okBHPwC7hs8=
 google.golang.org/protobuf v1.31.0/go.mod h1:HV8QOd/L58Z+nl8r43ehVNZIU/HEI6OcFqwMG9pJV4I=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405 h1:yhCVgyC4o1eVCa2tZl7eS0r+SDo693bJlVdllGtEeKM=
 gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
 gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
 gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
@@ -103,16 +103,10 @@ prometheus:
 // loadConfig 从 savedata/config.yml 加载配置
 func loadConfig() error {
-	configPath := filepath.Join("savedata", "config.yml")
+	if err := config.LoadFromSavedata(); err != nil {
-	
+		return fmt.Errorf("failed to load config: %v", err)
 	// 检查配置文件是否存在
 	if _, err := os.Stat(configPath); os.IsNotExist(err) {
 		return fmt.Errorf("config file not found: %s", configPath)
 	}
-	
+	log.Printf("Config loaded successfully from savedata/config.yml")
 	// TODO: 解析 YAML 配置文件并应用到 config 包
 	// 这里暂时只是检查文件存在，后续可以添加 YAML 解析逻辑
 	log.Printf("Loading config from: %s", configPath)
 	return nil
 }
@@ -130,8 +124,8 @@ func main() {
 	// ---- 命令行参数 ----
 	// --storage：存储根目录路径，默认使用 config.StoragePath
 	storageDir := flag.String("storage", config.StoragePath, "path to savedata directory")
-	// --entry：BFS 爬取的起始 URL，默认使用 config.EntryURL（维基百科中文首页）
+	// --entry：BFS 爬取的起始 URL，默认使用 config.EntryURL()（维基百科中文首页）
-	entryURL := flag.String("entry", config.EntryURL, "BFS crawl entry URL")
+	entryURL := flag.String("entry", config.EntryURL(), "BFS crawl entry URL")
 	// --stopwords：屏蔽词 JSON 文件路径
 	stopWords := flag.String("stopwords", "../data/标点符号.json", "path to stop-words JSON")
 	flag.Parse()
@@ -161,7 +155,7 @@ func main() {
 	// ---- 4. 搜索服务器（默认 :80）：对外提供搜索 API，同时内嵌收获服务（统一端口）
 	searchSrv := search.New(db, infoSvc, anal)
 	go func() {
-		addr := fmt.Sprintf(":%d", config.SearchServerPort)
+		addr := fmt.Sprintf(":%d", config.SearchServerPort())
 		if err := searchSrv.ListenAndServe(addr); err != nil {
 			log.Fatalf("[search] fatal: %v", err)
 		}
@@ -175,7 +169,7 @@ func main() {
 	// 从 info 服务获取繁荣表快照，用于调度优先级决策
 	prosperMap := infoSvc.ProsperMap()
 	crawl := crawler.New(db, anal, prosperMap)
-	go crawl.Run(*entryURL, config.MaxEpoch)
+	go crawl.Run(*entryURL, config.MaxEpoch())
 	log.Println("all modules started — press Ctrl-C to stop")
@@ -50,7 +50,7 @@ func New(db *storage.DB, infoSvc *info.Service, a *analyzer.Analyzer) *Server {
 		analyzer: a,
 		mem:      make(map[string][]storage.IndexEntry),
 		httpCli: &http.Client{
-			Timeout: time.Duration(config.OnlineSnippetTimeout) * time.Second,
+			Timeout: time.Duration(config.OnlineSnippetTimeout()) * time.Second,
 		},
 	}
 	// 启动定期刷盘 goroutine
@@ -60,7 +60,7 @@ func New(db *storage.DB, infoSvc *info.Service, a *analyzer.Analyzer) *Server {
 // runPeriodicFlush 每隔 FlushIntervalSeconds 秒触发一次刷盘。
 func (s *Server) runPeriodicFlush() {
-	ticker := time.NewTicker(time.Duration(config.FlushIntervalSeconds) * time.Second)
+	ticker := time.NewTicker(time.Duration(config.FlushIntervalSeconds()) * time.Second)
 	defer ticker.Stop()
 	for range ticker.C {
 		s.Flush()
@@ -514,17 +514,18 @@ func (s *Server) query(tokens []string, from, to int, siteFilter string) ([]sear
 		defVal  float64 // 缺省权重（词在索引中条目已满时使用）
 	}
 	tokenIndexes := make([]tokenIndex, 0, len(tokens))
 	maxURLsPerKey := config.MaxURLsPerKey()
 	for _, t := range tokens {
 		entries, _ := s.db.GetIndex(t)
 		// 计算缺省权重：当条目数达到上限时，权重低于第 MaxURLsPerKey 名的条目使用缺省权重
-		defVal := 1.0 / 10000 * float64(max(100, len(entries))) / float64(config.MaxURLsPerKey)
+		defVal := 1.0 / 10000 * float64(max(100, len(entries))) / float64(maxURLsPerKey)
-		if len(entries) >= config.MaxURLsPerKey {
+		if len(entries) >= maxURLsPerKey {
 			weights := make([]float64, len(entries))
 			for i, e := range entries {
 				weights[i] = float64(e.Weight)
 			}
 			sort.Sort(sort.Reverse(sort.Float64Slice(weights)))
-			defVal = math.Max(1.0/10000, weights[config.MaxURLsPerKey-1]/2)
+			defVal = math.Max(1.0/10000, weights[maxURLsPerKey-1]/2)
 		}
 		tokenIndexes = append(tokenIndexes, tokenIndex{t, entries, defVal})
 	}
@@ -576,7 +577,7 @@ func (s *Server) query(tokens []string, from, to int, siteFilter string) ([]sear
 			rel *= vp
 		}
 		// 反向链接繁荣加分
-		prosper := 1 + s.infoSvc.Prosper(u)*config.BacklinkWeight
+		prosper := 1 + s.infoSvc.Prosper(u)*config.BacklinkWeight()
 		bad := badURL(u)
 		adjust := s.infoSvc.Adjust(netloc(u))
 		// 基础分数 = 相关性 × 繁荣值 × URL质量 × 人工调整
@@ -659,7 +660,7 @@ func (s *Server) query(tokens []string, from, to int, siteFilter string) ([]sear
 			repMul = 1 - (h - 0.5)
 		}
 		// 连续词出现越多，乘以 config.ConsecutiveKeyWeight（>1）加成
-		consMul := math.Pow(config.ConsecutiveKeyWeight, float64(consecutive))
+		consMul := math.Pow(config.ConsecutiveKeyWeight(), float64(consecutive))
 		candidates[i].scoreVec[0] *= repMul * consMul
 		candidates[i].scoreVec[5] = repMul
 		candidates[i].scoreVec[8] = consMul
@@ -729,7 +730,7 @@ func (s *Server) getSnippet(rawURL string) *snippetInfo {
 		snip := buildSnippet(entry)
 		return snip
 	}
-	if !config.UseOnlineSnippet {
+	if !config.UseOnlineSnippet() {
 		return nil
 	}
 	// 在线抓取（不使用 robots.txt，适用于搜索摘要场景）
@@ -737,7 +738,7 @@ func (s *Server) getSnippet(rawURL string) *snippetInfo {
 	if err != nil {
 		return nil
 	}
-	req.Header.Set("User-Agent", config.SpiderName)
+	req.Header.Set("User-Agent", config.SpiderName())
 	resp, err := s.httpCli.Do(req)
 	if err != nil || resp.StatusCode != 200 {
 		return nil
@@ -785,7 +786,8 @@ func languageMultiplier(si *storage.SiteInfo) float64 {
 	}
 	chinese := si.Languages["zh"] / total
 	weird := (total - si.Languages["zh"] - si.Languages["en"] - si.Languages["ja"]) / total
-	return 1 + chinese*config.LanguageWeight - weird*config.LanguageWeight
+	languageWeight := config.LanguageWeight()
 	return 1 + chinese*languageWeight - weird*languageWeight
 }
 // timeMul 根据网站最后访问时间计算时间衰减倍数（越久远衰减越多）。
@@ -807,7 +809,7 @@ func timeMul(si *storage.SiteInfo, now int64) float64 {
 	if days > 0 {
 		days-- // 跳过第一天
 	}
-	return math.Pow(config.WeightDailyDecay, float64(days))
+	return math.Pow(config.WeightDailyDecay(), float64(days))
 }
 // urlTimeMul 根据该 URL 的摘要抓取时间计算时间衰减倍数（30 天内不衰减）。
@@ -820,7 +822,7 @@ func urlTimeMul(db *storage.DB, rawURL string, now int64) float64 {
 	if days <= 30 {
 		return 1.0
 	}
-	return math.Pow((2+config.WeightDailyDecay)/3, float64(days))
+	return math.Pow((2+config.WeightDailyDecay())/3, float64(days))
 }
 // badURL 返回 URL 的"劣质"评分（0~0.9）。
@@ -1193,7 +1195,7 @@ func (s *Server) handleIngest(w http.ResponseWriter, r *http.Request) {
 		atomic.AddInt64(&s.rowCount, 1)
 	}
 	s.memMu.Unlock()
-	if atomic.LoadInt64(&s.rowCount) > int64(config.BigCleanThreshold) {
+	if atomic.LoadInt64(&s.rowCount) > int64(config.BigCleanThreshold()) {
 		go s.Flush()
 	}
 	w.Write([]byte("ok"))
@@ -1208,14 +1210,15 @@ func (s *Server) handleFlush(w http.ResponseWriter, r *http.Request) {
 // lowThreshold 返回某关键词在已有大量条目时，新条目所需的最低权重阈值。
 func (s *Server) lowThreshold(key string) float64 {
 	existing, _ := s.db.GetIndex(key)
-	if len(existing) < config.MaxURLsPerKey {
+	maxURLsPerKey := config.MaxURLsPerKey()
 	if len(existing) < maxURLsPerKey {
 		return -1
 	}
 	weights := make([]float64, len(existing))
 	for i, e := range existing {
 		weights[i] = float64(e.Weight)
 	}
-	return nthLargest(weights, config.MaxURLsPerKey-1) * 0.05
+	return nthLargest(weights, maxURLsPerKey-1) * 0.05
 }
 // flush 将内存中的索引批量合并写入磁盘，然后清空内存。
@@ -1269,15 +1272,16 @@ func (s *Server) flush() {
 // mergeKey 将新条目和磁盘已有条目合并后返回最终列表。
 func (s *Server) mergeKey(key string, newEntries []storage.IndexEntry) []storage.IndexEntry {
 	existing, _ := s.db.GetIndex(key)
-	if len(existing) == 0 && len(newEntries) < config.MinURLsForNewKey {
+	if len(existing) == 0 && len(newEntries) < config.MinURLsForNewKey() {
 		return nil
 	}
 	merged := dedup(append(newEntries, existing...))
 	if rand.Float64() < 0.02 {
 		merged = dedupNormalised(merged)
 	}
-	if float64(len(merged)) > float64(config.MaxURLsPerKey)*1.1 || rand.Float64() < 0.02 {
+	maxURLsPerKey := config.MaxURLsPerKey()
-		merged = trim(merged, s.infoSvc, config.MaxURLsPerKey, config.MaxSameDomainPerKey)
+	if float64(len(merged)) > float64(maxURLsPerKey)*1.1 || rand.Float64() < 0.02 {
 		merged = trim(merged, s.infoSvc, maxURLsPerKey, config.MaxSameDomainPerKey())
 	}
 	return merged
 }