54 lines
1.6 KiB
Go
54 lines
1.6 KiB
Go
// Package config holds all global configuration parameters for sese-engine.
|
|
package config
|
|
|
|
// Index / storage limits
|
|
const (
|
|
MaxURLsPerKey = 11000 // max URLs stored per index key
|
|
MaxSameDomainPerKey = 20 // max URLs from the same domain per key
|
|
BigCleanThreshold = 10000000 // flush in-memory index after this many rows
|
|
MaxNewURLsPerKey = 10000 // cap on new URLs added per key per flush
|
|
MinURLsForNewKey = 3 // discard new keys with fewer than this many URLs
|
|
)
|
|
|
|
// Crawler settings
|
|
const (
|
|
SpiderName = "loli_spider"
|
|
CrawlerCooldown = 3 // seconds between requests to the same host
|
|
CrawlerWorkers = 22 // goroutine pool size for crawling
|
|
CrawlFocus = 0.7 // concentration factor — higher = more focused on single domain
|
|
MaxKeywordsPerPage = 250
|
|
MaxEpoch = 100
|
|
ExpectedProsperRatio = 0.6 // fraction of queue that should be "prosperous" (high backlink) domains
|
|
EntryURL = "https://zh.wikipedia.org/"
|
|
)
|
|
|
|
// Search / ranking weights
|
|
const (
|
|
UseOnlineSnippet = true
|
|
OnlineSnippetTimeout = 3 // seconds
|
|
WeightDailyDecay = 0.996
|
|
LanguageWeight = 0.5
|
|
ConsecutiveKeyWeight = 1.3
|
|
BacklinkWeight = 1.0
|
|
SearchServerPort = 80
|
|
)
|
|
|
|
// Backlink computation
|
|
const (
|
|
BacklinkBaseline = 200000 // normalization divisor for backlink scores
|
|
)
|
|
|
|
// Storage path (relative to process working directory)
|
|
const StoragePath = "./savedata"
|
|
|
|
// Prometheus ports
|
|
const (
|
|
PromPortCrawler = 14950
|
|
PromPortHarvester = 14951
|
|
PromPortBacklink = 14952
|
|
PromPortSearch = 14953
|
|
)
|
|
|
|
// Harvester HTTP endpoint
|
|
const HarvesterAddr = "http://127.0.0.1:5000"
|