Files
sese-engine-go/config/config.go
T

54 lines
1.6 KiB
Go

// Package config holds all global configuration parameters for sese-engine.
package config
// Index / storage limits
const (
MaxURLsPerKey = 11000 // max URLs stored per index key
MaxSameDomainPerKey = 20 // max URLs from the same domain per key
BigCleanThreshold = 10000000 // flush in-memory index after this many rows
MaxNewURLsPerKey = 10000 // cap on new URLs added per key per flush
MinURLsForNewKey = 3 // discard new keys with fewer than this many URLs
)
// Crawler settings
const (
SpiderName = "loli_spider"
CrawlerCooldown = 3 // seconds between requests to the same host
CrawlerWorkers = 22 // goroutine pool size for crawling
CrawlFocus = 0.7 // concentration factor — higher = more focused on single domain
MaxKeywordsPerPage = 250
MaxEpoch = 100
ExpectedProsperRatio = 0.6 // fraction of queue that should be "prosperous" (high backlink) domains
EntryURL = "https://zh.wikipedia.org/"
)
// Search / ranking weights
const (
UseOnlineSnippet = true
OnlineSnippetTimeout = 3 // seconds
WeightDailyDecay = 0.996
LanguageWeight = 0.5
ConsecutiveKeyWeight = 1.3
BacklinkWeight = 1.0
SearchServerPort = 80
)
// Backlink computation
const (
BacklinkBaseline = 200000 // normalization divisor for backlink scores
)
// Storage path (relative to process working directory)
const StoragePath = "./savedata"
// Prometheus ports
const (
PromPortCrawler = 14950
PromPortHarvester = 14951
PromPortBacklink = 14952
PromPortSearch = 14953
)
// Harvester HTTP endpoint
const HarvesterAddr = "http://127.0.0.1:5000"