加上中文注释
This commit is contained in:
@@ -1,60 +1,65 @@
|
||||
// sese-engine — Go rewrite
|
||||
// Go 版 sese-engine:个人搜索引擎的主入口文件。
|
||||
//
|
||||
// All modules (harvester, search server, crawler, backlink calculator) are
|
||||
// launched as goroutines from this single binary. The binary blocks until
|
||||
// interrupted (Ctrl-C / SIGTERM).
|
||||
// 所有模块(爬虫、收获服务器、搜索服务器、反向链接计算)均作为 goroutine 在同一进程中启动。
|
||||
// 主线程阻塞等待系统信号(Ctrl-C / SIGTERM),收到后优雅退出。
|
||||
//
|
||||
// Usage:
|
||||
// 运行方式:
|
||||
//
|
||||
// cd golang && go run . [--storage ./savedata] [--entry https://zh.wikipedia.org/]
|
||||
package main
|
||||
|
||||
import (
|
||||
"flag"
|
||||
"fmt"
|
||||
"log"
|
||||
"os"
|
||||
"os/signal"
|
||||
"syscall"
|
||||
"flag" // 命令行参数解析
|
||||
"fmt" // 格式化(搜索服务端口)
|
||||
"log" // 日志输出
|
||||
"os" // 操作系统信号
|
||||
"os/signal" // 信号捕获
|
||||
"syscall" // 系统调用(SIGTERM)
|
||||
|
||||
"sese-engine/analyzer"
|
||||
"sese-engine/backlink"
|
||||
"sese-engine/config"
|
||||
"sese-engine/crawler"
|
||||
"sese-engine/harvester"
|
||||
"sese-engine/info"
|
||||
"sese-engine/search"
|
||||
"sese-engine/storage"
|
||||
"sese-engine/analyzer" // 文本分析和关键词提取
|
||||
"sese-engine/backlink" // 反向链接(繁荣值)计算
|
||||
"sese-engine/config" // 全局配置
|
||||
"sese-engine/crawler" // BFS 爬虫
|
||||
"sese-engine/harvester" // 收获服务器(索引写入)
|
||||
"sese-engine/info" // info 服务(繁荣表、调整表、屏蔽词)
|
||||
"sese-engine/search" // 搜索服务器
|
||||
"sese-engine/storage" // 持久化存储
|
||||
)
|
||||
|
||||
func main() {
|
||||
// ---- 命令行参数 ----
|
||||
// --storage:存储根目录路径,默认使用 config.StoragePath
|
||||
storageDir := flag.String("storage", config.StoragePath, "path to savedata directory")
|
||||
entryURL := flag.String("entry", config.EntryURL, "BFS crawl entry URL")
|
||||
stopWords := flag.String("stopwords", "../data/标点符号.json", "path to stop-words JSON")
|
||||
// --entry:BFS 爬取的起始 URL,默认使用 config.EntryURL(维基百科中文首页)
|
||||
entryURL := flag.String("entry", config.EntryURL, "BFS crawl entry URL")
|
||||
// --stopwords:屏蔽词 JSON 文件路径
|
||||
stopWords := flag.String("stopwords", "../data/标点符号.json", "path to stop-words JSON")
|
||||
flag.Parse()
|
||||
|
||||
// 设置日志格式:时间戳 + 短文件名
|
||||
log.SetFlags(log.LstdFlags | log.Lshortfile)
|
||||
log.Printf("sese-engine starting storage=%s entry=%s", *storageDir, *entryURL)
|
||||
|
||||
// ---- 1. Storage ----
|
||||
// ---- 1. 存储层:打开 bbolt 数据库 ----
|
||||
db, err := storage.Open(*storageDir)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to open storage: %v", err)
|
||||
}
|
||||
defer db.Close()
|
||||
|
||||
// ---- 2. Info service ----
|
||||
// ---- 2. Info 服务:加载繁荣表、调整表和屏蔽词 ----
|
||||
infoSvc := info.New(*storageDir)
|
||||
|
||||
// ---- 3. Analyzer ----
|
||||
// modelPath is unused (lingua-go uses built-in language models, no external file needed)
|
||||
// ---- 3. Analyzer:初始化分词器和语言检测器 ----
|
||||
// modelPath 参数已废弃(lingua-go 使用内置模型,无需外部文件)
|
||||
anal, err := analyzer.New("", *stopWords)
|
||||
if err != nil {
|
||||
log.Fatalf("failed to init analyzer: %v", err)
|
||||
}
|
||||
defer anal.Close()
|
||||
|
||||
// ---- 4. Harvester (index write server on :5000) ----
|
||||
// ---- 4. 收获服务器(:5000):接收爬虫发来的索引数据 ----
|
||||
harvSrv := harvester.New(db, infoSvc)
|
||||
go func() {
|
||||
if err := harvSrv.ListenAndServe(":5000"); err != nil {
|
||||
@@ -62,7 +67,7 @@ func main() {
|
||||
}
|
||||
}()
|
||||
|
||||
// ---- 5. Search server ----
|
||||
// ---- 5. 搜索服务器(默认 :80):对外提供搜索 API ----
|
||||
searchSrv := search.New(db, infoSvc, anal)
|
||||
go func() {
|
||||
addr := fmt.Sprintf(":%d", config.SearchServerPort)
|
||||
@@ -71,18 +76,20 @@ func main() {
|
||||
}
|
||||
}()
|
||||
|
||||
// ---- 6. Backlink calculator (runs every 48 h) ----
|
||||
// ---- 6. 反向链接计算器:每 48 小时运行一次 ----
|
||||
bl := backlink.New(db, *storageDir)
|
||||
go bl.Run()
|
||||
|
||||
// ---- 7. Crawler ----
|
||||
// ---- 7. 爬虫:从入口 URL 开始 BFS 爬取 ----
|
||||
// 从 info 服务获取繁荣表快照,用于调度优先级决策
|
||||
prosperMap := infoSvc.ProsperMap()
|
||||
crawl := crawler.New(db, anal, prosperMap)
|
||||
go crawl.Run(*entryURL, config.MaxEpoch)
|
||||
|
||||
log.Println("all modules started — press Ctrl-C to stop")
|
||||
|
||||
// ---- Graceful shutdown ----
|
||||
// ---- 优雅退出 ----
|
||||
// 阻塞等待 SIGINT(Ctrl-C)或 SIGTERM 信号
|
||||
quit := make(chan os.Signal, 1)
|
||||
signal.Notify(quit, os.Interrupt, syscall.SIGTERM)
|
||||
<-quit
|
||||
|
||||
Reference in New Issue
Block a user