// Package info loads and serves auxiliary data: backlink scores, adjustment // table, and blocked query words. package info import ( "encoding/json" "math" "os" "path/filepath" "strings" "sync" ) // Service loads the prosperity map, adjustment table, and blocked words. type Service struct { mu sync.RWMutex prosperMap map[string]float64 // normalised backlink scores adjustTable map[string]float64 // per-domain manual weight adjustments blockedWords map[string]bool storagePath string } // New creates and loads the info service from storagePath. func New(storagePath string) *Service { s := &Service{storagePath: storagePath} s.Reload() return s } // Reload re-reads all data files from disk. func (s *Service) Reload() { s.mu.Lock() defer s.mu.Unlock() s.prosperMap = loadProsperMap(s.storagePath) s.adjustTable = loadAdjustTable() s.blockedWords = loadBlockedWords() } // Prosper returns the backlink score for a URL (sum of its path components). func (s *Service) Prosper(rawURL string) float64 { s.mu.RLock() defer s.mu.RUnlock() return prosperFor(rawURL, s.prosperMap) } // ProsperMap returns the full prosperity map (read-only snapshot). func (s *Service) ProsperMap() map[string]float64 { s.mu.RLock() defer s.mu.RUnlock() out := make(map[string]float64, len(s.prosperMap)) for k, v := range s.prosperMap { out[k] = v } return out } // Adjust returns the manual weight multiplier for a hostname (default 1.0). func (s *Service) Adjust(host string) float64 { s.mu.RLock() defer s.mu.RUnlock() if v, ok := s.adjustTable[host]; ok { return v } return 1.0 } // IsBlocked returns true if the word is in the blocked list. func (s *Service) IsBlocked(word string) bool { s.mu.RLock() defer s.mu.RUnlock() return s.blockedWords[word] } // ---- loaders ---- const backlinkBaseline = 200000.0 func loadProsperMap(storagePath string) map[string]float64 { path := filepath.Join(storagePath, "prosper.json") f, err := os.Open(path) if err != nil { return map[string]float64{} } defer f.Close() var raw map[string]float64 if err := json.NewDecoder(f).Decode(&raw); err != nil { return map[string]float64{} } return normalise(raw) } func normalise(d map[string]float64) map[string]float64 { total := 0.0 for k, v := range d { if !strings.Contains(k, "/") { total += v } } if total == 0 { return d } factor := backlinkBaseline / total out := make(map[string]float64, len(d)) for k, v := range d { out[k] = v * factor } // Propagate max score up the domain tree for k, v := range out { now := k for { idx := strings.Index(now, ".") if idx < 0 { break } now = now[idx+1:] if cur, ok := out[now]; ok && cur < v { out[now] = v } else if !ok { break } } } return out } func loadAdjustTable() map[string]float64 { // Try loading from data/adjust.json — fallback if absent f, err := os.Open(filepath.Join("data", "adjust.json")) if err != nil { return map[string]float64{} } defer f.Close() var m map[string]float64 json.NewDecoder(f).Decode(&m) return m } func loadBlockedWords() map[string]bool { f, err := os.Open(filepath.Join("data", "blocked_words.json")) if err != nil { return map[string]bool{} } defer f.Close() var words []string json.NewDecoder(f).Decode(&words) m := make(map[string]bool, len(words)) for _, w := range words { m[w] = true } return m } // prosperFor computes the prosperity score for a URL by decomposing it. func prosperFor(rawURL string, pm map[string]float64) float64 { segments := decomposeURL(rawURL) s := 0.0 for _, seg := range segments { t, ok := pm[seg] if !ok { t = 0 } l := 0.0 if t > 0 { l = math.Log2(2+t*2) - 1 } if s == 0 { if l == 0 { return 0 } s = l } else { s = l + math.Log((s-l)/2+1) } } if s > 0 { return 0.1 + s } return 0 } // decomposeURL yields "domain.tld", "domain.tld/path", "domain.tld/path/sub", ... func decomposeURL(rawURL string) []string { u := strings.ToLower(rawURL) if strings.HasPrefix(u, "https://") { u = u[8:] } else if strings.HasPrefix(u, "http://") { u = u[7:] } else { return nil } u = strings.ReplaceAll(u, "?", "/") u = strings.ReplaceAll(u, "#", "/") u = strings.TrimRight(u, "/") if u == "" || u[0] == '/' || u[0] == '%' || u[0] == ' ' { return nil } parts := strings.Split(u, "/") var out []string current := parts[0] out = append(out, current) for _, p := range parts[1:] { current = current + "/" + p out = append(out, current) } return out }