Read news basis

 Able to read wordpress site

btw the 10 yrs ago package still work properly, amazing...
means the wordpress api did not change a lot and the golang backward
compability is amazing!
This commit is contained in:
2025-01-25 22:05:38 +08:00
parent e055ef27e5
commit ab0a87106b
11 changed files with 185 additions and 19 deletions

View File

@@ -12,10 +12,11 @@ import (
"github.com/gocolly/colly"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
"github.com/spf13/viper"
)
// We have to set the User-Agent to this so the sites will respond with opengraph data
const ScrapLinkUserAgent = "facebookexternalhit/1.1"
const ScrapLinkDefaultUA = "facebookexternalhit/1.1"
func GetLinkMetaFromCache(target string) (models.LinkMeta, error) {
hash := md5.Sum([]byte(target))
@@ -40,8 +41,13 @@ func ScrapLink(target string) (*models.LinkMeta, error) {
return &cache, nil
}
ua := viper.GetString("scraper.expand_ua")
if len(ua) == 0 {
ua = ScrapLinkDefaultUA
}
c := colly.NewCollector(
colly.UserAgent(ScrapLinkUserAgent),
colly.UserAgent(ua),
colly.MaxDepth(3),
)