🐛 Use const useragent instead of config when scrap links

This commit is contained in:
LittleSheep 2024-12-14 12:48:29 +08:00
parent 690e74e006
commit e5a1741109

View File

@ -12,9 +12,11 @@ import (
"github.com/gocolly/colly" "github.com/gocolly/colly"
"github.com/rs/zerolog/log" "github.com/rs/zerolog/log"
"github.com/samber/lo" "github.com/samber/lo"
"github.com/spf13/viper"
) )
// We have to set the User-Agent to this so the sites will respond with opengraph data
const ScrapLinkUserAgent = "FacebookExternalHit/1.1"
func GetLinkMetaFromCache(target string) (models.LinkMeta, error) { func GetLinkMetaFromCache(target string) (models.LinkMeta, error) {
hash := md5.Sum([]byte(target)) hash := md5.Sum([]byte(target))
entry := hex.EncodeToString(hash[:]) entry := hex.EncodeToString(hash[:])
@ -39,7 +41,7 @@ func ScrapLink(target string) (*models.LinkMeta, error) {
} }
c := colly.NewCollector( c := colly.NewCollector(
colly.UserAgent(viper.GetString("scraper.user-agent")), colly.UserAgent(ScrapLinkUserAgent),
colly.MaxDepth(3), colly.MaxDepth(3),
) )
@ -93,23 +95,23 @@ func ScrapLink(target string) (*models.LinkMeta, error) {
}) })
c.OnRequest(func(r *colly.Request) { c.OnRequest(func(r *colly.Request) {
log.Debug().Str("url", target).Msg("Expanding link... requesting") log.Debug().Str("url", target).Msg("Scraping link... requesting")
}) })
c.RedirectHandler = func(req *http.Request, via []*http.Request) error { c.RedirectHandler = func(req *http.Request, via []*http.Request) error {
log.Debug().Str("url", req.URL.String()).Msg("Expanding link... redirecting") log.Debug().Str("url", req.URL.String()).Msg("Scraping link... redirecting")
return nil return nil
} }
c.OnResponse(func(r *colly.Response) { c.OnResponse(func(r *colly.Response) {
log.Debug().Str("url", target).Msg("Expanding link... analyzing") log.Debug().Str("url", target).Msg("Scraping link... analyzing")
}) })
c.OnError(func(r *colly.Response, err error) { c.OnError(func(r *colly.Response, err error) {
log.Warn().Err(err).Str("url", target).Msg("Expanding link... failed") log.Warn().Err(err).Str("url", target).Msg("Scraping link... failed")
}) })
c.OnScraped(func(r *colly.Response) { c.OnScraped(func(r *colly.Response) {
_ = SaveLinkMetaToCache(target, *meta) _ = SaveLinkMetaToCache(target, *meta)
log.Debug().Str("url", target).Msg("Expanding link... finished") log.Debug().Str("url", target).Msg("Scraping link... finished")
}) })
return meta, c.Visit(target) return meta, c.Visit(target)