🎉 Initial Commit
This commit is contained in:
24
pkg/internal/services/cleaner.go
Normal file
24
pkg/internal/services/cleaner.go
Normal file
@ -0,0 +1,24 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
database2 "git.solsynth.dev/hypernet/reader/pkg/internal/database"
|
||||
"time"
|
||||
|
||||
"github.com/rs/zerolog/log"
|
||||
)
|
||||
|
||||
func DoAutoDatabaseCleanup() {
|
||||
deadline := time.Now().Add(60 * time.Minute)
|
||||
log.Debug().Time("deadline", deadline).Msg("Now cleaning up entire database...")
|
||||
|
||||
var count int64
|
||||
for _, model := range database2.AutoMaintainRange {
|
||||
tx := database2.C.Unscoped().Delete(model, "deleted_at >= ?", deadline)
|
||||
if tx.Error != nil {
|
||||
log.Error().Err(tx.Error).Msg("An error occurred when running auth context cleanup...")
|
||||
}
|
||||
count += tx.RowsAffected
|
||||
}
|
||||
|
||||
log.Debug().Int64("affected", count).Msg("Clean up entire database accomplished.")
|
||||
}
|
116
pkg/internal/services/scraper.go
Normal file
116
pkg/internal/services/scraper.go
Normal file
@ -0,0 +1,116 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
"net"
|
||||
"net/http"
|
||||
"time"
|
||||
|
||||
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
|
||||
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
|
||||
"github.com/gocolly/colly"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/samber/lo"
|
||||
"github.com/spf13/viper"
|
||||
)
|
||||
|
||||
func GetLinkMetaFromCache(target string) (models.LinkMeta, error) {
|
||||
hash := md5.Sum([]byte(target))
|
||||
entry := hex.EncodeToString(hash[:])
|
||||
var meta models.LinkMeta
|
||||
if err := database.C.Where("entry = ?", entry).First(&meta).Error; err != nil {
|
||||
return meta, err
|
||||
}
|
||||
return meta, nil
|
||||
}
|
||||
|
||||
func SaveLinkMetaToCache(target string, meta models.LinkMeta) error {
|
||||
hash := md5.Sum([]byte(target))
|
||||
entry := hex.EncodeToString(hash[:])
|
||||
meta.Entry = entry
|
||||
return database.C.Save(&meta).Error
|
||||
}
|
||||
|
||||
func ScrapLink(target string) (*models.LinkMeta, error) {
|
||||
if cache, err := GetLinkMetaFromCache(target); err == nil {
|
||||
log.Debug().Str("url", target).Msg("Expanding link... hit cache")
|
||||
return &cache, nil
|
||||
}
|
||||
|
||||
c := colly.NewCollector(
|
||||
colly.UserAgent(viper.GetString("scraper.user-agent")),
|
||||
colly.MaxDepth(3),
|
||||
)
|
||||
|
||||
c.WithTransport(&http.Transport{
|
||||
Proxy: http.ProxyFromEnvironment,
|
||||
DialContext: (&net.Dialer{
|
||||
Timeout: 60 * time.Second,
|
||||
KeepAlive: 360 * time.Second,
|
||||
}).DialContext,
|
||||
MaxIdleConns: 100,
|
||||
IdleConnTimeout: 90 * time.Second,
|
||||
TLSHandshakeTimeout: 10 * time.Second,
|
||||
ExpectContinueTimeout: 1 * time.Second,
|
||||
})
|
||||
|
||||
meta := &models.LinkMeta{
|
||||
URL: target,
|
||||
}
|
||||
|
||||
c.OnHTML("title", func(e *colly.HTMLElement) {
|
||||
meta.Title = &e.Text
|
||||
})
|
||||
c.OnHTML("meta[name]", func(e *colly.HTMLElement) {
|
||||
switch e.Attr("name") {
|
||||
case "description":
|
||||
meta.Description = lo.ToPtr(e.Attr("content"))
|
||||
}
|
||||
})
|
||||
c.OnHTML("meta[property]", func(e *colly.HTMLElement) {
|
||||
switch e.Attr("property") {
|
||||
case "og:title":
|
||||
meta.Title = lo.ToPtr(e.Attr("content"))
|
||||
case "og:description":
|
||||
meta.Description = lo.ToPtr(e.Attr("content"))
|
||||
case "og:image":
|
||||
meta.Image = lo.ToPtr(e.Attr("content"))
|
||||
case "og:video":
|
||||
meta.Video = lo.ToPtr(e.Attr("content"))
|
||||
case "og:audio":
|
||||
meta.Audio = lo.ToPtr(e.Attr("content"))
|
||||
case "og:site_name":
|
||||
meta.SiteName = lo.ToPtr(e.Attr("content"))
|
||||
case "og:type":
|
||||
meta.Type = lo.ToPtr(e.Attr("content"))
|
||||
}
|
||||
})
|
||||
c.OnHTML("link[rel]", func(e *colly.HTMLElement) {
|
||||
if e.Attr("rel") == "icon" {
|
||||
meta.Icon = e.Request.AbsoluteURL(e.Attr("href"))
|
||||
}
|
||||
})
|
||||
|
||||
c.OnRequest(func(r *colly.Request) {
|
||||
log.Debug().Str("url", target).Msg("Expanding link... requesting")
|
||||
})
|
||||
c.RedirectHandler = func(req *http.Request, via []*http.Request) error {
|
||||
log.Debug().Str("url", req.URL.String()).Msg("Expanding link... redirecting")
|
||||
return nil
|
||||
}
|
||||
|
||||
c.OnResponse(func(r *colly.Response) {
|
||||
log.Debug().Str("url", target).Msg("Expanding link... analyzing")
|
||||
})
|
||||
c.OnError(func(r *colly.Response, err error) {
|
||||
log.Warn().Err(err).Str("url", target).Msg("Expanding link... failed")
|
||||
})
|
||||
|
||||
c.OnScraped(func(r *colly.Response) {
|
||||
_ = SaveLinkMetaToCache(target, *meta)
|
||||
log.Debug().Str("url", target).Msg("Expanding link... finished")
|
||||
})
|
||||
|
||||
return meta, c.Visit(target)
|
||||
}
|
Reference in New Issue
Block a user