✨ Read news basis
✨ Able to read wordpress site
btw the 10 yrs ago package still work properly, amazing...
means the wordpress api did not change a lot and the golang backward
compability is amazing!
This commit is contained in:
@ -7,6 +7,7 @@ import (
|
||||
|
||||
var AutoMaintainRange = []any{
|
||||
&models.LinkMeta{},
|
||||
&models.NewsArticle{},
|
||||
}
|
||||
|
||||
func RunMigration(source *gorm.DB) error {
|
||||
|
32
pkg/internal/models/new.go
Normal file
32
pkg/internal/models/new.go
Normal file
@ -0,0 +1,32 @@
|
||||
package models
|
||||
|
||||
import (
|
||||
"crypto/md5"
|
||||
"encoding/hex"
|
||||
|
||||
"git.solsynth.dev/hypernet/nexus/pkg/nex/cruda"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type NewsArticle struct {
|
||||
cruda.BaseModel
|
||||
|
||||
Thumbnail string `json:"thumbnail"`
|
||||
Title string `json:"title"`
|
||||
Description string `json:"description"`
|
||||
Content string `json:"content"`
|
||||
URL string `json:"url"`
|
||||
Hash string `json:"hash" gorm:"uniqueIndex"`
|
||||
Source string `json:"source"`
|
||||
}
|
||||
|
||||
func (v *NewsArticle) GenHash() *NewsArticle {
|
||||
if len(v.URL) == 0 {
|
||||
v.Hash = uuid.NewString()
|
||||
return v
|
||||
}
|
||||
|
||||
hash := md5.Sum([]byte(v.URL))
|
||||
v.Hash = hex.EncodeToString(hash[:])
|
||||
return v
|
||||
}
|
8
pkg/internal/models/source.go
Normal file
8
pkg/internal/models/source.go
Normal file
@ -0,0 +1,8 @@
|
||||
package models
|
||||
|
||||
type NewsSource struct {
|
||||
ID string `json:"id"`
|
||||
Label string `json:"label"`
|
||||
Type string `json:"type"`
|
||||
Source string `json:"source"`
|
||||
}
|
@ -7,6 +7,7 @@ import (
|
||||
func MapAPIs(app *fiber.App, baseURL string) {
|
||||
api := app.Group(baseURL).Name("API")
|
||||
{
|
||||
api.Get("/well-known/sources", getNewsSources)
|
||||
api.Get("/link/*", getLinkMeta)
|
||||
}
|
||||
}
|
||||
|
10
pkg/internal/server/api/well_known_api.go
Normal file
10
pkg/internal/server/api/well_known_api.go
Normal file
@ -0,0 +1,10 @@
|
||||
package api
|
||||
|
||||
import (
|
||||
"git.solsynth.dev/hypernet/reader/pkg/internal/services"
|
||||
"github.com/gofiber/fiber/v2"
|
||||
)
|
||||
|
||||
func getNewsSources(c *fiber.Ctx) error {
|
||||
return c.JSON(services.NewsSources)
|
||||
}
|
71
pkg/internal/services/reader.go
Normal file
71
pkg/internal/services/reader.go
Normal file
@ -0,0 +1,71 @@
|
||||
package services
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
|
||||
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
|
||||
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/sogko/go-wordpress"
|
||||
"github.com/spf13/viper"
|
||||
)
|
||||
|
||||
var NewsSources []models.NewsSource
|
||||
|
||||
func LoadNewsSources() error {
|
||||
if err := viper.UnmarshalKey("sources", &NewsSources); err != nil {
|
||||
return err
|
||||
}
|
||||
log.Info().Int("count", len(NewsSources)).Msg("Loaded news sources configuration.")
|
||||
return nil
|
||||
}
|
||||
|
||||
func ScanNewsSources() {
|
||||
var results []models.NewsArticle
|
||||
for _, src := range NewsSources {
|
||||
log.Debug().Str("source", src.ID).Msg("Scanning news source...")
|
||||
result, err := NewsSourceRead(src)
|
||||
if err != nil {
|
||||
log.Warn().Err(err).Str("source", src.ID).Msg("Failed to scan a news source.")
|
||||
}
|
||||
results = append(results, result...)
|
||||
log.Info().Str("source", src.ID).Int("count", len(result)).Msg("Scanned a news sources.")
|
||||
}
|
||||
log.Info().Int("count", len(results)).Msg("Scanned all news sources.")
|
||||
database.C.Save(&results)
|
||||
}
|
||||
|
||||
func NewsSourceRead(src models.NewsSource) ([]models.NewsArticle, error) {
|
||||
switch src.Type {
|
||||
case "wordpress":
|
||||
return newsSourceReadWordpress(src)
|
||||
default:
|
||||
return nil, fmt.Errorf("unsupported news source type: %s", src.Type)
|
||||
}
|
||||
}
|
||||
|
||||
func newsSourceReadWordpress(src models.NewsSource) ([]models.NewsArticle, error) {
|
||||
client := wordpress.NewClient(&wordpress.Options{
|
||||
BaseAPIURL: src.Source,
|
||||
})
|
||||
|
||||
posts, _, _, err := client.Posts().List(nil)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
var result []models.NewsArticle
|
||||
for _, post := range posts {
|
||||
article := &models.NewsArticle{
|
||||
Title: post.Title.Rendered,
|
||||
Description: post.Excerpt.Rendered,
|
||||
Content: post.Content.Rendered,
|
||||
URL: post.Link,
|
||||
Source: src.ID,
|
||||
}
|
||||
article.GenHash()
|
||||
result = append(result, *article)
|
||||
}
|
||||
|
||||
return result, nil
|
||||
}
|
@ -12,10 +12,11 @@ import (
|
||||
"github.com/gocolly/colly"
|
||||
"github.com/rs/zerolog/log"
|
||||
"github.com/samber/lo"
|
||||
"github.com/spf13/viper"
|
||||
)
|
||||
|
||||
// We have to set the User-Agent to this so the sites will respond with opengraph data
|
||||
const ScrapLinkUserAgent = "facebookexternalhit/1.1"
|
||||
const ScrapLinkDefaultUA = "facebookexternalhit/1.1"
|
||||
|
||||
func GetLinkMetaFromCache(target string) (models.LinkMeta, error) {
|
||||
hash := md5.Sum([]byte(target))
|
||||
@ -40,8 +41,13 @@ func ScrapLink(target string) (*models.LinkMeta, error) {
|
||||
return &cache, nil
|
||||
}
|
||||
|
||||
ua := viper.GetString("scraper.expand_ua")
|
||||
if len(ua) == 0 {
|
||||
ua = ScrapLinkDefaultUA
|
||||
}
|
||||
|
||||
c := colly.NewCollector(
|
||||
colly.UserAgent(ScrapLinkUserAgent),
|
||||
colly.UserAgent(ua),
|
||||
colly.MaxDepth(3),
|
||||
)
|
||||
|
||||
|
Reference in New Issue
Block a user