Able to read news across pages

This commit is contained in:
LittleSheep 2025-01-25 22:48:14 +08:00
parent d91192a11a
commit 48a04b09a4
4 changed files with 51 additions and 17 deletions

View File

@ -5,4 +5,5 @@ type NewsSource struct {
Label string `json:"label"`
Type string `json:"type"`
Source string `json:"source"`
Depth int `json:"depth"`
}

View File

@ -2,6 +2,7 @@ package api
import (
"git.solsynth.dev/hypernet/nexus/pkg/nex/sec"
"git.solsynth.dev/hypernet/reader/pkg/internal/server/exts"
"git.solsynth.dev/hypernet/reader/pkg/internal/services"
"github.com/gofiber/fiber/v2"
)
@ -11,6 +12,14 @@ func adminTriggerScanTask(c *fiber.Ctx) error {
return err
}
go services.ScanNewsSources()
var data struct {
Eager bool `json:"eager"`
}
if err := exts.BindAndValidate(c, &data); err != nil {
return err
}
go services.ScanNewsSources(data.Eager)
return c.SendStatus(fiber.StatusOK)
}

View File

@ -2,6 +2,7 @@ package services
import (
"fmt"
"strconv"
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
@ -20,7 +21,11 @@ func LoadNewsSources() error {
return nil
}
func ScanNewsSources() {
func ScanNewsSourcesNoEager() {
ScanNewsSources(false)
}
func ScanNewsSources(eager ...bool) {
var results []models.NewsArticle
for _, src := range NewsSources {
log.Debug().Str("source", src.ID).Msg("Scanning news source...")
@ -35,7 +40,7 @@ func ScanNewsSources() {
database.C.Save(&results)
}
func NewsSourceRead(src models.NewsSource) ([]models.NewsArticle, error) {
func NewsSourceRead(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) {
switch src.Type {
case "wordpress":
return newsSourceReadWordpress(src)
@ -44,18 +49,8 @@ func NewsSourceRead(src models.NewsSource) ([]models.NewsArticle, error) {
}
}
func newsSourceReadWordpress(src models.NewsSource) ([]models.NewsArticle, error) {
client := wordpress.NewClient(&wordpress.Options{
BaseAPIURL: src.Source,
})
posts, _, _, err := client.Posts().List(nil)
if err != nil {
return nil, err
}
var result []models.NewsArticle
for _, post := range posts {
func newsSourceReadWordpress(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) {
wpConvert := func(post wordpress.Post) models.NewsArticle {
article := &models.NewsArticle{
Title: post.Title.Rendered,
Description: post.Excerpt.Rendered,
@ -64,7 +59,36 @@ func newsSourceReadWordpress(src models.NewsSource) ([]models.NewsArticle, error
Source: src.ID,
}
article.GenHash()
result = append(result, *article)
return *article
}
client := wordpress.NewClient(&wordpress.Options{
BaseAPIURL: src.Source,
})
posts, resp, _, err := client.Posts().List(nil)
if err != nil {
return nil, err
}
var result []models.NewsArticle
for _, post := range posts {
result = append(result, wpConvert(post))
}
if len(eager) > 0 && eager[0] {
totalPagesRaw := resp.Header.Get("X-WP-TotalPages")
totalPages, _ := strconv.Atoi(totalPagesRaw)
depth := min(totalPages, src.Depth)
for page := 2; page <= depth; page++ {
posts, _, _, err := client.Posts().List(nil)
if err != nil {
return result, nil
}
for _, post := range posts {
result = append(result, wpConvert(post))
}
}
}
return result, nil

View File

@ -80,7 +80,7 @@ func main() {
// Configure timed tasks
quartz := cron.New(cron.WithLogger(cron.VerbosePrintfLogger(&log.Logger)))
quartz.AddFunc("@every 60m", services.DoAutoDatabaseCleanup)
quartz.AddFunc("@midnight", services.ScanNewsSources)
quartz.AddFunc("@midnight", services.ScanNewsSourcesNoEager)
quartz.Start()
// Server