From 48a04b09a46cd4bd4f9d1fd010d75ce25386df14 Mon Sep 17 00:00:00 2001 From: LittleSheep Date: Sat, 25 Jan 2025 22:48:14 +0800 Subject: [PATCH] :sparkles: Able to read news across pages --- pkg/internal/models/source.go | 1 + pkg/internal/server/api/admin_api.go | 11 +++++- pkg/internal/services/reader.go | 54 ++++++++++++++++++++-------- pkg/main.go | 2 +- 4 files changed, 51 insertions(+), 17 deletions(-) diff --git a/pkg/internal/models/source.go b/pkg/internal/models/source.go index 7edcc79..79cd471 100644 --- a/pkg/internal/models/source.go +++ b/pkg/internal/models/source.go @@ -5,4 +5,5 @@ type NewsSource struct { Label string `json:"label"` Type string `json:"type"` Source string `json:"source"` + Depth int `json:"depth"` } diff --git a/pkg/internal/server/api/admin_api.go b/pkg/internal/server/api/admin_api.go index cc6189b..38d706b 100644 --- a/pkg/internal/server/api/admin_api.go +++ b/pkg/internal/server/api/admin_api.go @@ -2,6 +2,7 @@ package api import ( "git.solsynth.dev/hypernet/nexus/pkg/nex/sec" + "git.solsynth.dev/hypernet/reader/pkg/internal/server/exts" "git.solsynth.dev/hypernet/reader/pkg/internal/services" "github.com/gofiber/fiber/v2" ) @@ -11,6 +12,14 @@ func adminTriggerScanTask(c *fiber.Ctx) error { return err } - go services.ScanNewsSources() + var data struct { + Eager bool `json:"eager"` + } + + if err := exts.BindAndValidate(c, &data); err != nil { + return err + } + + go services.ScanNewsSources(data.Eager) return c.SendStatus(fiber.StatusOK) } diff --git a/pkg/internal/services/reader.go b/pkg/internal/services/reader.go index dd9aa9d..7a62b03 100644 --- a/pkg/internal/services/reader.go +++ b/pkg/internal/services/reader.go @@ -2,6 +2,7 @@ package services import ( "fmt" + "strconv" "git.solsynth.dev/hypernet/reader/pkg/internal/database" "git.solsynth.dev/hypernet/reader/pkg/internal/models" @@ -20,7 +21,11 @@ func LoadNewsSources() error { return nil } -func ScanNewsSources() { +func ScanNewsSourcesNoEager() { + ScanNewsSources(false) +} + +func ScanNewsSources(eager ...bool) { var results []models.NewsArticle for _, src := range NewsSources { log.Debug().Str("source", src.ID).Msg("Scanning news source...") @@ -35,7 +40,7 @@ func ScanNewsSources() { database.C.Save(&results) } -func NewsSourceRead(src models.NewsSource) ([]models.NewsArticle, error) { +func NewsSourceRead(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) { switch src.Type { case "wordpress": return newsSourceReadWordpress(src) @@ -44,18 +49,8 @@ func NewsSourceRead(src models.NewsSource) ([]models.NewsArticle, error) { } } -func newsSourceReadWordpress(src models.NewsSource) ([]models.NewsArticle, error) { - client := wordpress.NewClient(&wordpress.Options{ - BaseAPIURL: src.Source, - }) - - posts, _, _, err := client.Posts().List(nil) - if err != nil { - return nil, err - } - - var result []models.NewsArticle - for _, post := range posts { +func newsSourceReadWordpress(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) { + wpConvert := func(post wordpress.Post) models.NewsArticle { article := &models.NewsArticle{ Title: post.Title.Rendered, Description: post.Excerpt.Rendered, @@ -64,7 +59,36 @@ func newsSourceReadWordpress(src models.NewsSource) ([]models.NewsArticle, error Source: src.ID, } article.GenHash() - result = append(result, *article) + return *article + } + + client := wordpress.NewClient(&wordpress.Options{ + BaseAPIURL: src.Source, + }) + + posts, resp, _, err := client.Posts().List(nil) + if err != nil { + return nil, err + } + + var result []models.NewsArticle + for _, post := range posts { + result = append(result, wpConvert(post)) + } + + if len(eager) > 0 && eager[0] { + totalPagesRaw := resp.Header.Get("X-WP-TotalPages") + totalPages, _ := strconv.Atoi(totalPagesRaw) + depth := min(totalPages, src.Depth) + for page := 2; page <= depth; page++ { + posts, _, _, err := client.Posts().List(nil) + if err != nil { + return result, nil + } + for _, post := range posts { + result = append(result, wpConvert(post)) + } + } } return result, nil diff --git a/pkg/main.go b/pkg/main.go index 90b06a2..075ff6d 100644 --- a/pkg/main.go +++ b/pkg/main.go @@ -80,7 +80,7 @@ func main() { // Configure timed tasks quartz := cron.New(cron.WithLogger(cron.VerbosePrintfLogger(&log.Logger))) quartz.AddFunc("@every 60m", services.DoAutoDatabaseCleanup) - quartz.AddFunc("@midnight", services.ScanNewsSources) + quartz.AddFunc("@midnight", services.ScanNewsSourcesNoEager) quartz.Start() // Server