Compare commits

...

9 Commits

Author SHA1 Message Date
eba8977107 🐛 Trying to fix scraping 2025-05-08 01:47:21 +08:00
6cfa6e8285 💄 Optimized the feed 2025-04-06 14:15:54 +08:00
e34f248cfa 🐛 Fix API pagination 2025-04-06 13:44:59 +08:00
131778780c 🐛 Fix API stacking routing issue 2025-04-06 13:32:29 +08:00
fd9761f328 Feed the full content flag to reduce web requests 2025-04-06 13:27:57 +08:00
fd0d3699e4 Get feed item now will preload feed 2025-04-06 13:23:07 +08:00
c812359f8b Able to get feed full content 2025-04-06 13:21:36 +08:00
0c28766336 🐛 Fix items' published at 2025-04-06 13:14:44 +08:00
1f27667b7e Reduce the delay between two fetch feed timed task 2025-04-06 13:13:03 +08:00
10 changed files with 101 additions and 21 deletions

View File

@@ -2,6 +2,7 @@ package grpc
import (
"context"
"time"
iproto "git.solsynth.dev/hypernet/interactive/pkg/proto"
"git.solsynth.dev/hypernet/nexus/pkg/nex"
@@ -14,7 +15,11 @@ import (
func (v *Server) GetFeed(_ context.Context, in *iproto.GetFeedRequest) (*iproto.GetFeedResponse, error) {
limit := int(in.GetLimit())
articles, err := services.GetTodayFeedRandomly(limit)
var cursor *time.Time
if in.Cursor != nil {
cursor = lo.ToPtr(time.UnixMilli(int64(in.GetCursor())))
}
articles, err := services.GetTodayFeedRandomly(limit, cursor)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}

View File

@@ -12,7 +12,8 @@ type SubscriptionFeed struct {
cruda.BaseModel
URL string `json:"url"`
IsEnabled bool `json:"enabled"`
IsEnabled bool `json:"is_enabled"`
IsFullContent bool `json:"is_full_content"`
PullInterval int `json:"pull_interval"`
Adapter string `json:"adapter"`
AccountID *uint `json:"account_id"`

View File

@@ -13,15 +13,14 @@ func adminTriggerScanTask(c *fiber.Ctx) error {
}
var data struct {
Eager bool `json:"eager"`
Sources []string `json:"sources"`
Eager bool `json:"eager"`
}
if err := exts.BindAndValidate(c, &data); err != nil {
return err
}
go services.FetchFeedTimed()
go services.FetchFeed(data.Eager)
return c.SendStatus(fiber.StatusOK)
}

View File

@@ -0,0 +1,42 @@
package api
import (
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"github.com/gofiber/fiber/v2"
)
func listFeedItem(c *fiber.Ctx) error {
take := c.QueryInt("take", 10)
offset := c.QueryInt("offset", 0)
var count int64
if err := database.C.Model(&models.SubscriptionItem{}).Count(&count).Error; err != nil {
return fiber.NewError(fiber.StatusBadRequest, err.Error())
}
var items []models.SubscriptionItem
if err := database.C.
Order("published_at DESC").
Omit("Content").
Preload("Feed").
Limit(take).Offset(offset).Find(&items).Error; err != nil {
return fiber.NewError(fiber.StatusBadRequest, err.Error())
}
return c.JSON(fiber.Map{
"count": count,
"data": items,
})
}
func getFeedItem(c *fiber.Ctx) error {
id, _ := c.ParamsInt("id", 0)
var item models.SubscriptionItem
if err := database.C.Where("id = ?", id).Preload("Feed").First(&item).Error; err != nil {
return fiber.NewError(fiber.StatusNotFound, err.Error())
}
return c.JSON(item)
}

View File

@@ -17,7 +17,7 @@ func listFeedSubscriptions(c *fiber.Ctx) error {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
var feeds []models.SubscriptionFeed
if err := database.C.Take(take).Offset(offset).Find(&feeds).Error; err != nil {
if err := database.C.Limit(take).Offset(offset).Find(&feeds).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}

View File

@@ -27,6 +27,9 @@ func MapAPIs(app *fiber.App, baseURL string) {
feed.Post("/:id/toggle", toggleFeedSubscription)
feed.Delete("/:id", deleteFeedSubscription)
}
subscription.Get("/", listFeedItem)
subscription.Get("/:id", getFeedItem)
}
}
}

View File

@@ -3,13 +3,20 @@ package services
import (
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"time"
)
func GetTodayFeedRandomly(limit int) ([]models.SubscriptionItem, error) {
func GetTodayFeedRandomly(limit int, cursor *time.Time) ([]models.SubscriptionItem, error) {
tx := database.C
if cursor != nil {
tx = tx.Where("published_at < ?", *cursor)
}
var articles []models.SubscriptionItem
if err := database.C.Limit(limit).
Where("DATE(created_at) = CURRENT_DATE"). // Created in today
Order("RANDOM()").
if err := tx.Limit(limit).
Order("published_at DESC").
Omit("Content").
Preload("Feed").
Find(&articles).Error; err != nil {
return articles, err
}

View File

@@ -22,9 +22,18 @@ func FetchFeedTimed() {
func FetchFeed(eager ...bool) {
var feeds []models.SubscriptionFeed
if err := database.C.Where("is_enabled = ?", true).Find(&feeds).Error; err != nil {
log.Warn().Err(err).Msg("An error occurred when fetching feeds.")
return
if len(eager) > 0 && eager[0] {
if err := database.C.Where("is_enabled = ?", true).Find(&feeds).Error; err != nil {
log.Warn().Err(err).Msg("An error occurred when fetching feeds.")
return
}
} else {
if err := database.C.
Where("last_fetched_at IS NULL OR NOW() >= last_fetched_at + (pull_interval || ' hours')::interval").
Find(&feeds).Error; err != nil {
log.Warn().Err(err).Msg("An error occurred when fetching due feeds.")
return
}
}
log.Info().Int("count", len(feeds)).Msg("Ready to fetch feeds...")
@@ -56,7 +65,10 @@ func FetchFeed(eager ...bool) {
count += len(result)
}
database.C.Where("id IN ?", scannedFeed).Update("last_fetched_at", time.Now())
database.C.
Model(&models.SubscriptionFeed{}).
Where("id IN ?", scannedFeed).
Update("last_fetched_at", time.Now())
log.Info().Int("count", count).Msg("Scanned all feeds.")
}
@@ -86,6 +98,8 @@ func feedReadWordpress(src models.SubscriptionFeed, eager ...bool) ([]models.Sub
date, err := time.Parse("2006-01-02T15:04:05", post.DateGMT)
if err == nil {
article.PublishedAt = date
} else {
article.PublishedAt = time.Now()
}
article.GenHash()
return *article
@@ -162,17 +176,25 @@ func feedReadGuidedFeed(src models.SubscriptionFeed, eager ...bool) ([]models.Su
}
if item.PublishedParsed != nil {
parent.PublishedAt = *item.PublishedParsed
} else {
parent.PublishedAt = time.Now()
}
if item.Image != nil {
parent.Thumbnail = item.Image.URL
}
article, err := ScrapSubscriptionItem(item.Link, parent)
if err != nil {
log.Warn().Err(err).Str("url", item.Link).Msg("Failed to scrap a news article...")
continue
// When the source enabled the full content,
// It means the feed contains all the content, and we're not going to scrap it
if src.IsFullContent {
result = append(result, pgConvert(parent))
} else {
article, err := ScrapSubscriptionItem(item.Link, parent)
if err != nil {
log.Warn().Err(err).Str("url", item.Link).Msg("Failed to scrap a news article...")
continue
}
result = append(result, pgConvert(*article))
}
result = append(result, pgConvert(*article))
log.Debug().Str("url", item.Link).Msg("Scraped a news article...")
}
@@ -185,6 +207,7 @@ func feedReadWebpage(src models.SubscriptionFeed, eager ...bool) ([]models.Subsc
art := &article
art.GenHash()
art.FeedID = src.ID
art.PublishedAt = time.Now()
article = *art
return article
}

View File

@@ -19,7 +19,7 @@ import (
)
// We have to set the User-Agent to this so the sites will respond with opengraph data
const ScrapLinkDefaultUA = "FacebookExternalHit/1.1"
const ScrapLinkDefaultUA = "facebookexternalhit/1.1"
func GetLinkMetaFromCache(target string) (models.LinkMeta, error) {
hash := md5.Sum([]byte(target))

View File

@@ -75,7 +75,7 @@ func main() {
// Configure timed tasks
quartz := cron.New(cron.WithLogger(cron.VerbosePrintfLogger(&log.Logger)))
quartz.AddFunc("@every 60m", services.DoAutoDatabaseCleanup)
quartz.AddFunc("@midnight", services.FetchFeedTimed)
quartz.AddFunc("@every 60m", services.FetchFeedTimed)
quartz.Start()
// Server