Compare commits

...

7 Commits

Author SHA1 Message Date
eba8977107 🐛 Trying to fix scraping 2025-05-08 01:47:21 +08:00
6cfa6e8285 💄 Optimized the feed 2025-04-06 14:15:54 +08:00
e34f248cfa 🐛 Fix API pagination 2025-04-06 13:44:59 +08:00
131778780c 🐛 Fix API stacking routing issue 2025-04-06 13:32:29 +08:00
fd9761f328 Feed the full content flag to reduce web requests 2025-04-06 13:27:57 +08:00
fd0d3699e4 Get feed item now will preload feed 2025-04-06 13:23:07 +08:00
c812359f8b Able to get feed full content 2025-04-06 13:21:36 +08:00
8 changed files with 77 additions and 13 deletions

View File

@@ -2,6 +2,7 @@ package grpc
import (
"context"
"time"
iproto "git.solsynth.dev/hypernet/interactive/pkg/proto"
"git.solsynth.dev/hypernet/nexus/pkg/nex"
@@ -14,7 +15,11 @@ import (
func (v *Server) GetFeed(_ context.Context, in *iproto.GetFeedRequest) (*iproto.GetFeedResponse, error) {
limit := int(in.GetLimit())
articles, err := services.GetTodayFeedRandomly(limit)
var cursor *time.Time
if in.Cursor != nil {
cursor = lo.ToPtr(time.UnixMilli(int64(in.GetCursor())))
}
articles, err := services.GetTodayFeedRandomly(limit, cursor)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}

View File

@@ -12,7 +12,8 @@ type SubscriptionFeed struct {
cruda.BaseModel
URL string `json:"url"`
IsEnabled bool `json:"enabled"`
IsEnabled bool `json:"is_enabled"`
IsFullContent bool `json:"is_full_content"`
PullInterval int `json:"pull_interval"`
Adapter string `json:"adapter"`
AccountID *uint `json:"account_id"`

View File

@@ -0,0 +1,42 @@
package api
import (
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"github.com/gofiber/fiber/v2"
)
func listFeedItem(c *fiber.Ctx) error {
take := c.QueryInt("take", 10)
offset := c.QueryInt("offset", 0)
var count int64
if err := database.C.Model(&models.SubscriptionItem{}).Count(&count).Error; err != nil {
return fiber.NewError(fiber.StatusBadRequest, err.Error())
}
var items []models.SubscriptionItem
if err := database.C.
Order("published_at DESC").
Omit("Content").
Preload("Feed").
Limit(take).Offset(offset).Find(&items).Error; err != nil {
return fiber.NewError(fiber.StatusBadRequest, err.Error())
}
return c.JSON(fiber.Map{
"count": count,
"data": items,
})
}
func getFeedItem(c *fiber.Ctx) error {
id, _ := c.ParamsInt("id", 0)
var item models.SubscriptionItem
if err := database.C.Where("id = ?", id).Preload("Feed").First(&item).Error; err != nil {
return fiber.NewError(fiber.StatusNotFound, err.Error())
}
return c.JSON(item)
}

View File

@@ -17,7 +17,7 @@ func listFeedSubscriptions(c *fiber.Ctx) error {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
var feeds []models.SubscriptionFeed
if err := database.C.Take(take).Offset(offset).Find(&feeds).Error; err != nil {
if err := database.C.Limit(take).Offset(offset).Find(&feeds).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}

View File

@@ -27,6 +27,9 @@ func MapAPIs(app *fiber.App, baseURL string) {
feed.Post("/:id/toggle", toggleFeedSubscription)
feed.Delete("/:id", deleteFeedSubscription)
}
subscription.Get("/", listFeedItem)
subscription.Get("/:id", getFeedItem)
}
}
}

View File

@@ -3,13 +3,20 @@ package services
import (
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"time"
)
func GetTodayFeedRandomly(limit int) ([]models.SubscriptionItem, error) {
func GetTodayFeedRandomly(limit int, cursor *time.Time) ([]models.SubscriptionItem, error) {
tx := database.C
if cursor != nil {
tx = tx.Where("published_at < ?", *cursor)
}
var articles []models.SubscriptionItem
if err := database.C.Limit(limit).
Where("DATE(created_at) = CURRENT_DATE"). // Created in today
Order("RANDOM()").
if err := tx.Limit(limit).
Order("published_at DESC").
Omit("Content").
Preload("Feed").
Find(&articles).Error; err != nil {
return articles, err
}

View File

@@ -183,12 +183,18 @@ func feedReadGuidedFeed(src models.SubscriptionFeed, eager ...bool) ([]models.Su
parent.Thumbnail = item.Image.URL
}
article, err := ScrapSubscriptionItem(item.Link, parent)
if err != nil {
log.Warn().Err(err).Str("url", item.Link).Msg("Failed to scrap a news article...")
continue
// When the source enabled the full content,
// It means the feed contains all the content, and we're not going to scrap it
if src.IsFullContent {
result = append(result, pgConvert(parent))
} else {
article, err := ScrapSubscriptionItem(item.Link, parent)
if err != nil {
log.Warn().Err(err).Str("url", item.Link).Msg("Failed to scrap a news article...")
continue
}
result = append(result, pgConvert(*article))
}
result = append(result, pgConvert(*article))
log.Debug().Str("url", item.Link).Msg("Scraped a news article...")
}

View File

@@ -19,7 +19,7 @@ import (
)
// We have to set the User-Agent to this so the sites will respond with opengraph data
const ScrapLinkDefaultUA = "FacebookExternalHit/1.1"
const ScrapLinkDefaultUA = "facebookexternalhit/1.1"
func GetLinkMetaFromCache(target string) (models.LinkMeta, error) {
hash := md5.Sum([]byte(target))