♻️ Refactored feed module
This commit is contained in:
		| @@ -7,7 +7,8 @@ import ( | ||||
|  | ||||
| var AutoMaintainRange = []any{ | ||||
| 	&models.LinkMeta{}, | ||||
| 	&models.NewsArticle{}, | ||||
| 	&models.SubscriptionFeed{}, | ||||
| 	&models.SubscriptionItem{}, | ||||
| } | ||||
|  | ||||
| func RunMigration(source *gorm.DB) error { | ||||
|   | ||||
| @@ -14,15 +14,15 @@ import ( | ||||
|  | ||||
| func (v *Server) GetFeed(_ context.Context, in *iproto.GetFeedRequest) (*iproto.GetFeedResponse, error) { | ||||
| 	limit := int(in.GetLimit()) | ||||
| 	articles, err := services.GetTodayNewsRandomly(limit, false) | ||||
| 	articles, err := services.GetTodayFeedRandomly(limit) | ||||
| 	if err != nil { | ||||
| 		return nil, status.Error(codes.Internal, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return &iproto.GetFeedResponse{ | ||||
| 		Items: lo.Map(articles, func(item models.NewsArticle, _ int) *iproto.FeedItem { | ||||
| 		Items: lo.Map(articles, func(item models.SubscriptionItem, _ int) *iproto.FeedItem { | ||||
| 			return &iproto.FeedItem{ | ||||
| 				Type:      "reader.news", | ||||
| 				Type:      "reader.feed", | ||||
| 				Content:   nex.EncodeMap(item), | ||||
| 				CreatedAt: uint64(item.CreatedAt.Unix()), | ||||
| 			} | ||||
|   | ||||
							
								
								
									
										46
									
								
								pkg/internal/models/feed.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										46
									
								
								pkg/internal/models/feed.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,46 @@ | ||||
| package models | ||||
|  | ||||
| import ( | ||||
| 	"crypto/md5" | ||||
| 	"encoding/hex" | ||||
| 	"git.solsynth.dev/hypernet/nexus/pkg/nex/cruda" | ||||
| 	"github.com/google/uuid" | ||||
| 	"time" | ||||
| ) | ||||
|  | ||||
| type SubscriptionFeed struct { | ||||
| 	cruda.BaseModel | ||||
|  | ||||
| 	URL           string     `json:"url"` | ||||
| 	IsEnabled     bool       `json:"enabled"` | ||||
| 	PullInterval  int        `json:"pull_interval"` | ||||
| 	Adapter       string     `json:"adapter"` | ||||
| 	AccountID     *uint      `json:"account_id"` | ||||
| 	LastFetchedAt *time.Time `json:"last_fetched_at"` | ||||
| } | ||||
|  | ||||
| type SubscriptionItem struct { | ||||
| 	cruda.BaseModel | ||||
|  | ||||
| 	FeedID      uint             `json:"feed_id"` | ||||
| 	Feed        SubscriptionFeed `json:"feed"` | ||||
| 	Thumbnail   string           `json:"thumbnail"` | ||||
| 	Title       string           `json:"title"` | ||||
| 	Description string           `json:"description"` | ||||
| 	Content     string           `json:"content"` | ||||
| 	URL         string           `json:"url"` | ||||
| 	Hash        string           `json:"hash" gorm:"uniqueIndex"` | ||||
|  | ||||
| 	// PublishedAt is the time when the article is published, when the feed adapter didn't provide this default to creation date | ||||
| 	PublishedAt time.Time `json:"published_at"` | ||||
| } | ||||
|  | ||||
| func (v *SubscriptionItem) GenHash() { | ||||
| 	if len(v.URL) == 0 { | ||||
| 		v.URL = uuid.NewString() | ||||
| 		return | ||||
| 	} | ||||
|  | ||||
| 	hash := md5.Sum([]byte(v.URL)) | ||||
| 	v.Hash = hex.EncodeToString(hash[:]) | ||||
| } | ||||
| @@ -1,34 +0,0 @@ | ||||
| package models | ||||
|  | ||||
| import ( | ||||
| 	"crypto/md5" | ||||
| 	"encoding/hex" | ||||
| 	"time" | ||||
|  | ||||
| 	"git.solsynth.dev/hypernet/nexus/pkg/nex/cruda" | ||||
| 	"github.com/google/uuid" | ||||
| ) | ||||
|  | ||||
| type NewsArticle struct { | ||||
| 	cruda.BaseModel | ||||
|  | ||||
| 	Thumbnail   string     `json:"thumbnail"` | ||||
| 	Title       string     `json:"title"` | ||||
| 	Description string     `json:"description"` | ||||
| 	Content     string     `json:"content"` | ||||
| 	URL         string     `json:"url"` | ||||
| 	Hash        string     `json:"hash" gorm:"uniqueIndex"` | ||||
| 	Source      string     `json:"source"` | ||||
| 	PublishedAt *time.Time `json:"published_at"` | ||||
| } | ||||
|  | ||||
| func (v *NewsArticle) GenHash() *NewsArticle { | ||||
| 	if len(v.URL) == 0 { | ||||
| 		v.Hash = uuid.NewString() | ||||
| 		return v | ||||
| 	} | ||||
|  | ||||
| 	hash := md5.Sum([]byte(v.URL)) | ||||
| 	v.Hash = hex.EncodeToString(hash[:]) | ||||
| 	return v | ||||
| } | ||||
| @@ -1,11 +0,0 @@ | ||||
| package models | ||||
|  | ||||
| type NewsSource struct { | ||||
| 	ID       string `json:"id"` | ||||
| 	Label    string `json:"label"` | ||||
| 	Type     string `json:"type"` | ||||
| 	Source   string `json:"source"` | ||||
| 	Depth    int    `json:"depth"` | ||||
| 	Enabled  bool   `json:"enabled"` | ||||
| 	Advanced bool   `json:"advanced"` | ||||
| } | ||||
| @@ -2,14 +2,9 @@ package api | ||||
|  | ||||
| import ( | ||||
| 	"git.solsynth.dev/hypernet/nexus/pkg/nex/sec" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/database" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/models" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/server/exts" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/services" | ||||
| 	"github.com/gofiber/fiber/v2" | ||||
| 	"github.com/rs/zerolog/log" | ||||
| 	"github.com/samber/lo" | ||||
| 	"gorm.io/gorm/clause" | ||||
| ) | ||||
|  | ||||
| func adminTriggerScanTask(c *fiber.Ctx) error { | ||||
| @@ -26,36 +21,7 @@ func adminTriggerScanTask(c *fiber.Ctx) error { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	go func() { | ||||
| 		count := 0 | ||||
| 		for _, src := range services.GetNewsSources() { | ||||
| 			if !src.Enabled { | ||||
| 				continue | ||||
| 			} | ||||
| 			if len(data.Sources) > 0 && !lo.Contains(data.Sources, src.ID) { | ||||
| 				continue | ||||
| 			} | ||||
|  | ||||
| 			log.Debug().Str("source", src.ID).Msg("Scanning news source...") | ||||
| 			result, err := services.NewsSourceRead(src, data.Eager) | ||||
| 			if err != nil { | ||||
| 				log.Warn().Err(err).Str("source", src.ID).Msg("Failed to scan a news source.") | ||||
| 			} | ||||
|  | ||||
| 			result = lo.UniqBy(result, func(item models.NewsArticle) string { | ||||
| 				return item.Hash | ||||
| 			}) | ||||
| 			database.C.Clauses(clause.OnConflict{ | ||||
| 				Columns:   []clause.Column{{Name: "hash"}}, | ||||
| 				DoUpdates: clause.AssignmentColumns([]string{"thumbnail", "title", "content", "description", "published_at"}), | ||||
| 			}).Create(&result) | ||||
|  | ||||
| 			log.Info().Str("source", src.ID).Int("count", len(result)).Msg("Scanned a news sources.") | ||||
| 			count += len(result) | ||||
| 		} | ||||
|  | ||||
| 		log.Info().Int("count", count).Msg("Scanned all news sources.") | ||||
| 	}() | ||||
| 	go services.FetchFeedTimed() | ||||
|  | ||||
| 	return c.SendStatus(fiber.StatusOK) | ||||
| } | ||||
|   | ||||
							
								
								
									
										170
									
								
								pkg/internal/server/api/feed_subscriptions_api.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										170
									
								
								pkg/internal/server/api/feed_subscriptions_api.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,170 @@ | ||||
| package api | ||||
|  | ||||
| import ( | ||||
| 	"git.solsynth.dev/hypernet/nexus/pkg/nex/sec" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/database" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/models" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/server/exts" | ||||
| 	"github.com/gofiber/fiber/v2" | ||||
| ) | ||||
|  | ||||
| func listFeedSubscriptions(c *fiber.Ctx) error { | ||||
| 	take := c.QueryInt("take", 10) | ||||
| 	offset := c.QueryInt("offset", 0) | ||||
|  | ||||
| 	var count int64 | ||||
| 	if err := database.C.Model(&models.SubscriptionFeed{}).Count(&count).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
| 	var feeds []models.SubscriptionFeed | ||||
| 	if err := database.C.Take(take).Offset(offset).Find(&feeds).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(fiber.Map{ | ||||
| 		"count": count, | ||||
| 		"data":  feeds, | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func listCreatedFeedSubscriptions(c *fiber.Ctx) error { | ||||
| 	if err := sec.EnsureAuthenticated(c); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	user := c.Locals("nex_user").(*sec.UserInfo) | ||||
|  | ||||
| 	take := c.QueryInt("take", 10) | ||||
| 	offset := c.QueryInt("offset", 0) | ||||
|  | ||||
| 	tx := database.C.Where("account_id = ?", user.ID) | ||||
|  | ||||
| 	var count int64 | ||||
| 	countTx := tx | ||||
| 	if err := countTx.Model(&models.SubscriptionFeed{}).Count(&count).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
| 	var feeds []models.SubscriptionFeed | ||||
| 	if err := tx.Take(take).Offset(offset).Find(&feeds).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(fiber.Map{ | ||||
| 		"count": count, | ||||
| 		"data":  feeds, | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func getFeedSubscription(c *fiber.Ctx) error { | ||||
| 	id, _ := c.ParamsInt("id", 0) | ||||
|  | ||||
| 	var feed models.SubscriptionFeed | ||||
| 	if err := database.C.Where("id = ?", id).First(&feed).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusNotFound, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(feed) | ||||
| } | ||||
|  | ||||
| func createFeedSubscription(c *fiber.Ctx) error { | ||||
| 	if err := sec.EnsureGrantedPerm(c, "CreateFeedSubscription", true); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	user := c.Locals("nex_user").(*sec.UserInfo) | ||||
|  | ||||
| 	var data struct { | ||||
| 		URL          string `json:"url" validate:"required,url"` | ||||
| 		PullInterval int    `json:"pull_interval" validate:"required,min=6,max=720"` | ||||
| 		Adapter      string `json:"adapter"` | ||||
| 	} | ||||
| 	if err := exts.BindAndValidate(c, &data); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	feed := models.SubscriptionFeed{ | ||||
| 		URL:          data.URL, | ||||
| 		PullInterval: data.PullInterval, | ||||
| 		Adapter:      data.Adapter, | ||||
| 		AccountID:    &user.ID, | ||||
| 	} | ||||
|  | ||||
| 	if err := database.C.Create(&feed).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(feed) | ||||
| } | ||||
|  | ||||
| func updateFeedSubscription(c *fiber.Ctx) error { | ||||
| 	if err := sec.EnsureAuthenticated(c); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	user := c.Locals("nex_user").(*sec.UserInfo) | ||||
|  | ||||
| 	id, _ := c.ParamsInt("id", 0) | ||||
|  | ||||
| 	var data struct { | ||||
| 		URL          string `json:"url" validate:"required,url"` | ||||
| 		PullInterval int    `json:"pull_interval" validate:"required,min=6,max=720"` | ||||
| 		Adapter      string `json:"adapter"` | ||||
| 	} | ||||
| 	if err := exts.BindAndValidate(c, &data); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	var feed models.SubscriptionFeed | ||||
| 	if err := database.C.Where("account_id = ? AND id = ?", user.ID, id).First(&feed).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusNotFound, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	feed.URL = data.URL | ||||
| 	feed.PullInterval = data.PullInterval | ||||
| 	feed.Adapter = data.Adapter | ||||
|  | ||||
| 	if err := database.C.Save(&feed).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(feed) | ||||
| } | ||||
|  | ||||
| func toggleFeedSubscription(c *fiber.Ctx) error { | ||||
| 	if err := sec.EnsureAuthenticated(c); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	user := c.Locals("nex_user").(*sec.UserInfo) | ||||
|  | ||||
| 	id, _ := c.ParamsInt("id", 0) | ||||
|  | ||||
| 	var feed models.SubscriptionFeed | ||||
| 	if err := database.C.Where("account_id = ? AND id = ?", user.ID, id).First(&feed).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusNotFound, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	feed.IsEnabled = !feed.IsEnabled | ||||
|  | ||||
| 	if err := database.C.Save(&feed).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(feed) | ||||
| } | ||||
|  | ||||
| func deleteFeedSubscription(c *fiber.Ctx) error { | ||||
| 	if err := sec.EnsureAuthenticated(c); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	user := c.Locals("nex_user").(*sec.UserInfo) | ||||
|  | ||||
| 	id, _ := c.ParamsInt("id", 0) | ||||
|  | ||||
| 	var feed models.SubscriptionFeed | ||||
| 	if err := database.C.Where("account_id = ? AND id = ?", user.ID, id).First(&feed).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusNotFound, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	if err := database.C.Delete(&feed).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.SendStatus(fiber.StatusOK) | ||||
| } | ||||
| @@ -8,8 +8,6 @@ import ( | ||||
| func MapAPIs(app *fiber.App, baseURL string) { | ||||
| 	api := app.Group(baseURL).Name("API") | ||||
| 	{ | ||||
| 		api.Get("/well-known/sources", getNewsSources) | ||||
|  | ||||
| 		admin := api.Group("/admin").Name("Admin") | ||||
| 		{ | ||||
| 			admin.Post("/scan", sec.ValidatorMiddleware, adminTriggerScanTask) | ||||
| @@ -17,11 +15,18 @@ func MapAPIs(app *fiber.App, baseURL string) { | ||||
|  | ||||
| 		api.Get("/link/*", getLinkMeta) | ||||
|  | ||||
| 		news := api.Group("/news").Name("News") | ||||
| 		subscription := api.Group("/subscriptions").Name("Subscriptions") | ||||
| 		{ | ||||
| 			news.Get("/today", getTodayNews) | ||||
| 			news.Get("/", listNewsArticles) | ||||
| 			news.Get("/:hash", getNewsArticle) | ||||
| 			feed := subscription.Group("/feed").Name("Feed") | ||||
| 			{ | ||||
| 				feed.Get("/", listFeedSubscriptions) | ||||
| 				feed.Get("/me", listCreatedFeedSubscriptions) | ||||
| 				feed.Get("/:id", getFeedSubscription) | ||||
| 				feed.Post("/", createFeedSubscription) | ||||
| 				feed.Put("/:id", updateFeedSubscription) | ||||
| 				feed.Post("/:id/toggle", toggleFeedSubscription) | ||||
| 				feed.Delete("/:id", deleteFeedSubscription) | ||||
| 			} | ||||
| 		} | ||||
| 	} | ||||
| } | ||||
|   | ||||
| @@ -1,96 +0,0 @@ | ||||
| package api | ||||
|  | ||||
| import ( | ||||
| 	"time" | ||||
|  | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/services" | ||||
|  | ||||
| 	"git.solsynth.dev/hypernet/nexus/pkg/nex/sec" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/database" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/models" | ||||
| 	"github.com/gofiber/fiber/v2" | ||||
| ) | ||||
|  | ||||
| func getTodayNews(c *fiber.Ctx) error { | ||||
| 	tx := database.C | ||||
| 	today := time.Now().Format("2006-01-02") | ||||
| 	tx = tx.Where("DATE(COALESCE(published_at, created_at)) = ?", today) | ||||
|  | ||||
| 	var count int64 | ||||
| 	countTx := tx | ||||
| 	if err := countTx.Model(&models.NewsArticle{}).Count(&count).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	var article models.NewsArticle | ||||
| 	if err := tx. | ||||
| 		Omit("Content").Order("COALESCE(published_at, created_at) DESC"). | ||||
| 		First(&article).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(fiber.Map{ | ||||
| 		"count": count, | ||||
| 		"data":  article, | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func listNewsArticles(c *fiber.Ctx) error { | ||||
| 	if err := sec.EnsureGrantedPerm(c, "ListNews", true); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
|  | ||||
| 	take := c.QueryInt("take", 0) | ||||
| 	offset := c.QueryInt("offset", 0) | ||||
| 	source := c.Query("source") | ||||
|  | ||||
| 	tx := database.C | ||||
|  | ||||
| 	if len(source) > 0 { | ||||
| 		tx = tx.Where("source = ?", source) | ||||
| 	} | ||||
|  | ||||
| 	isAdvanced := false | ||||
| 	if err := sec.EnsureGrantedPerm(c, "ListNewsAdvanced", true); err == nil { | ||||
| 		isAdvanced = true | ||||
| 	} | ||||
|  | ||||
| 	var sources []string | ||||
| 	for _, srv := range services.GetNewsSources() { | ||||
| 		if !isAdvanced && srv.Advanced { | ||||
| 			continue | ||||
| 		} | ||||
| 		sources = append(sources, srv.ID) | ||||
| 	} | ||||
|  | ||||
| 	tx = tx.Where("source IN ?", sources) | ||||
|  | ||||
| 	var count int64 | ||||
| 	countTx := tx | ||||
| 	if err := countTx.Model(&models.NewsArticle{}).Count(&count).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	var articles []models.NewsArticle | ||||
| 	if err := tx.Limit(take).Offset(offset). | ||||
| 		Omit("Content").Order("COALESCE(published_at, created_at) DESC"). | ||||
| 		Find(&articles).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusInternalServerError, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(fiber.Map{ | ||||
| 		"count": count, | ||||
| 		"data":  articles, | ||||
| 	}) | ||||
| } | ||||
|  | ||||
| func getNewsArticle(c *fiber.Ctx) error { | ||||
| 	hash := c.Params("hash") | ||||
|  | ||||
| 	var article models.NewsArticle | ||||
| 	if err := database.C.Where("hash = ?", hash).First(&article).Error; err != nil { | ||||
| 		return fiber.NewError(fiber.StatusNotFound, err.Error()) | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(article) | ||||
| } | ||||
| @@ -1,23 +0,0 @@ | ||||
| package api | ||||
|  | ||||
| import ( | ||||
| 	"git.solsynth.dev/hypernet/nexus/pkg/nex/sec" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/models" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/services" | ||||
| 	"github.com/gofiber/fiber/v2" | ||||
| 	"github.com/samber/lo" | ||||
| ) | ||||
|  | ||||
| func getNewsSources(c *fiber.Ctx) error { | ||||
| 	isAdvanced := false | ||||
| 	if err := sec.EnsureGrantedPerm(c, "ListNewsAdvanced", true); err == nil { | ||||
| 		isAdvanced = true | ||||
| 	} | ||||
|  | ||||
| 	return c.JSON(lo.Filter(services.GetNewsSources(), func(item models.NewsSource, index int) bool { | ||||
| 		if !isAdvanced && item.Advanced { | ||||
| 			return false | ||||
| 		} | ||||
| 		return item.Enabled | ||||
| 	})) | ||||
| } | ||||
| @@ -5,18 +5,9 @@ import ( | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/models" | ||||
| ) | ||||
|  | ||||
| func GetTodayNewsRandomly(limit int, isAdvanced bool) ([]models.NewsArticle, error) { | ||||
| 	var sources []string | ||||
| 	for _, srv := range GetNewsSources() { | ||||
| 		if !isAdvanced && srv.Advanced { | ||||
| 			continue | ||||
| 		} | ||||
| 		sources = append(sources, srv.ID) | ||||
| 	} | ||||
|  | ||||
| 	var articles []models.NewsArticle | ||||
| func GetTodayFeedRandomly(limit int) ([]models.SubscriptionItem, error) { | ||||
| 	var articles []models.SubscriptionItem | ||||
| 	if err := database.C.Limit(limit). | ||||
| 		Where("source IN ?", sources). | ||||
| 		Where("DATE(created_at) = CURRENT_DATE"). // Created in today | ||||
| 		Order("RANDOM()"). | ||||
| 		Find(&articles).Error; err != nil { | ||||
|   | ||||
| @@ -13,42 +13,38 @@ import ( | ||||
| 	"github.com/rs/zerolog/log" | ||||
| 	"github.com/samber/lo" | ||||
| 	"github.com/sogko/go-wordpress" | ||||
| 	"github.com/spf13/viper" | ||||
| 	"gorm.io/gorm/clause" | ||||
| ) | ||||
| 
 | ||||
| var newsSources []models.NewsSource | ||||
| 
 | ||||
| func GetNewsSources() []models.NewsSource { | ||||
| 	return newsSources | ||||
| func FetchFeedTimed() { | ||||
| 	FetchFeed(false) | ||||
| } | ||||
| 
 | ||||
| func LoadNewsSources() error { | ||||
| 	if err := viper.UnmarshalKey("sources", &newsSources); err != nil { | ||||
| 		return err | ||||
| func FetchFeed(eager ...bool) { | ||||
| 	var feeds []models.SubscriptionFeed | ||||
| 	if err := database.C.Where("is_enabled = ?", true).Find(&feeds).Error; err != nil { | ||||
| 		log.Warn().Err(err).Msg("An error occurred when fetching feeds.") | ||||
| 		return | ||||
| 	} | ||||
| 	log.Info().Int("count", len(newsSources)).Msg("Loaded news sources configuration.") | ||||
| 	return nil | ||||
| } | ||||
| 
 | ||||
| func ScanNewsSourcesNoEager() { | ||||
| 	ScanNewsSources(false) | ||||
| } | ||||
| 	log.Info().Int("count", len(feeds)).Msg("Ready to fetch feeds...") | ||||
| 
 | ||||
| func ScanNewsSources(eager ...bool) { | ||||
| 	count := 0 | ||||
| 	for _, src := range newsSources { | ||||
| 		if !src.Enabled { | ||||
| 	var scannedFeed []uint | ||||
| 	for _, src := range feeds { | ||||
| 		if !src.IsEnabled { | ||||
| 			continue | ||||
| 		} | ||||
| 
 | ||||
| 		log.Debug().Str("source", src.ID).Msg("Scanning news source...") | ||||
| 		result, err := NewsSourceRead(src, eager...) | ||||
| 		log.Debug().Uint("source", src.ID).Msg("Scanning feed...") | ||||
| 		result, err := SubscriptionFeedRead(src, eager...) | ||||
| 		if err != nil { | ||||
| 			log.Warn().Err(err).Str("source", src.ID).Msg("Failed to scan a news source.") | ||||
| 			log.Warn().Err(err).Uint("source", src.ID).Msg("Failed to scan a feed.") | ||||
| 		} else { | ||||
| 			scannedFeed = append(scannedFeed, src.ID) | ||||
| 		} | ||||
| 
 | ||||
| 		result = lo.UniqBy(result, func(item models.NewsArticle) string { | ||||
| 		result = lo.UniqBy(result, func(item models.SubscriptionItem) string { | ||||
| 			return item.Hash | ||||
| 		}) | ||||
| 		database.C.Clauses(clause.OnConflict{ | ||||
| @@ -56,45 +52,47 @@ func ScanNewsSources(eager ...bool) { | ||||
| 			DoUpdates: clause.AssignmentColumns([]string{"thumbnail", "title", "content", "description", "published_at"}), | ||||
| 		}).Create(&result) | ||||
| 
 | ||||
| 		log.Info().Str("source", src.ID).Int("count", len(result)).Msg("Scanned a news sources.") | ||||
| 		log.Info().Uint("source", src.ID).Int("count", len(result)).Msg("Scanned a feed.") | ||||
| 		count += len(result) | ||||
| 	} | ||||
| 
 | ||||
| 	log.Info().Int("count", count).Msg("Scanned all news sources.") | ||||
| 	database.C.Where("id IN ?", scannedFeed).Update("last_fetched_at", time.Now()) | ||||
| 
 | ||||
| 	log.Info().Int("count", count).Msg("Scanned all feeds.") | ||||
| } | ||||
| 
 | ||||
| func NewsSourceRead(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) { | ||||
| 	switch src.Type { | ||||
| func SubscriptionFeedRead(src models.SubscriptionFeed, eager ...bool) ([]models.SubscriptionItem, error) { | ||||
| 	switch src.Adapter { | ||||
| 	case "wordpress": | ||||
| 		return newsSourceReadWordpress(src, eager...) | ||||
| 	case "scrap": | ||||
| 		return newsSourceReadScrap(src, eager...) | ||||
| 		return feedReadWordpress(src, eager...) | ||||
| 	case "webpage": | ||||
| 		return feedReadWebpage(src, eager...) | ||||
| 	case "feed": | ||||
| 		return newsSourceReadFeed(src, eager...) | ||||
| 		return feedReadGuidedFeed(src, eager...) | ||||
| 	default: | ||||
| 		return nil, fmt.Errorf("unsupported news source type: %s", src.Type) | ||||
| 		return nil, fmt.Errorf("unsupported feed source type: %s", src.Adapter) | ||||
| 	} | ||||
| } | ||||
| 
 | ||||
| func newsSourceReadWordpress(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) { | ||||
| 	wpConvert := func(post wordpress.Post) models.NewsArticle { | ||||
| 		article := &models.NewsArticle{ | ||||
| func feedReadWordpress(src models.SubscriptionFeed, eager ...bool) ([]models.SubscriptionItem, error) { | ||||
| 	wpConvert := func(post wordpress.Post) models.SubscriptionItem { | ||||
| 		article := &models.SubscriptionItem{ | ||||
| 			Title:       post.Title.Rendered, | ||||
| 			Description: post.Excerpt.Rendered, | ||||
| 			Content:     post.Content.Rendered, | ||||
| 			URL:         post.Link, | ||||
| 			Source:      src.ID, | ||||
| 			FeedID:      src.ID, | ||||
| 		} | ||||
| 		time, err := time.Parse("2006-01-02T15:04:05", post.DateGMT) | ||||
| 		date, err := time.Parse("2006-01-02T15:04:05", post.DateGMT) | ||||
| 		if err == nil { | ||||
| 			article.PublishedAt = &time | ||||
| 			article.PublishedAt = date | ||||
| 		} | ||||
| 		article.GenHash() | ||||
| 		return *article | ||||
| 	} | ||||
| 
 | ||||
| 	client := wordpress.NewClient(&wordpress.Options{ | ||||
| 		BaseAPIURL: src.Source, | ||||
| 		BaseAPIURL: src.URL, | ||||
| 	}) | ||||
| 
 | ||||
| 	posts, resp, _, err := client.Posts().List(nil) | ||||
| @@ -102,7 +100,7 @@ func newsSourceReadWordpress(src models.NewsSource, eager ...bool) ([]models.New | ||||
| 		return nil, err | ||||
| 	} | ||||
| 
 | ||||
| 	var result []models.NewsArticle | ||||
| 	var result []models.SubscriptionItem | ||||
| 	for _, post := range posts { | ||||
| 		result = append(result, wpConvert(post)) | ||||
| 	} | ||||
| @@ -110,7 +108,7 @@ func newsSourceReadWordpress(src models.NewsSource, eager ...bool) ([]models.New | ||||
| 	if len(eager) > 0 && eager[0] { | ||||
| 		totalPagesRaw := resp.Header.Get("X-WP-TotalPages") | ||||
| 		totalPages, _ := strconv.Atoi(totalPagesRaw) | ||||
| 		depth := min(totalPages, src.Depth) | ||||
| 		depth := min(totalPages, 10) | ||||
| 		for page := 2; page <= depth; page++ { | ||||
| 			posts, _, _, err := client.Posts().List(fiber.Map{ | ||||
| 				"page": page, | ||||
| @@ -127,11 +125,11 @@ func newsSourceReadWordpress(src models.NewsSource, eager ...bool) ([]models.New | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func newsSourceReadFeed(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) { | ||||
| 	pgConvert := func(article models.NewsArticle) models.NewsArticle { | ||||
| func feedReadGuidedFeed(src models.SubscriptionFeed, eager ...bool) ([]models.SubscriptionItem, error) { | ||||
| 	pgConvert := func(article models.SubscriptionItem) models.SubscriptionItem { | ||||
| 		art := &article | ||||
| 		art.GenHash() | ||||
| 		art.Source = src.ID | ||||
| 		art.FeedID = src.ID | ||||
| 		article = *art | ||||
| 		return article | ||||
| 	} | ||||
| @@ -139,7 +137,7 @@ func newsSourceReadFeed(src models.NewsSource, eager ...bool) ([]models.NewsArti | ||||
| 	ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second) | ||||
| 	defer cancel() | ||||
| 	fp := gofeed.NewParser() | ||||
| 	feed, _ := fp.ParseURLWithContext(src.Source, ctx) | ||||
| 	feed, _ := fp.ParseURLWithContext(src.URL, ctx) | ||||
| 
 | ||||
| 	maxPages := lo.TernaryF(len(eager) > 0 && eager[0], func() int { | ||||
| 		if feed.Items == nil { | ||||
| @@ -147,29 +145,29 @@ func newsSourceReadFeed(src models.NewsSource, eager ...bool) ([]models.NewsArti | ||||
| 		} | ||||
| 		return len(feed.Items) | ||||
| 	}, func() int { | ||||
| 		return src.Depth | ||||
| 		return 10 * 10 | ||||
| 	}) | ||||
| 
 | ||||
| 	var result []models.NewsArticle | ||||
| 	var result []models.SubscriptionItem | ||||
| 	for _, item := range feed.Items { | ||||
| 		if maxPages <= 0 { | ||||
| 			break | ||||
| 		} | ||||
| 
 | ||||
| 		maxPages-- | ||||
| 		parent := models.NewsArticle{ | ||||
| 		parent := models.SubscriptionItem{ | ||||
| 			URL:         item.Link, | ||||
| 			Title:       item.Title, | ||||
| 			Description: item.Description, | ||||
| 		} | ||||
| 		if item.PublishedParsed != nil { | ||||
| 			parent.PublishedAt = item.PublishedParsed | ||||
| 			parent.PublishedAt = *item.PublishedParsed | ||||
| 		} | ||||
| 		if item.Image != nil { | ||||
| 			parent.Thumbnail = item.Image.URL | ||||
| 		} | ||||
| 
 | ||||
| 		article, err := ScrapNews(item.Link, parent) | ||||
| 		article, err := ScrapSubscriptionItem(item.Link, parent) | ||||
| 		if err != nil { | ||||
| 			log.Warn().Err(err).Str("url", item.Link).Msg("Failed to scrap a news article...") | ||||
| 			continue | ||||
| @@ -182,17 +180,17 @@ func newsSourceReadFeed(src models.NewsSource, eager ...bool) ([]models.NewsArti | ||||
| 	return result, nil | ||||
| } | ||||
| 
 | ||||
| func newsSourceReadScrap(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) { | ||||
| 	pgConvert := func(article models.NewsArticle) models.NewsArticle { | ||||
| func feedReadWebpage(src models.SubscriptionFeed, eager ...bool) ([]models.SubscriptionItem, error) { | ||||
| 	pgConvert := func(article models.SubscriptionItem) models.SubscriptionItem { | ||||
| 		art := &article | ||||
| 		art.GenHash() | ||||
| 		art.Source = src.ID | ||||
| 		art.FeedID = src.ID | ||||
| 		article = *art | ||||
| 		return article | ||||
| 	} | ||||
| 
 | ||||
| 	maxPages := lo.Ternary(len(eager) > 0 && eager[0], 0, src.Depth) | ||||
| 	result := ScrapNewsIndex(src.Source, maxPages) | ||||
| 	maxPages := lo.Ternary(len(eager) > 0 && eager[0], 0, 10*10) | ||||
| 	result := ScrapSubscriptionFeed(src.URL, maxPages) | ||||
| 
 | ||||
| 	for idx, page := range result { | ||||
| 		result[idx] = pgConvert(page) | ||||
| @@ -19,7 +19,7 @@ import ( | ||||
| ) | ||||
|  | ||||
| // We have to set the User-Agent to this so the sites will respond with opengraph data | ||||
| const ScrapLinkDefaultUA = "facebookexternalhit/1.1" | ||||
| const ScrapLinkDefaultUA = "FacebookExternalHit/1.1" | ||||
|  | ||||
| func GetLinkMetaFromCache(target string) (models.LinkMeta, error) { | ||||
| 	hash := md5.Sum([]byte(target)) | ||||
| @@ -128,7 +128,7 @@ func ScrapLink(target string) (*models.LinkMeta, error) { | ||||
|  | ||||
| const ScrapNewsDefaultUA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15" | ||||
|  | ||||
| func ScrapNewsIndex(target string, maxPages ...int) []models.NewsArticle { | ||||
| func ScrapSubscriptionFeed(target string, maxPages ...int) []models.SubscriptionItem { | ||||
| 	parsedTarget, err := url.Parse(target) | ||||
| 	if err != nil { | ||||
| 		return nil | ||||
| @@ -162,7 +162,7 @@ func ScrapNewsIndex(target string, maxPages ...int) []models.NewsArticle { | ||||
| 		ExpectContinueTimeout: 1 * time.Second, | ||||
| 	}) | ||||
|  | ||||
| 	var result []models.NewsArticle | ||||
| 	var result []models.SubscriptionItem | ||||
|  | ||||
| 	c.OnHTML("main a", func(e *colly.HTMLElement) { | ||||
| 		if limit <= 0 { | ||||
| @@ -178,7 +178,7 @@ func ScrapNewsIndex(target string, maxPages ...int) []models.NewsArticle { | ||||
| 		} | ||||
|  | ||||
| 		limit-- | ||||
| 		article, err := ScrapNews(url) | ||||
| 		article, err := ScrapSubscriptionItem(url) | ||||
| 		if err != nil { | ||||
| 			log.Warn().Err(err).Str("url", url).Msg("Failed to scrap a news article...") | ||||
| 			return | ||||
| @@ -190,12 +190,12 @@ func ScrapNewsIndex(target string, maxPages ...int) []models.NewsArticle { | ||||
| 		} | ||||
| 	}) | ||||
|  | ||||
| 	c.Visit(target) | ||||
| 	_ = c.Visit(target) | ||||
|  | ||||
| 	return result | ||||
| } | ||||
|  | ||||
| func ScrapNews(target string, parent ...models.NewsArticle) (*models.NewsArticle, error) { | ||||
| func ScrapSubscriptionItem(target string, parent ...models.SubscriptionItem) (*models.SubscriptionItem, error) { | ||||
| 	ua := viper.GetString("scraper.news_ua") | ||||
| 	if len(ua) == 0 { | ||||
| 		ua = ScrapNewsDefaultUA | ||||
| @@ -218,7 +218,7 @@ func ScrapNews(target string, parent ...models.NewsArticle) (*models.NewsArticle | ||||
| 		ExpectContinueTimeout: 1 * time.Second, | ||||
| 	}) | ||||
|  | ||||
| 	article := &models.NewsArticle{ | ||||
| 	article := &models.SubscriptionItem{ | ||||
| 		URL: target, | ||||
| 	} | ||||
|  | ||||
|   | ||||
| @@ -72,15 +72,10 @@ func main() { | ||||
| 		log.Fatal().Err(err).Msg("An error occurred when initializing cache.") | ||||
| 	} | ||||
|  | ||||
| 	// Load news sources | ||||
| 	if err := services.LoadNewsSources(); err != nil { | ||||
| 		log.Fatal().Err(err).Msg("An error occurred when loading news sources.") | ||||
| 	} | ||||
|  | ||||
| 	// Configure timed tasks | ||||
| 	quartz := cron.New(cron.WithLogger(cron.VerbosePrintfLogger(&log.Logger))) | ||||
| 	quartz.AddFunc("@every 60m", services.DoAutoDatabaseCleanup) | ||||
| 	quartz.AddFunc("@midnight", services.ScanNewsSourcesNoEager) | ||||
| 	quartz.AddFunc("@midnight", services.FetchFeedTimed) | ||||
| 	quartz.Start() | ||||
|  | ||||
| 	// Server | ||||
|   | ||||
		Reference in New Issue
	
	Block a user