✨ Read news basis
✨ Able to read wordpress site
btw the 10 yrs ago package still work properly, amazing...
means the wordpress api did not change a lot and the golang backward
compability is amazing!
			
			
This commit is contained in:
		| @@ -7,6 +7,7 @@ import ( | ||||
|  | ||||
| var AutoMaintainRange = []any{ | ||||
| 	&models.LinkMeta{}, | ||||
| 	&models.NewsArticle{}, | ||||
| } | ||||
|  | ||||
| func RunMigration(source *gorm.DB) error { | ||||
|   | ||||
							
								
								
									
										32
									
								
								pkg/internal/models/new.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										32
									
								
								pkg/internal/models/new.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,32 @@ | ||||
| package models | ||||
|  | ||||
| import ( | ||||
| 	"crypto/md5" | ||||
| 	"encoding/hex" | ||||
|  | ||||
| 	"git.solsynth.dev/hypernet/nexus/pkg/nex/cruda" | ||||
| 	"github.com/google/uuid" | ||||
| ) | ||||
|  | ||||
| type NewsArticle struct { | ||||
| 	cruda.BaseModel | ||||
|  | ||||
| 	Thumbnail   string `json:"thumbnail"` | ||||
| 	Title       string `json:"title"` | ||||
| 	Description string `json:"description"` | ||||
| 	Content     string `json:"content"` | ||||
| 	URL         string `json:"url"` | ||||
| 	Hash        string `json:"hash" gorm:"uniqueIndex"` | ||||
| 	Source      string `json:"source"` | ||||
| } | ||||
|  | ||||
| func (v *NewsArticle) GenHash() *NewsArticle { | ||||
| 	if len(v.URL) == 0 { | ||||
| 		v.Hash = uuid.NewString() | ||||
| 		return v | ||||
| 	} | ||||
|  | ||||
| 	hash := md5.Sum([]byte(v.URL)) | ||||
| 	v.Hash = hex.EncodeToString(hash[:]) | ||||
| 	return v | ||||
| } | ||||
							
								
								
									
										8
									
								
								pkg/internal/models/source.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										8
									
								
								pkg/internal/models/source.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,8 @@ | ||||
| package models | ||||
|  | ||||
| type NewsSource struct { | ||||
| 	ID     string `json:"id"` | ||||
| 	Label  string `json:"label"` | ||||
| 	Type   string `json:"type"` | ||||
| 	Source string `json:"source"` | ||||
| } | ||||
| @@ -7,6 +7,7 @@ import ( | ||||
| func MapAPIs(app *fiber.App, baseURL string) { | ||||
| 	api := app.Group(baseURL).Name("API") | ||||
| 	{ | ||||
| 		api.Get("/well-known/sources", getNewsSources) | ||||
| 		api.Get("/link/*", getLinkMeta) | ||||
| 	} | ||||
| } | ||||
|   | ||||
							
								
								
									
										10
									
								
								pkg/internal/server/api/well_known_api.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								pkg/internal/server/api/well_known_api.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| package api | ||||
|  | ||||
| import ( | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/services" | ||||
| 	"github.com/gofiber/fiber/v2" | ||||
| ) | ||||
|  | ||||
| func getNewsSources(c *fiber.Ctx) error { | ||||
| 	return c.JSON(services.NewsSources) | ||||
| } | ||||
							
								
								
									
										71
									
								
								pkg/internal/services/reader.go
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										71
									
								
								pkg/internal/services/reader.go
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,71 @@ | ||||
| package services | ||||
|  | ||||
| import ( | ||||
| 	"fmt" | ||||
|  | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/database" | ||||
| 	"git.solsynth.dev/hypernet/reader/pkg/internal/models" | ||||
| 	"github.com/rs/zerolog/log" | ||||
| 	"github.com/sogko/go-wordpress" | ||||
| 	"github.com/spf13/viper" | ||||
| ) | ||||
|  | ||||
| var NewsSources []models.NewsSource | ||||
|  | ||||
| func LoadNewsSources() error { | ||||
| 	if err := viper.UnmarshalKey("sources", &NewsSources); err != nil { | ||||
| 		return err | ||||
| 	} | ||||
| 	log.Info().Int("count", len(NewsSources)).Msg("Loaded news sources configuration.") | ||||
| 	return nil | ||||
| } | ||||
|  | ||||
| func ScanNewsSources() { | ||||
| 	var results []models.NewsArticle | ||||
| 	for _, src := range NewsSources { | ||||
| 		log.Debug().Str("source", src.ID).Msg("Scanning news source...") | ||||
| 		result, err := NewsSourceRead(src) | ||||
| 		if err != nil { | ||||
| 			log.Warn().Err(err).Str("source", src.ID).Msg("Failed to scan a news source.") | ||||
| 		} | ||||
| 		results = append(results, result...) | ||||
| 		log.Info().Str("source", src.ID).Int("count", len(result)).Msg("Scanned a news sources.") | ||||
| 	} | ||||
| 	log.Info().Int("count", len(results)).Msg("Scanned all news sources.") | ||||
| 	database.C.Save(&results) | ||||
| } | ||||
|  | ||||
| func NewsSourceRead(src models.NewsSource) ([]models.NewsArticle, error) { | ||||
| 	switch src.Type { | ||||
| 	case "wordpress": | ||||
| 		return newsSourceReadWordpress(src) | ||||
| 	default: | ||||
| 		return nil, fmt.Errorf("unsupported news source type: %s", src.Type) | ||||
| 	} | ||||
| } | ||||
|  | ||||
| func newsSourceReadWordpress(src models.NewsSource) ([]models.NewsArticle, error) { | ||||
| 	client := wordpress.NewClient(&wordpress.Options{ | ||||
| 		BaseAPIURL: src.Source, | ||||
| 	}) | ||||
|  | ||||
| 	posts, _, _, err := client.Posts().List(nil) | ||||
| 	if err != nil { | ||||
| 		return nil, err | ||||
| 	} | ||||
|  | ||||
| 	var result []models.NewsArticle | ||||
| 	for _, post := range posts { | ||||
| 		article := &models.NewsArticle{ | ||||
| 			Title:       post.Title.Rendered, | ||||
| 			Description: post.Excerpt.Rendered, | ||||
| 			Content:     post.Content.Rendered, | ||||
| 			URL:         post.Link, | ||||
| 			Source:      src.ID, | ||||
| 		} | ||||
| 		article.GenHash() | ||||
| 		result = append(result, *article) | ||||
| 	} | ||||
|  | ||||
| 	return result, nil | ||||
| } | ||||
| @@ -12,10 +12,11 @@ import ( | ||||
| 	"github.com/gocolly/colly" | ||||
| 	"github.com/rs/zerolog/log" | ||||
| 	"github.com/samber/lo" | ||||
| 	"github.com/spf13/viper" | ||||
| ) | ||||
|  | ||||
| // We have to set the User-Agent to this so the sites will respond with opengraph data | ||||
| const ScrapLinkUserAgent = "facebookexternalhit/1.1" | ||||
| const ScrapLinkDefaultUA = "facebookexternalhit/1.1" | ||||
|  | ||||
| func GetLinkMetaFromCache(target string) (models.LinkMeta, error) { | ||||
| 	hash := md5.Sum([]byte(target)) | ||||
| @@ -40,8 +41,13 @@ func ScrapLink(target string) (*models.LinkMeta, error) { | ||||
| 		return &cache, nil | ||||
| 	} | ||||
|  | ||||
| 	ua := viper.GetString("scraper.expand_ua") | ||||
| 	if len(ua) == 0 { | ||||
| 		ua = ScrapLinkDefaultUA | ||||
| 	} | ||||
|  | ||||
| 	c := colly.NewCollector( | ||||
| 		colly.UserAgent(ScrapLinkUserAgent), | ||||
| 		colly.UserAgent(ua), | ||||
| 		colly.MaxDepth(3), | ||||
| 	) | ||||
|  | ||||
|   | ||||
| @@ -72,9 +72,15 @@ func main() { | ||||
| 		log.Fatal().Err(err).Msg("An error occurred when initializing cache.") | ||||
| 	} | ||||
|  | ||||
| 	// Load news sources | ||||
| 	if err := services.LoadNewsSources(); err != nil { | ||||
| 		log.Fatal().Err(err).Msg("An error occurred when loading news sources.") | ||||
| 	} | ||||
|  | ||||
| 	// Configure timed tasks | ||||
| 	quartz := cron.New(cron.WithLogger(cron.VerbosePrintfLogger(&log.Logger))) | ||||
| 	quartz.AddFunc("@every 60m", services.DoAutoDatabaseCleanup) | ||||
| 	quartz.AddFunc("@midnight", services.ScanNewsSources) | ||||
| 	quartz.Start() | ||||
|  | ||||
| 	// Server | ||||
|   | ||||
		Reference in New Issue
	
	Block a user