Compare commits

...

29 Commits

Author SHA1 Message Date
eba8977107 🐛 Trying to fix scraping 2025-05-08 01:47:21 +08:00
6cfa6e8285 💄 Optimized the feed 2025-04-06 14:15:54 +08:00
e34f248cfa 🐛 Fix API pagination 2025-04-06 13:44:59 +08:00
131778780c 🐛 Fix API stacking routing issue 2025-04-06 13:32:29 +08:00
fd9761f328 Feed the full content flag to reduce web requests 2025-04-06 13:27:57 +08:00
fd0d3699e4 Get feed item now will preload feed 2025-04-06 13:23:07 +08:00
c812359f8b Able to get feed full content 2025-04-06 13:21:36 +08:00
0c28766336 🐛 Fix items' published at 2025-04-06 13:14:44 +08:00
1f27667b7e Reduce the delay between two fetch feed timed task 2025-04-06 13:13:03 +08:00
b2f88c18c0 🐛 Fix large JWT causing 413 2025-04-05 23:51:57 +08:00
2fb98edb83 ♻️ Refactored feed module 2025-04-05 00:42:00 +08:00
3a8d85684c 💄 Optimize scraper thumbnail logic 2025-03-15 14:53:14 +08:00
17c280ddf7 Interactive feed provider 2025-03-15 13:37:43 +08:00
ee4e7a58fe 🐛 Fix panic 2025-03-15 13:03:04 +08:00
da46071b2c 🐛 No advance permission user still will see the source 2025-02-15 20:11:59 +08:00
cb61132011 🐛 Bug fix 2025-02-15 18:26:14 +08:00
48b2a8d470 Separate news permission level 2025-02-15 18:09:51 +08:00
a9b8fbf558 🛂 Add permission check for listing news 2025-01-29 19:22:36 +08:00
6abc233f00 📝 Update README 2025-01-26 20:17:59 +08:00
da62afe2d1 🐛 Fix ERROR: column reference "id" is ambiguous (SQLSTATE 42702) 2025-01-26 13:54:34 +08:00
f392f12f76 🐛 Trying to fix duplicate key when inserting 2025-01-26 13:49:52 +08:00
b398311bd2 🐛 Fix wordpress eager loading keep fetching the same page 2025-01-26 13:41:34 +08:00
52bd374eeb Get today's news 2025-01-26 13:20:53 +08:00
690bbd4b43 Admin trigger api now can decide to fetch which source only 2025-01-26 12:49:29 +08:00
1867b34735 👔 Fetch less pages when no eager mode 2025-01-26 12:47:30 +08:00
2a58d185eb ♻️ Save news article to db seprately 2025-01-26 01:00:59 +08:00
fbbe72bd54 👔 Scrap the website according the feed whatever it provide content or not 2025-01-26 00:41:33 +08:00
01b0cbce3e Able to filter news by source 2025-01-26 00:38:35 +08:00
d9fe0c6789 👔 Stop showing disabled news source in well known 2025-01-26 00:38:28 +08:00
22 changed files with 630 additions and 332 deletions

1
.gitignore vendored
View File

@ -3,3 +3,4 @@
/keys
.DS_Store
.idea

View File

@ -2,3 +2,8 @@
Reader is the way HyperNet dealing with the
link expand and fetching external metadata
## Features
- Expand links
- Fetch external news

16
go.mod
View File

@ -3,21 +3,24 @@ module git.solsynth.dev/hypernet/reader
go 1.23.2
require (
git.solsynth.dev/hypernet/nexus v0.0.0-20241103165538-c0fec1084611
github.com/dgraph-io/ristretto v0.1.1
git.solsynth.dev/hypernet/interactive v0.0.0-20250315044754-43447a128652
git.solsynth.dev/hypernet/nexus v0.0.0-20241123050605-25ab1371739b
github.com/dgraph-io/ristretto v0.2.0
github.com/eko/gocache/lib/v4 v4.1.6
github.com/eko/gocache/store/ristretto/v4 v4.2.2
github.com/fatih/color v1.18.0
github.com/go-playground/validator/v10 v10.22.1
github.com/gocolly/colly v1.2.0
github.com/gofiber/fiber/v2 v2.52.5
github.com/google/uuid v1.6.0
github.com/json-iterator/go v1.1.12
github.com/mmcdole/gofeed v1.3.0
github.com/robfig/cron/v3 v3.0.1
github.com/rs/zerolog v1.33.0
github.com/samber/lo v1.47.0
github.com/sogko/go-wordpress v0.0.0-20160322054548-0f4f3dc4231f
github.com/spf13/viper v1.19.0
google.golang.org/grpc v1.67.1
google.golang.org/grpc v1.70.0
gorm.io/driver/postgres v1.5.9
gorm.io/gorm v1.25.12
)
@ -41,11 +44,9 @@ require (
github.com/gobwas/glob v0.2.3 // indirect
github.com/goccy/go-json v0.10.3 // indirect
github.com/golang-jwt/jwt/v5 v5.2.1 // indirect
github.com/golang/glog v1.2.2 // indirect
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da // indirect
github.com/golang/mock v1.6.0 // indirect
github.com/golang/protobuf v1.5.4 // indirect
github.com/google/uuid v1.6.0 // indirect
github.com/hashicorp/hcl v1.0.0 // indirect
github.com/jackc/pgpassfile v1.0.0 // indirect
github.com/jackc/pgservicefile v0.0.0-20240606120523-5a60cdf6a761 // indirect
@ -61,7 +62,6 @@ require (
github.com/mattn/go-isatty v0.0.20 // indirect
github.com/mattn/go-runewidth v0.0.16 // indirect
github.com/mitchellh/mapstructure v1.5.0 // indirect
github.com/mmcdole/gofeed v1.3.0 // indirect
github.com/mmcdole/goxpp v1.1.1 // indirect
github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect
github.com/modern-go/reflect2 v1.0.2 // indirect
@ -94,8 +94,8 @@ require (
golang.org/x/sys v0.29.0 // indirect
golang.org/x/text v0.21.0 // indirect
google.golang.org/appengine v1.6.8 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect
google.golang.org/protobuf v1.35.1 // indirect
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287 // indirect
google.golang.org/protobuf v1.36.4 // indirect
gopkg.in/ini.v1 v1.67.0 // indirect
gopkg.in/yaml.v3 v3.0.1 // indirect
gorm.io/datatypes v1.2.4 // indirect

57
go.sum
View File

@ -1,15 +1,15 @@
filippo.io/edwards25519 v1.1.0 h1:FNf4tywRC1HmFuKW5xopWpigGjJKiJSV0Cqo0cJWDaA=
filippo.io/edwards25519 v1.1.0/go.mod h1:BxyFTGdWcka3PhytdK4V28tE5sGfRvvvRV7EaN4VDT4=
git.solsynth.dev/hypernet/nexus v0.0.0-20241103165538-c0fec1084611 h1:ZEzUDsO88X+amOaEKZOpnQHHNYm5iw3hCBdy138sQro=
git.solsynth.dev/hypernet/nexus v0.0.0-20241103165538-c0fec1084611/go.mod h1:PhLCv2lsNoscPVJbkWnxwQnJ141lc4RIEkVffrHwl4s=
github.com/PuerkitoBio/goquery v1.10.0 h1:6fiXdLuUvYs2OJSvNRqlNPoBm6YABE226xrbavY5Wv4=
github.com/PuerkitoBio/goquery v1.10.0/go.mod h1:TjZZl68Q3eGHNBA8CWaxAN7rOU1EbDz3CWuolcO5Yu4=
git.solsynth.dev/hypernet/interactive v0.0.0-20250315044058-6e75f692633b h1:jwnyP1GNhdoKuqnCQ0cKTgzKiGus3vX5fzG7R4Ws6aM=
git.solsynth.dev/hypernet/interactive v0.0.0-20250315044058-6e75f692633b/go.mod h1:/+QaqRjHKOt9jM4Z47yvg94jH2Wb2DPG5l+FrPYmKwk=
git.solsynth.dev/hypernet/interactive v0.0.0-20250315044754-43447a128652 h1:bL4h4toyl+2PdpgJQRCIsp2iQYY2tMqAcrpABSCrWuc=
git.solsynth.dev/hypernet/interactive v0.0.0-20250315044754-43447a128652/go.mod h1:/+QaqRjHKOt9jM4Z47yvg94jH2Wb2DPG5l+FrPYmKwk=
git.solsynth.dev/hypernet/nexus v0.0.0-20241123050605-25ab1371739b h1:8yB9kMwEMY/nIbmDDxrhH5sTypgmK5PIIiIfP5QXx4s=
git.solsynth.dev/hypernet/nexus v0.0.0-20241123050605-25ab1371739b/go.mod h1:PhLCv2lsNoscPVJbkWnxwQnJ141lc4RIEkVffrHwl4s=
github.com/PuerkitoBio/goquery v1.10.1 h1:Y8JGYUkXWTGRB6Ars3+j3kN0xg1YqqlwvdTV8WTFQcU=
github.com/PuerkitoBio/goquery v1.10.1/go.mod h1:IYiHrOMps66ag56LEH7QYDDupKXyo5A8qrjIx3ZtujY=
github.com/andybalholm/brotli v1.1.1 h1:PR2pgnyFznKEugtsUo0xLdDop5SKXd5Qf5ysW+7XdTA=
github.com/andybalholm/brotli v1.1.1/go.mod h1:05ib4cKhjx3OQYUY22hTVd34Bc8upXjOLL2rKwwZBoA=
github.com/andybalholm/cascadia v1.3.2 h1:3Xi6Dw5lHF15JtdcmAHD3i1+T8plmv7BQ/nsViSLyss=
github.com/andybalholm/cascadia v1.3.2/go.mod h1:7gtRlve5FxPPgIgX36uWBX58OdBsSS6lUvCFb+h7KvU=
github.com/andybalholm/cascadia v1.3.3 h1:AG2YHrzJIm4BZ19iwJ/DAua6Btl3IwJX+VI4kktS1LM=
github.com/andybalholm/cascadia v1.3.3/go.mod h1:xNd9bqTn98Ln4DwST8/nG+H0yuB8Hmgu1YHNnWw0GeA=
github.com/antchfx/htmlquery v1.3.3 h1:x6tVzrRhVNfECDaVxnZi1mEGrQg3mjE/rxbH2Pe6dNE=
@ -18,7 +18,6 @@ github.com/antchfx/xmlquery v1.4.2 h1:MZKd9+wblwxfQ1zd1AdrTsqVaMjMCwow3IqkCSe00K
github.com/antchfx/xmlquery v1.4.2/go.mod h1:QXhvf5ldTuGqhd1SHNvvtlhhdQLks4dD0awIVhXIDTA=
github.com/antchfx/xpath v1.3.2 h1:LNjzlsSjinu3bQpw9hWMY9ocB80oLOWuQqFvO6xt51U=
github.com/antchfx/xpath v1.3.2/go.mod h1:i54GszH55fYfBmoZXapTHN8T8tkcHfRgLyVwwqzXNcs=
github.com/cespare/xxhash/v2 v2.1.1/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/cespare/xxhash/v2 v2.3.0 h1:UL815xU9SqsFlibzuggzjXhog7bL6oX9BbNZnL2UFvs=
github.com/cespare/xxhash/v2 v2.3.0/go.mod h1:VGX0DQ3Q6kWi7AoAeZDth3/j3BFtOZR5XLFGgcrjCOs=
github.com/coreos/go-systemd/v22 v22.5.0/go.mod h1:Y58oyj3AT4RCenI/lSvhwexgC+NSVTIJ3seZv2GcEnc=
@ -26,11 +25,10 @@ github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSs
github.com/davecgh/go-spew v1.1.1/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc h1:U9qPSI2PIWSS1VwoXQT9A3Wy9MM3WgvqSxFWenqJduM=
github.com/davecgh/go-spew v1.1.2-0.20180830191138-d8f796af33cc/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
github.com/dgraph-io/ristretto v0.1.1 h1:6CWw5tJNgpegArSHpNHJKldNeq03FQCwYvfMVWajOK8=
github.com/dgraph-io/ristretto v0.1.1/go.mod h1:S1GPSBCYCIhmVNfcth17y2zZtQT6wzkzgwUve0VDWWA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2 h1:tdlZCpZ/P9DhczCTSixgIKmwPv6+wP5DGjqLYw5SUiA=
github.com/dgryski/go-farm v0.0.0-20190423205320-6a90982ecee2/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dustin/go-humanize v1.0.0/go.mod h1:HtrtbFcZ19U5GC7JDqmcUSB87Iq5E25KnS6fMYU6eOk=
github.com/dgraph-io/ristretto v0.2.0 h1:XAfl+7cmoUDWW/2Lx8TGZQjjxIQ2Ley9DSf52dru4WE=
github.com/dgraph-io/ristretto v0.2.0/go.mod h1:8uBHCU/PBV4Ag0CJrP47b9Ofby5dqWNh4FicAdoqFNU=
github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13 h1:fAjc9m62+UWV/WAFKLNi6ZS0675eEUC9y3AlwSbQu1Y=
github.com/dgryski/go-farm v0.0.0-20200201041132-a6ae2369ad13/go.mod h1:SqUrOPUnsFjfmXRMNPybcSiG0BgUW2AuFH8PAnS2iTw=
github.com/dustin/go-humanize v1.0.1 h1:GzkhY7T5VNhEkwH0PVJgjz+fX1rhBrR7pRT3mDkpeCY=
github.com/dustin/go-humanize v1.0.1/go.mod h1:Mu1zIs6XwVuF/gI1OepvI0qD18qycQx+mFykh5fBlto=
github.com/eko/gocache/lib/v4 v4.1.6 h1:5WWIGISKhE7mfkyF+SJyWwqa4Dp2mkdX8QsZpnENqJI=
@ -47,6 +45,10 @@ github.com/fsnotify/fsnotify v1.8.0 h1:dAwr6QBTBZIkG8roQaJjGof0pp0EeF+tNV7YBP3F/
github.com/fsnotify/fsnotify v1.8.0/go.mod h1:8jBTzvmWwFyi3Pb8djgCCO5IBqzKJ/Jwo8TRcHyHii0=
github.com/gabriel-vasile/mimetype v1.4.3 h1:in2uUcidCuFcDKtdcBxlR0rJ1+fsokWf+uqxgUFjbI0=
github.com/gabriel-vasile/mimetype v1.4.3/go.mod h1:d8uq/6HKRL6CGdk+aubisF/M5GcPfT7nKyLpA0lbSSk=
github.com/go-logr/logr v1.4.2 h1:6pFjapn8bFcIbiKo3XT4j/BhANplGihG6tvd+8rYgrY=
github.com/go-logr/logr v1.4.2/go.mod h1:9T104GzyrTigFIr8wt5mBrctHMim0Nb2HLGrmQ40KvY=
github.com/go-logr/stdr v1.2.2 h1:hSWxHoqTgW2S2qGc0LTAI563KZ5YKYRhT3MFKZMbjag=
github.com/go-logr/stdr v1.2.2/go.mod h1:mMo/vtBO5dYbehREoey6XUKy/eSumjCCveDpRre4VKE=
github.com/go-playground/assert/v2 v2.2.0 h1:JvknZsQTYeFEAhQwI4qEt9cyV5ONwRHC+lYKSsYSR8s=
github.com/go-playground/assert/v2 v2.2.0/go.mod h1:VDjEfimB/XKnb+ZQfWdccd7VUvScMdVu0Titje2rxJ4=
github.com/go-playground/locales v0.14.1 h1:EWaQ/wswjilfKLTECiXz7Rh+3BjFhfDFKv/oXslEjJA=
@ -73,9 +75,6 @@ github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9 h1:au07oEsX2xN0kt
github.com/golang-sql/civil v0.0.0-20220223132316-b832511892a9/go.mod h1:8vg3r2VgvsThLBIFL93Qb5yWzgyZWhEmBwUJWevAkK0=
github.com/golang-sql/sqlexp v0.1.0 h1:ZCD6MBpcuOVfGVqsEmY5/4FtYiKz6tSyUv9LPEDei6A=
github.com/golang-sql/sqlexp v0.1.0/go.mod h1:J4ad9Vo8ZCWQ2GMrC4UCQy1JpCbwU9m3EOqtpKwwwHI=
github.com/golang/glog v0.0.0-20160126235308-23def4e6c14b/go.mod h1:SBH7ygxi8pfUlaOkMMuAQtPIUF8ecWP5IEl/CR7VP2Q=
github.com/golang/glog v1.2.2 h1:1+mZ9upx1Dh6FmUTFR1naJ77miKiXgALjWOZ3NVFPmY=
github.com/golang/glog v1.2.2/go.mod h1:6AhwSGph0fcJtXVM/PEHPqZlFeoLxhs7/t5UDAwmO+w=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da h1:oI5xCqsCo564l8iNU+DwB5epxmsaqB+rhGL0m5jtYqE=
github.com/golang/groupcache v0.0.0-20210331224755-41bb18bfe9da/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
github.com/golang/mock v1.6.0 h1:ErTB+efbowRARo13NNdxyJji2egdxLGQhRaY+DUumQc=
@ -194,7 +193,6 @@ github.com/spf13/viper v1.19.0 h1:RWq5SEjt8o25SROyN3z2OrDB9l7RPd3lwTWU8EcEdcI=
github.com/spf13/viper v1.19.0/go.mod h1:GQUN9bilAbhU/jgc1bKs99f/suXKeUMct8Adx5+Ntkg=
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4=
github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg=
github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA=
github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY=
@ -214,6 +212,16 @@ github.com/xyproto/randomstring v1.0.5 h1:YtlWPoRdgMu3NZtP45drfy1GKoojuR7hmRcnhZ
github.com/xyproto/randomstring v1.0.5/go.mod h1:rgmS5DeNXLivK7YprL0pY+lTuhNQW3iGxZ18UQApw/E=
github.com/yuin/goldmark v1.3.5/go.mod h1:mwnBkeHKe2W/ZEtQ+71ViKU8L12m81fl3OWwC1Zlc8k=
github.com/yuin/goldmark v1.4.13/go.mod h1:6yULJ656Px+3vBD8DxQVa3kxgyrAnzto9xy5taEt/CY=
go.opentelemetry.io/otel v1.32.0 h1:WnBN+Xjcteh0zdk01SVqV55d/m62NJLJdIyb4y/WO5U=
go.opentelemetry.io/otel v1.32.0/go.mod h1:00DCVSB0RQcnzlwyTfqtxSm+DRr9hpYrHjNGiBHVQIg=
go.opentelemetry.io/otel/metric v1.32.0 h1:xV2umtmNcThh2/a/aCP+h64Xx5wsj8qqnkYZktzNa0M=
go.opentelemetry.io/otel/metric v1.32.0/go.mod h1:jH7CIbbK6SH2V2wE16W05BHCtIDzauciCRLoc/SyMv8=
go.opentelemetry.io/otel/sdk v1.32.0 h1:RNxepc9vK59A8XsgZQouW8ue8Gkb4jpWtJm9ge5lEG4=
go.opentelemetry.io/otel/sdk v1.32.0/go.mod h1:LqgegDBjKMmb2GC6/PrTnteJG39I8/vJCAP9LlJXEjU=
go.opentelemetry.io/otel/sdk/metric v1.32.0 h1:rZvFnvmvawYb0alrYkjraqJq0Z4ZUJAiyYCU9snn1CU=
go.opentelemetry.io/otel/sdk/metric v1.32.0/go.mod h1:PWeZlq0zt9YkYAp3gjKZ0eicRYvOh1Gd+X99x6GHpCQ=
go.opentelemetry.io/otel/trace v1.32.0 h1:WIC9mYrXf8TmY/EXuULKc8hR17vE+Hjv2cssQDe03fM=
go.opentelemetry.io/otel/trace v1.32.0/go.mod h1:+i4rkvCraA+tG6AzwloGaCtkx53Fa+L+V8e9a7YvhT8=
go.uber.org/multierr v1.11.0 h1:blXXJkSxSSfBVBlC76pxqeO+LN3aDfLQo+309xJstO0=
go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN80Y=
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
@ -240,7 +248,6 @@ golang.org/x/net v0.0.0-20210405180319-a5a99cb37ef4/go.mod h1:p54w0d4576C0XHj96b
golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug+ECip1KBveYUHfp+8e9klMJ9c=
golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs=
golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns=
golang.org/x/net v0.10.0/go.mod h1:0qNGK6F8kojg2nk9dLZ2mShWaEBan6FAoqfSigmmuDg=
golang.org/x/net v0.15.0/go.mod h1:idbUs1IY1+zTqbi8yxTbhexhEEk5ur9LInksu6HrEpk=
golang.org/x/net v0.21.0/go.mod h1:bIjVDfnllIU7BJ2DNgfnXvpSvtn8VRwhlsaeUTyUS44=
@ -266,10 +273,8 @@ golang.org/x/sys v0.0.0-20210615035016-665e8c7367d1/go.mod h1:oPkhp1MJrh7nUepCBc
golang.org/x/sys v0.0.0-20220520151302-bc2c85ada10a/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220722155257-8c9f86f7a55f/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20220811171246-fbc7d0a398ab/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.0.0-20221010170243-090e33056c14/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.8.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg=
golang.org/x/sys v0.17.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA=
@ -281,7 +286,6 @@ golang.org/x/telemetry v0.0.0-20240228155512-f48c80bd79b2/go.mod h1:TeRTkGYfJXct
golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo=
golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8=
golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k=
golang.org/x/term v0.7.0/go.mod h1:P32HKFT3hSsZrRxla30E9HqToFYAQPCMs/zFMBUFqPY=
golang.org/x/term v0.8.0/go.mod h1:xPskH00ivmX89bAKVGSKKtLOWNx2+17Eiy94tnKShWo=
golang.org/x/term v0.12.0/go.mod h1:owVbMEjm3cBLCHdkQu9b1opXd4ETQWc3BhuQGKgXgvU=
golang.org/x/term v0.17.0/go.mod h1:lLRBjIVuehSbZlaOtGMbcMncT+aqLLLmKrsjNrUguwk=
@ -311,20 +315,19 @@ golang.org/x/xerrors v0.0.0-20191204190536-9bdfabe68543/go.mod h1:I/5z698sn9Ka8T
golang.org/x/xerrors v0.0.0-20200804184101-5ec99f83aff1/go.mod h1:I/5z698sn9Ka8TeJc9MKroUUfqBBauWjQqLJ2OPfmY0=
google.golang.org/appengine v1.6.8 h1:IhEN5q69dyKagZPYMSdIjS2HqprW324FRQZJcGqPAsM=
google.golang.org/appengine v1.6.8/go.mod h1:1jJ3jBArFh5pcgW8gCtRJnepW8FzD1V44FJffLiz/Ds=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 h1:zciRKQ4kBpFgpfC5QQCVtnnNAcLIqweL7plyZRQHVpI=
google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38/go.mod h1:GX3210XPVPUjJbTUbvwI8f2IpZDMZuPJWDzDuebbviI=
google.golang.org/grpc v1.67.1 h1:zWnc1Vrcno+lHZCOofnIMvycFcc0QRGIzm9dhnDX68E=
google.golang.org/grpc v1.67.1/go.mod h1:1gLDyUQU7CTLJI90u3nXZ9ekeghjeM7pTDZlqFNg2AA=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287 h1:J1H9f+LEdWAfHcez/4cvaVBox7cOYT+IU6rgqj5x++8=
google.golang.org/genproto/googleapis/rpc v0.0.0-20250127172529-29210b9bc287/go.mod h1:8BS3B93F/U1juMFq9+EDk+qOT5CO1R9IzXxG3PTqiRk=
google.golang.org/grpc v1.70.0 h1:pWFv03aZoHzlRKHWicjsZytKAiYCtNS0dHbXnIdq7jQ=
google.golang.org/grpc v1.70.0/go.mod h1:ofIJqVKDXx/JiXrwr2IG4/zwdH9txy3IlF40RmcJSQw=
google.golang.org/protobuf v1.26.0-rc.1/go.mod h1:jlhhOSvTdKEhbULTjvd4ARK9grFBp09yW+WbY/TyQbw=
google.golang.org/protobuf v1.26.0/go.mod h1:9q0QmTI4eRPtz6boOQmLYwt+qCgq0jsYwAQnmE0givc=
google.golang.org/protobuf v1.35.1 h1:m3LfL6/Ca+fqnjnlqQXNpFPABW1UD7mjh8KO2mKFytA=
google.golang.org/protobuf v1.35.1/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
google.golang.org/protobuf v1.36.4 h1:6A3ZDJHn/eNqc1i+IdefRzy/9PokBTPvcqMySR7NNIM=
google.golang.org/protobuf v1.36.4/go.mod h1:9fA7Ob0pmnwhb644+1+CVWFRbNajQ6iRojtC/QF5bRE=
gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405/go.mod h1:Co6ibVJAznAaIkqp8huTwlJQCZ016jof/cbN4VW5Yz0=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c h1:Hei/4ADfdWqJk1ZMxUNpqntNwaWcugrBjAiHlqqRiVk=
gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c/go.mod h1:JHkPIbrfpd72SG/EVd6muEfDQjcINNoR0C8j2r3qZ4Q=
gopkg.in/ini.v1 v1.67.0 h1:Dgnx+6+nfE+IfzjUEISNeydPJh9AXNNsWbGP9KzCsOA=
gopkg.in/ini.v1 v1.67.0/go.mod h1:pNLf8WUiyNEtQjuu5G5vTm06TEv9tsIgeAvK8hOrP4k=
gopkg.in/yaml.v2 v2.2.2/go.mod h1:hI93XBmqTisBFMUTm0b8Fm+jr3Dg1NNxqwp+5A1VGuI=
gopkg.in/yaml.v3 v3.0.0-20200313102051-9f266ea9e77c/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=
gopkg.in/yaml.v3 v3.0.1 h1:fxVm/GzAzEWqLHuvctI91KS9hhNmmWOoWu0XTYJS7CA=
gopkg.in/yaml.v3 v3.0.1/go.mod h1:K4uyk7z7BCEPqu6E+C64Yfv1cQ7kz7rIZviUmN+EgEM=

View File

@ -7,7 +7,8 @@ import (
var AutoMaintainRange = []any{
&models.LinkMeta{},
&models.NewsArticle{},
&models.SubscriptionFeed{},
&models.SubscriptionItem{},
}
func RunMigration(source *gorm.DB) error {

36
pkg/internal/grpc/feed.go Normal file
View File

@ -0,0 +1,36 @@
package grpc
import (
"context"
"time"
iproto "git.solsynth.dev/hypernet/interactive/pkg/proto"
"git.solsynth.dev/hypernet/nexus/pkg/nex"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"git.solsynth.dev/hypernet/reader/pkg/internal/services"
"github.com/samber/lo"
"google.golang.org/grpc/codes"
"google.golang.org/grpc/status"
)
func (v *Server) GetFeed(_ context.Context, in *iproto.GetFeedRequest) (*iproto.GetFeedResponse, error) {
limit := int(in.GetLimit())
var cursor *time.Time
if in.Cursor != nil {
cursor = lo.ToPtr(time.UnixMilli(int64(in.GetCursor())))
}
articles, err := services.GetTodayFeedRandomly(limit, cursor)
if err != nil {
return nil, status.Error(codes.Internal, err.Error())
}
return &iproto.GetFeedResponse{
Items: lo.Map(articles, func(item models.SubscriptionItem, _ int) *iproto.FeedItem {
return &iproto.FeedItem{
Type: "reader.feed",
Content: nex.EncodeMap(item),
CreatedAt: uint64(item.CreatedAt.Unix()),
}
}),
}, nil
}

View File

@ -3,6 +3,7 @@ package grpc
import (
"net"
iproto "git.solsynth.dev/hypernet/interactive/pkg/proto"
"git.solsynth.dev/hypernet/nexus/pkg/proto"
"github.com/spf13/viper"
"google.golang.org/grpc"
@ -12,6 +13,7 @@ import (
type Server struct {
proto.UnimplementedDirectoryServiceServer
iproto.UnimplementedFeedServiceServer
health.UnimplementedHealthServer
srv *grpc.Server
@ -23,6 +25,7 @@ func NewGrpc() *Server {
}
proto.RegisterDirectoryServiceServer(server.srv, server)
iproto.RegisterFeedServiceServer(server.srv, server)
health.RegisterHealthServer(server.srv, server)
reflection.Register(server.srv)

View File

@ -0,0 +1,47 @@
package models
import (
"crypto/md5"
"encoding/hex"
"git.solsynth.dev/hypernet/nexus/pkg/nex/cruda"
"github.com/google/uuid"
"time"
)
type SubscriptionFeed struct {
cruda.BaseModel
URL string `json:"url"`
IsEnabled bool `json:"is_enabled"`
IsFullContent bool `json:"is_full_content"`
PullInterval int `json:"pull_interval"`
Adapter string `json:"adapter"`
AccountID *uint `json:"account_id"`
LastFetchedAt *time.Time `json:"last_fetched_at"`
}
type SubscriptionItem struct {
cruda.BaseModel
FeedID uint `json:"feed_id"`
Feed SubscriptionFeed `json:"feed"`
Thumbnail string `json:"thumbnail"`
Title string `json:"title"`
Description string `json:"description"`
Content string `json:"content"`
URL string `json:"url"`
Hash string `json:"hash" gorm:"uniqueIndex"`
// PublishedAt is the time when the article is published, when the feed adapter didn't provide this default to creation date
PublishedAt time.Time `json:"published_at"`
}
func (v *SubscriptionItem) GenHash() {
if len(v.URL) == 0 {
v.URL = uuid.NewString()
return
}
hash := md5.Sum([]byte(v.URL))
v.Hash = hex.EncodeToString(hash[:])
}

View File

@ -1,34 +0,0 @@
package models
import (
"crypto/md5"
"encoding/hex"
"time"
"git.solsynth.dev/hypernet/nexus/pkg/nex/cruda"
"github.com/google/uuid"
)
type NewsArticle struct {
cruda.BaseModel
Thumbnail string `json:"thumbnail"`
Title string `json:"title"`
Description string `json:"description"`
Content string `json:"content"`
URL string `json:"url"`
Hash string `json:"hash" gorm:"uniqueIndex"`
Source string `json:"source"`
PublishedAt *time.Time `json:"published_at"`
}
func (v *NewsArticle) GenHash() *NewsArticle {
if len(v.URL) == 0 {
v.Hash = uuid.NewString()
return v
}
hash := md5.Sum([]byte(v.URL))
v.Hash = hex.EncodeToString(hash[:])
return v
}

View File

@ -1,10 +0,0 @@
package models
type NewsSource struct {
ID string `json:"id"`
Label string `json:"label"`
Type string `json:"type"`
Source string `json:"source"`
Depth int `json:"depth"`
Enabled bool `json:"enabled"`
}

View File

@ -20,6 +20,7 @@ func adminTriggerScanTask(c *fiber.Ctx) error {
return err
}
go services.ScanNewsSources(data.Eager)
go services.FetchFeed(data.Eager)
return c.SendStatus(fiber.StatusOK)
}

View File

@ -0,0 +1,42 @@
package api
import (
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"github.com/gofiber/fiber/v2"
)
func listFeedItem(c *fiber.Ctx) error {
take := c.QueryInt("take", 10)
offset := c.QueryInt("offset", 0)
var count int64
if err := database.C.Model(&models.SubscriptionItem{}).Count(&count).Error; err != nil {
return fiber.NewError(fiber.StatusBadRequest, err.Error())
}
var items []models.SubscriptionItem
if err := database.C.
Order("published_at DESC").
Omit("Content").
Preload("Feed").
Limit(take).Offset(offset).Find(&items).Error; err != nil {
return fiber.NewError(fiber.StatusBadRequest, err.Error())
}
return c.JSON(fiber.Map{
"count": count,
"data": items,
})
}
func getFeedItem(c *fiber.Ctx) error {
id, _ := c.ParamsInt("id", 0)
var item models.SubscriptionItem
if err := database.C.Where("id = ?", id).Preload("Feed").First(&item).Error; err != nil {
return fiber.NewError(fiber.StatusNotFound, err.Error())
}
return c.JSON(item)
}

View File

@ -0,0 +1,170 @@
package api
import (
"git.solsynth.dev/hypernet/nexus/pkg/nex/sec"
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"git.solsynth.dev/hypernet/reader/pkg/internal/server/exts"
"github.com/gofiber/fiber/v2"
)
func listFeedSubscriptions(c *fiber.Ctx) error {
take := c.QueryInt("take", 10)
offset := c.QueryInt("offset", 0)
var count int64
if err := database.C.Model(&models.SubscriptionFeed{}).Count(&count).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
var feeds []models.SubscriptionFeed
if err := database.C.Limit(take).Offset(offset).Find(&feeds).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
return c.JSON(fiber.Map{
"count": count,
"data": feeds,
})
}
func listCreatedFeedSubscriptions(c *fiber.Ctx) error {
if err := sec.EnsureAuthenticated(c); err != nil {
return err
}
user := c.Locals("nex_user").(*sec.UserInfo)
take := c.QueryInt("take", 10)
offset := c.QueryInt("offset", 0)
tx := database.C.Where("account_id = ?", user.ID)
var count int64
countTx := tx
if err := countTx.Model(&models.SubscriptionFeed{}).Count(&count).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
var feeds []models.SubscriptionFeed
if err := tx.Take(take).Offset(offset).Find(&feeds).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
return c.JSON(fiber.Map{
"count": count,
"data": feeds,
})
}
func getFeedSubscription(c *fiber.Ctx) error {
id, _ := c.ParamsInt("id", 0)
var feed models.SubscriptionFeed
if err := database.C.Where("id = ?", id).First(&feed).Error; err != nil {
return fiber.NewError(fiber.StatusNotFound, err.Error())
}
return c.JSON(feed)
}
func createFeedSubscription(c *fiber.Ctx) error {
if err := sec.EnsureGrantedPerm(c, "CreateFeedSubscription", true); err != nil {
return err
}
user := c.Locals("nex_user").(*sec.UserInfo)
var data struct {
URL string `json:"url" validate:"required,url"`
PullInterval int `json:"pull_interval" validate:"required,min=6,max=720"`
Adapter string `json:"adapter"`
}
if err := exts.BindAndValidate(c, &data); err != nil {
return err
}
feed := models.SubscriptionFeed{
URL: data.URL,
PullInterval: data.PullInterval,
Adapter: data.Adapter,
AccountID: &user.ID,
}
if err := database.C.Create(&feed).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
return c.JSON(feed)
}
func updateFeedSubscription(c *fiber.Ctx) error {
if err := sec.EnsureAuthenticated(c); err != nil {
return err
}
user := c.Locals("nex_user").(*sec.UserInfo)
id, _ := c.ParamsInt("id", 0)
var data struct {
URL string `json:"url" validate:"required,url"`
PullInterval int `json:"pull_interval" validate:"required,min=6,max=720"`
Adapter string `json:"adapter"`
}
if err := exts.BindAndValidate(c, &data); err != nil {
return err
}
var feed models.SubscriptionFeed
if err := database.C.Where("account_id = ? AND id = ?", user.ID, id).First(&feed).Error; err != nil {
return fiber.NewError(fiber.StatusNotFound, err.Error())
}
feed.URL = data.URL
feed.PullInterval = data.PullInterval
feed.Adapter = data.Adapter
if err := database.C.Save(&feed).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
return c.JSON(feed)
}
func toggleFeedSubscription(c *fiber.Ctx) error {
if err := sec.EnsureAuthenticated(c); err != nil {
return err
}
user := c.Locals("nex_user").(*sec.UserInfo)
id, _ := c.ParamsInt("id", 0)
var feed models.SubscriptionFeed
if err := database.C.Where("account_id = ? AND id = ?", user.ID, id).First(&feed).Error; err != nil {
return fiber.NewError(fiber.StatusNotFound, err.Error())
}
feed.IsEnabled = !feed.IsEnabled
if err := database.C.Save(&feed).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
return c.JSON(feed)
}
func deleteFeedSubscription(c *fiber.Ctx) error {
if err := sec.EnsureAuthenticated(c); err != nil {
return err
}
user := c.Locals("nex_user").(*sec.UserInfo)
id, _ := c.ParamsInt("id", 0)
var feed models.SubscriptionFeed
if err := database.C.Where("account_id = ? AND id = ?", user.ID, id).First(&feed).Error; err != nil {
return fiber.NewError(fiber.StatusNotFound, err.Error())
}
if err := database.C.Delete(&feed).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
return c.SendStatus(fiber.StatusOK)
}

View File

@ -8,8 +8,6 @@ import (
func MapAPIs(app *fiber.App, baseURL string) {
api := app.Group(baseURL).Name("API")
{
api.Get("/well-known/sources", getNewsSources)
admin := api.Group("/admin").Name("Admin")
{
admin.Post("/scan", sec.ValidatorMiddleware, adminTriggerScanTask)
@ -17,10 +15,21 @@ func MapAPIs(app *fiber.App, baseURL string) {
api.Get("/link/*", getLinkMeta)
news := api.Group("/news").Name("News")
subscription := api.Group("/subscriptions").Name("Subscriptions")
{
news.Get("/", listNewsArticles)
news.Get("/:hash", getNewsArticle)
feed := subscription.Group("/feed").Name("Feed")
{
feed.Get("/", listFeedSubscriptions)
feed.Get("/me", listCreatedFeedSubscriptions)
feed.Get("/:id", getFeedSubscription)
feed.Post("/", createFeedSubscription)
feed.Put("/:id", updateFeedSubscription)
feed.Post("/:id/toggle", toggleFeedSubscription)
feed.Delete("/:id", deleteFeedSubscription)
}
subscription.Get("/", listFeedItem)
subscription.Get("/:id", getFeedItem)
}
}
}

View File

@ -1,40 +0,0 @@
package api
import (
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"github.com/gofiber/fiber/v2"
)
func listNewsArticles(c *fiber.Ctx) error {
take := c.QueryInt("take", 0)
offset := c.QueryInt("offset", 0)
var count int64
if err := database.C.Model(&models.NewsArticle{}).Count(&count).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
var articles []models.NewsArticle
if err := database.C.Limit(take).Offset(offset).
Omit("Content").Order("created_at DESC").
Find(&articles).Error; err != nil {
return fiber.NewError(fiber.StatusInternalServerError, err.Error())
}
return c.JSON(fiber.Map{
"count": count,
"data": articles,
})
}
func getNewsArticle(c *fiber.Ctx) error {
hash := c.Params("hash")
var article models.NewsArticle
if err := database.C.Where("hash = ?", hash).First(&article).Error; err != nil {
return fiber.NewError(fiber.StatusNotFound, err.Error())
}
return c.JSON(article)
}

View File

@ -1,10 +0,0 @@
package api
import (
"git.solsynth.dev/hypernet/reader/pkg/internal/services"
"github.com/gofiber/fiber/v2"
)
func getNewsSources(c *fiber.Ctx) error {
return c.JSON(services.NewsSources)
}

View File

@ -32,6 +32,7 @@ func NewServer() *App {
JSONEncoder: jsoniter.ConfigCompatibleWithStandardLibrary.Marshal,
JSONDecoder: jsoniter.ConfigCompatibleWithStandardLibrary.Unmarshal,
BodyLimit: 512 * 1024 * 1024 * 1024, // 512 TiB
ReadBufferSize: 5 * 1024 * 1024, // 5MB for large JWT
EnablePrintRoutes: viper.GetBool("debug.print_routes"),
})

View File

@ -0,0 +1,24 @@
package services
import (
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"time"
)
func GetTodayFeedRandomly(limit int, cursor *time.Time) ([]models.SubscriptionItem, error) {
tx := database.C
if cursor != nil {
tx = tx.Where("published_at < ?", *cursor)
}
var articles []models.SubscriptionItem
if err := tx.Limit(limit).
Order("published_at DESC").
Omit("Content").
Preload("Feed").
Find(&articles).Error; err != nil {
return articles, err
}
return articles, nil
}

View File

@ -0,0 +1,223 @@
package services
import (
"context"
"fmt"
"strconv"
"time"
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"github.com/gofiber/fiber/v2"
"github.com/mmcdole/gofeed"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
"github.com/sogko/go-wordpress"
"gorm.io/gorm/clause"
)
func FetchFeedTimed() {
FetchFeed(false)
}
func FetchFeed(eager ...bool) {
var feeds []models.SubscriptionFeed
if len(eager) > 0 && eager[0] {
if err := database.C.Where("is_enabled = ?", true).Find(&feeds).Error; err != nil {
log.Warn().Err(err).Msg("An error occurred when fetching feeds.")
return
}
} else {
if err := database.C.
Where("last_fetched_at IS NULL OR NOW() >= last_fetched_at + (pull_interval || ' hours')::interval").
Find(&feeds).Error; err != nil {
log.Warn().Err(err).Msg("An error occurred when fetching due feeds.")
return
}
}
log.Info().Int("count", len(feeds)).Msg("Ready to fetch feeds...")
count := 0
var scannedFeed []uint
for _, src := range feeds {
if !src.IsEnabled {
continue
}
log.Debug().Uint("source", src.ID).Msg("Scanning feed...")
result, err := SubscriptionFeedRead(src, eager...)
if err != nil {
log.Warn().Err(err).Uint("source", src.ID).Msg("Failed to scan a feed.")
} else {
scannedFeed = append(scannedFeed, src.ID)
}
result = lo.UniqBy(result, func(item models.SubscriptionItem) string {
return item.Hash
})
database.C.Clauses(clause.OnConflict{
Columns: []clause.Column{{Name: "hash"}},
DoUpdates: clause.AssignmentColumns([]string{"thumbnail", "title", "content", "description", "published_at"}),
}).Create(&result)
log.Info().Uint("source", src.ID).Int("count", len(result)).Msg("Scanned a feed.")
count += len(result)
}
database.C.
Model(&models.SubscriptionFeed{}).
Where("id IN ?", scannedFeed).
Update("last_fetched_at", time.Now())
log.Info().Int("count", count).Msg("Scanned all feeds.")
}
func SubscriptionFeedRead(src models.SubscriptionFeed, eager ...bool) ([]models.SubscriptionItem, error) {
switch src.Adapter {
case "wordpress":
return feedReadWordpress(src, eager...)
case "webpage":
return feedReadWebpage(src, eager...)
case "feed":
return feedReadGuidedFeed(src, eager...)
default:
return nil, fmt.Errorf("unsupported feed source type: %s", src.Adapter)
}
}
func feedReadWordpress(src models.SubscriptionFeed, eager ...bool) ([]models.SubscriptionItem, error) {
wpConvert := func(post wordpress.Post) models.SubscriptionItem {
article := &models.SubscriptionItem{
Title: post.Title.Rendered,
Description: post.Excerpt.Rendered,
Content: post.Content.Rendered,
URL: post.Link,
FeedID: src.ID,
}
date, err := time.Parse("2006-01-02T15:04:05", post.DateGMT)
if err == nil {
article.PublishedAt = date
} else {
article.PublishedAt = time.Now()
}
article.GenHash()
return *article
}
client := wordpress.NewClient(&wordpress.Options{
BaseAPIURL: src.URL,
})
posts, resp, _, err := client.Posts().List(nil)
if err != nil {
return nil, err
}
var result []models.SubscriptionItem
for _, post := range posts {
result = append(result, wpConvert(post))
}
if len(eager) > 0 && eager[0] {
totalPagesRaw := resp.Header.Get("X-WP-TotalPages")
totalPages, _ := strconv.Atoi(totalPagesRaw)
depth := min(totalPages, 10)
for page := 2; page <= depth; page++ {
posts, _, _, err := client.Posts().List(fiber.Map{
"page": page,
})
if err != nil {
return result, nil
}
for _, post := range posts {
result = append(result, wpConvert(post))
}
}
}
return result, nil
}
func feedReadGuidedFeed(src models.SubscriptionFeed, eager ...bool) ([]models.SubscriptionItem, error) {
pgConvert := func(article models.SubscriptionItem) models.SubscriptionItem {
art := &article
art.GenHash()
art.FeedID = src.ID
article = *art
return article
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
fp := gofeed.NewParser()
feed, _ := fp.ParseURLWithContext(src.URL, ctx)
maxPages := lo.TernaryF(len(eager) > 0 && eager[0], func() int {
if feed.Items == nil {
return 0
}
return len(feed.Items)
}, func() int {
return 10 * 10
})
var result []models.SubscriptionItem
for _, item := range feed.Items {
if maxPages <= 0 {
break
}
maxPages--
parent := models.SubscriptionItem{
URL: item.Link,
Title: item.Title,
Description: item.Description,
}
if item.PublishedParsed != nil {
parent.PublishedAt = *item.PublishedParsed
} else {
parent.PublishedAt = time.Now()
}
if item.Image != nil {
parent.Thumbnail = item.Image.URL
}
// When the source enabled the full content,
// It means the feed contains all the content, and we're not going to scrap it
if src.IsFullContent {
result = append(result, pgConvert(parent))
} else {
article, err := ScrapSubscriptionItem(item.Link, parent)
if err != nil {
log.Warn().Err(err).Str("url", item.Link).Msg("Failed to scrap a news article...")
continue
}
result = append(result, pgConvert(*article))
}
log.Debug().Str("url", item.Link).Msg("Scraped a news article...")
}
return result, nil
}
func feedReadWebpage(src models.SubscriptionFeed, eager ...bool) ([]models.SubscriptionItem, error) {
pgConvert := func(article models.SubscriptionItem) models.SubscriptionItem {
art := &article
art.GenHash()
art.FeedID = src.ID
art.PublishedAt = time.Now()
article = *art
return article
}
maxPages := lo.Ternary(len(eager) > 0 && eager[0], 0, 10*10)
result := ScrapSubscriptionFeed(src.URL, maxPages)
for idx, page := range result {
result[idx] = pgConvert(page)
}
return result, nil
}

View File

@ -1,183 +0,0 @@
package services
import (
"context"
"fmt"
"strconv"
"time"
"git.solsynth.dev/hypernet/reader/pkg/internal/database"
"git.solsynth.dev/hypernet/reader/pkg/internal/models"
"github.com/mmcdole/gofeed"
"github.com/rs/zerolog/log"
"github.com/samber/lo"
"github.com/sogko/go-wordpress"
"github.com/spf13/viper"
"gorm.io/gorm/clause"
)
var NewsSources []models.NewsSource
func LoadNewsSources() error {
if err := viper.UnmarshalKey("sources", &NewsSources); err != nil {
return err
}
log.Info().Int("count", len(NewsSources)).Msg("Loaded news sources configuration.")
return nil
}
func ScanNewsSourcesNoEager() {
ScanNewsSources(false)
}
func ScanNewsSources(eager ...bool) {
var results []models.NewsArticle
for _, src := range NewsSources {
if !src.Enabled {
continue
}
log.Debug().Str("source", src.ID).Msg("Scanning news source...")
result, err := NewsSourceRead(src)
if err != nil {
log.Warn().Err(err).Str("source", src.ID).Msg("Failed to scan a news source.")
}
results = append(results, result...)
log.Info().Str("source", src.ID).Int("count", len(result)).Msg("Scanned a news sources.")
}
log.Info().Int("count", len(results)).Msg("Scanned all news sources.")
results = lo.UniqBy(results, func(item models.NewsArticle) string {
return item.Hash
})
database.C.Clauses(clause.OnConflict{
UpdateAll: true,
}).Create(&results)
}
func NewsSourceRead(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) {
switch src.Type {
case "wordpress":
return newsSourceReadWordpress(src, eager...)
case "scrap":
return newsSourceReadScrap(src)
case "feed":
return newsSourceReadFeed(src)
default:
return nil, fmt.Errorf("unsupported news source type: %s", src.Type)
}
}
func newsSourceReadWordpress(src models.NewsSource, eager ...bool) ([]models.NewsArticle, error) {
wpConvert := func(post wordpress.Post) models.NewsArticle {
article := &models.NewsArticle{
Title: post.Title.Rendered,
Description: post.Excerpt.Rendered,
Content: post.Content.Rendered,
URL: post.Link,
Source: src.ID,
}
time, err := time.Parse("2006-01-02T15:04:05", post.DateGMT)
if err == nil {
article.PublishedAt = &time
}
article.GenHash()
return *article
}
client := wordpress.NewClient(&wordpress.Options{
BaseAPIURL: src.Source,
})
posts, resp, _, err := client.Posts().List(nil)
if err != nil {
return nil, err
}
var result []models.NewsArticle
for _, post := range posts {
result = append(result, wpConvert(post))
}
if len(eager) > 0 && eager[0] {
totalPagesRaw := resp.Header.Get("X-WP-TotalPages")
totalPages, _ := strconv.Atoi(totalPagesRaw)
depth := min(totalPages, src.Depth)
for page := 2; page <= depth; page++ {
posts, _, _, err := client.Posts().List(nil)
if err != nil {
return result, nil
}
for _, post := range posts {
result = append(result, wpConvert(post))
}
}
}
return result, nil
}
func newsSourceReadFeed(src models.NewsSource) ([]models.NewsArticle, error) {
pgConvert := func(article models.NewsArticle) models.NewsArticle {
art := &article
art.GenHash()
art.Source = src.ID
article = *art
return article
}
ctx, cancel := context.WithTimeout(context.Background(), 60*time.Second)
defer cancel()
fp := gofeed.NewParser()
feed, _ := fp.ParseURLWithContext(src.Source, ctx)
var result []models.NewsArticle
for _, item := range feed.Items {
parent := models.NewsArticle{
URL: item.Link,
Title: item.Title,
Description: item.Description,
}
if item.PublishedParsed != nil {
parent.PublishedAt = item.PublishedParsed
}
if item.Image != nil {
parent.Thumbnail = item.Image.URL
}
if len(item.Content) > 0 {
// Good website, provide content, skip scraping of it
parent.Content = item.Content
result = append(result, pgConvert(parent))
} else {
article, err := ScrapNews(item.Link, parent)
if err != nil {
log.Warn().Err(err).Str("url", item.Link).Msg("Failed to scrap a news article...")
continue
}
result = append(result, pgConvert(*article))
}
log.Debug().Str("url", item.Link).Msg("Scraped a news article...")
}
return result, nil
}
func newsSourceReadScrap(src models.NewsSource) ([]models.NewsArticle, error) {
pgConvert := func(article models.NewsArticle) models.NewsArticle {
art := &article
art.GenHash()
art.Source = src.ID
article = *art
return article
}
result := ScrapNewsIndex(src.Source)
for idx, page := range result {
result[idx] = pgConvert(page)
}
return result, nil
}

View File

@ -128,7 +128,7 @@ func ScrapLink(target string) (*models.LinkMeta, error) {
const ScrapNewsDefaultUA = "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/18.1.1 Safari/605.1.15"
func ScrapNewsIndex(target string) []models.NewsArticle {
func ScrapSubscriptionFeed(target string, maxPages ...int) []models.SubscriptionItem {
parsedTarget, err := url.Parse(target)
if err != nil {
return nil
@ -140,6 +140,11 @@ func ScrapNewsIndex(target string) []models.NewsArticle {
ua = ScrapNewsDefaultUA
}
var limit int
if len(maxPages) > 0 && maxPages[0] > 0 {
limit = maxPages[0]
}
c := colly.NewCollector(
colly.UserAgent(ua),
colly.MaxDepth(3),
@ -157,9 +162,13 @@ func ScrapNewsIndex(target string) []models.NewsArticle {
ExpectContinueTimeout: 1 * time.Second,
})
var result []models.NewsArticle
var result []models.SubscriptionItem
c.OnHTML("main a", func(e *colly.HTMLElement) {
if limit <= 0 {
return
}
url := e.Attr("href")
if strings.HasPrefix(url, "#") || strings.HasPrefix(url, "javascript:") || strings.HasPrefix(url, "mailto:") {
return
@ -168,7 +177,8 @@ func ScrapNewsIndex(target string) []models.NewsArticle {
url = fmt.Sprintf("%s%s", baseUrl, url)
}
article, err := ScrapNews(url)
limit--
article, err := ScrapSubscriptionItem(url)
if err != nil {
log.Warn().Err(err).Str("url", url).Msg("Failed to scrap a news article...")
return
@ -180,12 +190,12 @@ func ScrapNewsIndex(target string) []models.NewsArticle {
}
})
c.Visit(target)
_ = c.Visit(target)
return result
}
func ScrapNews(target string, parent ...models.NewsArticle) (*models.NewsArticle, error) {
func ScrapSubscriptionItem(target string, parent ...models.SubscriptionItem) (*models.SubscriptionItem, error) {
ua := viper.GetString("scraper.news_ua")
if len(ua) == 0 {
ua = ScrapNewsDefaultUA
@ -208,7 +218,7 @@ func ScrapNews(target string, parent ...models.NewsArticle) (*models.NewsArticle
ExpectContinueTimeout: 1 * time.Second,
})
article := &models.NewsArticle{
article := &models.SubscriptionItem{
URL: target,
}
@ -239,7 +249,11 @@ func ScrapNews(target string, parent ...models.NewsArticle) (*models.NewsArticle
})
c.OnHTML("article img", func(e *colly.HTMLElement) {
if len(article.Thumbnail) == 0 {
article.Thumbnail = e.Attr("src")
url := e.Attr("src")
// Usually, if the image have a relative path, it is some static assets instead of content.
if strings.HasPrefix(url, "http") {
article.Thumbnail = url
}
}
})

View File

@ -72,15 +72,10 @@ func main() {
log.Fatal().Err(err).Msg("An error occurred when initializing cache.")
}
// Load news sources
if err := services.LoadNewsSources(); err != nil {
log.Fatal().Err(err).Msg("An error occurred when loading news sources.")
}
// Configure timed tasks
quartz := cron.New(cron.WithLogger(cron.VerbosePrintfLogger(&log.Logger)))
quartz.AddFunc("@every 60m", services.DoAutoDatabaseCleanup)
quartz.AddFunc("@midnight", services.ScanNewsSourcesNoEager)
quartz.AddFunc("@every 60m", services.FetchFeedTimed)
quartz.Start()
// Server