From ab0a87106b3c7e7bc2a2c74d0da8b29a4846d324 Mon Sep 17 00:00:00 2001 From: LittleSheep Date: Sat, 25 Jan 2025 22:05:38 +0800 Subject: [PATCH] :sparkles: Read news basis :sparkles: Able to read wordpress site btw the 10 yrs ago package still work properly, amazing... means the wordpress api did not change a lot and the golang backward compability is amazing! --- go.mod | 16 +++-- go.sum | 40 +++++++++---- pkg/internal/database/migrator.go | 1 + pkg/internal/models/new.go | 32 ++++++++++ pkg/internal/models/source.go | 8 +++ pkg/internal/server/api/index.go | 1 + pkg/internal/server/api/well_known_api.go | 10 ++++ pkg/internal/services/reader.go | 71 +++++++++++++++++++++++ pkg/internal/services/scraper.go | 10 +++- pkg/main.go | 6 ++ settings.toml | 9 +++ 11 files changed, 185 insertions(+), 19 deletions(-) create mode 100644 pkg/internal/models/new.go create mode 100644 pkg/internal/models/source.go create mode 100644 pkg/internal/server/api/well_known_api.go create mode 100644 pkg/internal/services/reader.go diff --git a/go.mod b/go.mod index 4962617..f71fa64 100644 --- a/go.mod +++ b/go.mod @@ -15,6 +15,7 @@ require ( github.com/robfig/cron/v3 v3.0.1 github.com/rs/zerolog v1.33.0 github.com/samber/lo v1.47.0 + github.com/sogko/go-wordpress v0.0.0-20160322054548-0f4f3dc4231f github.com/spf13/viper v1.19.0 google.golang.org/grpc v1.67.1 gorm.io/driver/postgres v1.5.9 @@ -31,6 +32,7 @@ require ( github.com/antchfx/xpath v1.3.2 // indirect github.com/cespare/xxhash/v2 v2.3.0 // indirect github.com/dustin/go-humanize v1.0.1 // indirect + github.com/elazarl/goproxy v1.6.1 // indirect github.com/fsnotify/fsnotify v1.8.0 // indirect github.com/gabriel-vasile/mimetype v1.4.3 // indirect github.com/go-playground/locales v0.14.1 // indirect @@ -61,6 +63,8 @@ require ( github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect github.com/modern-go/reflect2 v1.0.2 // indirect + github.com/moul/http2curl v1.0.0 // indirect + github.com/parnurzeal/gorequest v0.3.0 // indirect github.com/pelletier/go-toml/v2 v2.2.3 // indirect github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c // indirect github.com/pkg/errors v0.9.1 // indirect @@ -68,10 +72,12 @@ require ( github.com/sagikazarmark/locafero v0.6.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d // indirect + github.com/smartystreets/goconvey v1.8.1 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.7.0 // indirect github.com/spf13/pflag v1.0.5 // indirect + github.com/stretchr/testify v1.10.0 // indirect github.com/subosito/gotenv v1.6.0 // indirect github.com/temoto/robotstxt v1.1.2 // indirect github.com/tinylib/msgp v1.2.4 // indirect @@ -79,12 +85,12 @@ require ( github.com/valyala/fasthttp v1.57.0 // indirect github.com/valyala/tcplisten v1.0.0 // indirect go.uber.org/multierr v1.11.0 // indirect - golang.org/x/crypto v0.28.0 // indirect + golang.org/x/crypto v0.32.0 // indirect golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c // indirect - golang.org/x/net v0.30.0 // indirect - golang.org/x/sync v0.8.0 // indirect - golang.org/x/sys v0.26.0 // indirect - golang.org/x/text v0.19.0 // indirect + golang.org/x/net v0.34.0 // indirect + golang.org/x/sync v0.10.0 // indirect + golang.org/x/sys v0.29.0 // indirect + golang.org/x/text v0.21.0 // indirect google.golang.org/appengine v1.6.8 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20241021214115-324edc3d5d38 // indirect google.golang.org/protobuf v1.35.1 // indirect diff --git a/go.sum b/go.sum index f9d2067..8eda6a2 100644 --- a/go.sum +++ b/go.sum @@ -33,6 +33,8 @@ github.com/eko/gocache/lib/v4 v4.1.6 h1:5WWIGISKhE7mfkyF+SJyWwqa4Dp2mkdX8QsZpnEN github.com/eko/gocache/lib/v4 v4.1.6/go.mod h1:HFxC8IiG2WeRotg09xEnPD72sCheJiTSr4Li5Ameg7g= github.com/eko/gocache/store/ristretto/v4 v4.2.2 h1:lXFzoZ5ck6Gy6ON7f5DHSkNt122qN7KoroCVgVwF7oo= github.com/eko/gocache/store/ristretto/v4 v4.2.2/go.mod h1:uIvBVJzqRepr5L0RsbkfQ2iYfbyos2fuji/s4yM+aUM= +github.com/elazarl/goproxy v1.6.1 h1:DsmEhraeCqxza89DlZtMLCwAV5doVPLgheg5OdP4i0M= +github.com/elazarl/goproxy v1.6.1/go.mod h1:X/5W/t+gzDyLfHW4DrMdpjqYjpXsURlBt9lpBDxZZZQ= github.com/fatih/color v1.18.0 h1:S8gINlzdQ840/4pfAwic/ZE0djQEH3wM94VfqLTZcOM= github.com/fatih/color v1.18.0/go.mod h1:4FelSpRwEGDpQ12mAdzqdOukCy4u8WUtOY6lkT/6HfU= github.com/frankban/quicktest v1.14.6 h1:7Xjx+VpznH+oBnejlPUj8oUpdxnVs4f8XU8WnHkI4W8= @@ -84,6 +86,8 @@ github.com/google/go-cmp v0.6.0/go.mod h1:17dUlkBOakJ0+DkrSSNjCkIjxS6bF9zb3elmeN github.com/google/gofuzz v1.0.0/go.mod h1:dBl0BpW6vV/+mYPU4Po3pmUjxk6FQPldtuIdl/M65Eg= github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= +github.com/gopherjs/gopherjs v1.17.2 h1:fQnZVsXk8uxXIStYb0N4bGk7jeyTalG/wsZjQ25dO0g= +github.com/gopherjs/gopherjs v1.17.2/go.mod h1:pRRIvn/QzFLrKfvEz3qUuEhtE/zLCWfreZ6J5gM2i+k= github.com/hashicorp/hcl v1.0.0 h1:0Anlzjpi4vEasTeNFn2mLJgTSwt0+6sfsiTG8qcWGx4= github.com/hashicorp/hcl v1.0.0/go.mod h1:E5yfLk+7swimpb2L/Alb/PJmXilQ/rhwaUYs4T20WEQ= github.com/jackc/pgpassfile v1.0.0 h1:/6Hmqy13Ss2zCq62VdNG8tM1wchn8zjSGOBJ6icpsIM= @@ -100,6 +104,8 @@ github.com/jinzhu/now v1.1.5 h1:/o9tlHleP7gOFmsnYNz3RGnqzefHA47wQpKrrdTIwXQ= github.com/jinzhu/now v1.1.5/go.mod h1:d3SSVoowX0Lcu0IBviAWJpolVfI5UJVZZ7cO71lE/z8= github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= +github.com/jtolds/gls v4.20.0+incompatible h1:xdiiI2gbIgH/gLH7ADydsJ1uDOEzR8yvV7C0MuV77Wo= +github.com/jtolds/gls v4.20.0+incompatible/go.mod h1:QJZ7F/aHp+rZTRtaJ1ow/lLfFfVYBRgL+9YlvaHOwJU= github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o= github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak= github.com/klauspost/compress v1.17.11 h1:In6xLpyWOi1+C7tXUUWv2ot1QvBjxevKAaI6IXrJmUc= @@ -131,6 +137,10 @@ github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd h1:TRLaZ9cD/w github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd/go.mod h1:6dJC0mAP4ikYIbvyc7fijjWJddQyLn8Ig3JB5CqoB9Q= github.com/modern-go/reflect2 v1.0.2 h1:xBagoLtFs94CBntxluKeaWgTMpvLxC4ur3nMaC9Gz0M= github.com/modern-go/reflect2 v1.0.2/go.mod h1:yWuevngMOJpCy52FWWMvUC8ws7m/LJsjYzDa0/r8luk= +github.com/moul/http2curl v1.0.0 h1:dRMWoAtb+ePxMlLkrCbAqh4TlPHXvoGUSQ323/9Zahs= +github.com/moul/http2curl v1.0.0/go.mod h1:8UbvGypXm98wA/IqH45anm5Y2Z6ep6O31QGOAZ3H0fQ= +github.com/parnurzeal/gorequest v0.3.0 h1:SoFyqCDC9COr1xuS6VA8fC8RU7XyrJZN2ona1kEX7FI= +github.com/parnurzeal/gorequest v0.3.0/go.mod h1:3Kh2QUMJoqw3icWAecsyzkpY7UzRfDhbRdTjtNwNiUE= github.com/pelletier/go-toml/v2 v2.2.3 h1:YmeHyLY8mFWbdkNWwpr+qIL2bEqT0o95WSdkNHvL12M= github.com/pelletier/go-toml/v2 v2.2.3/go.mod h1:MfCQTFTvCcUyyvvwm1+G6H/jORL20Xlb6rzQu9GuUkc= github.com/philhofer/fwd v1.1.3-0.20240916144458-20a13a1f6b7c h1:dAMKvw0MlJT1GshSTtih8C2gDs04w8dReiOGXrGLNoY= @@ -158,6 +168,12 @@ github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d h1:hrujxIzL1woJ7 github.com/saintfish/chardet v0.0.0-20230101081208-5e3ef4b5456d/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU= github.com/samber/lo v1.47.0 h1:z7RynLwP5nbyRscyvcD043DWYoOcYRv3mV8lBeqOCLc= github.com/samber/lo v1.47.0/go.mod h1:RmDH9Ct32Qy3gduHQuKJ3gW1fMHAnE/fAzQuf6He5cU= +github.com/smarty/assertions v1.15.0 h1:cR//PqUBUiQRakZWqBiFFQ9wb8emQGDb0HeGdqGByCY= +github.com/smarty/assertions v1.15.0/go.mod h1:yABtdzeQs6l1brC900WlRNwj6ZR55d7B+E8C6HtKdec= +github.com/smartystreets/goconvey v1.8.1 h1:qGjIddxOk4grTu9JPOU31tVfq3cNdBlNa5sSznIX1xY= +github.com/smartystreets/goconvey v1.8.1/go.mod h1:+/u4qLyY6x1jReYOp7GOM2FSt8aP9CzCZL03bI28W60= +github.com/sogko/go-wordpress v0.0.0-20160322054548-0f4f3dc4231f h1:MTAb/+stQKJAJXLLw6gyo5wtPtLXxe8qxHiMWycqKaM= +github.com/sogko/go-wordpress v0.0.0-20160322054548-0f4f3dc4231f/go.mod h1:zfVc6/o4xmmkLFwipQhABlabqEdUBsTNkNN03oRBIFw= github.com/sourcegraph/conc v0.3.0 h1:OQTbbt6P72L20UqAkXXuLOj79LfEanQ+YQFNpLA9ySo= github.com/sourcegraph/conc v0.3.0/go.mod h1:Sdozi7LEKbFPqYX2/J+iBAM6HpqSLTASQIKqDmF7Mt0= github.com/spf13/afero v1.11.0 h1:WJQKhtpdm3v2IzqG8VMqrr6Rf3UYpEF239Jy9wNepM8= @@ -172,8 +188,8 @@ github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+ github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI= github.com/stretchr/testify v1.4.0/go.mod h1:j7eGeouHqKxXV5pUuKE4zz7dFj8WfuZ+81PSLYec5m4= github.com/stretchr/testify v1.7.0/go.mod h1:6Fq8oRcR53rry900zMqJjRRixrwX3KX962/h/Wwjteg= -github.com/stretchr/testify v1.9.0 h1:HtqpIVDClZ4nwg75+f6Lvsy/wHu+3BoSGCbBAcpTsTg= -github.com/stretchr/testify v1.9.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= +github.com/stretchr/testify v1.10.0 h1:Xv5erBjTwe/5IxqUQTdXv5kgmIvbHo3QQyRwhJsOfJA= +github.com/stretchr/testify v1.10.0/go.mod h1:r2ic/lqez/lEtzL7wO/rwa5dbSLXVDPFyf8C91i36aY= github.com/subosito/gotenv v1.6.0 h1:9NlTDc1FTs4qu0DDq7AEtTPNw6SVm7uBMsUCUjABIf8= github.com/subosito/gotenv v1.6.0/go.mod h1:Dk4QP5c2W3ibzajGcXpNraDfq2IrhjMIvMSWPKKo0FU= github.com/temoto/robotstxt v1.1.2 h1:W2pOjSJ6SWvldyEuiFXNxz3xZ8aiWX5LbfDiOFd7Fxg= @@ -195,8 +211,8 @@ go.uber.org/multierr v1.11.0/go.mod h1:20+QtiLqy0Nd6FdQB9TLXag12DsQkrbs3htMFfDN8 golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w= golang.org/x/crypto v0.0.0-20191011191535-87dc89f01550/go.mod h1:yigFU9vqHzYiE8UmvKecakEJjdnWj3jj499lnFckfCI= golang.org/x/crypto v0.0.0-20210921155107-089bfa567519/go.mod h1:GvvjBRRGRdwPK5ydBHafDWAxML/pGHZbMvKqRZ5+Abc= -golang.org/x/crypto v0.28.0 h1:GBDwsMXVQi34v5CCYUm2jkJvu4cbtru2U4TN2PSyQnw= -golang.org/x/crypto v0.28.0/go.mod h1:rmgy+3RHxRZMyY0jjAJShp2zgEdOqj2AO7U0pYmeQ7U= +golang.org/x/crypto v0.32.0 h1:euUpcYgM8WcP71gNpTqQCn6rC2t6ULUPiOzfWaXVVfc= +golang.org/x/crypto v0.32.0/go.mod h1:ZnnJkOaASj8g0AjIduWNlq2NRxL0PlBrbKVyZ6V/Ugc= golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c h1:7dEasQXItcW1xKJ2+gg5VOiBnqWrJc+rq0DPKyvvdbY= golang.org/x/exp v0.0.0-20241009180824-f66d83c29e7c/go.mod h1:NQtJDoLvd6faHhE7m4T/1IY708gDefGGjR/iUW8yQQ8= golang.org/x/mod v0.4.2/go.mod h1:s0Qsj1ACt9ePp/hMypM3fl4fZqREWJwdYDEqhRiZZUA= @@ -210,14 +226,14 @@ golang.org/x/net v0.0.0-20220722155237-a158d28d115b/go.mod h1:XRhObCWvk6IyKnWLug golang.org/x/net v0.6.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.7.0/go.mod h1:2Tu9+aMcznHK/AK1HMvgo6xiTLG5rD5rZLDS+rp2Bjs= golang.org/x/net v0.9.0/go.mod h1:d48xBJpPfHeWQsugry2m+kC02ZBRGRgulfHnEXEuWns= -golang.org/x/net v0.30.0 h1:AcW1SDZMkb8IpzCdQUaIq2sP4sZ4zw+55h6ynffypl4= -golang.org/x/net v0.30.0/go.mod h1:2wGyMJ5iFasEhkwi13ChkO/t1ECNC4X4eBKkVFyYFlU= +golang.org/x/net v0.34.0 h1:Mb7Mrk043xzHgnRM88suvJFwzVrRfHEHJEl5/71CKw0= +golang.org/x/net v0.34.0/go.mod h1:di0qlW3YNM5oh6GqDGQr92MyTozJPmybPK4Ev/Gm31k= golang.org/x/sync v0.0.0-20190423024810-112230192c58/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20210220032951-036812b2e83c/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.0.0-20220722155255-886fb9371eb4/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= golang.org/x/sync v0.1.0/go.mod h1:RxMgew5VJxzue5/jJTE5uejpjVlOe/izrB70Jof72aM= -golang.org/x/sync v0.8.0 h1:3NFvSEYkUoMifnESzZl15y791HH1qU2xm6eCJU5ZPXQ= -golang.org/x/sync v0.8.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= +golang.org/x/sync v0.10.0 h1:3NQrjDixjgGwUOCaF8w2+VYHv0Ve/vGYSbdkTa98gmQ= +golang.org/x/sync v0.10.0/go.mod h1:Czt+wKu1gCyEFDUtn0jG5QVvpJ6rzVqr5aXyt9drQfk= golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY= golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= golang.org/x/sys v0.0.0-20201119102817-f84b799fce68/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs= @@ -232,8 +248,8 @@ golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.7.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= -golang.org/x/sys v0.26.0 h1:KHjCJyddX0LoSTb3J+vWpupP9p0oznkqVk/IfjymZbo= -golang.org/x/sys v0.26.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.29.0 h1:TPYlXGxvx1MGTn2GiZDhnjPA9wZzZeGKHHmKhHYvgaU= +golang.org/x/sys v0.29.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.5.0/go.mod h1:jMB1sMXY+tzblOD4FWmEbocvup2/aLOaQEp7JmGp78k= @@ -244,8 +260,8 @@ golang.org/x/text v0.3.7/go.mod h1:u+2+/6zg+i71rQMx5EYifcz6MCKuco9NR6JIITiCfzQ= golang.org/x/text v0.3.8/go.mod h1:E6s5w1FMmriuDzIBO73fBruAKo1PCIq6d2Q6DHfQ8WQ= golang.org/x/text v0.7.0/go.mod h1:mrYo+phRRbMaCq/xk9113O4dZlRixOauAjOtrjsXDZ8= golang.org/x/text v0.9.0/go.mod h1:e1OnstbJyHTd6l/uOt8jFFHp6TRDWZR/bV3emEE/zU8= -golang.org/x/text v0.19.0 h1:kTxAhCbGbxhK0IwgSKiMO5awPoDQ0RpfiVYBfK860YM= -golang.org/x/text v0.19.0/go.mod h1:BuEKDfySbSR4drPmRPG/7iBdf8hvFMuRexcpahXilzY= +golang.org/x/text v0.21.0 h1:zyQAAkrwaneQ066sspRyJaG9VNi/YJ1NfzcGB3hZ/qo= +golang.org/x/text v0.21.0/go.mod h1:4IBbMaMmOPCJ8SecivzSH54+73PCFmPWxNTLm+vZkEQ= golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ= golang.org/x/tools v0.0.0-20191119224855-298f0cb1881e/go.mod h1:b+2E5dAYhXwXZwtnZ6UAqBI28+e2cm9otk0dWdXHAEo= golang.org/x/tools v0.1.1/go.mod h1:o0xws9oXOQQZyjljx8fwUC0k7L1pTE6eaCbjGeHmOkk= diff --git a/pkg/internal/database/migrator.go b/pkg/internal/database/migrator.go index 51e88ed..61fe9a4 100644 --- a/pkg/internal/database/migrator.go +++ b/pkg/internal/database/migrator.go @@ -7,6 +7,7 @@ import ( var AutoMaintainRange = []any{ &models.LinkMeta{}, + &models.NewsArticle{}, } func RunMigration(source *gorm.DB) error { diff --git a/pkg/internal/models/new.go b/pkg/internal/models/new.go new file mode 100644 index 0000000..2bec671 --- /dev/null +++ b/pkg/internal/models/new.go @@ -0,0 +1,32 @@ +package models + +import ( + "crypto/md5" + "encoding/hex" + + "git.solsynth.dev/hypernet/nexus/pkg/nex/cruda" + "github.com/google/uuid" +) + +type NewsArticle struct { + cruda.BaseModel + + Thumbnail string `json:"thumbnail"` + Title string `json:"title"` + Description string `json:"description"` + Content string `json:"content"` + URL string `json:"url"` + Hash string `json:"hash" gorm:"uniqueIndex"` + Source string `json:"source"` +} + +func (v *NewsArticle) GenHash() *NewsArticle { + if len(v.URL) == 0 { + v.Hash = uuid.NewString() + return v + } + + hash := md5.Sum([]byte(v.URL)) + v.Hash = hex.EncodeToString(hash[:]) + return v +} diff --git a/pkg/internal/models/source.go b/pkg/internal/models/source.go new file mode 100644 index 0000000..7edcc79 --- /dev/null +++ b/pkg/internal/models/source.go @@ -0,0 +1,8 @@ +package models + +type NewsSource struct { + ID string `json:"id"` + Label string `json:"label"` + Type string `json:"type"` + Source string `json:"source"` +} diff --git a/pkg/internal/server/api/index.go b/pkg/internal/server/api/index.go index 7f46f51..b6bbbb3 100644 --- a/pkg/internal/server/api/index.go +++ b/pkg/internal/server/api/index.go @@ -7,6 +7,7 @@ import ( func MapAPIs(app *fiber.App, baseURL string) { api := app.Group(baseURL).Name("API") { + api.Get("/well-known/sources", getNewsSources) api.Get("/link/*", getLinkMeta) } } diff --git a/pkg/internal/server/api/well_known_api.go b/pkg/internal/server/api/well_known_api.go new file mode 100644 index 0000000..693091d --- /dev/null +++ b/pkg/internal/server/api/well_known_api.go @@ -0,0 +1,10 @@ +package api + +import ( + "git.solsynth.dev/hypernet/reader/pkg/internal/services" + "github.com/gofiber/fiber/v2" +) + +func getNewsSources(c *fiber.Ctx) error { + return c.JSON(services.NewsSources) +} diff --git a/pkg/internal/services/reader.go b/pkg/internal/services/reader.go new file mode 100644 index 0000000..dd9aa9d --- /dev/null +++ b/pkg/internal/services/reader.go @@ -0,0 +1,71 @@ +package services + +import ( + "fmt" + + "git.solsynth.dev/hypernet/reader/pkg/internal/database" + "git.solsynth.dev/hypernet/reader/pkg/internal/models" + "github.com/rs/zerolog/log" + "github.com/sogko/go-wordpress" + "github.com/spf13/viper" +) + +var NewsSources []models.NewsSource + +func LoadNewsSources() error { + if err := viper.UnmarshalKey("sources", &NewsSources); err != nil { + return err + } + log.Info().Int("count", len(NewsSources)).Msg("Loaded news sources configuration.") + return nil +} + +func ScanNewsSources() { + var results []models.NewsArticle + for _, src := range NewsSources { + log.Debug().Str("source", src.ID).Msg("Scanning news source...") + result, err := NewsSourceRead(src) + if err != nil { + log.Warn().Err(err).Str("source", src.ID).Msg("Failed to scan a news source.") + } + results = append(results, result...) + log.Info().Str("source", src.ID).Int("count", len(result)).Msg("Scanned a news sources.") + } + log.Info().Int("count", len(results)).Msg("Scanned all news sources.") + database.C.Save(&results) +} + +func NewsSourceRead(src models.NewsSource) ([]models.NewsArticle, error) { + switch src.Type { + case "wordpress": + return newsSourceReadWordpress(src) + default: + return nil, fmt.Errorf("unsupported news source type: %s", src.Type) + } +} + +func newsSourceReadWordpress(src models.NewsSource) ([]models.NewsArticle, error) { + client := wordpress.NewClient(&wordpress.Options{ + BaseAPIURL: src.Source, + }) + + posts, _, _, err := client.Posts().List(nil) + if err != nil { + return nil, err + } + + var result []models.NewsArticle + for _, post := range posts { + article := &models.NewsArticle{ + Title: post.Title.Rendered, + Description: post.Excerpt.Rendered, + Content: post.Content.Rendered, + URL: post.Link, + Source: src.ID, + } + article.GenHash() + result = append(result, *article) + } + + return result, nil +} diff --git a/pkg/internal/services/scraper.go b/pkg/internal/services/scraper.go index 1727a03..e772e06 100644 --- a/pkg/internal/services/scraper.go +++ b/pkg/internal/services/scraper.go @@ -12,10 +12,11 @@ import ( "github.com/gocolly/colly" "github.com/rs/zerolog/log" "github.com/samber/lo" + "github.com/spf13/viper" ) // We have to set the User-Agent to this so the sites will respond with opengraph data -const ScrapLinkUserAgent = "facebookexternalhit/1.1" +const ScrapLinkDefaultUA = "facebookexternalhit/1.1" func GetLinkMetaFromCache(target string) (models.LinkMeta, error) { hash := md5.Sum([]byte(target)) @@ -40,8 +41,13 @@ func ScrapLink(target string) (*models.LinkMeta, error) { return &cache, nil } + ua := viper.GetString("scraper.expand_ua") + if len(ua) == 0 { + ua = ScrapLinkDefaultUA + } + c := colly.NewCollector( - colly.UserAgent(ScrapLinkUserAgent), + colly.UserAgent(ua), colly.MaxDepth(3), ) diff --git a/pkg/main.go b/pkg/main.go index afb79ed..90b06a2 100644 --- a/pkg/main.go +++ b/pkg/main.go @@ -72,9 +72,15 @@ func main() { log.Fatal().Err(err).Msg("An error occurred when initializing cache.") } + // Load news sources + if err := services.LoadNewsSources(); err != nil { + log.Fatal().Err(err).Msg("An error occurred when loading news sources.") + } + // Configure timed tasks quartz := cron.New(cron.WithLogger(cron.VerbosePrintfLogger(&log.Logger))) quartz.AddFunc("@every 60m", services.DoAutoDatabaseCleanup) + quartz.AddFunc("@midnight", services.ScanNewsSources) quartz.Start() // Server diff --git a/settings.toml b/settings.toml index 8460c57..2434443 100644 --- a/settings.toml +++ b/settings.toml @@ -5,9 +5,18 @@ grpc_bind = "0.0.0.0:7008" nexus_addr = "localhost:7001" +[[sources]] +id = "shadiao" +label = "Shadiao Plus" +type = "wordpress" +source = "https://shadiao.plus/wp-json/wp/v2" + [debug] database = true print_routes = false [security] internal_public_key = "keys/internal_public_key.pem" + +[scraper] +expand_ua = "facebookexternalhit/1.1"