From 84c3b16735cd0680d03d384f3eed188d156648db Mon Sep 17 00:00:00 2001 From: LittleSheep Date: Mon, 29 Jul 2024 14:41:28 +0800 Subject: [PATCH] :sparkles: Background scan for unanalyzed files --- go.mod | 6 +- go.sum | 12 +++ pkg/internal/services/analyzer.go | 117 ++++++++++++++++++++++-------- pkg/main.go | 2 + 4 files changed, 104 insertions(+), 33 deletions(-) diff --git a/go.mod b/go.mod index 60b76e5..0c2e829 100644 --- a/go.mod +++ b/go.mod @@ -48,6 +48,7 @@ require ( github.com/jinzhu/inflection v1.0.0 // indirect github.com/jinzhu/now v1.1.5 // indirect github.com/jpillora/backoff v1.0.0 // indirect + github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 // indirect github.com/klauspost/compress v1.17.8 // indirect github.com/klauspost/cpuid/v2 v2.2.7 // indirect github.com/leodido/go-urn v1.2.4 // indirect @@ -57,6 +58,7 @@ require ( github.com/mattn/go-runewidth v0.0.15 // indirect github.com/mbobakov/grpc-consul-resolver v1.5.3 // indirect github.com/minio/md5-simd v1.1.2 // indirect + github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db // indirect github.com/mitchellh/go-homedir v1.1.0 // indirect github.com/mitchellh/mapstructure v1.5.0 // indirect github.com/modern-go/concurrent v0.0.0-20180306012644-bacd9c7ef1dd // indirect @@ -68,6 +70,7 @@ require ( github.com/rs/xid v1.5.0 // indirect github.com/sagikazarmark/locafero v0.4.0 // indirect github.com/sagikazarmark/slog-shim v0.1.0 // indirect + github.com/schollz/progressbar/v3 v3.14.4 // indirect github.com/sourcegraph/conc v0.3.0 // indirect github.com/spf13/afero v1.11.0 // indirect github.com/spf13/cast v1.6.0 // indirect @@ -82,7 +85,8 @@ require ( golang.org/x/exp v0.0.0-20240613232115-7f521ea00fb8 // indirect golang.org/x/net v0.26.0 // indirect golang.org/x/sync v0.7.0 // indirect - golang.org/x/sys v0.21.0 // indirect + golang.org/x/sys v0.22.0 // indirect + golang.org/x/term v0.22.0 // indirect golang.org/x/text v0.16.0 // indirect google.golang.org/genproto/googleapis/rpc v0.0.0-20240604185151-ef581f913117 // indirect google.golang.org/protobuf v1.34.2 // indirect diff --git a/go.sum b/go.sum index 7785ba8..24cb8bb 100644 --- a/go.sum +++ b/go.sum @@ -146,6 +146,8 @@ github.com/json-iterator/go v1.1.9/go.mod h1:KdQUCv79m/52Kvf8AW2vK1V8akMuk1QjK/u github.com/json-iterator/go v1.1.12 h1:PV8peI4a0ysnczrg+LtxykD8LfKY9ML6u2jnxaEnrnM= github.com/json-iterator/go v1.1.12/go.mod h1:e30LSqwooZae/UwlEbR2852Gd8hjQvJoHmT4TnhNGBo= github.com/julienschmidt/httprouter v1.2.0/go.mod h1:SYymIcj16QtmaHHD7aYtjjsJG7VTCxuUUipMqKk8s4w= +github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213 h1:qGQQKEcAR99REcMpsXCp3lJ03zYT1PkRd3kQGPn9GVg= +github.com/k0kubun/go-ansi v0.0.0-20180517002512-3bf9e2903213/go.mod h1:vNUNkEQ1e29fT/6vq2aBdFsgNPmy8qMdSay1npru+Sw= github.com/klauspost/compress v1.17.8 h1:YcnTYrq7MikUT7k0Yb5eceMmALQPYBW/Xltxn0NAMnU= github.com/klauspost/compress v1.17.8/go.mod h1:Di0epgTjJY877eYKx5yC51cX2A2Vl2ibi7bDH9ttBbw= github.com/klauspost/cpuid/v2 v2.0.1/go.mod h1:FInQzS24/EEf25PyTYn52gqo7WaD8xa0213Md/qVLRg= @@ -197,6 +199,8 @@ github.com/minio/md5-simd v1.1.2/go.mod h1:MzdKDxYpY2BT9XQFocsiZf/NKVtR7nkE4RoEp github.com/minio/minio-go/v7 v7.0.70 h1:1u9NtMgfK1U42kUxcsl5v0yj6TEOPR497OAQxpJnn2g= github.com/minio/minio-go/v7 v7.0.70/go.mod h1:4yBA8v80xGA30cfM3fz0DKYMXunWl/AV/6tWEs9ryzo= github.com/mitchellh/cli v1.1.0/go.mod h1:xcISNoH86gajksDmfB23e/pu+B+GeFRMYmoHXxx3xhI= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db h1:62I3jR2EmQ4l5rM/4FEfDWcRD+abF5XlKShorW5LRoQ= +github.com/mitchellh/colorstring v0.0.0-20190213212951-d06e56a500db/go.mod h1:l0dey0ia/Uv7NcFFVbCLtqEBQbrT4OCwCSKTEv6enCw= github.com/mitchellh/go-homedir v1.1.0 h1:lukF9ziXFxDFPkA1vsr5zpc1XuPDn/wFntq5mG+4E0Y= github.com/mitchellh/go-homedir v1.1.0/go.mod h1:SfyaCUpYCn1Vlf4IUYiD9fPX4A5wJrkLzIz1N1q0pr0= github.com/mitchellh/mapstructure v0.0.0-20160808181253-ca63d7c062ee/go.mod h1:FVVH3fgwuzCH5S8UJGiWEs2h04kUh9fWfEaFds41c1Y= @@ -255,6 +259,8 @@ github.com/sagikazarmark/slog-shim v0.1.0 h1:diDBnUNK9N/354PgrxMywXnAwEr1QZcOr6g github.com/sagikazarmark/slog-shim v0.1.0/go.mod h1:SrcSrq8aKtyuqEI1uvTDTK1arOWRIczQRv+GVI1AkeQ= github.com/samber/lo v1.39.0 h1:4gTz1wUhNYLhFSKl6O+8peW0v2F4BCY034GRpU9WnuA= github.com/samber/lo v1.39.0/go.mod h1:+m/ZKRl6ClXCE2Lgf3MsQlWfh4bn1bz6CXEOxnEXnEA= +github.com/schollz/progressbar/v3 v3.14.4 h1:W9ZrDSJk7eqmQhd3uxFNNcTr0QL+xuGNI9dEMrw0r74= +github.com/schollz/progressbar/v3 v3.14.4/go.mod h1:aT3UQ7yGm+2ZjeXPqsjTenwL3ddUiuZ0kfQ/2tHlyNI= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529 h1:nn5Wsu0esKSJiIVhscUtVbo7ada43DJhG55ua/hjS5I= github.com/sean-/seed v0.0.0-20170313163322-e2103e2c3529/go.mod h1:DxrIzT+xaE7yg65j358z/aeFdxmN0P9QXhEzd20vsDc= github.com/sirupsen/logrus v1.2.0/go.mod h1:LxeOpSwHxABJmUn/MG1IvRgCAasNZTLOkJPxbbu5VWo= @@ -355,11 +361,17 @@ golang.org/x/sys v0.3.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.5.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.6.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= golang.org/x/sys v0.12.0/go.mod h1:oPkhp1MJrh7nUepCBck5+mAzfO9JrbApNNgaTdGDITg= +golang.org/x/sys v0.20.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/sys v0.21.0 h1:rF+pYz3DAGSQAxAu1CbC7catZg4ebC4UIeIhKxBZvws= golang.org/x/sys v0.21.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= +golang.org/x/sys v0.22.0 h1:RI27ohtqKCnwULzJLqkv897zojh5/DwS/ENaMzUOaWI= +golang.org/x/sys v0.22.0/go.mod h1:/VUhepiaJMQUp4+oa/7Zr1D23ma6VTLIYjOOTFZPUcA= golang.org/x/term v0.0.0-20201126162022-7de9c90e9dd1/go.mod h1:bj7SfCRtBDWHUb9snDiAeCFNEtKQo2Wmx5Cou7ajbmo= golang.org/x/term v0.0.0-20210927222741-03fcf44c2211/go.mod h1:jbD1KX2456YbFQfuXm/mYQcufACuNUgVhRMnK/tPxf8= golang.org/x/term v0.3.0/go.mod h1:q750SLmJuPmVoN1blW3UFBPREJfb1KmY3vwxfr+nFDA= +golang.org/x/term v0.20.0/go.mod h1:8UkIAJTvZgivsXaD6/pH6U9ecQzZ45awqEOzuCvwpFY= +golang.org/x/term v0.22.0 h1:BbsgPEJULsl2fV/AT3v15Mjva5yXKQDyKf+TbDz7QJk= +golang.org/x/term v0.22.0/go.mod h1:F3qCibpT5AMpCRfhfT53vVJwhLtIVHhB9XDjfFvnMI4= golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ= golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk= golang.org/x/text v0.3.3/go.mod h1:5Zoc/QRtKVWzQhOtBMvqHzDpF6irO9z98xDceosuGiQ= diff --git a/pkg/internal/services/analyzer.go b/pkg/internal/services/analyzer.go index 2c45db4..beec4c4 100644 --- a/pkg/internal/services/analyzer.go +++ b/pkg/internal/services/analyzer.go @@ -14,7 +14,9 @@ import ( "git.solsynth.dev/hydrogen/paperclip/pkg/internal/database" "git.solsynth.dev/hydrogen/paperclip/pkg/internal/models" jsoniter "github.com/json-iterator/go" + "github.com/k0kubun/go-ansi" "github.com/rs/zerolog/log" + "github.com/schollz/progressbar/v3" "github.com/spf13/viper" _ "image/gif" @@ -40,49 +42,100 @@ func StartConsumeAnalyzeTask() { } } +func ScanUnanalyzedFileFromDatabase() { + workers := viper.GetInt("workers.files_analyze") + + if workers < 2 { + log.Warn().Int("val", workers).Int("min", 2).Msg("The file analyzer does not have enough computing power, and the scan of unanalyzed files will not start...") + } + + var attachments []models.Attachment + if err := database.C.Where("destination = ? OR is_analyzed = ?", models.AttachmentDstTemporary, false).Find(&attachments).Error; err != nil { + log.Error().Err(err).Msg("Scan unanalyzed files from database failed...") + return + } + + if len(attachments) == 0 { + return + } + + go func() { + var deletionIdSet []uint + bar := progressbar.NewOptions(len(attachments), + progressbar.OptionSetWriter(ansi.NewAnsiStdout()), + progressbar.OptionEnableColorCodes(true), + progressbar.OptionShowBytes(true), + progressbar.OptionSetWidth(15), + progressbar.OptionSetDescription("Analyzing the unanalyzed files..."), + progressbar.OptionSetTheme(progressbar.Theme{ + Saucer: "[green]=[reset]", + SaucerHead: "[green]>[reset]", + SaucerPadding: " ", + BarStart: "[", + BarEnd: "]", + })) + for _, task := range attachments { + if err := AnalyzeAttachment(task); err != nil { + log.Error().Err(err).Any("task", task).Msg("A background file analyze task failed...") + deletionIdSet = append(deletionIdSet, task.ID) + } + bar.Add(1) + } + log.Info().Int("count", len(attachments)).Int("fails", len(deletionIdSet)).Msg("All unanalyzed files has been analyzed!") + + if len(deletionIdSet) > 0 { + database.C.Delete(&models.Attachment{}, deletionIdSet) + } + }() +} + func AnalyzeAttachment(file models.Attachment) error { if file.Destination != models.AttachmentDstTemporary { return fmt.Errorf("attachment isn't in temporary storage, unable to analyze") } - destMap := viper.GetStringMap("destinations.temporary") + var start time.Time - var dest models.LocalDestination - rawDest, _ := jsoniter.Marshal(destMap) - _ = jsoniter.Unmarshal(rawDest, &dest) + if !file.IsAnalyzed || len(file.HashCode) == 0 { + destMap := viper.GetStringMap("destinations.temporary") - start := time.Now() + var dest models.LocalDestination + rawDest, _ := jsoniter.Marshal(destMap) + _ = jsoniter.Unmarshal(rawDest, &dest) - dst := filepath.Join(dest.Path, file.Uuid) - if _, err := os.Stat(dst); os.IsNotExist(err) { - return fmt.Errorf("attachment doesn't exists in temporary storage: %v", err) - } + start = time.Now() - if t := strings.SplitN(file.MimeType, "/", 2)[0]; t == "image" { - // Dealing with image - reader, err := os.Open(dst) - if err != nil { - return fmt.Errorf("unable to open file: %v", err) + dst := filepath.Join(dest.Path, file.Uuid) + if _, err := os.Stat(dst); os.IsNotExist(err) { + return fmt.Errorf("attachment doesn't exists in temporary storage: %v", err) } - defer reader.Close() - im, _, err := image.Decode(reader) - if err != nil { - return fmt.Errorf("unable to decode file as an image: %v", err) - } - width := im.Bounds().Dx() - height := im.Bounds().Dy() - ratio := width / height - file.Metadata = map[string]any{ - "width": width, - "height": height, - "ratio": ratio, - } - } - if hash, err := HashAttachment(file); err != nil { - return err - } else { - file.HashCode = hash + if t := strings.SplitN(file.MimeType, "/", 2)[0]; t == "image" { + // Dealing with image + reader, err := os.Open(dst) + if err != nil { + return fmt.Errorf("unable to open file: %v", err) + } + defer reader.Close() + im, _, err := image.Decode(reader) + if err != nil { + return fmt.Errorf("unable to decode file as an image: %v", err) + } + width := im.Bounds().Dx() + height := im.Bounds().Dy() + ratio := float64(width) / float64(height) + file.Metadata = map[string]any{ + "width": width, + "height": height, + "ratio": ratio, + } + } + + if hash, err := HashAttachment(file); err != nil { + return err + } else { + file.HashCode = hash + } } tx := database.C.Begin() diff --git a/pkg/main.go b/pkg/main.go index 6b9bdd2..8352c18 100644 --- a/pkg/main.go +++ b/pkg/main.go @@ -72,6 +72,8 @@ func main() { // Messages log.Info().Msgf("Paperclip v%s is started...", pkg.AppVersion) + services.ScanUnanalyzedFileFromDatabase() + quit := make(chan os.Signal, 1) signal.Notify(quit, syscall.SIGINT, syscall.SIGTERM) <-quit