From 089a9ecd9df96c7e679f062097c61e0700843d77 Mon Sep 17 00:00:00 2001 From: LittleSheep Date: Mon, 29 Jul 2024 00:01:51 +0800 Subject: [PATCH] :sparkles: Processing files in background --- pkg/internal/models/attachments.go | 4 + pkg/internal/server/api/attachments_api.go | 17 ++- pkg/internal/services/analyzer.go | 66 +++++++++++- pkg/internal/services/attachments.go | 119 ++++++++++++--------- 4 files changed, 145 insertions(+), 61 deletions(-) diff --git a/pkg/internal/models/attachments.go b/pkg/internal/models/attachments.go index 9009870..bd5d5b0 100644 --- a/pkg/internal/models/attachments.go +++ b/pkg/internal/models/attachments.go @@ -20,10 +20,14 @@ type Attachment struct { MimeType string `json:"mimetype"` HashCode string `json:"hash"` Destination AttachmentDst `json:"destination"` + RefCount int `json:"ref_count"` Metadata datatypes.JSONMap `json:"metadata"` IsMature bool `json:"is_mature"` + Ref *Attachment `json:"ref"` + RefID *uint `json:"ref_id"` + Account Account `json:"account"` AccountID uint `json:"account_id"` } diff --git a/pkg/internal/server/api/attachments_api.go b/pkg/internal/server/api/attachments_api.go index eb0dc61..af5f15a 100644 --- a/pkg/internal/server/api/attachments_api.go +++ b/pkg/internal/server/api/attachments_api.go @@ -78,10 +78,6 @@ func createAttachment(c *fiber.Ctx) error { } user = lo.ToPtr(c.Locals("user").(models.Account)) - hash := c.FormValue("hash") - if len(hash) != 64 { - return fiber.NewError(fiber.StatusBadRequest, "please provide a sha-256 hash code, length should be 64 characters") - } usage := c.FormValue("usage") if !lo.Contains(viper.GetStringSlice("accepts_usage"), usage) { return fiber.NewError(fiber.StatusBadRequest, fmt.Sprintf("disallowed usage: %s", usage)) @@ -100,9 +96,8 @@ func createAttachment(c *fiber.Ctx) error { _ = jsoniter.UnmarshalFromString(c.FormValue("metadata"), &usermeta) tx := database.C.Begin() - metadata, linked, err := services.NewAttachmentMetadata(tx, user, file, models.Attachment{ + metadata, err := services.NewAttachmentMetadata(tx, user, file, models.Attachment{ Usage: usage, - HashCode: hash, Alternative: c.FormValue("alt"), MimeType: c.FormValue("mimetype"), Metadata: usermeta, @@ -114,15 +109,15 @@ func createAttachment(c *fiber.Ctx) error { return fiber.NewError(fiber.StatusBadRequest, err.Error()) } - if !linked { - if err := services.UploadFileToTemporary(c, file, metadata); err != nil { - tx.Rollback() - return fiber.NewError(fiber.StatusBadRequest, err.Error()) - } + if err := services.UploadFileToTemporary(c, file, metadata); err != nil { + tx.Rollback() + return fiber.NewError(fiber.StatusBadRequest, err.Error()) } tx.Commit() + services.PublishAnalyzeTask(metadata) + return c.JSON(metadata) } diff --git a/pkg/internal/services/analyzer.go b/pkg/internal/services/analyzer.go index 81e1614..5a7cbfd 100644 --- a/pkg/internal/services/analyzer.go +++ b/pkg/internal/services/analyzer.go @@ -1,8 +1,11 @@ package services import ( + "crypto/sha256" + "encoding/hex" "fmt" "image" + "io" "os" "path/filepath" "strings" @@ -60,9 +63,68 @@ func AnalyzeAttachment(file models.Attachment) error { } } - if err := database.C.Save(&file).Error; err != nil { - return fmt.Errorf("unable to save file record: %v", err) + if hash, err := HashAttachment(file); err != nil { + return err + } else { + file.HashCode = hash } + tx := database.C.Begin() + + linked, err := TryLinkAttachment(tx, file, file.HashCode) + if linked && err != nil { + return fmt.Errorf("unable to link file record: %v", err) + } else if !linked { + if err := tx.Save(&file); err != nil { + tx.Rollback() + return fmt.Errorf("unable to save file record: %v", err) + } + } + + if !linked { + if err := ReUploadFileToPermanent(file); err != nil { + tx.Rollback() + return fmt.Errorf("unable to move file to permanet storage: %v", err) + } + } + + tx.Commit() + return nil } + +func HashAttachment(file models.Attachment) (hash string, err error) { + if file.Destination != models.AttachmentDstTemporary { + err = fmt.Errorf("attachment isn't in temporary storage, unable to hash") + return + } + + destMap := viper.GetStringMap("destinations.temporary") + + var dest models.LocalDestination + rawDest, _ := jsoniter.Marshal(destMap) + _ = jsoniter.Unmarshal(rawDest, &dest) + + dst := filepath.Join(dest.Path, file.Uuid) + if _, err = os.Stat(dst); !os.IsExist(err) { + err = fmt.Errorf("attachment doesn't exists in temporary storage") + return + } + + var in *os.File + in, err = os.Open("file.txt") + if err != nil { + err = fmt.Errorf("unable to open file: %v", err) + return + } + defer in.Close() + + hasher := sha256.New() + if _, err = io.Copy(hasher, in); err != nil { + err = fmt.Errorf("unable to hash: %v", err) + return + } + + hash = hex.EncodeToString(hasher.Sum(nil)) + return +} diff --git a/pkg/internal/services/attachments.go b/pkg/internal/services/attachments.go index 0a6c313..0af79bf 100644 --- a/pkg/internal/services/attachments.go +++ b/pkg/internal/services/attachments.go @@ -48,50 +48,37 @@ func GetAttachmentByHash(hash string) (models.Attachment, error) { return attachment, nil } -func NewAttachmentMetadata(tx *gorm.DB, user *models.Account, file *multipart.FileHeader, attachment models.Attachment) (models.Attachment, bool, error) { - linked := false - exists, pickupErr := GetAttachmentByHash(attachment.HashCode) - if pickupErr == nil { - linked = true - exists.Alternative = attachment.Alternative - exists.Usage = attachment.Usage - exists.Metadata = attachment.Metadata - attachment = exists - attachment.ID = 0 - attachment.AccountID = user.ID - } else { - // Upload the new file - attachment.Uuid = uuid.NewString() - attachment.Size = file.Size - attachment.Name = file.Filename - attachment.AccountID = user.ID +func NewAttachmentMetadata(tx *gorm.DB, user *models.Account, file *multipart.FileHeader, attachment models.Attachment) (models.Attachment, error) { + attachment.Uuid = uuid.NewString() + attachment.Size = file.Size + attachment.Name = file.Filename + attachment.AccountID = user.ID - // If the user didn't provide file mimetype manually, we have to detect it - if len(attachment.MimeType) == 0 { - if ext := filepath.Ext(attachment.Name); len(ext) > 0 { - // Detect mimetype by file extensions - attachment.MimeType = mime.TypeByExtension(ext) - } else { - // Detect mimetype by file header - // This method as a fallback method, because this isn't pretty accurate - header, err := file.Open() - if err != nil { - return attachment, false, fmt.Errorf("failed to read file header: %v", err) - } - defer header.Close() - - fileHeader := make([]byte, 512) - _, err = header.Read(fileHeader) - if err != nil { - return attachment, false, err - } - attachment.MimeType = http.DetectContentType(fileHeader) + // If the user didn't provide file mimetype manually, we have to detect it + if len(attachment.MimeType) == 0 { + if ext := filepath.Ext(attachment.Name); len(ext) > 0 { + // Detect mimetype by file extensions + attachment.MimeType = mime.TypeByExtension(ext) + } else { + // Detect mimetype by file header + // This method as a fallback method, because this isn't pretty accurate + header, err := file.Open() + if err != nil { + return attachment, fmt.Errorf("failed to read file header: %v", err) } + defer header.Close() + + fileHeader := make([]byte, 512) + _, err = header.Read(fileHeader) + if err != nil { + return attachment, err + } + attachment.MimeType = http.DetectContentType(fileHeader) } } if err := tx.Save(&attachment).Error; err != nil { - return attachment, linked, fmt.Errorf("failed to save attachment record: %v", err) + return attachment, fmt.Errorf("failed to save attachment record: %v", err) } else { if len(metadataCache) > metadataCacheLimit { clear(metadataCache) @@ -99,7 +86,32 @@ func NewAttachmentMetadata(tx *gorm.DB, user *models.Account, file *multipart.Fi metadataCache[attachment.ID] = attachment } - return attachment, linked, nil + return attachment, nil +} + +func TryLinkAttachment(tx *gorm.DB, og models.Attachment, hash string) (bool, error) { + prev, err := GetAttachmentByHash(hash) + if err != nil { + return false, err + } + + prev.RefCount++ + og.RefID = &prev.ID + og.Uuid = prev.Uuid + og.Destination = prev.Destination + + if err := tx.Save(&og).Error; err != nil { + tx.Rollback() + return true, err + } else if err = tx.Save(&prev).Error; err != nil { + tx.Rollback() + return true, err + } + + metadataCache[prev.ID] = prev + metadataCache[og.ID] = og + + return true, nil } func UpdateAttachment(item models.Attachment) (models.Attachment, error) { @@ -116,22 +128,33 @@ func UpdateAttachment(item models.Attachment) (models.Attachment, error) { } func DeleteAttachment(item models.Attachment) error { - var dupeCount int64 - if err := database.C. - Where(&models.Attachment{HashCode: item.HashCode}). - Model(&models.Attachment{}). - Count(&dupeCount).Error; err != nil { - dupeCount = -1 - } + dat := item + tx := database.C.Begin() + + if item.RefID != nil { + var refTarget models.Attachment + if err := database.C.Where(models.Attachment{ + BaseModel: models.BaseModel{ID: *item.RefID}, + }).First(&refTarget).Error; err == nil { + refTarget.RefCount-- + if err := tx.Save(&refTarget).Error; err != nil { + tx.Rollback() + return fmt.Errorf("unable to update ref count: %v", err) + } + } + } if err := database.C.Delete(&item).Error; err != nil { + tx.Rollback() return err } else { delete(metadataCache, item.ID) } - if dupeCount != -1 && dupeCount <= 1 { - return DeleteFile(item) + tx.Commit() + + if dat.RefCount == 0 { + return DeleteFile(dat) } return nil