Processing files in background

This commit is contained in:
LittleSheep 2024-07-29 00:01:51 +08:00
parent 020e59234e
commit 089a9ecd9d
4 changed files with 145 additions and 61 deletions

View File

@ -20,10 +20,14 @@ type Attachment struct {
MimeType string `json:"mimetype"`
HashCode string `json:"hash"`
Destination AttachmentDst `json:"destination"`
RefCount int `json:"ref_count"`
Metadata datatypes.JSONMap `json:"metadata"`
IsMature bool `json:"is_mature"`
Ref *Attachment `json:"ref"`
RefID *uint `json:"ref_id"`
Account Account `json:"account"`
AccountID uint `json:"account_id"`
}

View File

@ -78,10 +78,6 @@ func createAttachment(c *fiber.Ctx) error {
}
user = lo.ToPtr(c.Locals("user").(models.Account))
hash := c.FormValue("hash")
if len(hash) != 64 {
return fiber.NewError(fiber.StatusBadRequest, "please provide a sha-256 hash code, length should be 64 characters")
}
usage := c.FormValue("usage")
if !lo.Contains(viper.GetStringSlice("accepts_usage"), usage) {
return fiber.NewError(fiber.StatusBadRequest, fmt.Sprintf("disallowed usage: %s", usage))
@ -100,9 +96,8 @@ func createAttachment(c *fiber.Ctx) error {
_ = jsoniter.UnmarshalFromString(c.FormValue("metadata"), &usermeta)
tx := database.C.Begin()
metadata, linked, err := services.NewAttachmentMetadata(tx, user, file, models.Attachment{
metadata, err := services.NewAttachmentMetadata(tx, user, file, models.Attachment{
Usage: usage,
HashCode: hash,
Alternative: c.FormValue("alt"),
MimeType: c.FormValue("mimetype"),
Metadata: usermeta,
@ -114,15 +109,15 @@ func createAttachment(c *fiber.Ctx) error {
return fiber.NewError(fiber.StatusBadRequest, err.Error())
}
if !linked {
if err := services.UploadFileToTemporary(c, file, metadata); err != nil {
tx.Rollback()
return fiber.NewError(fiber.StatusBadRequest, err.Error())
}
if err := services.UploadFileToTemporary(c, file, metadata); err != nil {
tx.Rollback()
return fiber.NewError(fiber.StatusBadRequest, err.Error())
}
tx.Commit()
services.PublishAnalyzeTask(metadata)
return c.JSON(metadata)
}

View File

@ -1,8 +1,11 @@
package services
import (
"crypto/sha256"
"encoding/hex"
"fmt"
"image"
"io"
"os"
"path/filepath"
"strings"
@ -60,9 +63,68 @@ func AnalyzeAttachment(file models.Attachment) error {
}
}
if err := database.C.Save(&file).Error; err != nil {
return fmt.Errorf("unable to save file record: %v", err)
if hash, err := HashAttachment(file); err != nil {
return err
} else {
file.HashCode = hash
}
tx := database.C.Begin()
linked, err := TryLinkAttachment(tx, file, file.HashCode)
if linked && err != nil {
return fmt.Errorf("unable to link file record: %v", err)
} else if !linked {
if err := tx.Save(&file); err != nil {
tx.Rollback()
return fmt.Errorf("unable to save file record: %v", err)
}
}
if !linked {
if err := ReUploadFileToPermanent(file); err != nil {
tx.Rollback()
return fmt.Errorf("unable to move file to permanet storage: %v", err)
}
}
tx.Commit()
return nil
}
func HashAttachment(file models.Attachment) (hash string, err error) {
if file.Destination != models.AttachmentDstTemporary {
err = fmt.Errorf("attachment isn't in temporary storage, unable to hash")
return
}
destMap := viper.GetStringMap("destinations.temporary")
var dest models.LocalDestination
rawDest, _ := jsoniter.Marshal(destMap)
_ = jsoniter.Unmarshal(rawDest, &dest)
dst := filepath.Join(dest.Path, file.Uuid)
if _, err = os.Stat(dst); !os.IsExist(err) {
err = fmt.Errorf("attachment doesn't exists in temporary storage")
return
}
var in *os.File
in, err = os.Open("file.txt")
if err != nil {
err = fmt.Errorf("unable to open file: %v", err)
return
}
defer in.Close()
hasher := sha256.New()
if _, err = io.Copy(hasher, in); err != nil {
err = fmt.Errorf("unable to hash: %v", err)
return
}
hash = hex.EncodeToString(hasher.Sum(nil))
return
}

View File

@ -48,50 +48,37 @@ func GetAttachmentByHash(hash string) (models.Attachment, error) {
return attachment, nil
}
func NewAttachmentMetadata(tx *gorm.DB, user *models.Account, file *multipart.FileHeader, attachment models.Attachment) (models.Attachment, bool, error) {
linked := false
exists, pickupErr := GetAttachmentByHash(attachment.HashCode)
if pickupErr == nil {
linked = true
exists.Alternative = attachment.Alternative
exists.Usage = attachment.Usage
exists.Metadata = attachment.Metadata
attachment = exists
attachment.ID = 0
attachment.AccountID = user.ID
} else {
// Upload the new file
attachment.Uuid = uuid.NewString()
attachment.Size = file.Size
attachment.Name = file.Filename
attachment.AccountID = user.ID
func NewAttachmentMetadata(tx *gorm.DB, user *models.Account, file *multipart.FileHeader, attachment models.Attachment) (models.Attachment, error) {
attachment.Uuid = uuid.NewString()
attachment.Size = file.Size
attachment.Name = file.Filename
attachment.AccountID = user.ID
// If the user didn't provide file mimetype manually, we have to detect it
if len(attachment.MimeType) == 0 {
if ext := filepath.Ext(attachment.Name); len(ext) > 0 {
// Detect mimetype by file extensions
attachment.MimeType = mime.TypeByExtension(ext)
} else {
// Detect mimetype by file header
// This method as a fallback method, because this isn't pretty accurate
header, err := file.Open()
if err != nil {
return attachment, false, fmt.Errorf("failed to read file header: %v", err)
}
defer header.Close()
fileHeader := make([]byte, 512)
_, err = header.Read(fileHeader)
if err != nil {
return attachment, false, err
}
attachment.MimeType = http.DetectContentType(fileHeader)
// If the user didn't provide file mimetype manually, we have to detect it
if len(attachment.MimeType) == 0 {
if ext := filepath.Ext(attachment.Name); len(ext) > 0 {
// Detect mimetype by file extensions
attachment.MimeType = mime.TypeByExtension(ext)
} else {
// Detect mimetype by file header
// This method as a fallback method, because this isn't pretty accurate
header, err := file.Open()
if err != nil {
return attachment, fmt.Errorf("failed to read file header: %v", err)
}
defer header.Close()
fileHeader := make([]byte, 512)
_, err = header.Read(fileHeader)
if err != nil {
return attachment, err
}
attachment.MimeType = http.DetectContentType(fileHeader)
}
}
if err := tx.Save(&attachment).Error; err != nil {
return attachment, linked, fmt.Errorf("failed to save attachment record: %v", err)
return attachment, fmt.Errorf("failed to save attachment record: %v", err)
} else {
if len(metadataCache) > metadataCacheLimit {
clear(metadataCache)
@ -99,7 +86,32 @@ func NewAttachmentMetadata(tx *gorm.DB, user *models.Account, file *multipart.Fi
metadataCache[attachment.ID] = attachment
}
return attachment, linked, nil
return attachment, nil
}
func TryLinkAttachment(tx *gorm.DB, og models.Attachment, hash string) (bool, error) {
prev, err := GetAttachmentByHash(hash)
if err != nil {
return false, err
}
prev.RefCount++
og.RefID = &prev.ID
og.Uuid = prev.Uuid
og.Destination = prev.Destination
if err := tx.Save(&og).Error; err != nil {
tx.Rollback()
return true, err
} else if err = tx.Save(&prev).Error; err != nil {
tx.Rollback()
return true, err
}
metadataCache[prev.ID] = prev
metadataCache[og.ID] = og
return true, nil
}
func UpdateAttachment(item models.Attachment) (models.Attachment, error) {
@ -116,22 +128,33 @@ func UpdateAttachment(item models.Attachment) (models.Attachment, error) {
}
func DeleteAttachment(item models.Attachment) error {
var dupeCount int64
if err := database.C.
Where(&models.Attachment{HashCode: item.HashCode}).
Model(&models.Attachment{}).
Count(&dupeCount).Error; err != nil {
dupeCount = -1
}
dat := item
tx := database.C.Begin()
if item.RefID != nil {
var refTarget models.Attachment
if err := database.C.Where(models.Attachment{
BaseModel: models.BaseModel{ID: *item.RefID},
}).First(&refTarget).Error; err == nil {
refTarget.RefCount--
if err := tx.Save(&refTarget).Error; err != nil {
tx.Rollback()
return fmt.Errorf("unable to update ref count: %v", err)
}
}
}
if err := database.C.Delete(&item).Error; err != nil {
tx.Rollback()
return err
} else {
delete(metadataCache, item.ID)
}
if dupeCount != -1 && dupeCount <= 1 {
return DeleteFile(item)
tx.Commit()
if dat.RefCount == 0 {
return DeleteFile(dat)
}
return nil