From 5b9b28d77a49ffdcfa6b8b5e71b8855afc616248 Mon Sep 17 00:00:00 2001 From: LittleSheep Date: Sun, 18 May 2025 16:52:00 +0800 Subject: [PATCH] :zap: No longer save file with same hash --- DysonNetwork.Sphere/Account/Account.cs | 4 +- .../Account/AccountController.cs | 4 +- DysonNetwork.Sphere/Chat/ChatRoom.cs | 4 +- .../Chat/ChatRoomController.cs | 4 +- DysonNetwork.Sphere/Realm/Realm.cs | 4 +- DysonNetwork.Sphere/Storage/CloudFile.cs | 20 ++- DysonNetwork.Sphere/Storage/FileController.cs | 8 +- DysonNetwork.Sphere/Storage/FileService.cs | 157 ++++++++++++++++-- .../MessageReadReceiptFlushHandler.cs | 10 +- 9 files changed, 185 insertions(+), 30 deletions(-) diff --git a/DysonNetwork.Sphere/Account/Account.cs b/DysonNetwork.Sphere/Account/Account.cs index c0b0271..d31b2bb 100644 --- a/DysonNetwork.Sphere/Account/Account.cs +++ b/DysonNetwork.Sphere/Account/Account.cs @@ -63,9 +63,9 @@ public class Profile : ModelBase (Experience - Leveling.ExperiencePerLevel[Level]) * 100.0 / (Leveling.ExperiencePerLevel[Level + 1] - Leveling.ExperiencePerLevel[Level]); - public string? PictureId { get; set; } + [MaxLength(32)] public string? PictureId { get; set; } public Storage.CloudFile? Picture { get; set; } - public string? BackgroundId { get; set; } + [MaxLength(32)] public string? BackgroundId { get; set; } public Storage.CloudFile? Background { get; set; } public Guid AccountId { get; set; } diff --git a/DysonNetwork.Sphere/Account/AccountController.cs b/DysonNetwork.Sphere/Account/AccountController.cs index 5fa07ec..bb49b72 100644 --- a/DysonNetwork.Sphere/Account/AccountController.cs +++ b/DysonNetwork.Sphere/Account/AccountController.cs @@ -166,8 +166,8 @@ public class AccountController( [MaxLength(256)] public string? LastName { get; set; } [MaxLength(4096)] public string? Bio { get; set; } - public string? PictureId { get; set; } - public string? BackgroundId { get; set; } + [MaxLength(32)] public string? PictureId { get; set; } + [MaxLength(32)] public string? BackgroundId { get; set; } } [Authorize] diff --git a/DysonNetwork.Sphere/Chat/ChatRoom.cs b/DysonNetwork.Sphere/Chat/ChatRoom.cs index b8386ae..e897700 100644 --- a/DysonNetwork.Sphere/Chat/ChatRoom.cs +++ b/DysonNetwork.Sphere/Chat/ChatRoom.cs @@ -20,9 +20,9 @@ public class ChatRoom : ModelBase public ChatRoomType Type { get; set; } public bool IsPublic { get; set; } - public string? PictureId { get; set; } + [MaxLength(32)] public string? PictureId { get; set; } public CloudFile? Picture { get; set; } - public string? BackgroundId { get; set; } + [MaxLength(32)] public string? BackgroundId { get; set; } public CloudFile? Background { get; set; } [JsonIgnore] public ICollection Members { get; set; } = new List(); diff --git a/DysonNetwork.Sphere/Chat/ChatRoomController.cs b/DysonNetwork.Sphere/Chat/ChatRoomController.cs index 193dfb8..9a2a506 100644 --- a/DysonNetwork.Sphere/Chat/ChatRoomController.cs +++ b/DysonNetwork.Sphere/Chat/ChatRoomController.cs @@ -124,8 +124,8 @@ public class ChatRoomController( { [Required] [MaxLength(1024)] public string? Name { get; set; } [MaxLength(4096)] public string? Description { get; set; } - public string? PictureId { get; set; } - public string? BackgroundId { get; set; } + [MaxLength(32)] public string? PictureId { get; set; } + [MaxLength(32)] public string? BackgroundId { get; set; } public Guid? RealmId { get; set; } } diff --git a/DysonNetwork.Sphere/Realm/Realm.cs b/DysonNetwork.Sphere/Realm/Realm.cs index d06a788..9fa1a03 100644 --- a/DysonNetwork.Sphere/Realm/Realm.cs +++ b/DysonNetwork.Sphere/Realm/Realm.cs @@ -19,9 +19,9 @@ public class Realm : ModelBase public bool IsCommunity { get; set; } public bool IsPublic { get; set; } - public string? PictureId { get; set; } + [MaxLength(32)] public string? PictureId { get; set; } public CloudFile? Picture { get; set; } - public string? BackgroundId { get; set; } + [MaxLength(32)] public string? BackgroundId { get; set; } public CloudFile? Background { get; set; } [JsonIgnore] public ICollection Members { get; set; } = new List(); diff --git a/DysonNetwork.Sphere/Storage/CloudFile.cs b/DysonNetwork.Sphere/Storage/CloudFile.cs index dec7787..a22e190 100644 --- a/DysonNetwork.Sphere/Storage/CloudFile.cs +++ b/DysonNetwork.Sphere/Storage/CloudFile.cs @@ -22,12 +22,13 @@ public class RemoteStorageConfig public class CloudFile : ModelBase { - [MaxLength(128)] public string Id { get; set; } = Guid.NewGuid().ToString(); + /// The id generated by TuS, basically just UUID remove the dash lines + [MaxLength(32)] public string Id { get; set; } = Guid.NewGuid().ToString(); [MaxLength(1024)] public string Name { get; set; } = string.Empty; [MaxLength(4096)] public string? Description { get; set; } [Column(TypeName = "jsonb")] public Dictionary? FileMeta { get; set; } = null!; [Column(TypeName = "jsonb")] public Dictionary? UserMeta { get; set; } = null!; - [Column(TypeName = "jsonb")] List SensitiveMarks { get; set; } = new(); + [Column(TypeName = "jsonb")] public List SensitiveMarks { get; set; } = new(); [MaxLength(256)] public string? MimeType { get; set; } [MaxLength(256)] public string? Hash { get; set; } public long Size { get; set; } @@ -35,9 +36,20 @@ public class CloudFile : ModelBase public Instant? ExpiredAt { get; set; } [MaxLength(128)] public string? UploadedTo { get; set; } public bool HasCompression { get; set; }= false; + + /// The object name which stored remotely, + /// multiple cloud file may have same storage id to indicate they are the same file + /// + /// If the storage id was null and the uploaded at is not null, means it is an embedding file, + /// The embedding file means the file is store on another site, + /// or it is a webpage (based on mimetype) + [MaxLength(32)] public string? StorageId { get; set; } + /// This field should be null when the storage id is filled + /// Indicates the off-site accessible url of the file + [MaxLength(4096)] public string? StorageUrl { get; set; } - // Metrics - // When this used count keep zero, it means it's not used by anybody, so it can be recycled + /// Metrics + /// When this used count keep zero, it means it's not used by anybody, so it can be recycled public int UsedCount { get; set; } = 0; [JsonIgnore] public Account.Account Account { get; set; } = null!; diff --git a/DysonNetwork.Sphere/Storage/FileController.cs b/DysonNetwork.Sphere/Storage/FileController.cs index 0232429..91f15ba 100644 --- a/DysonNetwork.Sphere/Storage/FileController.cs +++ b/DysonNetwork.Sphere/Storage/FileController.cs @@ -17,9 +17,11 @@ public class FileController( [HttpGet("{id}")] public async Task OpenFile(string id, [FromQuery] bool original = false) { - var file = await db.Files.FindAsync(id); + var file = await fs.GetFileAsync(id); if (file is null) return NotFound(); - + + if (file.StorageUrl is not null) return Redirect(file.StorageUrl); + if (file.UploadedTo is null) { var tusStorePath = configuration.GetValue("Tus:StorePath")!; @@ -29,7 +31,7 @@ public class FileController( } var dest = fs.GetRemoteStorageConfig(file.UploadedTo); - var fileName = file.Id; + var fileName = file.StorageId; if (!original && file.HasCompression) { diff --git a/DysonNetwork.Sphere/Storage/FileService.cs b/DysonNetwork.Sphere/Storage/FileService.cs index f90dd19..34545e2 100644 --- a/DysonNetwork.Sphere/Storage/FileService.cs +++ b/DysonNetwork.Sphere/Storage/FileService.cs @@ -2,6 +2,7 @@ using System.Globalization; using FFMpegCore; using System.Security.Cryptography; using Microsoft.EntityFrameworkCore; +using Microsoft.Extensions.Caching.Memory; using Minio; using Minio.DataModel.Args; using NodaTime; @@ -15,9 +16,36 @@ public class FileService( IConfiguration configuration, TusDiskStore store, ILogger logger, - IServiceScopeFactory scopeFactory + IServiceScopeFactory scopeFactory, + IMemoryCache cache ) { + private const string CacheKeyPrefix = "cloudfile_"; + private static readonly TimeSpan CacheDuration = TimeSpan.FromMinutes(15); + + /// + /// The api for getting file meta with cache, + /// the best use case is for accessing the file data. + /// + /// This function won't load uploader's information, only keep minimal file meta + /// + /// The id of the cloud file requested + /// The minimal file meta + public async Task GetFileAsync(string fileId) + { + var cacheKey = $"{CacheKeyPrefix}{fileId}"; + + if (cache.TryGetValue(cacheKey, out CloudFile? cachedFile)) + return cachedFile; + + var file = await db.Files.FirstOrDefaultAsync(f => f.Id == fileId); + + if (file != null) + cache.Set(cacheKey, file, CacheDuration); + + return file; + } + private static readonly string TempFilePrefix = "dyn-cloudfile"; // The analysis file method no longer will remove the GPS EXIF data @@ -31,7 +59,7 @@ public class FileService( ) { var result = new List<(string filePath, string suffix)>(); - + var ogFilePath = Path.Join(configuration.GetValue("Tus:StorePath"), fileId); var fileSize = stream.Length; var hash = await HashFileAsync(stream, fileSize: fileSize); @@ -47,10 +75,25 @@ public class FileService( AccountId = account.Id }; + var existingFile = await db.Files.FirstOrDefaultAsync(f => f.Hash == hash); + file.StorageId = existingFile is not null ? existingFile.StorageId : file.Id; + + if (existingFile is not null) + { + file.FileMeta = existingFile.FileMeta; + file.HasCompression = existingFile.HasCompression; + file.SensitiveMarks = existingFile.SensitiveMarks; + + db.Files.Add(file); + await db.SaveChangesAsync(); + return file; + } + switch (contentType.Split('/')[0]) { case "image": - var blurhash = BlurHashSharp.SkiaSharp.BlurHashEncoder.Encode(xComponent: 3, yComponent: 3, filename: ogFilePath); + var blurhash = + BlurHashSharp.SkiaSharp.BlurHashEncoder.Encode(xComponent: 3, yComponent: 3, filename: ogFilePath); // Rewind stream stream.Position = 0; @@ -130,7 +173,7 @@ public class FileService( if (contentType.Split('/')[0] == "image") { file.MimeType = "image/webp"; - + using var vipsImage = NetVips.Image.NewFromFile(ogFilePath); var imagePath = Path.Join(Path.GetTempPath(), $"{TempFilePrefix}#{file.Id}"); vipsImage.WriteToFile(imagePath + ".webp"); @@ -278,7 +321,14 @@ public class FileService( public async Task DeleteFileDataAsync(CloudFile file) { + if (file.StorageId is null) return; if (file.UploadedTo is null) return; + + var repeatedStorageId = await db.Files + .Where(f => f.StorageId == file.StorageId && f.Id != file.Id && f.UsedCount > 0) + .AnyAsync(); + if (repeatedStorageId) return; + var dest = GetRemoteStorageConfig(file.UploadedTo); var client = CreateMinioClient(dest); if (client is null) @@ -287,12 +337,27 @@ public class FileService( ); var bucket = dest.Bucket; + var objectId = file.StorageId ?? file.Id; // Use StorageId if available, otherwise fall back to Id + await client.RemoveObjectAsync( - new RemoveObjectArgs().WithBucket(bucket).WithObject(file.Id) + new RemoveObjectArgs().WithBucket(bucket).WithObject(objectId) ); - db.Remove(file); - await db.SaveChangesAsync(); + if (file.HasCompression) + { + // Also remove the compressed version if it exists + try + { + await client.RemoveObjectAsync( + new RemoveObjectArgs().WithBucket(bucket).WithObject(objectId + ".compressed") + ); + } + catch + { + // Ignore errors when deleting compressed version + logger.LogWarning("Failed to delete compressed version of file {fileId}", file.Id); + } + } } public RemoteStorageConfig GetRemoteStorageConfig(string destination) @@ -345,20 +410,88 @@ public class CloudFileUnusedRecyclingJob(AppDatabase db, FileService fs, ILogger var cutoff = SystemClock.Instance.GetCurrentInstant() - Duration.FromHours(1); var now = SystemClock.Instance.GetCurrentInstant(); - var files = db.Files + + // Get files to delete along with their storage IDs + var files = await db.Files .Where(f => (f.ExpiredAt == null && f.UsedCount == 0 && f.CreatedAt < cutoff) || (f.ExpiredAt != null && f.ExpiredAt >= now) ) + .ToListAsync(); + + if (files.Count == 0) + { + logger.LogInformation("No files to delete"); + return; + } + + logger.LogInformation($"Found {files.Count} files to process..."); + + // Group files by StorageId and find which ones are safe to delete + var storageIds = files.Where(f => f.StorageId != null) + .Select(f => f.StorageId!) + .Distinct() .ToList(); - logger.LogInformation($"Deleting {files.Count} unused cloud files..."); + var usedStorageIds = await db.Files + .Where(f => f.StorageId != null && + storageIds.Contains(f.StorageId) && + !files.Select(ff => ff.Id).Contains(f.Id)) + .Select(f => f.StorageId!) + .Distinct() + .ToListAsync(); - var tasks = files.Select(fs.DeleteFileDataAsync); - await Task.WhenAll(tasks); + // Group files for deletion + var filesToDelete = files.Where(f => f.StorageId == null || !usedStorageIds.Contains(f.StorageId)) + .GroupBy(f => f.UploadedTo) + .ToDictionary(grouping => grouping.Key!, grouping => grouping.ToList()); + // Delete files by remote storage + foreach (var group in filesToDelete) + { + if (string.IsNullOrEmpty(group.Key)) continue; + + try + { + var dest = fs.GetRemoteStorageConfig(group.Key); + var client = fs.CreateMinioClient(dest); + if (client == null) continue; + + // Create delete tasks for each file in the group + var deleteTasks = group.Value.Select(file => + { + var objectId = file.StorageId ?? file.Id; + var tasks = new List + { + client.RemoveObjectAsync(new RemoveObjectArgs() + .WithBucket(dest.Bucket) + .WithObject(objectId)) + }; + + if (file.HasCompression) + { + tasks.Add(client.RemoveObjectAsync(new RemoveObjectArgs() + .WithBucket(dest.Bucket) + .WithObject(objectId + ".compressed"))); + } + + return Task.WhenAll(tasks); + }); + + await Task.WhenAll(deleteTasks); + } + catch (Exception ex) + { + logger.LogError(ex, "Error deleting files from remote storage {remote}", group.Key); + } + } + + // Delete all file records from the database + var fileIds = files.Select(f => f.Id).ToList(); await db.Files - .Where(f => f.UsedCount == 0 && f.CreatedAt < cutoff) + .Where(f => fileIds.Contains(f.Id)) .ExecuteDeleteAsync(); + + logger.LogInformation($"Completed deleting {files.Count} files"); } } \ No newline at end of file diff --git a/DysonNetwork.Sphere/Storage/Handlers/MessageReadReceiptFlushHandler.cs b/DysonNetwork.Sphere/Storage/Handlers/MessageReadReceiptFlushHandler.cs index 5231928..654ae87 100644 --- a/DysonNetwork.Sphere/Storage/Handlers/MessageReadReceiptFlushHandler.cs +++ b/DysonNetwork.Sphere/Storage/Handlers/MessageReadReceiptFlushHandler.cs @@ -10,7 +10,15 @@ public class MessageReadReceiptFlushHandler(IServiceProvider serviceProvider) : { public async Task FlushAsync(IReadOnlyList items) { - var distinctItems = items.DistinctBy(x => new { x.MessageId, x.SenderId }).ToList(); + var distinctItems = items + .DistinctBy(x => new { x.MessageId, x.SenderId }) + .Select(x => + { + x.CreatedAt = SystemClock.Instance.GetCurrentInstant(); + x.UpdatedAt = x.CreatedAt; + return x; + }) + .ToList(); using var scope = serviceProvider.CreateScope(); var db = scope.ServiceProvider.GetRequiredService(); await db.BulkInsertAsync(distinctItems);