No longer save file with same hash

This commit is contained in:
LittleSheep 2025-05-18 16:52:00 +08:00
parent 18fde9f16c
commit 5b9b28d77a
9 changed files with 185 additions and 30 deletions

View File

@ -63,9 +63,9 @@ public class Profile : ModelBase
(Experience - Leveling.ExperiencePerLevel[Level]) * 100.0 / (Experience - Leveling.ExperiencePerLevel[Level]) * 100.0 /
(Leveling.ExperiencePerLevel[Level + 1] - Leveling.ExperiencePerLevel[Level]); (Leveling.ExperiencePerLevel[Level + 1] - Leveling.ExperiencePerLevel[Level]);
public string? PictureId { get; set; } [MaxLength(32)] public string? PictureId { get; set; }
public Storage.CloudFile? Picture { get; set; } public Storage.CloudFile? Picture { get; set; }
public string? BackgroundId { get; set; } [MaxLength(32)] public string? BackgroundId { get; set; }
public Storage.CloudFile? Background { get; set; } public Storage.CloudFile? Background { get; set; }
public Guid AccountId { get; set; } public Guid AccountId { get; set; }

View File

@ -166,8 +166,8 @@ public class AccountController(
[MaxLength(256)] public string? LastName { get; set; } [MaxLength(256)] public string? LastName { get; set; }
[MaxLength(4096)] public string? Bio { get; set; } [MaxLength(4096)] public string? Bio { get; set; }
public string? PictureId { get; set; } [MaxLength(32)] public string? PictureId { get; set; }
public string? BackgroundId { get; set; } [MaxLength(32)] public string? BackgroundId { get; set; }
} }
[Authorize] [Authorize]

View File

@ -20,9 +20,9 @@ public class ChatRoom : ModelBase
public ChatRoomType Type { get; set; } public ChatRoomType Type { get; set; }
public bool IsPublic { get; set; } public bool IsPublic { get; set; }
public string? PictureId { get; set; } [MaxLength(32)] public string? PictureId { get; set; }
public CloudFile? Picture { get; set; } public CloudFile? Picture { get; set; }
public string? BackgroundId { get; set; } [MaxLength(32)] public string? BackgroundId { get; set; }
public CloudFile? Background { get; set; } public CloudFile? Background { get; set; }
[JsonIgnore] public ICollection<ChatMember> Members { get; set; } = new List<ChatMember>(); [JsonIgnore] public ICollection<ChatMember> Members { get; set; } = new List<ChatMember>();

View File

@ -124,8 +124,8 @@ public class ChatRoomController(
{ {
[Required] [MaxLength(1024)] public string? Name { get; set; } [Required] [MaxLength(1024)] public string? Name { get; set; }
[MaxLength(4096)] public string? Description { get; set; } [MaxLength(4096)] public string? Description { get; set; }
public string? PictureId { get; set; } [MaxLength(32)] public string? PictureId { get; set; }
public string? BackgroundId { get; set; } [MaxLength(32)] public string? BackgroundId { get; set; }
public Guid? RealmId { get; set; } public Guid? RealmId { get; set; }
} }

View File

@ -19,9 +19,9 @@ public class Realm : ModelBase
public bool IsCommunity { get; set; } public bool IsCommunity { get; set; }
public bool IsPublic { get; set; } public bool IsPublic { get; set; }
public string? PictureId { get; set; } [MaxLength(32)] public string? PictureId { get; set; }
public CloudFile? Picture { get; set; } public CloudFile? Picture { get; set; }
public string? BackgroundId { get; set; } [MaxLength(32)] public string? BackgroundId { get; set; }
public CloudFile? Background { get; set; } public CloudFile? Background { get; set; }
[JsonIgnore] public ICollection<RealmMember> Members { get; set; } = new List<RealmMember>(); [JsonIgnore] public ICollection<RealmMember> Members { get; set; } = new List<RealmMember>();

View File

@ -22,12 +22,13 @@ public class RemoteStorageConfig
public class CloudFile : ModelBase public class CloudFile : ModelBase
{ {
[MaxLength(128)] public string Id { get; set; } = Guid.NewGuid().ToString(); /// The id generated by TuS, basically just UUID remove the dash lines
[MaxLength(32)] public string Id { get; set; } = Guid.NewGuid().ToString();
[MaxLength(1024)] public string Name { get; set; } = string.Empty; [MaxLength(1024)] public string Name { get; set; } = string.Empty;
[MaxLength(4096)] public string? Description { get; set; } [MaxLength(4096)] public string? Description { get; set; }
[Column(TypeName = "jsonb")] public Dictionary<string, object>? FileMeta { get; set; } = null!; [Column(TypeName = "jsonb")] public Dictionary<string, object>? FileMeta { get; set; } = null!;
[Column(TypeName = "jsonb")] public Dictionary<string, object>? UserMeta { get; set; } = null!; [Column(TypeName = "jsonb")] public Dictionary<string, object>? UserMeta { get; set; } = null!;
[Column(TypeName = "jsonb")] List<CloudFileSensitiveMark> SensitiveMarks { get; set; } = new(); [Column(TypeName = "jsonb")] public List<CloudFileSensitiveMark> SensitiveMarks { get; set; } = new();
[MaxLength(256)] public string? MimeType { get; set; } [MaxLength(256)] public string? MimeType { get; set; }
[MaxLength(256)] public string? Hash { get; set; } [MaxLength(256)] public string? Hash { get; set; }
public long Size { get; set; } public long Size { get; set; }
@ -35,9 +36,20 @@ public class CloudFile : ModelBase
public Instant? ExpiredAt { get; set; } public Instant? ExpiredAt { get; set; }
[MaxLength(128)] public string? UploadedTo { get; set; } [MaxLength(128)] public string? UploadedTo { get; set; }
public bool HasCompression { get; set; }= false; public bool HasCompression { get; set; }= false;
/// The object name which stored remotely,
/// multiple cloud file may have same storage id to indicate they are the same file
///
/// If the storage id was null and the uploaded at is not null, means it is an embedding file,
/// The embedding file means the file is store on another site,
/// or it is a webpage (based on mimetype)
[MaxLength(32)] public string? StorageId { get; set; }
/// This field should be null when the storage id is filled
/// Indicates the off-site accessible url of the file
[MaxLength(4096)] public string? StorageUrl { get; set; }
// Metrics /// Metrics
// When this used count keep zero, it means it's not used by anybody, so it can be recycled /// When this used count keep zero, it means it's not used by anybody, so it can be recycled
public int UsedCount { get; set; } = 0; public int UsedCount { get; set; } = 0;
[JsonIgnore] public Account.Account Account { get; set; } = null!; [JsonIgnore] public Account.Account Account { get; set; } = null!;

View File

@ -17,9 +17,11 @@ public class FileController(
[HttpGet("{id}")] [HttpGet("{id}")]
public async Task<ActionResult> OpenFile(string id, [FromQuery] bool original = false) public async Task<ActionResult> OpenFile(string id, [FromQuery] bool original = false)
{ {
var file = await db.Files.FindAsync(id); var file = await fs.GetFileAsync(id);
if (file is null) return NotFound(); if (file is null) return NotFound();
if (file.StorageUrl is not null) return Redirect(file.StorageUrl);
if (file.UploadedTo is null) if (file.UploadedTo is null)
{ {
var tusStorePath = configuration.GetValue<string>("Tus:StorePath")!; var tusStorePath = configuration.GetValue<string>("Tus:StorePath")!;
@ -29,7 +31,7 @@ public class FileController(
} }
var dest = fs.GetRemoteStorageConfig(file.UploadedTo); var dest = fs.GetRemoteStorageConfig(file.UploadedTo);
var fileName = file.Id; var fileName = file.StorageId;
if (!original && file.HasCompression) if (!original && file.HasCompression)
{ {

View File

@ -2,6 +2,7 @@ using System.Globalization;
using FFMpegCore; using FFMpegCore;
using System.Security.Cryptography; using System.Security.Cryptography;
using Microsoft.EntityFrameworkCore; using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Caching.Memory;
using Minio; using Minio;
using Minio.DataModel.Args; using Minio.DataModel.Args;
using NodaTime; using NodaTime;
@ -15,9 +16,36 @@ public class FileService(
IConfiguration configuration, IConfiguration configuration,
TusDiskStore store, TusDiskStore store,
ILogger<FileService> logger, ILogger<FileService> logger,
IServiceScopeFactory scopeFactory IServiceScopeFactory scopeFactory,
IMemoryCache cache
) )
{ {
private const string CacheKeyPrefix = "cloudfile_";
private static readonly TimeSpan CacheDuration = TimeSpan.FromMinutes(15);
/// <summary>
/// The api for getting file meta with cache,
/// the best use case is for accessing the file data.
///
/// <b>This function won't load uploader's information, only keep minimal file meta</b>
/// </summary>
/// <param name="fileId">The id of the cloud file requested</param>
/// <returns>The minimal file meta</returns>
public async Task<CloudFile?> GetFileAsync(string fileId)
{
var cacheKey = $"{CacheKeyPrefix}{fileId}";
if (cache.TryGetValue(cacheKey, out CloudFile? cachedFile))
return cachedFile;
var file = await db.Files.FirstOrDefaultAsync(f => f.Id == fileId);
if (file != null)
cache.Set(cacheKey, file, CacheDuration);
return file;
}
private static readonly string TempFilePrefix = "dyn-cloudfile"; private static readonly string TempFilePrefix = "dyn-cloudfile";
// The analysis file method no longer will remove the GPS EXIF data // The analysis file method no longer will remove the GPS EXIF data
@ -31,7 +59,7 @@ public class FileService(
) )
{ {
var result = new List<(string filePath, string suffix)>(); var result = new List<(string filePath, string suffix)>();
var ogFilePath = Path.Join(configuration.GetValue<string>("Tus:StorePath"), fileId); var ogFilePath = Path.Join(configuration.GetValue<string>("Tus:StorePath"), fileId);
var fileSize = stream.Length; var fileSize = stream.Length;
var hash = await HashFileAsync(stream, fileSize: fileSize); var hash = await HashFileAsync(stream, fileSize: fileSize);
@ -47,10 +75,25 @@ public class FileService(
AccountId = account.Id AccountId = account.Id
}; };
var existingFile = await db.Files.FirstOrDefaultAsync(f => f.Hash == hash);
file.StorageId = existingFile is not null ? existingFile.StorageId : file.Id;
if (existingFile is not null)
{
file.FileMeta = existingFile.FileMeta;
file.HasCompression = existingFile.HasCompression;
file.SensitiveMarks = existingFile.SensitiveMarks;
db.Files.Add(file);
await db.SaveChangesAsync();
return file;
}
switch (contentType.Split('/')[0]) switch (contentType.Split('/')[0])
{ {
case "image": case "image":
var blurhash = BlurHashSharp.SkiaSharp.BlurHashEncoder.Encode(xComponent: 3, yComponent: 3, filename: ogFilePath); var blurhash =
BlurHashSharp.SkiaSharp.BlurHashEncoder.Encode(xComponent: 3, yComponent: 3, filename: ogFilePath);
// Rewind stream // Rewind stream
stream.Position = 0; stream.Position = 0;
@ -130,7 +173,7 @@ public class FileService(
if (contentType.Split('/')[0] == "image") if (contentType.Split('/')[0] == "image")
{ {
file.MimeType = "image/webp"; file.MimeType = "image/webp";
using var vipsImage = NetVips.Image.NewFromFile(ogFilePath); using var vipsImage = NetVips.Image.NewFromFile(ogFilePath);
var imagePath = Path.Join(Path.GetTempPath(), $"{TempFilePrefix}#{file.Id}"); var imagePath = Path.Join(Path.GetTempPath(), $"{TempFilePrefix}#{file.Id}");
vipsImage.WriteToFile(imagePath + ".webp"); vipsImage.WriteToFile(imagePath + ".webp");
@ -278,7 +321,14 @@ public class FileService(
public async Task DeleteFileDataAsync(CloudFile file) public async Task DeleteFileDataAsync(CloudFile file)
{ {
if (file.StorageId is null) return;
if (file.UploadedTo is null) return; if (file.UploadedTo is null) return;
var repeatedStorageId = await db.Files
.Where(f => f.StorageId == file.StorageId && f.Id != file.Id && f.UsedCount > 0)
.AnyAsync();
if (repeatedStorageId) return;
var dest = GetRemoteStorageConfig(file.UploadedTo); var dest = GetRemoteStorageConfig(file.UploadedTo);
var client = CreateMinioClient(dest); var client = CreateMinioClient(dest);
if (client is null) if (client is null)
@ -287,12 +337,27 @@ public class FileService(
); );
var bucket = dest.Bucket; var bucket = dest.Bucket;
var objectId = file.StorageId ?? file.Id; // Use StorageId if available, otherwise fall back to Id
await client.RemoveObjectAsync( await client.RemoveObjectAsync(
new RemoveObjectArgs().WithBucket(bucket).WithObject(file.Id) new RemoveObjectArgs().WithBucket(bucket).WithObject(objectId)
); );
db.Remove(file); if (file.HasCompression)
await db.SaveChangesAsync(); {
// Also remove the compressed version if it exists
try
{
await client.RemoveObjectAsync(
new RemoveObjectArgs().WithBucket(bucket).WithObject(objectId + ".compressed")
);
}
catch
{
// Ignore errors when deleting compressed version
logger.LogWarning("Failed to delete compressed version of file {fileId}", file.Id);
}
}
} }
public RemoteStorageConfig GetRemoteStorageConfig(string destination) public RemoteStorageConfig GetRemoteStorageConfig(string destination)
@ -345,20 +410,88 @@ public class CloudFileUnusedRecyclingJob(AppDatabase db, FileService fs, ILogger
var cutoff = SystemClock.Instance.GetCurrentInstant() - Duration.FromHours(1); var cutoff = SystemClock.Instance.GetCurrentInstant() - Duration.FromHours(1);
var now = SystemClock.Instance.GetCurrentInstant(); var now = SystemClock.Instance.GetCurrentInstant();
var files = db.Files
// Get files to delete along with their storage IDs
var files = await db.Files
.Where(f => .Where(f =>
(f.ExpiredAt == null && f.UsedCount == 0 && f.CreatedAt < cutoff) || (f.ExpiredAt == null && f.UsedCount == 0 && f.CreatedAt < cutoff) ||
(f.ExpiredAt != null && f.ExpiredAt >= now) (f.ExpiredAt != null && f.ExpiredAt >= now)
) )
.ToListAsync();
if (files.Count == 0)
{
logger.LogInformation("No files to delete");
return;
}
logger.LogInformation($"Found {files.Count} files to process...");
// Group files by StorageId and find which ones are safe to delete
var storageIds = files.Where(f => f.StorageId != null)
.Select(f => f.StorageId!)
.Distinct()
.ToList(); .ToList();
logger.LogInformation($"Deleting {files.Count} unused cloud files..."); var usedStorageIds = await db.Files
.Where(f => f.StorageId != null &&
storageIds.Contains(f.StorageId) &&
!files.Select(ff => ff.Id).Contains(f.Id))
.Select(f => f.StorageId!)
.Distinct()
.ToListAsync();
var tasks = files.Select(fs.DeleteFileDataAsync); // Group files for deletion
await Task.WhenAll(tasks); var filesToDelete = files.Where(f => f.StorageId == null || !usedStorageIds.Contains(f.StorageId))
.GroupBy(f => f.UploadedTo)
.ToDictionary(grouping => grouping.Key!, grouping => grouping.ToList());
// Delete files by remote storage
foreach (var group in filesToDelete)
{
if (string.IsNullOrEmpty(group.Key)) continue;
try
{
var dest = fs.GetRemoteStorageConfig(group.Key);
var client = fs.CreateMinioClient(dest);
if (client == null) continue;
// Create delete tasks for each file in the group
var deleteTasks = group.Value.Select(file =>
{
var objectId = file.StorageId ?? file.Id;
var tasks = new List<Task>
{
client.RemoveObjectAsync(new RemoveObjectArgs()
.WithBucket(dest.Bucket)
.WithObject(objectId))
};
if (file.HasCompression)
{
tasks.Add(client.RemoveObjectAsync(new RemoveObjectArgs()
.WithBucket(dest.Bucket)
.WithObject(objectId + ".compressed")));
}
return Task.WhenAll(tasks);
});
await Task.WhenAll(deleteTasks);
}
catch (Exception ex)
{
logger.LogError(ex, "Error deleting files from remote storage {remote}", group.Key);
}
}
// Delete all file records from the database
var fileIds = files.Select(f => f.Id).ToList();
await db.Files await db.Files
.Where(f => f.UsedCount == 0 && f.CreatedAt < cutoff) .Where(f => fileIds.Contains(f.Id))
.ExecuteDeleteAsync(); .ExecuteDeleteAsync();
logger.LogInformation($"Completed deleting {files.Count} files");
} }
} }

View File

@ -10,7 +10,15 @@ public class MessageReadReceiptFlushHandler(IServiceProvider serviceProvider) :
{ {
public async Task FlushAsync(IReadOnlyList<MessageReadReceipt> items) public async Task FlushAsync(IReadOnlyList<MessageReadReceipt> items)
{ {
var distinctItems = items.DistinctBy(x => new { x.MessageId, x.SenderId }).ToList(); var distinctItems = items
.DistinctBy(x => new { x.MessageId, x.SenderId })
.Select(x =>
{
x.CreatedAt = SystemClock.Instance.GetCurrentInstant();
x.UpdatedAt = x.CreatedAt;
return x;
})
.ToList();
using var scope = serviceProvider.CreateScope(); using var scope = serviceProvider.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<AppDatabase>(); await db.BulkInsertAsync(distinctItems); var db = scope.ServiceProvider.GetRequiredService<AppDatabase>(); await db.BulkInsertAsync(distinctItems);