No longer save file with same hash

This commit is contained in:
LittleSheep 2025-05-18 16:52:00 +08:00
parent 18fde9f16c
commit 5b9b28d77a
9 changed files with 185 additions and 30 deletions

View File

@ -63,9 +63,9 @@ public class Profile : ModelBase
(Experience - Leveling.ExperiencePerLevel[Level]) * 100.0 /
(Leveling.ExperiencePerLevel[Level + 1] - Leveling.ExperiencePerLevel[Level]);
public string? PictureId { get; set; }
[MaxLength(32)] public string? PictureId { get; set; }
public Storage.CloudFile? Picture { get; set; }
public string? BackgroundId { get; set; }
[MaxLength(32)] public string? BackgroundId { get; set; }
public Storage.CloudFile? Background { get; set; }
public Guid AccountId { get; set; }

View File

@ -166,8 +166,8 @@ public class AccountController(
[MaxLength(256)] public string? LastName { get; set; }
[MaxLength(4096)] public string? Bio { get; set; }
public string? PictureId { get; set; }
public string? BackgroundId { get; set; }
[MaxLength(32)] public string? PictureId { get; set; }
[MaxLength(32)] public string? BackgroundId { get; set; }
}
[Authorize]

View File

@ -20,9 +20,9 @@ public class ChatRoom : ModelBase
public ChatRoomType Type { get; set; }
public bool IsPublic { get; set; }
public string? PictureId { get; set; }
[MaxLength(32)] public string? PictureId { get; set; }
public CloudFile? Picture { get; set; }
public string? BackgroundId { get; set; }
[MaxLength(32)] public string? BackgroundId { get; set; }
public CloudFile? Background { get; set; }
[JsonIgnore] public ICollection<ChatMember> Members { get; set; } = new List<ChatMember>();

View File

@ -124,8 +124,8 @@ public class ChatRoomController(
{
[Required] [MaxLength(1024)] public string? Name { get; set; }
[MaxLength(4096)] public string? Description { get; set; }
public string? PictureId { get; set; }
public string? BackgroundId { get; set; }
[MaxLength(32)] public string? PictureId { get; set; }
[MaxLength(32)] public string? BackgroundId { get; set; }
public Guid? RealmId { get; set; }
}

View File

@ -19,9 +19,9 @@ public class Realm : ModelBase
public bool IsCommunity { get; set; }
public bool IsPublic { get; set; }
public string? PictureId { get; set; }
[MaxLength(32)] public string? PictureId { get; set; }
public CloudFile? Picture { get; set; }
public string? BackgroundId { get; set; }
[MaxLength(32)] public string? BackgroundId { get; set; }
public CloudFile? Background { get; set; }
[JsonIgnore] public ICollection<RealmMember> Members { get; set; } = new List<RealmMember>();

View File

@ -22,12 +22,13 @@ public class RemoteStorageConfig
public class CloudFile : ModelBase
{
[MaxLength(128)] public string Id { get; set; } = Guid.NewGuid().ToString();
/// The id generated by TuS, basically just UUID remove the dash lines
[MaxLength(32)] public string Id { get; set; } = Guid.NewGuid().ToString();
[MaxLength(1024)] public string Name { get; set; } = string.Empty;
[MaxLength(4096)] public string? Description { get; set; }
[Column(TypeName = "jsonb")] public Dictionary<string, object>? FileMeta { get; set; } = null!;
[Column(TypeName = "jsonb")] public Dictionary<string, object>? UserMeta { get; set; } = null!;
[Column(TypeName = "jsonb")] List<CloudFileSensitiveMark> SensitiveMarks { get; set; } = new();
[Column(TypeName = "jsonb")] public List<CloudFileSensitiveMark> SensitiveMarks { get; set; } = new();
[MaxLength(256)] public string? MimeType { get; set; }
[MaxLength(256)] public string? Hash { get; set; }
public long Size { get; set; }
@ -35,9 +36,20 @@ public class CloudFile : ModelBase
public Instant? ExpiredAt { get; set; }
[MaxLength(128)] public string? UploadedTo { get; set; }
public bool HasCompression { get; set; }= false;
/// The object name which stored remotely,
/// multiple cloud file may have same storage id to indicate they are the same file
///
/// If the storage id was null and the uploaded at is not null, means it is an embedding file,
/// The embedding file means the file is store on another site,
/// or it is a webpage (based on mimetype)
[MaxLength(32)] public string? StorageId { get; set; }
/// This field should be null when the storage id is filled
/// Indicates the off-site accessible url of the file
[MaxLength(4096)] public string? StorageUrl { get; set; }
// Metrics
// When this used count keep zero, it means it's not used by anybody, so it can be recycled
/// Metrics
/// When this used count keep zero, it means it's not used by anybody, so it can be recycled
public int UsedCount { get; set; } = 0;
[JsonIgnore] public Account.Account Account { get; set; } = null!;

View File

@ -17,9 +17,11 @@ public class FileController(
[HttpGet("{id}")]
public async Task<ActionResult> OpenFile(string id, [FromQuery] bool original = false)
{
var file = await db.Files.FindAsync(id);
var file = await fs.GetFileAsync(id);
if (file is null) return NotFound();
if (file.StorageUrl is not null) return Redirect(file.StorageUrl);
if (file.UploadedTo is null)
{
var tusStorePath = configuration.GetValue<string>("Tus:StorePath")!;
@ -29,7 +31,7 @@ public class FileController(
}
var dest = fs.GetRemoteStorageConfig(file.UploadedTo);
var fileName = file.Id;
var fileName = file.StorageId;
if (!original && file.HasCompression)
{

View File

@ -2,6 +2,7 @@ using System.Globalization;
using FFMpegCore;
using System.Security.Cryptography;
using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Caching.Memory;
using Minio;
using Minio.DataModel.Args;
using NodaTime;
@ -15,9 +16,36 @@ public class FileService(
IConfiguration configuration,
TusDiskStore store,
ILogger<FileService> logger,
IServiceScopeFactory scopeFactory
IServiceScopeFactory scopeFactory,
IMemoryCache cache
)
{
private const string CacheKeyPrefix = "cloudfile_";
private static readonly TimeSpan CacheDuration = TimeSpan.FromMinutes(15);
/// <summary>
/// The api for getting file meta with cache,
/// the best use case is for accessing the file data.
///
/// <b>This function won't load uploader's information, only keep minimal file meta</b>
/// </summary>
/// <param name="fileId">The id of the cloud file requested</param>
/// <returns>The minimal file meta</returns>
public async Task<CloudFile?> GetFileAsync(string fileId)
{
var cacheKey = $"{CacheKeyPrefix}{fileId}";
if (cache.TryGetValue(cacheKey, out CloudFile? cachedFile))
return cachedFile;
var file = await db.Files.FirstOrDefaultAsync(f => f.Id == fileId);
if (file != null)
cache.Set(cacheKey, file, CacheDuration);
return file;
}
private static readonly string TempFilePrefix = "dyn-cloudfile";
// The analysis file method no longer will remove the GPS EXIF data
@ -31,7 +59,7 @@ public class FileService(
)
{
var result = new List<(string filePath, string suffix)>();
var ogFilePath = Path.Join(configuration.GetValue<string>("Tus:StorePath"), fileId);
var fileSize = stream.Length;
var hash = await HashFileAsync(stream, fileSize: fileSize);
@ -47,10 +75,25 @@ public class FileService(
AccountId = account.Id
};
var existingFile = await db.Files.FirstOrDefaultAsync(f => f.Hash == hash);
file.StorageId = existingFile is not null ? existingFile.StorageId : file.Id;
if (existingFile is not null)
{
file.FileMeta = existingFile.FileMeta;
file.HasCompression = existingFile.HasCompression;
file.SensitiveMarks = existingFile.SensitiveMarks;
db.Files.Add(file);
await db.SaveChangesAsync();
return file;
}
switch (contentType.Split('/')[0])
{
case "image":
var blurhash = BlurHashSharp.SkiaSharp.BlurHashEncoder.Encode(xComponent: 3, yComponent: 3, filename: ogFilePath);
var blurhash =
BlurHashSharp.SkiaSharp.BlurHashEncoder.Encode(xComponent: 3, yComponent: 3, filename: ogFilePath);
// Rewind stream
stream.Position = 0;
@ -130,7 +173,7 @@ public class FileService(
if (contentType.Split('/')[0] == "image")
{
file.MimeType = "image/webp";
using var vipsImage = NetVips.Image.NewFromFile(ogFilePath);
var imagePath = Path.Join(Path.GetTempPath(), $"{TempFilePrefix}#{file.Id}");
vipsImage.WriteToFile(imagePath + ".webp");
@ -278,7 +321,14 @@ public class FileService(
public async Task DeleteFileDataAsync(CloudFile file)
{
if (file.StorageId is null) return;
if (file.UploadedTo is null) return;
var repeatedStorageId = await db.Files
.Where(f => f.StorageId == file.StorageId && f.Id != file.Id && f.UsedCount > 0)
.AnyAsync();
if (repeatedStorageId) return;
var dest = GetRemoteStorageConfig(file.UploadedTo);
var client = CreateMinioClient(dest);
if (client is null)
@ -287,12 +337,27 @@ public class FileService(
);
var bucket = dest.Bucket;
var objectId = file.StorageId ?? file.Id; // Use StorageId if available, otherwise fall back to Id
await client.RemoveObjectAsync(
new RemoveObjectArgs().WithBucket(bucket).WithObject(file.Id)
new RemoveObjectArgs().WithBucket(bucket).WithObject(objectId)
);
db.Remove(file);
await db.SaveChangesAsync();
if (file.HasCompression)
{
// Also remove the compressed version if it exists
try
{
await client.RemoveObjectAsync(
new RemoveObjectArgs().WithBucket(bucket).WithObject(objectId + ".compressed")
);
}
catch
{
// Ignore errors when deleting compressed version
logger.LogWarning("Failed to delete compressed version of file {fileId}", file.Id);
}
}
}
public RemoteStorageConfig GetRemoteStorageConfig(string destination)
@ -345,20 +410,88 @@ public class CloudFileUnusedRecyclingJob(AppDatabase db, FileService fs, ILogger
var cutoff = SystemClock.Instance.GetCurrentInstant() - Duration.FromHours(1);
var now = SystemClock.Instance.GetCurrentInstant();
var files = db.Files
// Get files to delete along with their storage IDs
var files = await db.Files
.Where(f =>
(f.ExpiredAt == null && f.UsedCount == 0 && f.CreatedAt < cutoff) ||
(f.ExpiredAt != null && f.ExpiredAt >= now)
)
.ToListAsync();
if (files.Count == 0)
{
logger.LogInformation("No files to delete");
return;
}
logger.LogInformation($"Found {files.Count} files to process...");
// Group files by StorageId and find which ones are safe to delete
var storageIds = files.Where(f => f.StorageId != null)
.Select(f => f.StorageId!)
.Distinct()
.ToList();
logger.LogInformation($"Deleting {files.Count} unused cloud files...");
var usedStorageIds = await db.Files
.Where(f => f.StorageId != null &&
storageIds.Contains(f.StorageId) &&
!files.Select(ff => ff.Id).Contains(f.Id))
.Select(f => f.StorageId!)
.Distinct()
.ToListAsync();
var tasks = files.Select(fs.DeleteFileDataAsync);
await Task.WhenAll(tasks);
// Group files for deletion
var filesToDelete = files.Where(f => f.StorageId == null || !usedStorageIds.Contains(f.StorageId))
.GroupBy(f => f.UploadedTo)
.ToDictionary(grouping => grouping.Key!, grouping => grouping.ToList());
// Delete files by remote storage
foreach (var group in filesToDelete)
{
if (string.IsNullOrEmpty(group.Key)) continue;
try
{
var dest = fs.GetRemoteStorageConfig(group.Key);
var client = fs.CreateMinioClient(dest);
if (client == null) continue;
// Create delete tasks for each file in the group
var deleteTasks = group.Value.Select(file =>
{
var objectId = file.StorageId ?? file.Id;
var tasks = new List<Task>
{
client.RemoveObjectAsync(new RemoveObjectArgs()
.WithBucket(dest.Bucket)
.WithObject(objectId))
};
if (file.HasCompression)
{
tasks.Add(client.RemoveObjectAsync(new RemoveObjectArgs()
.WithBucket(dest.Bucket)
.WithObject(objectId + ".compressed")));
}
return Task.WhenAll(tasks);
});
await Task.WhenAll(deleteTasks);
}
catch (Exception ex)
{
logger.LogError(ex, "Error deleting files from remote storage {remote}", group.Key);
}
}
// Delete all file records from the database
var fileIds = files.Select(f => f.Id).ToList();
await db.Files
.Where(f => f.UsedCount == 0 && f.CreatedAt < cutoff)
.Where(f => fileIds.Contains(f.Id))
.ExecuteDeleteAsync();
logger.LogInformation($"Completed deleting {files.Count} files");
}
}

View File

@ -10,7 +10,15 @@ public class MessageReadReceiptFlushHandler(IServiceProvider serviceProvider) :
{
public async Task FlushAsync(IReadOnlyList<MessageReadReceipt> items)
{
var distinctItems = items.DistinctBy(x => new { x.MessageId, x.SenderId }).ToList();
var distinctItems = items
.DistinctBy(x => new { x.MessageId, x.SenderId })
.Select(x =>
{
x.CreatedAt = SystemClock.Instance.GetCurrentInstant();
x.UpdatedAt = x.CreatedAt;
return x;
})
.ToList();
using var scope = serviceProvider.CreateScope();
var db = scope.ServiceProvider.GetRequiredService<AppDatabase>(); await db.BulkInsertAsync(distinctItems);