💥 ♻️ Refactor cloud files' references, and loading system

This commit is contained in:
2025-06-01 19:18:23 +08:00
parent 02ae634690
commit 00229fd406
32 changed files with 5204 additions and 582 deletions

View File

@ -215,6 +215,7 @@ public class CacheServiceRedis : ICacheService
public async Task<bool> SetAsync<T>(string key, T value, TimeSpan? expiry = null)
{
key = $"{GlobalKeyPrefix}{key}";
if (string.IsNullOrEmpty(key))
throw new ArgumentException("Key cannot be null or empty", nameof(key));
@ -224,6 +225,7 @@ public class CacheServiceRedis : ICacheService
public async Task<T?> GetAsync<T>(string key)
{
key = $"{GlobalKeyPrefix}{key}";
if (string.IsNullOrEmpty(key))
throw new ArgumentException("Key cannot be null or empty", nameof(key));
@ -238,6 +240,7 @@ public class CacheServiceRedis : ICacheService
public async Task<(bool found, T? value)> GetAsyncWithStatus<T>(string key)
{
key = $"{GlobalKeyPrefix}{key}";
if (string.IsNullOrEmpty(key))
throw new ArgumentException("Key cannot be null or empty", nameof(key));
@ -252,6 +255,7 @@ public class CacheServiceRedis : ICacheService
public async Task<bool> RemoveAsync(string key)
{
key = $"{GlobalKeyPrefix}{key}";
if (string.IsNullOrEmpty(key))
throw new ArgumentException("Key cannot be null or empty", nameof(key));
@ -281,6 +285,7 @@ public class CacheServiceRedis : ICacheService
throw new ArgumentException(@"Group cannot be null or empty.", nameof(group));
var groupKey = $"{GroupKeyPrefix}{group}";
key = $"{GlobalKeyPrefix}{key}";
await _database.SetAddAsync(groupKey, key);
}
@ -319,6 +324,7 @@ public class CacheServiceRedis : ICacheService
public async Task<bool> SetWithGroupsAsync<T>(string key, T value, IEnumerable<string>? groups = null,
TimeSpan? expiry = null)
{
key = $"{GlobalKeyPrefix}{key}";
// First, set the value in the cache
var setResult = await SetAsync(key, value, expiry);

View File

@ -20,10 +20,28 @@ public class RemoteStorageConfig
public string? AccessProxy { get; set; }
}
public class CloudFile : ModelBase
/// <summary>
/// The class that used in jsonb columns which referenced the cloud file.
/// The aim of this class is to store some properties that won't change to a file to reduce the database load.
/// </summary>
public class CloudFileReferenceObject : ICloudFile
{
public string Id { get; set; } = null!;
public string Name { get; set; } = string.Empty;
public Dictionary<string, object>? FileMeta { get; set; } = null!;
public Dictionary<string, object>? UserMeta { get; set; } = null!;
public string? MimeType { get; set; }
public string? Hash { get; set; }
public long Size { get; set; }
public bool HasCompression { get; set; } = false;
}
public class CloudFile : ModelBase, ICloudFile, IIdentifiedResource
{
/// The id generated by TuS, basically just UUID remove the dash lines
[MaxLength(32)] public string Id { get; set; } = Guid.NewGuid().ToString();
[MaxLength(32)]
public string Id { get; set; } = Guid.NewGuid().ToString();
[MaxLength(1024)] public string Name { get; set; } = string.Empty;
[MaxLength(4096)] public string? Description { get; set; }
[Column(TypeName = "jsonb")] public Dictionary<string, object>? FileMeta { get; set; } = null!;
@ -33,29 +51,42 @@ public class CloudFile : ModelBase
[MaxLength(256)] public string? Hash { get; set; }
public long Size { get; set; }
public Instant? UploadedAt { get; set; }
public Instant? ExpiredAt { get; set; }
[MaxLength(128)] public string? UploadedTo { get; set; }
public bool HasCompression { get; set; }= false;
public bool HasCompression { get; set; } = false;
/// The object name which stored remotely,
/// multiple cloud file may have same storage id to indicate they are the same file
///
/// If the storage id was null and the uploaded at is not null, means it is an embedding file,
/// The embedding file means the file is store on another site,
/// or it is a webpage (based on mimetype)
[MaxLength(32)] public string? StorageId { get; set; }
[MaxLength(32)]
public string? StorageId { get; set; }
/// This field should be null when the storage id is filled
/// Indicates the off-site accessible url of the file
[MaxLength(4096)] public string? StorageUrl { get; set; }
/// Metrics
/// When this used count keep zero, it means it's not used by anybody, so it can be recycled
public int UsedCount { get; set; } = 0;
/// An optional package identifier that indicates the cloud file's usage
[MaxLength(1024)] public string? Usage { get; set; }
[MaxLength(4096)]
public string? StorageUrl { get; set; }
[JsonIgnore] public Account.Account Account { get; set; } = null!;
public Guid AccountId { get; set; }
public CloudFileReferenceObject ToReferenceObject()
{
return new CloudFileReferenceObject
{
Id = Id,
Name = Name,
FileMeta = FileMeta,
UserMeta = UserMeta,
MimeType = MimeType,
Hash = Hash,
Size = Size,
HasCompression = HasCompression
};
}
public string ResourceIdentifier => $"file/{Id}";
}
public enum CloudFileSensitiveMark
@ -73,4 +104,18 @@ public enum CloudFileSensitiveMark
SelfHarm,
ChildAbuse,
Other
}
public class CloudFileReference : ModelBase
{
public Guid Id { get; set; } = Guid.NewGuid();
[MaxLength(32)] public string FileId { get; set; } = null!;
public CloudFile File { get; set; } = null!;
[MaxLength(1024)] public string Usage { get; set; } = null!;
[MaxLength(1024)] public string ResourceId { get; set; } = null!;
/// <summary>
/// Optional expiration date for the file reference
/// </summary>
public Instant? ExpiredAt { get; set; }
}

View File

@ -0,0 +1,127 @@
using Microsoft.EntityFrameworkCore;
using NodaTime;
using Quartz;
namespace DysonNetwork.Sphere.Storage;
public class CloudFileUnusedRecyclingJob(
AppDatabase db,
FileService fs,
FileReferenceService fileRefService,
ILogger<CloudFileUnusedRecyclingJob> logger
)
: IJob
{
public async Task Execute(IJobExecutionContext context)
{
logger.LogInformation("Deleting unused cloud files...");
var cutoff = SystemClock.Instance.GetCurrentInstant() - Duration.FromHours(1);
var now = SystemClock.Instance.GetCurrentInstant();
// Get files that are either expired or created more than an hour ago
var fileIds = await db.Files
.Select(f => f.Id)
.ToListAsync();
// Filter to only include files that have no references or all references have expired
var deletionPlan = new List<string>();
foreach (var batch in fileIds.Chunk(100)) // Process in batches to avoid excessive query size
{
var references = await fileRefService.GetReferencesAsync(batch);
deletionPlan.AddRange(from refer in references
where refer.Value.Count == 0 || refer.Value.All(r => r.ExpiredAt != null && now >= r.ExpiredAt)
select refer.Key);
}
if (deletionPlan.Count == 0)
{
logger.LogInformation("No files to delete");
return;
}
// Get the actual file objects for the files to be deleted
var files = await db.Files
.Where(f => deletionPlan.Contains(f.Id))
.ToListAsync();
logger.LogInformation($"Found {files.Count} files to delete...");
// Group files by StorageId and find which ones are safe to delete
var storageIds = files.Where(f => f.StorageId != null)
.Select(f => f.StorageId!)
.Distinct()
.ToList();
// Check if any other files with the same storage IDs are referenced
var usedStorageIds = new List<string>();
var filesWithSameStorageId = await db.Files
.Where(f => f.StorageId != null &&
storageIds.Contains(f.StorageId) &&
!files.Select(ff => ff.Id).Contains(f.Id))
.ToListAsync();
foreach (var file in filesWithSameStorageId)
{
// Get all references for the file
var references = await fileRefService.GetReferencesAsync(file.Id);
// Check if file has active references (non-expired)
if (references.Any(r => r.ExpiredAt == null || r.ExpiredAt > now) && file.StorageId != null)
{
usedStorageIds.Add(file.StorageId);
}
}
// Group files for deletion
var filesToDelete = files.Where(f => f.StorageId == null || !usedStorageIds.Contains(f.StorageId))
.GroupBy(f => f.UploadedTo)
.ToDictionary(grouping => grouping.Key!, grouping => grouping.ToList());
// Delete files by remote storage
foreach (var group in filesToDelete.Where(group => !string.IsNullOrEmpty(group.Key)))
{
try
{
var dest = fs.GetRemoteStorageConfig(group.Key);
var client = fs.CreateMinioClient(dest);
if (client == null) continue;
// Create delete tasks for each file in the group
var deleteTasks = group.Value.Select(file =>
{
var objectId = file.StorageId ?? file.Id;
var tasks = new List<Task>
{
client.RemoveObjectAsync(new Minio.DataModel.Args.RemoveObjectArgs()
.WithBucket(dest.Bucket)
.WithObject(objectId))
};
if (file.HasCompression)
{
tasks.Add(client.RemoveObjectAsync(new Minio.DataModel.Args.RemoveObjectArgs()
.WithBucket(dest.Bucket)
.WithObject(objectId + ".compressed")));
}
return Task.WhenAll(tasks);
});
await Task.WhenAll(deleteTasks);
}
catch (Exception ex)
{
logger.LogError(ex, "Error deleting files from remote storage {remote}", group.Key);
}
}
// Delete all file records from the database
var fileIdsToDelete = files.Select(f => f.Id).ToList();
await db.Files
.Where(f => fileIdsToDelete.Contains(f.Id))
.ExecuteDeleteAsync();
logger.LogInformation($"Completed deleting {files.Count} files");
}
}

View File

@ -0,0 +1,66 @@
using Microsoft.EntityFrameworkCore;
using NodaTime;
using Quartz;
namespace DysonNetwork.Sphere.Storage;
/// <summary>
/// Job responsible for cleaning up expired file references
/// </summary>
public class FileExpirationJob(AppDatabase db, FileService fileService, ILogger<FileExpirationJob> logger) : IJob
{
public async Task Execute(IJobExecutionContext context)
{
var now = SystemClock.Instance.GetCurrentInstant();
logger.LogInformation("Running file reference expiration job at {now}", now);
// Find all expired references
var expiredReferences = await db.FileReferences
.Where(r => r.ExpiredAt < now && r.ExpiredAt != null)
.ToListAsync();
if (!expiredReferences.Any())
{
logger.LogInformation("No expired file references found");
return;
}
logger.LogInformation("Found {count} expired file references", expiredReferences.Count);
// Get unique file IDs
var fileIds = expiredReferences.Select(r => r.FileId).Distinct().ToList();
var filesAndReferenceCount = new Dictionary<string, int>();
// Delete expired references
db.FileReferences.RemoveRange(expiredReferences);
await db.SaveChangesAsync();
// Check remaining references for each file
foreach (var fileId in fileIds)
{
var remainingReferences = await db.FileReferences
.Where(r => r.FileId == fileId)
.CountAsync();
filesAndReferenceCount[fileId] = remainingReferences;
// If no references remain, delete the file
if (remainingReferences == 0)
{
var file = await db.Files.FirstOrDefaultAsync(f => f.Id == fileId);
if (file != null)
{
logger.LogInformation("Deleting file {fileId} as all references have expired", fileId);
await fileService.DeleteFileAsync(file);
}
}
else
{
// Just purge the cache
await fileService._PurgeCacheAsync(fileId);
}
}
logger.LogInformation("Completed file reference expiration job");
}
}

View File

@ -0,0 +1,407 @@
using Microsoft.EntityFrameworkCore;
using NodaTime;
namespace DysonNetwork.Sphere.Storage;
public class FileReferenceService(AppDatabase db, FileService fileService, ICacheService cache)
{
private const string CacheKeyPrefix = "fileref:";
private static readonly TimeSpan CacheDuration = TimeSpan.FromMinutes(15);
/// <summary>
/// Creates a new reference to a file for a specific resource
/// </summary>
/// <param name="fileId">The ID of the file to reference</param>
/// <param name="usage">The usage context (e.g., "avatar", "post-attachment")</param>
/// <param name="resourceId">The ID of the resource using the file</param>
/// <param name="expiredAt">Optional expiration time for the file</param>
/// <param name="duration">Optional duration after which the file expires (alternative to expiredAt)</param>
/// <returns>The created file reference</returns>
public async Task<CloudFileReference> CreateReferenceAsync(
string fileId,
string usage,
string resourceId,
Instant? expiredAt = null,
Duration? duration = null)
{
// Calculate expiration time if needed
Instant? finalExpiration = expiredAt;
if (duration.HasValue)
{
finalExpiration = SystemClock.Instance.GetCurrentInstant() + duration.Value;
}
var reference = new CloudFileReference
{
FileId = fileId,
Usage = usage,
ResourceId = resourceId,
ExpiredAt = finalExpiration
};
db.FileReferences.Add(reference);
await db.SaveChangesAsync();
// Purge cache for the file since its usage count has effectively changed
await fileService._PurgeCacheAsync(fileId);
return reference;
}
/// <summary>
/// Gets all references to a file
/// </summary>
/// <param name="fileId">The ID of the file</param>
/// <returns>A list of all references to the file</returns>
public async Task<List<CloudFileReference>> GetReferencesAsync(string fileId)
{
var cacheKey = $"{CacheKeyPrefix}list:{fileId}";
var cachedReferences = await cache.GetAsync<List<CloudFileReference>>(cacheKey);
if (cachedReferences is not null)
return cachedReferences;
var references = await db.FileReferences
.Where(r => r.FileId == fileId)
.ToListAsync();
await cache.SetAsync(cacheKey, references, CacheDuration);
return references;
}
public async Task<Dictionary<string, List<CloudFileReference>>> GetReferencesAsync(IEnumerable<string> fileId)
{
var references = await db.FileReferences
.Where(r => fileId.Contains(r.FileId))
.GroupBy(r => r.FileId)
.ToDictionaryAsync(r => r.Key, r => r.ToList());
return references;
}
/// <summary>
/// Gets the number of references to a file
/// </summary>
/// <param name="fileId">The ID of the file</param>
/// <returns>The number of references to the file</returns>
public async Task<int> GetReferenceCountAsync(string fileId)
{
var cacheKey = $"{CacheKeyPrefix}count:{fileId}";
var cachedCount = await cache.GetAsync<int?>(cacheKey);
if (cachedCount.HasValue)
return cachedCount.Value;
var count = await db.FileReferences
.Where(r => r.FileId == fileId)
.CountAsync();
await cache.SetAsync(cacheKey, count, CacheDuration);
return count;
}
/// <summary>
/// Gets all references for a specific resource
/// </summary>
/// <param name="resourceId">The ID of the resource</param>
/// <returns>A list of file references associated with the resource</returns>
public async Task<List<CloudFileReference>> GetResourceReferencesAsync(string resourceId)
{
var cacheKey = $"{CacheKeyPrefix}resource:{resourceId}";
var cachedReferences = await cache.GetAsync<List<CloudFileReference>>(cacheKey);
if (cachedReferences is not null)
return cachedReferences;
var references = await db.FileReferences
.Where(r => r.ResourceId == resourceId)
.ToListAsync();
await cache.SetAsync(cacheKey, references, CacheDuration);
return references;
}
/// <summary>
/// Gets all file references for a specific usage context
/// </summary>
/// <param name="usage">The usage context</param>
/// <returns>A list of file references with the specified usage</returns>
public async Task<List<CloudFileReference>> GetUsageReferencesAsync(string usage)
{
return await db.FileReferences
.Where(r => r.Usage == usage)
.ToListAsync();
}
/// <summary>
/// Deletes references for a specific resource
/// </summary>
/// <param name="resourceId">The ID of the resource</param>
/// <returns>The number of deleted references</returns>
public async Task<int> DeleteResourceReferencesAsync(string resourceId)
{
var references = await db.FileReferences
.Where(r => r.ResourceId == resourceId)
.ToListAsync();
var fileIds = references.Select(r => r.FileId).Distinct().ToList();
db.FileReferences.RemoveRange(references);
var deletedCount = await db.SaveChangesAsync();
// Purge caches
var tasks = fileIds.Select(fileService._PurgeCacheAsync).ToList();
tasks.Add(PurgeCacheForResourceAsync(resourceId));
await Task.WhenAll(tasks);
return deletedCount;
}
/// <summary>
/// Deletes a specific file reference
/// </summary>
/// <param name="referenceId">The ID of the reference to delete</param>
/// <returns>True if the reference was deleted, false otherwise</returns>
public async Task<bool> DeleteReferenceAsync(Guid referenceId)
{
var reference = await db.FileReferences
.FirstOrDefaultAsync(r => r.Id == referenceId);
if (reference == null)
return false;
db.FileReferences.Remove(reference);
await db.SaveChangesAsync();
// Purge caches
await fileService._PurgeCacheAsync(reference.FileId);
await PurgeCacheForResourceAsync(reference.ResourceId);
await PurgeCacheForFileAsync(reference.FileId);
return true;
}
/// <summary>
/// Updates the files referenced by a resource
/// </summary>
/// <param name="resourceId">The ID of the resource</param>
/// <param name="newFileIds">The new list of file IDs</param>
/// <param name="usage">The usage context</param>
/// <param name="expiredAt">Optional expiration time for newly added files</param>
/// <param name="duration">Optional duration after which newly added files expire</param>
/// <returns>A list of the updated file references</returns>
public async Task<List<CloudFileReference>> UpdateResourceFilesAsync(
string resourceId,
IEnumerable<string>? newFileIds,
string usage,
Instant? expiredAt = null,
Duration? duration = null)
{
if (newFileIds == null)
return new List<CloudFileReference>();
var existingReferences = await db.FileReferences
.Where(r => r.ResourceId == resourceId && r.Usage == usage)
.ToListAsync();
var existingFileIds = existingReferences.Select(r => r.FileId).ToHashSet();
var newFileIdsList = newFileIds.ToList();
var newFileIdsSet = newFileIdsList.ToHashSet();
// Files to remove
var toRemove = existingReferences
.Where(r => !newFileIdsSet.Contains(r.FileId))
.ToList();
// Files to add
var toAdd = newFileIdsList
.Where(id => !existingFileIds.Contains(id))
.Select(id => new CloudFileReference
{
FileId = id,
Usage = usage,
ResourceId = resourceId
})
.ToList();
// Apply changes
if (toRemove.Any())
db.FileReferences.RemoveRange(toRemove);
if (toAdd.Any())
db.FileReferences.AddRange(toAdd);
await db.SaveChangesAsync();
// Update expiration for newly added references if specified
if ((expiredAt.HasValue || duration.HasValue) && toAdd.Any())
{
var finalExpiration = expiredAt;
if (duration.HasValue)
{
finalExpiration = SystemClock.Instance.GetCurrentInstant() + duration.Value;
}
// Update newly added references with the expiration time
var referenceIds = await db.FileReferences
.Where(r => toAdd.Select(a => a.FileId).Contains(r.FileId) &&
r.ResourceId == resourceId &&
r.Usage == usage)
.Select(r => r.Id)
.ToListAsync();
await db.FileReferences
.Where(r => referenceIds.Contains(r.Id))
.ExecuteUpdateAsync(setter => setter.SetProperty(
r => r.ExpiredAt,
_ => finalExpiration
));
}
// Purge caches
var allFileIds = existingFileIds.Union(newFileIdsSet).ToList();
var tasks = allFileIds.Select(fileService._PurgeCacheAsync).ToList();
tasks.Add(PurgeCacheForResourceAsync(resourceId));
await Task.WhenAll(tasks);
// Return updated references
return await db.FileReferences
.Where(r => r.ResourceId == resourceId && r.Usage == usage)
.ToListAsync();
}
/// <summary>
/// Gets all files referenced by a resource
/// </summary>
/// <param name="resourceId">The ID of the resource</param>
/// <param name="usage">Optional filter by usage context</param>
/// <returns>A list of files referenced by the resource</returns>
public async Task<List<CloudFile>> GetResourceFilesAsync(string resourceId, string? usage = null)
{
var query = db.FileReferences.Where(r => r.ResourceId == resourceId);
if (usage != null)
query = query.Where(r => r.Usage == usage);
var references = await query.ToListAsync();
var fileIds = references.Select(r => r.FileId).ToList();
return await db.Files
.Where(f => fileIds.Contains(f.Id))
.ToListAsync();
}
/// <summary>
/// Purges all caches related to a resource
/// </summary>
private async Task PurgeCacheForResourceAsync(string resourceId)
{
var cacheKey = $"{CacheKeyPrefix}resource:{resourceId}";
await cache.RemoveAsync(cacheKey);
}
/// <summary>
/// Purges all caches related to a file
/// </summary>
private async Task PurgeCacheForFileAsync(string fileId)
{
var cacheKeys = new[]
{
$"{CacheKeyPrefix}list:{fileId}",
$"{CacheKeyPrefix}count:{fileId}"
};
var tasks = cacheKeys.Select(cache.RemoveAsync);
await Task.WhenAll(tasks);
}
/// <summary>
/// Updates the expiration time for a file reference
/// </summary>
/// <param name="referenceId">The ID of the reference</param>
/// <param name="expiredAt">The new expiration time, or null to remove expiration</param>
/// <returns>True if the reference was found and updated, false otherwise</returns>
public async Task<bool> SetReferenceExpirationAsync(Guid referenceId, Instant? expiredAt)
{
var reference = await db.FileReferences
.FirstOrDefaultAsync(r => r.Id == referenceId);
if (reference == null)
return false;
reference.ExpiredAt = expiredAt;
await db.SaveChangesAsync();
await PurgeCacheForFileAsync(reference.FileId);
await PurgeCacheForResourceAsync(reference.ResourceId);
return true;
}
/// <summary>
/// Updates the expiration time for all references to a file
/// </summary>
/// <param name="fileId">The ID of the file</param>
/// <param name="expiredAt">The new expiration time, or null to remove expiration</param>
/// <returns>The number of references updated</returns>
public async Task<int> SetFileReferencesExpirationAsync(string fileId, Instant? expiredAt)
{
var rowsAffected = await db.FileReferences
.Where(r => r.FileId == fileId)
.ExecuteUpdateAsync(setter => setter.SetProperty(
r => r.ExpiredAt,
_ => expiredAt
));
if (rowsAffected > 0)
{
await fileService._PurgeCacheAsync(fileId);
await PurgeCacheForFileAsync(fileId);
}
return rowsAffected;
}
/// <summary>
/// Get all file references for a specific resource and usage type
/// </summary>
/// <param name="resourceId">The resource ID</param>
/// <param name="usageType">The usage type</param>
/// <returns>List of file references</returns>
public async Task<List<CloudFileReference>> GetResourceReferencesAsync(string resourceId, string usageType)
{
return await db.FileReferences
.Where(r => r.ResourceId == resourceId && r.Usage == usageType)
.ToListAsync();
}
/// <summary>
/// Check if a file has any references
/// </summary>
/// <param name="fileId">The file ID to check</param>
/// <returns>True if the file has references, false otherwise</returns>
public async Task<bool> HasFileReferencesAsync(string fileId)
{
return await db.FileReferences.AnyAsync(r => r.FileId == fileId);
}
/// <summary>
/// Updates the expiration time for a file reference using a duration from now
/// </summary>
/// <param name="referenceId">The ID of the reference</param>
/// <param name="duration">The duration after which the reference expires, or null to remove expiration</param>
/// <returns>True if the reference was found and updated, false otherwise</returns>
public async Task<bool> SetReferenceExpirationDurationAsync(Guid referenceId, Duration? duration)
{
Instant? expiredAt = null;
if (duration.HasValue)
{
expiredAt = SystemClock.Instance.GetCurrentInstant() + duration.Value;
}
return await SetReferenceExpirationAsync(referenceId, expiredAt);
}
}

View File

@ -39,7 +39,10 @@ public class FileService(
if (cachedFile is not null)
return cachedFile;
var file = await db.Files.FirstOrDefaultAsync(f => f.Id == fileId);
var file = await db.Files
.Include(f => f.Account)
.Where(f => f.Id == fileId)
.FirstOrDefaultAsync();
if (file != null)
await cache.SetAsync(cacheKey, file, CacheDuration);
@ -208,8 +211,9 @@ public class FileService(
if (result.Count > 0)
{
List<Task<CloudFile>> tasks = [];
tasks.AddRange(result.Select(result =>
nfs.UploadFileToRemoteAsync(file, result.filePath, null, result.suffix, true)));
tasks.AddRange(result.Select(item =>
nfs.UploadFileToRemoteAsync(file, item.filePath, null, item.suffix, true))
);
await Task.WhenAll(tasks);
file = await tasks.First();
@ -326,10 +330,23 @@ public class FileService(
if (file.StorageId is null) return;
if (file.UploadedTo is null) return;
var repeatedStorageId = await db.Files
.Where(f => f.StorageId == file.StorageId && f.Id != file.Id && f.UsedCount > 0)
.AnyAsync();
if (repeatedStorageId) return;
// Check if any other file with the same storage ID is referenced
var otherFilesWithSameStorageId = await db.Files
.Where(f => f.StorageId == file.StorageId && f.Id != file.Id)
.Select(f => f.Id)
.ToListAsync();
// Check if any of these files are referenced
var anyReferenced = false;
if (otherFilesWithSameStorageId.Any())
{
anyReferenced = await db.FileReferences
.Where(r => otherFilesWithSameStorageId.Contains(r.FileId))
.AnyAsync();
}
// If any other file with the same storage ID is referenced, don't delete the actual file data
if (anyReferenced) return;
var dest = GetRemoteStorageConfig(file.UploadedTo);
var client = CreateMinioClient(dest);
@ -380,242 +397,88 @@ public class FileService(
return client.Build();
}
public async Task MarkUsageAsync(CloudFile file, int delta)
{
await db.Files.Where(o => o.Id == file.Id)
.ExecuteUpdateAsync(setter => setter.SetProperty(
b => b.UsedCount,
b => b.UsedCount + delta
)
);
await _PurgeCacheAsync(file.Id);
}
public async Task MarkUsageRangeAsync(ICollection<CloudFile> files, int delta)
{
var ids = files.Select(f => f.Id).ToArray();
await db.Files.Where(o => ids.Contains(o.Id))
.ExecuteUpdateAsync(setter => setter.SetProperty(
b => b.UsedCount,
b => b.UsedCount + delta
)
);
await _PurgeCacheRangeAsync(files.Select(x => x.Id).ToList());
}
public async Task SetExpiresRangeAsync(ICollection<CloudFile> files, Duration? duration)
{
var ids = files.Select(f => f.Id).ToArray();
await db.Files.Where(o => ids.Contains(o.Id))
.ExecuteUpdateAsync(setter => setter.SetProperty(
b => b.ExpiredAt,
duration.HasValue
? b => SystemClock.Instance.GetCurrentInstant() + duration.Value
: _ => null
)
);
await _PurgeCacheRangeAsync(files.Select(x => x.Id).ToList());
}
public async Task SetUsageAsync(CloudFile file, string? usage)
{
await db.Files.Where(o => o.Id == file.Id)
.ExecuteUpdateAsync(setter => setter.SetProperty(
b => b.Usage,
_ => usage
)
);
await _PurgeCacheAsync(file.Id);
}
public async Task SetUsageRangeAsync(ICollection<CloudFile> files, string? usage)
{
var ids = files.Select(f => f.Id).ToArray();
await db.Files.Where(o => ids.Contains(o.Id))
.ExecuteUpdateAsync(setter => setter.SetProperty(
b => b.Usage,
_ => usage
)
);
await _PurgeCacheRangeAsync(files.Select(x => x.Id).ToList());
}
public async Task<(ICollection<CloudFile> current, ICollection<CloudFile> added, ICollection<CloudFile> removed)>
DiffAndSetUsageAsync(
ICollection<string>? newFileIds,
string? usage,
ICollection<CloudFile>? previousFiles = null
)
{
if (newFileIds == null) return ([], [], previousFiles ?? []);
var records = await db.Files.Where(f => newFileIds.Contains(f.Id)).ToListAsync();
var previous = previousFiles?.ToDictionary(f => f.Id) ?? new Dictionary<string, CloudFile>();
var current = records.ToDictionary(f => f.Id);
var added = current.Keys.Except(previous.Keys).Select(id => current[id]).ToList();
var removed = previous.Keys.Except(current.Keys).Select(id => previous[id]).ToList();
if (added.Count > 0) await SetUsageRangeAsync(added, usage);
if (removed.Count > 0) await SetUsageRangeAsync(removed, null);
return (newFileIds.Select(id => current[id]).ToList(), added, removed);
}
public async Task<(ICollection<CloudFile> current, ICollection<CloudFile> added, ICollection<CloudFile> removed)>
DiffAndMarkFilesAsync(
ICollection<string>? newFileIds,
ICollection<CloudFile>? previousFiles = null
)
{
if (newFileIds == null) return ([], [], previousFiles ?? []);
var records = await db.Files.Where(f => newFileIds.Contains(f.Id)).ToListAsync();
var previous = previousFiles?.ToDictionary(f => f.Id) ?? new Dictionary<string, CloudFile>();
var current = records.ToDictionary(f => f.Id);
var added = current.Keys.Except(previous.Keys).Select(id => current[id]).ToList();
var removed = previous.Keys.Except(current.Keys).Select(id => previous[id]).ToList();
if (added.Count > 0) await MarkUsageRangeAsync(added, 1);
if (removed.Count > 0) await MarkUsageRangeAsync(removed, -1);
return (newFileIds.Select(id => current[id]).ToList(), added, removed);
}
public async Task<(ICollection<CloudFile> current, ICollection<CloudFile> added, ICollection<CloudFile> removed)>
DiffAndSetExpiresAsync(
ICollection<string>? newFileIds,
Duration? duration,
ICollection<CloudFile>? previousFiles = null
)
{
if (newFileIds == null) return ([], [], previousFiles ?? []);
var records = await db.Files.Where(f => newFileIds.Contains(f.Id)).ToListAsync();
var previous = previousFiles?.ToDictionary(f => f.Id) ?? new Dictionary<string, CloudFile>();
var current = records.ToDictionary(f => f.Id);
var added = current.Keys.Except(previous.Keys).Select(id => current[id]).ToList();
var removed = previous.Keys.Except(current.Keys).Select(id => previous[id]).ToList();
if (added.Count > 0) await SetExpiresRangeAsync(added, duration);
if (removed.Count > 0) await SetExpiresRangeAsync(removed, null);
return (newFileIds.Select(id => current[id]).ToList(), added, removed);
}
// Add this helper method to purge the cache for a specific file
private async Task _PurgeCacheAsync(string fileId)
// Helper method to purge the cache for a specific file
// Made internal to allow FileReferenceService to use it
internal async Task _PurgeCacheAsync(string fileId)
{
var cacheKey = $"{CacheKeyPrefix}{fileId}";
await cache.RemoveAsync(cacheKey);
}
// Add this helper method to purge cache for multiple files
private async Task _PurgeCacheRangeAsync(ICollection<string> fileIds)
// Helper method to purge cache for multiple files
internal async Task _PurgeCacheRangeAsync(IEnumerable<string> fileIds)
{
var tasks = fileIds.Select(_PurgeCacheAsync);
await Task.WhenAll(tasks);
}
}
public class CloudFileUnusedRecyclingJob(AppDatabase db, FileService fs, ILogger<CloudFileUnusedRecyclingJob> logger)
: IJob
{
public async Task Execute(IJobExecutionContext context)
public async Task<List<CloudFile?>> LoadFromReference(List<CloudFileReferenceObject> references)
{
logger.LogInformation("Deleting unused cloud files...");
var cachedFiles = new Dictionary<string, CloudFile>();
var uncachedIds = new List<string>();
var cutoff = SystemClock.Instance.GetCurrentInstant() - Duration.FromHours(1);
var now = SystemClock.Instance.GetCurrentInstant();
// Get files to delete along with their storage IDs
var files = await db.Files
.Where(f =>
(f.ExpiredAt == null && f.UsedCount == 0 && f.CreatedAt < cutoff) ||
(f.ExpiredAt != null && now >= f.ExpiredAt)
)
.ToListAsync();
if (files.Count == 0)
// Check cache first
foreach (var reference in references)
{
logger.LogInformation("No files to delete");
return;
var cacheKey = $"{CacheKeyPrefix}{reference.Id}";
var cachedFile = await cache.GetAsync<CloudFile>(cacheKey);
if (cachedFile != null)
{
cachedFiles[reference.Id] = cachedFile;
}
else
{
uncachedIds.Add(reference.Id);
}
}
logger.LogInformation($"Found {files.Count} files to process...");
// Load uncached files from database
if (uncachedIds.Count > 0)
{
var dbFiles = await db.Files
.Include(f => f.Account)
.Where(f => uncachedIds.Contains(f.Id))
.ToListAsync();
// Group files by StorageId and find which ones are safe to delete
var storageIds = files.Where(f => f.StorageId != null)
.Select(f => f.StorageId!)
.Distinct()
// Add to cache
foreach (var file in dbFiles)
{
var cacheKey = $"{CacheKeyPrefix}{file.Id}";
await cache.SetAsync(cacheKey, file, CacheDuration);
cachedFiles[file.Id] = file;
}
}
// Preserve original order
return references
.Select(r => cachedFiles.GetValueOrDefault(r.Id))
.Where(f => f != null)
.ToList();
var usedStorageIds = await db.Files
.Where(f => f.StorageId != null &&
storageIds.Contains(f.StorageId) &&
!files.Select(ff => ff.Id).Contains(f.Id))
.Select(f => f.StorageId!)
.Distinct()
.ToListAsync();
// Group files for deletion
var filesToDelete = files.Where(f => f.StorageId == null || !usedStorageIds.Contains(f.StorageId))
.GroupBy(f => f.UploadedTo)
.ToDictionary(grouping => grouping.Key!, grouping => grouping.ToList());
// Delete files by remote storage
foreach (var group in filesToDelete.Where(group => !string.IsNullOrEmpty(group.Key)))
{
try
{
var dest = fs.GetRemoteStorageConfig(group.Key);
var client = fs.CreateMinioClient(dest);
if (client == null) continue;
// Create delete tasks for each file in the group
var deleteTasks = group.Value.Select(file =>
{
var objectId = file.StorageId ?? file.Id;
var tasks = new List<Task>
{
client.RemoveObjectAsync(new RemoveObjectArgs()
.WithBucket(dest.Bucket)
.WithObject(objectId))
};
if (file.HasCompression)
{
tasks.Add(client.RemoveObjectAsync(new RemoveObjectArgs()
.WithBucket(dest.Bucket)
.WithObject(objectId + ".compressed")));
}
return Task.WhenAll(tasks);
});
await Task.WhenAll(deleteTasks);
}
catch (Exception ex)
{
logger.LogError(ex, "Error deleting files from remote storage {remote}", group.Key);
}
}
// Delete all file records from the database
var fileIds = files.Select(f => f.Id).ToList();
await db.Files
.Where(f => fileIds.Contains(f.Id))
.ExecuteDeleteAsync();
logger.LogInformation($"Completed deleting {files.Count} files");
}
}
/// <summary>
/// Gets the number of references to a file based on CloudFileReference records
/// </summary>
/// <param name="fileId">The ID of the file</param>
/// <returns>The number of references to the file</returns>
public async Task<int> GetReferenceCountAsync(string fileId)
{
return await db.FileReferences
.Where(r => r.FileId == fileId)
.CountAsync();
}
/// <summary>
/// Checks if a file is referenced by any resource
/// </summary>
/// <param name="fileId">The ID of the file to check</param>
/// <returns>True if the file is referenced, false otherwise</returns>
public async Task<bool> IsReferencedAsync(string fileId)
{
return await db.FileReferences
.Where(r => r.FileId == fileId)
.AnyAsync();
}
}

View File

@ -0,0 +1,49 @@
namespace DysonNetwork.Sphere.Storage;
/// <summary>
/// Common interface for cloud file entities that can be used in file operations.
/// This interface exposes the essential properties needed for file operations
/// and is implemented by both CloudFile and CloudFileReferenceObject.
/// </summary>
public interface ICloudFile
{
/// <summary>
/// Gets the unique identifier of the cloud file.
/// </summary>
string Id { get; }
/// <summary>
/// Gets the name of the cloud file.
/// </summary>
string Name { get; }
/// <summary>
/// Gets the file metadata dictionary.
/// </summary>
Dictionary<string, object>? FileMeta { get; }
/// <summary>
/// Gets the user metadata dictionary.
/// </summary>
Dictionary<string, object>? UserMeta { get; }
/// <summary>
/// Gets the MIME type of the file.
/// </summary>
string? MimeType { get; }
/// <summary>
/// Gets the hash of the file content.
/// </summary>
string? Hash { get; }
/// <summary>
/// Gets the size of the file in bytes.
/// </summary>
long Size { get; }
/// <summary>
/// Gets whether the file has a compressed version available.
/// </summary>
bool HasCompression { get; }
}