using Microsoft.EntityFrameworkCore; using NodaTime; using Quartz; namespace DysonNetwork.Sphere.Storage; public class CloudFileUnusedRecyclingJob( AppDatabase db, FileService fs, FileReferenceService fileRefService, ILogger logger ) : IJob { public async Task Execute(IJobExecutionContext context) { logger.LogInformation("Deleting unused cloud files..."); var cutoff = SystemClock.Instance.GetCurrentInstant() - Duration.FromHours(1); var now = SystemClock.Instance.GetCurrentInstant(); // Get files that are either expired or created more than an hour ago var fileIds = await db.Files .Select(f => f.Id) .ToListAsync(); // Filter to only include files that have no references or all references have expired var deletionPlan = new List(); foreach (var batch in fileIds.Chunk(100)) // Process in batches to avoid excessive query size { var references = await fileRefService.GetReferencesAsync(batch); deletionPlan.AddRange(from refer in references where refer.Value.Count == 0 || refer.Value.All(r => r.ExpiredAt != null && now >= r.ExpiredAt) select refer.Key); } if (deletionPlan.Count == 0) { logger.LogInformation("No files to delete"); return; } // Get the actual file objects for the files to be deleted var files = await db.Files .Where(f => deletionPlan.Contains(f.Id)) .ToListAsync(); logger.LogInformation($"Found {files.Count} files to delete..."); // Group files by StorageId and find which ones are safe to delete var storageIds = files.Where(f => f.StorageId != null) .Select(f => f.StorageId!) .Distinct() .ToList(); // Check if any other files with the same storage IDs are referenced var usedStorageIds = new List(); var filesWithSameStorageId = await db.Files .Where(f => f.StorageId != null && storageIds.Contains(f.StorageId) && !files.Select(ff => ff.Id).Contains(f.Id)) .ToListAsync(); foreach (var file in filesWithSameStorageId) { // Get all references for the file var references = await fileRefService.GetReferencesAsync(file.Id); // Check if file has active references (non-expired) if (references.Any(r => r.ExpiredAt == null || r.ExpiredAt > now) && file.StorageId != null) { usedStorageIds.Add(file.StorageId); } } // Group files for deletion var filesToDelete = files.Where(f => f.StorageId == null || !usedStorageIds.Contains(f.StorageId)) .GroupBy(f => f.UploadedTo) .ToDictionary(grouping => grouping.Key!, grouping => grouping.ToList()); // Delete files by remote storage foreach (var group in filesToDelete.Where(group => !string.IsNullOrEmpty(group.Key))) { try { var dest = fs.GetRemoteStorageConfig(group.Key); var client = fs.CreateMinioClient(dest); if (client == null) continue; // Create delete tasks for each file in the group var deleteTasks = group.Value.Select(file => { var objectId = file.StorageId ?? file.Id; var tasks = new List { client.RemoveObjectAsync(new Minio.DataModel.Args.RemoveObjectArgs() .WithBucket(dest.Bucket) .WithObject(objectId)) }; if (file.HasCompression) { tasks.Add(client.RemoveObjectAsync(new Minio.DataModel.Args.RemoveObjectArgs() .WithBucket(dest.Bucket) .WithObject(objectId + ".compressed"))); } return Task.WhenAll(tasks); }); await Task.WhenAll(deleteTasks); } catch (Exception ex) { logger.LogError(ex, "Error deleting files from remote storage {remote}", group.Key); } } // Delete all file records from the database var fileIdsToDelete = files.Select(f => f.Id).ToList(); await db.Files .Where(f => fileIdsToDelete.Contains(f.Id)) .ExecuteDeleteAsync(); logger.LogInformation($"Completed deleting {files.Count} files"); } }