diff --git a/DysonNetwork.Sphere/Account/Account.cs b/DysonNetwork.Sphere/Account/Account.cs
index 3289f4d..452454f 100644
--- a/DysonNetwork.Sphere/Account/Account.cs
+++ b/DysonNetwork.Sphere/Account/Account.cs
@@ -72,6 +72,7 @@ public class Profile : ModelBase
[Column(TypeName = "jsonb")] public VerificationMark? Verification { get; set; }
[Column(TypeName = "jsonb")] public BadgeReferenceObject? ActiveBadge { get; set; }
+ [Column(TypeName = "jsonb")] public SubscriptionReferenceObject? StellarMembership { get; set; }
public int Experience { get; set; } = 0;
[NotMapped] public int Level => Leveling.ExperiencePerLevel.Count(xp => Experience >= xp) - 1;
diff --git a/DysonNetwork.Sphere/Storage/CloudFile.cs b/DysonNetwork.Sphere/Storage/CloudFile.cs
index 1a3ae46..14eba7a 100644
--- a/DysonNetwork.Sphere/Storage/CloudFile.cs
+++ b/DysonNetwork.Sphere/Storage/CloudFile.cs
@@ -53,6 +53,12 @@ public class CloudFile : ModelBase, ICloudFile, IIdentifiedResource
public Instant? UploadedAt { get; set; }
[MaxLength(128)] public string? UploadedTo { get; set; }
public bool HasCompression { get; set; } = false;
+
+ ///
+ /// The field is set to true if the recycling job plans to delete the file.
+ /// Due to the unstable of the recycling job, this doesn't really delete the file until a human verifies it.
+ ///
+ public bool IsMarkedRecycle { get; set; } = false;
/// The object name which stored remotely,
/// multiple cloud file may have same storage id to indicate they are the same file
diff --git a/DysonNetwork.Sphere/Storage/CloudFileUnusedRecyclingJob.cs b/DysonNetwork.Sphere/Storage/CloudFileUnusedRecyclingJob.cs
index 75f3708..3a46c5c 100644
--- a/DysonNetwork.Sphere/Storage/CloudFileUnusedRecyclingJob.cs
+++ b/DysonNetwork.Sphere/Storage/CloudFileUnusedRecyclingJob.cs
@@ -14,115 +14,81 @@ public class CloudFileUnusedRecyclingJob(
{
public async Task Execute(IJobExecutionContext context)
{
- return;
- logger.LogInformation("Deleting unused cloud files...");
+ logger.LogInformation("Marking unused cloud files...");
- var cutoff = SystemClock.Instance.GetCurrentInstant() - Duration.FromHours(1);
var now = SystemClock.Instance.GetCurrentInstant();
+ const int batchSize = 1000; // Process larger batches for efficiency
+ var processedCount = 0;
+ var markedCount = 0;
+ var totalFiles = await db.Files.Where(f => !f.IsMarkedRecycle).CountAsync();
- // Get files that are either expired or created more than an hour ago
- var fileIds = await db.Files
- .Select(f => f.Id)
- .ToListAsync();
+ logger.LogInformation("Found {TotalFiles} files to check for unused status", totalFiles);
- // Filter to only include files that have no references or all references have expired
- var deletionPlan = new List();
- foreach (var batch in fileIds.Chunk(100)) // Process in batches to avoid excessive query size
+ // Define a timestamp to limit the age of files we're processing in this run
+ // This spreads the processing across multiple job runs for very large databases
+ var ageThreshold = now - Duration.FromDays(30); // Process files up to 90 days old in this run
+
+ // Instead of loading all files at once, use pagination
+ var hasMoreFiles = true;
+ string? lastProcessedId = null;
+
+ while (hasMoreFiles)
{
- var references = await fileRefService.GetReferencesAsync(batch);
- deletionPlan.AddRange(from refer in references
- where refer.Value.Count == 0 || refer.Value.All(r => r.ExpiredAt != null && now >= r.ExpiredAt)
- select refer.Key);
- }
+ // Query for the next batch of files using keyset pagination
+ var filesQuery = db.Files
+ .Where(f => !f.IsMarkedRecycle)
+ .Where(f => f.CreatedAt <= ageThreshold); // Only process older files first
- if (deletionPlan.Count == 0)
- {
- logger.LogInformation("No files to delete");
- return;
- }
-
- // Get the actual file objects for the files to be deleted
- var files = await db.Files
- .Where(f => deletionPlan.Contains(f.Id))
- .ToListAsync();
-
- logger.LogInformation($"Found {files.Count} files to delete...");
-
- // Group files by StorageId and find which ones are safe to delete
- var storageIds = files.Where(f => f.StorageId != null)
- .Select(f => f.StorageId!)
- .Distinct()
- .ToList();
-
- // Check if any other files with the same storage IDs are referenced
- var usedStorageIds = new List();
- var filesWithSameStorageId = await db.Files
- .Where(f => f.StorageId != null &&
- storageIds.Contains(f.StorageId) &&
- !files.Select(ff => ff.Id).Contains(f.Id))
- .ToListAsync();
-
- foreach (var file in filesWithSameStorageId)
- {
- // Get all references for the file
- var references = await fileRefService.GetReferencesAsync(file.Id);
-
- // Check if file has active references (non-expired)
- if (references.Any(r => r.ExpiredAt == null || r.ExpiredAt > now) && file.StorageId != null)
+ if (lastProcessedId != null)
{
- usedStorageIds.Add(file.StorageId);
+ filesQuery = filesQuery.Where(f => string.Compare(f.Id, lastProcessedId) > 0);
+ }
+
+ var fileBatch = await filesQuery
+ .OrderBy(f => f.Id) // Ensure consistent ordering for pagination
+ .Take(batchSize)
+ .Select(f => f.Id)
+ .ToListAsync();
+
+ if (fileBatch.Count == 0)
+ {
+ hasMoreFiles = false;
+ continue;
+ }
+
+ processedCount += fileBatch.Count;
+ lastProcessedId = fileBatch.Last();
+
+ // Get all relevant file references for this batch
+ var fileReferences = await fileRefService.GetReferencesAsync(fileBatch);
+
+ // Filter to find files that have no references or all expired references
+ var filesToMark = fileBatch.Where(fileId =>
+ !fileReferences.TryGetValue(fileId, out var references) ||
+ references.Count == 0 ||
+ references.All(r => r.ExpiredAt.HasValue && r.ExpiredAt.Value <= now)
+ ).ToList();
+
+ if (filesToMark.Count > 0)
+ {
+ // Use a bulk update for better performance - mark all qualifying files at once
+ var updateCount = await db.Files
+ .Where(f => filesToMark.Contains(f.Id))
+ .ExecuteUpdateAsync(setter => setter
+ .SetProperty(f => f.IsMarkedRecycle, true));
+
+ markedCount += updateCount;
+ }
+
+ // Log progress periodically
+ if (processedCount % 10000 == 0 || !hasMoreFiles)
+ {
+ logger.LogInformation(
+ "Progress: processed {ProcessedCount}/{TotalFiles} files, marked {MarkedCount} for recycling",
+ processedCount, totalFiles, markedCount);
}
}
- // Group files for deletion
- var filesToDelete = files.Where(f => f.StorageId == null || !usedStorageIds.Contains(f.StorageId))
- .GroupBy(f => f.UploadedTo)
- .ToDictionary(grouping => grouping.Key!, grouping => grouping.ToList());
-
- // Delete files by remote storage
- foreach (var group in filesToDelete.Where(group => !string.IsNullOrEmpty(group.Key)))
- {
- try
- {
- var dest = fs.GetRemoteStorageConfig(group.Key);
- var client = fs.CreateMinioClient(dest);
- if (client == null) continue;
-
- // Create delete tasks for each file in the group
- // var deleteTasks = group.Value.Select(file =>
- // {
- // var objectId = file.StorageId ?? file.Id;
- // var tasks = new List
- // {
- // client.RemoveObjectAsync(new Minio.DataModel.Args.RemoveObjectArgs()
- // .WithBucket(dest.Bucket)
- // .WithObject(objectId))
- // };
- //
- // if (file.HasCompression)
- // {
- // tasks.Add(client.RemoveObjectAsync(new Minio.DataModel.Args.RemoveObjectArgs()
- // .WithBucket(dest.Bucket)
- // .WithObject(objectId + ".compressed")));
- // }
- //
- // return Task.WhenAll(tasks);
- // });
- //
- // await Task.WhenAll(deleteTasks);
- }
- catch (Exception ex)
- {
- logger.LogError(ex, "Error deleting files from remote storage {remote}", group.Key);
- }
- }
-
- // Delete all file records from the database
- var fileIdsToDelete = files.Select(f => f.Id).ToList();
- await db.Files
- .Where(f => fileIdsToDelete.Contains(f.Id))
- .ExecuteDeleteAsync();
-
- logger.LogInformation($"Completed deleting {files.Count} files");
+ logger.LogInformation("Completed marking {MarkedCount} files for recycling", markedCount);
}
}
\ No newline at end of file