94 lines
3.5 KiB
C#
94 lines
3.5 KiB
C#
using Microsoft.EntityFrameworkCore;
|
|
using NodaTime;
|
|
using Quartz;
|
|
|
|
namespace DysonNetwork.Sphere.Storage;
|
|
|
|
public class CloudFileUnusedRecyclingJob(
|
|
AppDatabase db,
|
|
FileService fs,
|
|
FileReferenceService fileRefService,
|
|
ILogger<CloudFileUnusedRecyclingJob> logger
|
|
)
|
|
: IJob
|
|
{
|
|
public async Task Execute(IJobExecutionContext context)
|
|
{
|
|
logger.LogInformation("Marking unused cloud files...");
|
|
|
|
var now = SystemClock.Instance.GetCurrentInstant();
|
|
const int batchSize = 1000; // Process larger batches for efficiency
|
|
var processedCount = 0;
|
|
var markedCount = 0;
|
|
var totalFiles = await db.Files.Where(f => !f.IsMarkedRecycle).CountAsync();
|
|
|
|
logger.LogInformation("Found {TotalFiles} files to check for unused status", totalFiles);
|
|
|
|
// Define a timestamp to limit the age of files we're processing in this run
|
|
// This spreads the processing across multiple job runs for very large databases
|
|
var ageThreshold = now - Duration.FromDays(30); // Process files up to 90 days old in this run
|
|
|
|
// Instead of loading all files at once, use pagination
|
|
var hasMoreFiles = true;
|
|
string? lastProcessedId = null;
|
|
|
|
while (hasMoreFiles)
|
|
{
|
|
// Query for the next batch of files using keyset pagination
|
|
var filesQuery = db.Files
|
|
.Where(f => !f.IsMarkedRecycle)
|
|
.Where(f => f.CreatedAt <= ageThreshold); // Only process older files first
|
|
|
|
if (lastProcessedId != null)
|
|
{
|
|
filesQuery = filesQuery.Where(f => string.Compare(f.Id, lastProcessedId) > 0);
|
|
}
|
|
|
|
var fileBatch = await filesQuery
|
|
.OrderBy(f => f.Id) // Ensure consistent ordering for pagination
|
|
.Take(batchSize)
|
|
.Select(f => f.Id)
|
|
.ToListAsync();
|
|
|
|
if (fileBatch.Count == 0)
|
|
{
|
|
hasMoreFiles = false;
|
|
continue;
|
|
}
|
|
|
|
processedCount += fileBatch.Count;
|
|
lastProcessedId = fileBatch.Last();
|
|
|
|
// Get all relevant file references for this batch
|
|
var fileReferences = await fileRefService.GetReferencesAsync(fileBatch);
|
|
|
|
// Filter to find files that have no references or all expired references
|
|
var filesToMark = fileBatch.Where(fileId =>
|
|
!fileReferences.TryGetValue(fileId, out var references) ||
|
|
references.Count == 0 ||
|
|
references.All(r => r.ExpiredAt.HasValue && r.ExpiredAt.Value <= now)
|
|
).ToList();
|
|
|
|
if (filesToMark.Count > 0)
|
|
{
|
|
// Use a bulk update for better performance - mark all qualifying files at once
|
|
var updateCount = await db.Files
|
|
.Where(f => filesToMark.Contains(f.Id))
|
|
.ExecuteUpdateAsync(setter => setter
|
|
.SetProperty(f => f.IsMarkedRecycle, true));
|
|
|
|
markedCount += updateCount;
|
|
}
|
|
|
|
// Log progress periodically
|
|
if (processedCount % 10000 == 0 || !hasMoreFiles)
|
|
{
|
|
logger.LogInformation(
|
|
"Progress: processed {ProcessedCount}/{TotalFiles} files, marked {MarkedCount} for recycling",
|
|
processedCount, totalFiles, markedCount);
|
|
}
|
|
}
|
|
|
|
logger.LogInformation("Completed marking {MarkedCount} files for recycling", markedCount);
|
|
}
|
|
} |