123 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
			
		
		
	
	
			123 lines
		
	
	
		
			4.7 KiB
		
	
	
	
		
			C#
		
	
	
	
	
	
using Microsoft.EntityFrameworkCore;
 | 
						|
using NodaTime;
 | 
						|
using Quartz;
 | 
						|
 | 
						|
namespace DysonNetwork.Drive.Storage;
 | 
						|
 | 
						|
public class CloudFileUnusedRecyclingJob(
 | 
						|
    AppDatabase db,
 | 
						|
    FileReferenceService fileRefService,
 | 
						|
    ILogger<CloudFileUnusedRecyclingJob> logger,
 | 
						|
    IConfiguration configuration
 | 
						|
)
 | 
						|
    : IJob
 | 
						|
{
 | 
						|
    public async Task Execute(IJobExecutionContext context)
 | 
						|
    {
 | 
						|
        logger.LogInformation("Cleaning tus cloud files...");
 | 
						|
        var storePath = configuration["Tus:StorePath"];
 | 
						|
        if (Directory.Exists(storePath))
 | 
						|
        {
 | 
						|
            var oneHourAgo = SystemClock.Instance.GetCurrentInstant() - Duration.FromHours(1);
 | 
						|
            var files = Directory.GetFiles(storePath);
 | 
						|
            foreach (var file in files)
 | 
						|
            {
 | 
						|
                var creationTime = File.GetCreationTime(file).ToUniversalTime();
 | 
						|
                if (creationTime < oneHourAgo.ToDateTimeUtc())
 | 
						|
                    File.Delete(file);
 | 
						|
            }
 | 
						|
        }
 | 
						|
        
 | 
						|
        logger.LogInformation("Marking unused cloud files...");
 | 
						|
 | 
						|
        var recyclablePools = await db.Pools
 | 
						|
            .Where(p => p.PolicyConfig.EnableRecycle)
 | 
						|
            .Select(p => p.Id)
 | 
						|
            .ToListAsync();
 | 
						|
 | 
						|
        var now = SystemClock.Instance.GetCurrentInstant();
 | 
						|
        const int batchSize = 1000; // Process larger batches for efficiency
 | 
						|
        var processedCount = 0;
 | 
						|
        var markedCount = 0;
 | 
						|
        var totalFiles = await db.Files
 | 
						|
            .Where(f => f.PoolId.HasValue && recyclablePools.Contains(f.PoolId.Value))
 | 
						|
            .Where(f => !f.IsMarkedRecycle)
 | 
						|
            .CountAsync();
 | 
						|
 | 
						|
        logger.LogInformation("Found {TotalFiles} files to check for unused status", totalFiles);
 | 
						|
 | 
						|
        // Define a timestamp to limit the age of files we're processing in this run
 | 
						|
        // This spreads the processing across multiple job runs for very large databases
 | 
						|
        var ageThreshold = now - Duration.FromDays(30); // Process files up to 90 days old in this run
 | 
						|
 | 
						|
        // Instead of loading all files at once, use pagination
 | 
						|
        var hasMoreFiles = true;
 | 
						|
        string? lastProcessedId = null;
 | 
						|
 | 
						|
        while (hasMoreFiles)
 | 
						|
        {
 | 
						|
            // Query for the next batch of files using keyset pagination
 | 
						|
            var filesQuery = db.Files
 | 
						|
                .Where(f => f.PoolId.HasValue && recyclablePools.Contains(f.PoolId.Value))
 | 
						|
                .Where(f => !f.IsMarkedRecycle)
 | 
						|
                .Where(f => f.CreatedAt <= ageThreshold); // Only process older files first
 | 
						|
 | 
						|
            if (lastProcessedId != null)
 | 
						|
                filesQuery = filesQuery.Where(f => string.Compare(f.Id, lastProcessedId) > 0);
 | 
						|
 | 
						|
            var fileBatch = await filesQuery
 | 
						|
                .OrderBy(f => f.Id) // Ensure consistent ordering for pagination
 | 
						|
                .Take(batchSize)
 | 
						|
                .Select(f => f.Id)
 | 
						|
                .ToListAsync();
 | 
						|
 | 
						|
            if (fileBatch.Count == 0)
 | 
						|
            {
 | 
						|
                hasMoreFiles = false;
 | 
						|
                continue;
 | 
						|
            }
 | 
						|
 | 
						|
            processedCount += fileBatch.Count;
 | 
						|
            lastProcessedId = fileBatch.Last();
 | 
						|
 | 
						|
            // Get all relevant file references for this batch
 | 
						|
            var fileReferences = await fileRefService.GetReferencesAsync(fileBatch);
 | 
						|
 | 
						|
            // Filter to find files that have no references or all expired references
 | 
						|
            var filesToMark = fileBatch.Where(fileId =>
 | 
						|
                !fileReferences.TryGetValue(fileId, out var references) ||
 | 
						|
                references.Count == 0 ||
 | 
						|
                references.All(r => r.ExpiredAt.HasValue && r.ExpiredAt.Value <= now)
 | 
						|
            ).ToList();
 | 
						|
 | 
						|
            if (filesToMark.Count > 0)
 | 
						|
            {
 | 
						|
                // Use a bulk update for better performance - mark all qualifying files at once
 | 
						|
                var updateCount = await db.Files
 | 
						|
                    .Where(f => filesToMark.Contains(f.Id))
 | 
						|
                    .ExecuteUpdateAsync(setter => setter
 | 
						|
                        .SetProperty(f => f.IsMarkedRecycle, true));
 | 
						|
 | 
						|
                markedCount += updateCount;
 | 
						|
            }
 | 
						|
 | 
						|
            // Log progress periodically
 | 
						|
            if (processedCount % 10000 == 0 || !hasMoreFiles)
 | 
						|
            {
 | 
						|
                logger.LogInformation(
 | 
						|
                    "Progress: processed {ProcessedCount}/{TotalFiles} files, marked {MarkedCount} for recycling",
 | 
						|
                    processedCount,
 | 
						|
                    totalFiles,
 | 
						|
                    markedCount
 | 
						|
                );
 | 
						|
            }
 | 
						|
        }
 | 
						|
        
 | 
						|
        var expiredCount = await db.Files
 | 
						|
            .Where(f => f.ExpiredAt.HasValue && f.ExpiredAt.Value <= now)
 | 
						|
            .ExecuteUpdateAsync(s => s.SetProperty(f => f.IsMarkedRecycle, true));
 | 
						|
        markedCount += expiredCount;
 | 
						|
 | 
						|
        logger.LogInformation("Completed marking {MarkedCount} files for recycling", markedCount);
 | 
						|
    }
 | 
						|
} |