diff --git a/DysonNetwork.Drive/Storage/FileReanalysisService.cs b/DysonNetwork.Drive/Storage/FileReanalysisService.cs index 4416c169..da5ab171 100644 --- a/DysonNetwork.Drive/Storage/FileReanalysisService.cs +++ b/DysonNetwork.Drive/Storage/FileReanalysisService.cs @@ -18,7 +18,7 @@ public class FileReanalysisService( IOptions options) { private readonly FileReanalysisOptions _options = options.Value; - private readonly HashSet _failedFileIds = new(); + private readonly HashSet _failedFileIds = []; private async Task> GetFilesNeedingReanalysisAsync(int limit = 100) { @@ -28,7 +28,8 @@ public class FileReanalysisService( .Where(f => f.ObjectId != null) .Include(f => f.Object) .ThenInclude(f => f.FileReplicas) - .Where(f => f.Object != null && (f.Object.Meta == null || f.Object.Meta.Count == 0)) + .Where(f => f.Object != null && (f.Object.Meta == null || f.Object.Meta.Count == 0 || f.Object.Size == 0 || + f.Object.Hash == null)) .Where(f => f.Object!.FileReplicas.Count > 0) .Where(f => f.CreatedAt <= deadline) .Skip(_failedFileIds.Count) @@ -62,7 +63,7 @@ public class FileReanalysisService( .ToListAsync(); } - public async Task ReanalyzeFileAsync(SnCloudFile file) + private async Task ReanalyzeFileAsync(SnCloudFile file) { logger.LogInformation("Starting reanalysis for file {FileId}: {FileName}", file.Id, file.Name); @@ -96,10 +97,9 @@ public class FileReanalysisService( { logger.LogWarning("Failed to extract metadata for supported MIME type {MimeType} on file {FileId}", file.MimeType, file.Id); - return false; } - bool updated = false; + var updated = false; if (file.Object.Size == 0 || file.Object.Size != actualSize) { file.Object.Size = actualSize; @@ -120,8 +120,9 @@ public class FileReanalysisService( if (updated) { + db.FileObjects.Update(file.Object); await db.SaveChangesAsync(); - int metaCount = meta?.Count ?? 0; + var metaCount = meta?.Count ?? 0; logger.LogInformation("Successfully reanalyzed file {FileId}, updated metadata with {MetaCount} fields", file.Id, metaCount); } @@ -130,11 +131,6 @@ public class FileReanalysisService( logger.LogInformation("File {FileId} already up to date", file.Id); } - if (_options.ValidateCompression || _options.ValidateThumbnails) - { - await ValidateCompressionAndThumbnailAsync(file); - } - return true; } catch (ObjectNotFoundException) @@ -152,9 +148,7 @@ public class FileReanalysisService( finally { if (File.Exists(tempPath)) - { File.Delete(tempPath); - } } } @@ -186,7 +180,8 @@ public class FileReanalysisService( var client = CreateMinioClient(dest); if (client == null) { - logger.LogWarning("Failed to create Minio client for pool {PoolId}, skipping validation", primaryReplica.PoolId); + logger.LogWarning("Failed to create Minio client for pool {PoolId}, skipping validation", + primaryReplica.PoolId); return; } @@ -194,10 +189,13 @@ public class FileReanalysisService( if (_options.ValidateCompression && file.Object.HasCompression) { - var compressedExists = await ObjectExistsAsync(client, dest.Bucket, primaryReplica.StorageId + ".compressed"); + var compressedExists = + await ObjectExistsAsync(client, dest.Bucket, primaryReplica.StorageId + ".compressed"); if (!compressedExists) { - logger.LogInformation("File {FileId} has compression flag but compressed version not found, setting HasCompression to false", file.Id); + logger.LogInformation( + "File {FileId} has compression flag but compressed version not found, setting HasCompression to false", + file.Id); file.Object.HasCompression = false; updated = true; } @@ -205,10 +203,13 @@ public class FileReanalysisService( if (_options.ValidateThumbnails && file.Object.HasThumbnail) { - var thumbnailExists = await ObjectExistsAsync(client, dest.Bucket, primaryReplica.StorageId + ".thumbnail"); + var thumbnailExists = + await ObjectExistsAsync(client, dest.Bucket, primaryReplica.StorageId + ".thumbnail"); if (!thumbnailExists) { - logger.LogInformation("File {FileId} has thumbnail flag but thumbnail not found, setting HasThumbnail to false", file.Id); + logger.LogInformation( + "File {FileId} has thumbnail flag but thumbnail not found, setting HasThumbnail to false", + file.Id); file.Object.HasThumbnail = false; updated = true; } @@ -226,7 +227,7 @@ public class FileReanalysisService( } } - private async Task ObjectExistsAsync(IMinioClient client, string bucket, string objectName) + private static async Task ObjectExistsAsync(IMinioClient client, string bucket, string objectName) { try { @@ -244,37 +245,58 @@ public class FileReanalysisService( public async Task ProcessNextFileAsync() { - if (!_options.Enabled) - { - logger.LogDebug("File reanalysis is disabled, skipping"); - return; - } + var reanalysisFiles = await GetFilesNeedingReanalysisAsync(10); + reanalysisFiles = reanalysisFiles.Where(f => !_failedFileIds.Contains(f.Id.ToString())).ToList(); - var files = await GetFilesNeedingReanalysisAsync(10); - files = files.Where(f => !_failedFileIds.Contains(f.Id.ToString())).ToList(); - if (files.Count == 0) + if (reanalysisFiles.Count > 0) { - if (_options.ValidateCompression || _options.ValidateThumbnails) + if (!_options.Enabled) { - files = await GetFilesNeedingCompressionValidationAsync(5); - if (files.Count == 0) + logger.LogDebug("File reanalysis is disabled, skipping reanalysis but continuing with validation"); + } + else + { + var file = reanalysisFiles[0]; + bool success = await ReanalyzeFileAsync(file); + if (!success) { - files = await GetFilesNeedingThumbnailValidationAsync(5); + logger.LogWarning("Failed to reanalyze file {FileId}, skipping for now", file.Id); + _failedFileIds.Add(file.Id); } + + return; } } - if (files.Count == 0) + + if (_options.ValidateCompression) { - logger.LogInformation("No files found needing reanalysis"); - return; + var compressionFiles = await GetFilesNeedingCompressionValidationAsync(5); + if (compressionFiles.Count > 0) + { + var file = compressionFiles[0]; + await ValidateCompressionAndThumbnailAsync(file); + return; + } } - var file = files[0]; - bool success = await ReanalyzeFileAsync(file); - if (!success) + if (_options.ValidateThumbnails) { - logger.LogWarning("Failed to reanalyze file {FileId}, skipping for now", file.Id); - _failedFileIds.Add(file.Id); + var thumbnailFiles = await GetFilesNeedingThumbnailValidationAsync(5); + if (thumbnailFiles.Count > 0) + { + var file = thumbnailFiles[0]; + await ValidateCompressionAndThumbnailAsync(file); + return; + } + } + + if (reanalysisFiles.Count > 0 && !_options.Enabled) + { + logger.LogInformation("Reanalysis is disabled, no other work to do"); + } + else + { + logger.LogInformation("No files found needing reanalysis or validation"); } } @@ -290,6 +312,7 @@ public class FileReanalysisService( { throw new InvalidOperationException($"No remote storage configured for pool {replica.PoolId}"); } + var dest = pool.StorageConfig; var client = CreateMinioClient(dest); @@ -512,4 +535,4 @@ public class FileReanalysisService( return client.Build(); } -} +} \ No newline at end of file