🐛 Trying to fix reanalysis service

This commit is contained in:
2026-01-14 00:50:55 +08:00
parent a4c2892a66
commit 625d4e1a13

View File

@@ -18,7 +18,7 @@ public class FileReanalysisService(
IOptions<FileReanalysisOptions> options)
{
private readonly FileReanalysisOptions _options = options.Value;
private readonly HashSet<string> _failedFileIds = new();
private readonly HashSet<string> _failedFileIds = [];
private async Task<List<SnCloudFile>> GetFilesNeedingReanalysisAsync(int limit = 100)
{
@@ -28,7 +28,8 @@ public class FileReanalysisService(
.Where(f => f.ObjectId != null)
.Include(f => f.Object)
.ThenInclude(f => f.FileReplicas)
.Where(f => f.Object != null && (f.Object.Meta == null || f.Object.Meta.Count == 0))
.Where(f => f.Object != null && (f.Object.Meta == null || f.Object.Meta.Count == 0 || f.Object.Size == 0 ||
f.Object.Hash == null))
.Where(f => f.Object!.FileReplicas.Count > 0)
.Where(f => f.CreatedAt <= deadline)
.Skip(_failedFileIds.Count)
@@ -62,7 +63,7 @@ public class FileReanalysisService(
.ToListAsync();
}
public async Task<bool> ReanalyzeFileAsync(SnCloudFile file)
private async Task<bool> ReanalyzeFileAsync(SnCloudFile file)
{
logger.LogInformation("Starting reanalysis for file {FileId}: {FileName}", file.Id, file.Name);
@@ -96,10 +97,9 @@ public class FileReanalysisService(
{
logger.LogWarning("Failed to extract metadata for supported MIME type {MimeType} on file {FileId}",
file.MimeType, file.Id);
return false;
}
bool updated = false;
var updated = false;
if (file.Object.Size == 0 || file.Object.Size != actualSize)
{
file.Object.Size = actualSize;
@@ -120,8 +120,9 @@ public class FileReanalysisService(
if (updated)
{
db.FileObjects.Update(file.Object);
await db.SaveChangesAsync();
int metaCount = meta?.Count ?? 0;
var metaCount = meta?.Count ?? 0;
logger.LogInformation("Successfully reanalyzed file {FileId}, updated metadata with {MetaCount} fields",
file.Id, metaCount);
}
@@ -130,11 +131,6 @@ public class FileReanalysisService(
logger.LogInformation("File {FileId} already up to date", file.Id);
}
if (_options.ValidateCompression || _options.ValidateThumbnails)
{
await ValidateCompressionAndThumbnailAsync(file);
}
return true;
}
catch (ObjectNotFoundException)
@@ -152,9 +148,7 @@ public class FileReanalysisService(
finally
{
if (File.Exists(tempPath))
{
File.Delete(tempPath);
}
}
}
@@ -186,7 +180,8 @@ public class FileReanalysisService(
var client = CreateMinioClient(dest);
if (client == null)
{
logger.LogWarning("Failed to create Minio client for pool {PoolId}, skipping validation", primaryReplica.PoolId);
logger.LogWarning("Failed to create Minio client for pool {PoolId}, skipping validation",
primaryReplica.PoolId);
return;
}
@@ -194,10 +189,13 @@ public class FileReanalysisService(
if (_options.ValidateCompression && file.Object.HasCompression)
{
var compressedExists = await ObjectExistsAsync(client, dest.Bucket, primaryReplica.StorageId + ".compressed");
var compressedExists =
await ObjectExistsAsync(client, dest.Bucket, primaryReplica.StorageId + ".compressed");
if (!compressedExists)
{
logger.LogInformation("File {FileId} has compression flag but compressed version not found, setting HasCompression to false", file.Id);
logger.LogInformation(
"File {FileId} has compression flag but compressed version not found, setting HasCompression to false",
file.Id);
file.Object.HasCompression = false;
updated = true;
}
@@ -205,10 +203,13 @@ public class FileReanalysisService(
if (_options.ValidateThumbnails && file.Object.HasThumbnail)
{
var thumbnailExists = await ObjectExistsAsync(client, dest.Bucket, primaryReplica.StorageId + ".thumbnail");
var thumbnailExists =
await ObjectExistsAsync(client, dest.Bucket, primaryReplica.StorageId + ".thumbnail");
if (!thumbnailExists)
{
logger.LogInformation("File {FileId} has thumbnail flag but thumbnail not found, setting HasThumbnail to false", file.Id);
logger.LogInformation(
"File {FileId} has thumbnail flag but thumbnail not found, setting HasThumbnail to false",
file.Id);
file.Object.HasThumbnail = false;
updated = true;
}
@@ -226,7 +227,7 @@ public class FileReanalysisService(
}
}
private async Task<bool> ObjectExistsAsync(IMinioClient client, string bucket, string objectName)
private static async Task<bool> ObjectExistsAsync(IMinioClient client, string bucket, string objectName)
{
try
{
@@ -244,37 +245,58 @@ public class FileReanalysisService(
public async Task ProcessNextFileAsync()
{
if (!_options.Enabled)
{
logger.LogDebug("File reanalysis is disabled, skipping");
return;
}
var reanalysisFiles = await GetFilesNeedingReanalysisAsync(10);
reanalysisFiles = reanalysisFiles.Where(f => !_failedFileIds.Contains(f.Id.ToString())).ToList();
var files = await GetFilesNeedingReanalysisAsync(10);
files = files.Where(f => !_failedFileIds.Contains(f.Id.ToString())).ToList();
if (files.Count == 0)
if (reanalysisFiles.Count > 0)
{
if (_options.ValidateCompression || _options.ValidateThumbnails)
if (!_options.Enabled)
{
files = await GetFilesNeedingCompressionValidationAsync(5);
if (files.Count == 0)
logger.LogDebug("File reanalysis is disabled, skipping reanalysis but continuing with validation");
}
else
{
var file = reanalysisFiles[0];
bool success = await ReanalyzeFileAsync(file);
if (!success)
{
files = await GetFilesNeedingThumbnailValidationAsync(5);
logger.LogWarning("Failed to reanalyze file {FileId}, skipping for now", file.Id);
_failedFileIds.Add(file.Id);
}
return;
}
}
if (files.Count == 0)
if (_options.ValidateCompression)
{
logger.LogInformation("No files found needing reanalysis");
return;
var compressionFiles = await GetFilesNeedingCompressionValidationAsync(5);
if (compressionFiles.Count > 0)
{
var file = compressionFiles[0];
await ValidateCompressionAndThumbnailAsync(file);
return;
}
}
var file = files[0];
bool success = await ReanalyzeFileAsync(file);
if (!success)
if (_options.ValidateThumbnails)
{
logger.LogWarning("Failed to reanalyze file {FileId}, skipping for now", file.Id);
_failedFileIds.Add(file.Id);
var thumbnailFiles = await GetFilesNeedingThumbnailValidationAsync(5);
if (thumbnailFiles.Count > 0)
{
var file = thumbnailFiles[0];
await ValidateCompressionAndThumbnailAsync(file);
return;
}
}
if (reanalysisFiles.Count > 0 && !_options.Enabled)
{
logger.LogInformation("Reanalysis is disabled, no other work to do");
}
else
{
logger.LogInformation("No files found needing reanalysis or validation");
}
}
@@ -290,6 +312,7 @@ public class FileReanalysisService(
{
throw new InvalidOperationException($"No remote storage configured for pool {replica.PoolId}");
}
var dest = pool.StorageConfig;
var client = CreateMinioClient(dest);
@@ -512,4 +535,4 @@ public class FileReanalysisService(
return client.Build();
}
}
}