using System.Globalization; using FFMpegCore; using System.Security.Cryptography; using DysonNetwork.Shared.Cache; using DysonNetwork.Shared.Proto; using Google.Protobuf.WellKnownTypes; using Microsoft.EntityFrameworkCore; using Minio; using Minio.DataModel.Args; using NetVips; using NodaTime; using tusdotnet.Stores; using System.Linq.Expressions; using Microsoft.EntityFrameworkCore.Query; namespace DysonNetwork.Drive.Storage; public class FileService( AppDatabase db, IConfiguration configuration, TusDiskStore store, ILogger logger, IServiceScopeFactory scopeFactory, ICacheService cache ) { private const string CacheKeyPrefix = "file:"; private static readonly TimeSpan CacheDuration = TimeSpan.FromMinutes(15); /// /// The api for getting file meta with cache, /// the best use case is for accessing the file data. /// /// This function won't load uploader's information, only keep minimal file meta /// /// The id of the cloud file requested /// The minimal file meta public async Task GetFileAsync(string fileId) { var cacheKey = $"{CacheKeyPrefix}{fileId}"; var cachedFile = await cache.GetAsync(cacheKey); if (cachedFile is not null) return cachedFile; var file = await db.Files .Where(f => f.Id == fileId) .FirstOrDefaultAsync(); if (file != null) await cache.SetAsync(cacheKey, file, CacheDuration); return file; } public async Task> GetFilesAsync(List fileIds) { var cachedFiles = new Dictionary(); var uncachedIds = new List(); // Check cache first foreach (var fileId in fileIds) { var cacheKey = $"{CacheKeyPrefix}{fileId}"; var cachedFile = await cache.GetAsync(cacheKey); if (cachedFile != null) cachedFiles[fileId] = cachedFile; else uncachedIds.Add(fileId); } // Load uncached files from database if (uncachedIds.Count > 0) { var dbFiles = await db.Files .Where(f => uncachedIds.Contains(f.Id)) .ToListAsync(); // Add to cache foreach (var file in dbFiles) { var cacheKey = $"{CacheKeyPrefix}{file.Id}"; await cache.SetAsync(cacheKey, file, CacheDuration); cachedFiles[file.Id] = file; } } // Preserve original order return fileIds .Select(f => cachedFiles.GetValueOrDefault(f)) .Where(f => f != null) .Cast() .ToList(); } private const string TempFilePrefix = "dyn-cloudfile"; private static readonly string[] AnimatedImageTypes = ["image/gif", "image/apng", "image/webp", "image/avif"]; public async Task ProcessNewFileAsync( Account account, string fileId, Stream stream, string fileName, string? contentType ) { var ogFilePath = Path.GetFullPath(Path.Join(configuration.GetValue("Tus:StorePath"), fileId)); var fileSize = stream.Length; var hash = await HashFileAsync(stream, fileSize: fileSize); contentType ??= !fileName.Contains('.') ? "application/octet-stream" : MimeTypes.GetMimeType(fileName); var file = new CloudFile { Id = fileId, Name = fileName, MimeType = contentType, Size = fileSize, Hash = hash, AccountId = Guid.Parse(account.Id) }; var existingFile = await db.Files.AsNoTracking().FirstOrDefaultAsync(f => f.Hash == hash); file.StorageId = existingFile?.StorageId ?? file.Id; if (existingFile is not null) { file.FileMeta = existingFile.FileMeta; file.HasCompression = existingFile.HasCompression; file.SensitiveMarks = existingFile.SensitiveMarks; file.MimeType = existingFile.MimeType; file.UploadedAt = existingFile.UploadedAt; file.UploadedTo = existingFile.UploadedTo; db.Files.Add(file); await db.SaveChangesAsync(); // Since the file content is a duplicate, we can delete the new upload and we are done. await stream.DisposeAsync(); await store.DeleteFileAsync(file.Id, CancellationToken.None); return file; } // Extract metadata on the current thread for a faster initial response await ExtractMetadataAsync(file, ogFilePath, stream); db.Files.Add(file); await db.SaveChangesAsync(); // Offload optimization (image conversion, thumbnailing) and uploading to a background task _ = Task.Run(() => ProcessAndUploadInBackgroundAsync(file.Id, file.StorageId, contentType, ogFilePath, stream)); return file; } /// /// Extracts metadata from the file based on its content type. /// This runs synchronously to ensure the initial database record has basic metadata. /// private async Task ExtractMetadataAsync(CloudFile file, string filePath, Stream stream) { switch (file.MimeType?.Split('/')[0]) { case "image": try { var blurhash = BlurHashSharp.SkiaSharp.BlurHashEncoder.Encode(3, 3, filePath); stream.Position = 0; using var vipsImage = Image.NewFromStream(stream); var width = vipsImage.Width; var height = vipsImage.Height; var orientation = vipsImage.Get("orientation") as int? ?? 1; var meta = new Dictionary { ["blur"] = blurhash, ["format"] = vipsImage.Get("vips-loader") ?? "unknown", ["width"] = width, ["height"] = height, ["orientation"] = orientation, }; var exif = new Dictionary(); foreach (var field in vipsImage.GetFields()) { if (IsIgnoredField(field)) continue; var value = vipsImage.Get(field); if (field.StartsWith("exif-")) exif[field.Replace("exif-", "")] = value; else meta[field] = value; } if (orientation is 6 or 8) (width, height) = (height, width); meta["exif"] = exif; meta["ratio"] = height != 0 ? (double)width / height : 0; file.FileMeta = meta; } catch (Exception ex) { file.FileMeta = new Dictionary(); logger.LogError(ex, "Failed to analyze image file {FileId}", file.Id); } break; case "video": case "audio": try { var mediaInfo = await FFProbe.AnalyseAsync(filePath); file.FileMeta = new Dictionary { ["duration"] = mediaInfo.Duration.TotalSeconds, ["format_name"] = mediaInfo.Format.FormatName, ["format_long_name"] = mediaInfo.Format.FormatLongName, ["start_time"] = mediaInfo.Format.StartTime.ToString(), ["bit_rate"] = mediaInfo.Format.BitRate.ToString(CultureInfo.InvariantCulture), ["tags"] = mediaInfo.Format.Tags ?? new Dictionary(), ["chapters"] = mediaInfo.Chapters, // Add detailed stream information ["video_streams"] = mediaInfo.VideoStreams.Select(s => new { s.AvgFrameRate, s.BitRate, s.CodecName, s.Duration, s.Height, s.Width, s.Language, s.PixelFormat, s.Rotation }).ToList(), ["audio_streams"] = mediaInfo.AudioStreams.Select(s => new { s.BitRate, s.Channels, s.ChannelLayout, s.CodecName, s.Duration, s.Language, s.SampleRateHz }) .ToList(), }; if (mediaInfo.PrimaryVideoStream is not null) file.FileMeta["ratio"] = (double)mediaInfo.PrimaryVideoStream.Width / mediaInfo.PrimaryVideoStream.Height; } catch (Exception ex) { logger.LogError(ex, "Failed to analyze media file {FileId}", file.Id); } break; } } /// /// Handles file optimization (image compression, video thumbnailing) and uploads to remote storage in the background. /// private async Task ProcessAndUploadInBackgroundAsync(string fileId, string storageId, string contentType, string originalFilePath, Stream stream) { await using var bgStream = stream; // Ensure stream is disposed at the end of this task using var scope = scopeFactory.CreateScope(); var nfs = scope.ServiceProvider.GetRequiredService(); var scopedDb = scope.ServiceProvider.GetRequiredService(); var uploads = new List<(string FilePath, string Suffix, string ContentType, bool SelfDestruct)>(); var newMimeType = contentType; var hasCompression = false; var hasThumbnail = false; try { logger.LogInformation("Processing file {FileId} in background...", fileId); switch (contentType.Split('/')[0]) { case "image" when !AnimatedImageTypes.Contains(contentType): newMimeType = "image/webp"; using (var vipsImage = Image.NewFromFile(originalFilePath, access: Enums.Access.Sequential)) { var imageToWrite = vipsImage; if (vipsImage.Interpretation is Enums.Interpretation.Scrgb or Enums.Interpretation.Xyz) { imageToWrite = vipsImage.Colourspace(Enums.Interpretation.Srgb); } var webpPath = Path.Join(Path.GetTempPath(), $"{TempFilePrefix}#{fileId}.webp"); imageToWrite.Autorot().WriteToFile(webpPath, new VOption { { "lossless", true }, { "strip", true } }); uploads.Add((webpPath, string.Empty, newMimeType, true)); if (imageToWrite.Width * imageToWrite.Height >= 1024 * 1024) { var scale = 1024.0 / Math.Max(imageToWrite.Width, imageToWrite.Height); var compressedPath = Path.Join(Path.GetTempPath(), $"{TempFilePrefix}#{fileId}-compressed.webp"); using var compressedImage = imageToWrite.Resize(scale); compressedImage.Autorot().WriteToFile(compressedPath, new VOption { { "Q", 80 }, { "strip", true } }); uploads.Add((compressedPath, ".compressed", newMimeType, true)); hasCompression = true; } if (!ReferenceEquals(imageToWrite, vipsImage)) { imageToWrite.Dispose(); // Clean up manually created colourspace-converted image } } break; case "video": uploads.Add((originalFilePath, string.Empty, contentType, false)); var thumbnailPath = Path.Join(Path.GetTempPath(), $"{TempFilePrefix}#{fileId}.thumbnail.webp"); try { var mediaInfo = await FFProbe.AnalyseAsync(originalFilePath); var snapshotTime = mediaInfo.Duration > TimeSpan.FromSeconds(5) ? TimeSpan.FromSeconds(5) : TimeSpan.FromSeconds(1); await FFMpeg.SnapshotAsync(originalFilePath, thumbnailPath, captureTime: snapshotTime); uploads.Add((thumbnailPath, ".thumbnail.webp", "image/webp", true)); hasThumbnail = true; } catch (Exception ex) { logger.LogError(ex, "Failed to generate thumbnail for video {FileId}", fileId); } break; default: uploads.Add((originalFilePath, string.Empty, contentType, false)); break; } logger.LogInformation("Optimized file {FileId}, now uploading...", fileId); if (uploads.Count > 0) { var uploadedTo = configuration.GetValue("Storage:PreferredRemote")!; var uploadTasks = uploads.Select(item => nfs.UploadFileToRemoteAsync(storageId, uploadedTo, item.FilePath, item.Suffix, item.ContentType, item.SelfDestruct) ).ToList(); await Task.WhenAll(uploadTasks); logger.LogInformation("Uploaded file {FileId} done!", fileId); var fileToUpdate = await scopedDb.Files.FirstAsync(f => f.Id == fileId); if (hasThumbnail) fileToUpdate.HasThumbnail = true; await scopedDb.Files.Where(f => f.Id == fileId).ExecuteUpdateAsync(setter => setter .SetProperty(f => f.UploadedAt, Instant.FromDateTimeUtc(DateTime.UtcNow)) .SetProperty(f => f.UploadedTo, uploadedTo) .SetProperty(f => f.MimeType, newMimeType) .SetProperty(f => f.HasCompression, hasCompression) .SetProperty(f => f.HasThumbnail, hasThumbnail) ); } } catch (Exception err) { logger.LogError(err, "Failed to process and upload {FileId}", fileId); } finally { await store.DeleteFileAsync(fileId, CancellationToken.None); await nfs._PurgeCacheAsync(fileId); } } private static async Task HashFileAsync(Stream stream, int chunkSize = 1024 * 1024, long? fileSize = null) { fileSize ??= stream.Length; if (fileSize > chunkSize * 1024 * 5) return await HashFastApproximateAsync(stream, chunkSize); using var md5 = MD5.Create(); var hashBytes = await md5.ComputeHashAsync(stream); stream.Position = 0; // Reset stream position after reading return Convert.ToHexString(hashBytes).ToLowerInvariant(); } private static async Task HashFastApproximateAsync(Stream stream, int chunkSize = 1024 * 1024) { // Scale the chunk size to kB level chunkSize *= 1024; using var md5 = MD5.Create(); var buffer = new byte[chunkSize * 2]; var fileLength = stream.Length; var bytesRead = await stream.ReadAsync(buffer.AsMemory(0, chunkSize)); if (fileLength > chunkSize) { stream.Seek(-chunkSize, SeekOrigin.End); bytesRead += await stream.ReadAsync(buffer.AsMemory(chunkSize, chunkSize)); } var hash = md5.ComputeHash(buffer, 0, bytesRead); stream.Position = 0; // Reset stream position return Convert.ToHexString(hash).ToLowerInvariant(); } public async Task UploadFileToRemoteAsync(string storageId, string targetRemote, string filePath, string? suffix = null, string? contentType = null, bool selfDestruct = false) { await using var fileStream = File.OpenRead(filePath); await UploadFileToRemoteAsync(storageId, targetRemote, fileStream, suffix, contentType); if (selfDestruct) File.Delete(filePath); } public async Task UploadFileToRemoteAsync(string storageId, string targetRemote, Stream stream, string? suffix = null, string? contentType = null) { var dest = GetRemoteStorageConfig(targetRemote); var client = CreateMinioClient(dest); if (client is null) throw new InvalidOperationException( $"Failed to configure client for remote destination '{targetRemote}'" ); var bucket = dest.Bucket; contentType ??= "application/octet-stream"; await client.PutObjectAsync(new PutObjectArgs() .WithBucket(bucket) .WithObject(string.IsNullOrWhiteSpace(suffix) ? storageId : storageId + suffix) .WithStreamData(stream) .WithObjectSize(stream.Length) .WithContentType(contentType) ); } public async Task UpdateFileAsync(CloudFile file, FieldMask updateMask) { var existingFile = await db.Files.FirstOrDefaultAsync(f => f.Id == file.Id); if (existingFile == null) { throw new InvalidOperationException($"File with ID {file.Id} not found."); } var updatable = new UpdatableCloudFile(existingFile); foreach (var path in updateMask.Paths) { switch (path) { case "name": updatable.Name = file.Name; break; case "description": updatable.Description = file.Description; break; case "file_meta": updatable.FileMeta = file.FileMeta; break; case "user_meta": updatable.UserMeta = file.UserMeta; break; case "is_marked_recycle": updatable.IsMarkedRecycle = file.IsMarkedRecycle; break; default: logger.LogWarning("Attempted to update unmodifiable field: {Field}", path); break; } } await db.Files.Where(f => f.Id == file.Id).ExecuteUpdateAsync(updatable.ToSetPropertyCalls()); await _PurgeCacheAsync(file.Id); // Re-fetch the file to return the updated state return await db.Files.AsNoTracking().FirstAsync(f => f.Id == file.Id); } public async Task DeleteFileAsync(CloudFile file) { await DeleteFileDataAsync(file); db.Remove(file); await db.SaveChangesAsync(); await _PurgeCacheAsync(file.Id); } public async Task DeleteFileDataAsync(CloudFile file) { if (file.StorageId is null) return; if (file.UploadedTo is null) return; // Check if any other file with the same storage ID is referenced var otherFilesWithSameStorageId = await db.Files .Where(f => f.StorageId == file.StorageId && f.Id != file.Id) .Select(f => f.Id) .ToListAsync(); // Check if any of these files are referenced var anyReferenced = false; if (otherFilesWithSameStorageId.Any()) { anyReferenced = await db.FileReferences .Where(r => otherFilesWithSameStorageId.Contains(r.FileId)) .AnyAsync(); } // If any other file with the same storage ID is referenced, don't delete the actual file data if (anyReferenced) return; var dest = GetRemoteStorageConfig(file.UploadedTo); var client = CreateMinioClient(dest); if (client is null) throw new InvalidOperationException( $"Failed to configure client for remote destination '{file.UploadedTo}'" ); var bucket = dest.Bucket; var objectId = file.StorageId ?? file.Id; // Use StorageId if available, otherwise fall back to Id await client.RemoveObjectAsync( new RemoveObjectArgs().WithBucket(bucket).WithObject(objectId) ); if (file.HasCompression) { // Also remove the compressed version if it exists try { await client.RemoveObjectAsync( new RemoveObjectArgs().WithBucket(bucket).WithObject(objectId + ".compressed") ); } catch { // Ignore errors when deleting compressed version logger.LogWarning("Failed to delete compressed version of file {fileId}", file.Id); } } } public RemoteStorageConfig GetRemoteStorageConfig(string destination) { var destinations = configuration.GetSection("Storage:Remote").Get>()!; var dest = destinations.FirstOrDefault(d => d.Id == destination); if (dest is null) throw new InvalidOperationException($"Remote destination '{destination}' not found"); return dest; } public IMinioClient? CreateMinioClient(RemoteStorageConfig dest) { var client = new MinioClient() .WithEndpoint(dest.Endpoint) .WithRegion(dest.Region) .WithCredentials(dest.SecretId, dest.SecretKey); if (dest.EnableSsl) client = client.WithSSL(); return client.Build(); } // Helper method to purge the cache for a specific file // Made internal to allow FileReferenceService to use it internal async Task _PurgeCacheAsync(string fileId) { var cacheKey = $"{CacheKeyPrefix}{fileId}"; await cache.RemoveAsync(cacheKey); } // Helper method to purge cache for multiple files internal async Task _PurgeCacheRangeAsync(IEnumerable fileIds) { var tasks = fileIds.Select(_PurgeCacheAsync); await Task.WhenAll(tasks); } public async Task> LoadFromReference(List references) { var cachedFiles = new Dictionary(); var uncachedIds = new List(); // Check cache first foreach (var reference in references) { var cacheKey = $"{CacheKeyPrefix}{reference.Id}"; var cachedFile = await cache.GetAsync(cacheKey); if (cachedFile != null) { cachedFiles[reference.Id] = cachedFile; } else { uncachedIds.Add(reference.Id); } } // Load uncached files from database if (uncachedIds.Count > 0) { var dbFiles = await db.Files .Where(f => uncachedIds.Contains(f.Id)) .ToListAsync(); // Add to cache foreach (var file in dbFiles) { var cacheKey = $"{CacheKeyPrefix}{file.Id}"; await cache.SetAsync(cacheKey, file, CacheDuration); cachedFiles[file.Id] = file; } } // Preserve original order return references .Select(r => cachedFiles.GetValueOrDefault(r.Id)) .Where(f => f != null) .ToList(); } /// /// Gets the number of references to a file based on CloudFileReference records /// /// The ID of the file /// The number of references to the file public async Task GetReferenceCountAsync(string fileId) { return await db.FileReferences .Where(r => r.FileId == fileId) .CountAsync(); } /// /// Checks if a file is referenced by any resource /// /// The ID of the file to check /// True if the file is referenced, false otherwise public async Task IsReferencedAsync(string fileId) { return await db.FileReferences .Where(r => r.FileId == fileId) .AnyAsync(); } /// /// Checks if an EXIF field should be ignored (e.g., GPS data). /// private static bool IsIgnoredField(string fieldName) { // Common GPS EXIF field names var gpsFields = new[] { "gps-latitude", "gps-longitude", "gps-altitude", "gps-latitude-ref", "gps-longitude-ref", "gps-altitude-ref", "gps-timestamp", "gps-datestamp", "gps-speed", "gps-speed-ref", "gps-track", "gps-track-ref", "gps-img-direction", "gps-img-direction-ref", "gps-dest-latitude", "gps-dest-longitude", "gps-dest-latitude-ref", "gps-dest-longitude-ref", "gps-processing-method", "gps-area-information" }; if (fieldName.StartsWith("exif-GPS")) return true; if (fieldName.StartsWith("ifd3-GPS")) return true; if (fieldName.EndsWith("-data")) return true; return gpsFields.Any(gpsField => fieldName.StartsWith(gpsField, StringComparison.OrdinalIgnoreCase)); } } /// /// A helper class to build an ExecuteUpdateAsync call for CloudFile. /// file class UpdatableCloudFile(CloudFile file) { public string Name { get; set; } = file.Name; public string? Description { get; set; } = file.Description; public Dictionary? FileMeta { get; set; } = file.FileMeta; public Dictionary? UserMeta { get; set; } = file.UserMeta; public bool IsMarkedRecycle { get; set; } = file.IsMarkedRecycle; public Expression, SetPropertyCalls>> ToSetPropertyCalls() { var userMeta = UserMeta ?? new Dictionary(); return setter => setter .SetProperty(f => f.Name, Name) .SetProperty(f => f.Description, Description) .SetProperty(f => f.FileMeta, FileMeta) .SetProperty(f => f.UserMeta, userMeta!) .SetProperty(f => f.IsMarkedRecycle, IsMarkedRecycle); } }