Sanitize text to remove hidden unicode and control characters

This commit is contained in:
2025-06-07 22:06:57 +08:00
parent a8503735d1
commit 3a978441b6
3 changed files with 41 additions and 4 deletions

View File

@ -0,0 +1,24 @@
using System.Globalization;
using System.Text;
namespace DysonNetwork.Sphere.Storage;
public abstract class TextSanitizer
{
public static string? Sanitize(string? text)
{
if (string.IsNullOrEmpty(text)) return text;
var filtered = new StringBuilder();
foreach (var ch in from ch in text
let category = CharUnicodeInfo.GetUnicodeCategory(ch)
where category is not (UnicodeCategory.Control or UnicodeCategory.Format
or UnicodeCategory.NonSpacingMark)
select ch)
{
filtered.Append(ch);
}
return filtered.ToString();
}
}