⏪ Rollback to use old text sanitizer
This commit is contained in:
@@ -6,35 +6,23 @@ namespace DysonNetwork.Shared.Content;
|
|||||||
|
|
||||||
public abstract partial class TextSanitizer
|
public abstract partial class TextSanitizer
|
||||||
{
|
{
|
||||||
[GeneratedRegex(@"[\u0000-\u001F\u007F\u200B-\u200F\u202A-\u202E\u2060-\u206F\uFFF0-\uFFFF]")]
|
|
||||||
private static partial Regex WeirdUnicodeRegex();
|
|
||||||
|
|
||||||
[GeneratedRegex(@"[\r\n]{2,}")]
|
|
||||||
private static partial Regex MultiNewlineRegex();
|
|
||||||
|
|
||||||
public static string? Sanitize(string? text)
|
public static string? Sanitize(string? text)
|
||||||
{
|
{
|
||||||
if (text is null) return null;
|
if (string.IsNullOrEmpty(text)) return text;
|
||||||
|
|
||||||
// Normalize weird Unicode characters
|
// List of control characters to preserve
|
||||||
var cleaned = WeirdUnicodeRegex().Replace(text, "");
|
var preserveControlChars = new[] { '\n', '\r', '\t', ' ' };
|
||||||
|
|
||||||
// Normalize bold/italic/fancy unicode letters to ASCII
|
var filtered = new StringBuilder();
|
||||||
cleaned = NormalizeFancyUnicode(cleaned);
|
foreach (var ch in from ch in text
|
||||||
|
let category = CharUnicodeInfo.GetUnicodeCategory(ch)
|
||||||
|
where category is not UnicodeCategory.Control || preserveControlChars.Contains(ch)
|
||||||
|
where category is not (UnicodeCategory.Format or UnicodeCategory.NonSpacingMark)
|
||||||
|
select ch)
|
||||||
|
{
|
||||||
|
filtered.Append(ch);
|
||||||
|
}
|
||||||
|
|
||||||
// Replace multiple newlines with a single newline
|
return filtered.ToString();
|
||||||
cleaned = MultiNewlineRegex().Replace(cleaned, "\n");
|
|
||||||
|
|
||||||
return cleaned;
|
|
||||||
}
|
|
||||||
|
|
||||||
private static string NormalizeFancyUnicode(string input)
|
|
||||||
{
|
|
||||||
var sb = new StringBuilder(input.Length);
|
|
||||||
foreach (var c in input.Normalize(NormalizationForm.FormC).Where(c =>
|
|
||||||
char.GetUnicodeCategory(c) != UnicodeCategory.NonSpacingMark))
|
|
||||||
sb.Append(c);
|
|
||||||
|
|
||||||
return sb.ToString();
|
|
||||||
}
|
}
|
||||||
}
|
}
|
Reference in New Issue
Block a user