Truncate to plain text for html content as well

This commit is contained in:
2026-01-02 14:14:27 +08:00
parent 306934304e
commit c588b6f234
2 changed files with 63 additions and 14 deletions

View File

@@ -11,6 +11,7 @@
</PropertyGroup> </PropertyGroup>
<ItemGroup> <ItemGroup>
<PackageReference Include="AngleSharp" Version="1.4.0" />
<PackageReference Include="Grpc.AspNetCore.Server" Version="2.76.0" /> <PackageReference Include="Grpc.AspNetCore.Server" Version="2.76.0" />
<PackageReference Include="jieba.NET" Version="0.42.2" /> <PackageReference Include="jieba.NET" Version="0.42.2" />
<PackageReference Include="Livekit.Server.Sdk.Dotnet" Version="1.1.0" /> <PackageReference Include="Livekit.Server.Sdk.Dotnet" Version="1.1.0" />

View File

@@ -3,7 +3,6 @@ using DysonNetwork.Shared;
using DysonNetwork.Shared.Cache; using DysonNetwork.Shared.Cache;
using DysonNetwork.Shared.Proto; using DysonNetwork.Shared.Proto;
using DysonNetwork.Shared.Registry; using DysonNetwork.Shared.Registry;
using DysonNetwork.Sphere.Localization; using DysonNetwork.Sphere.Localization;
using DysonNetwork.Sphere.Publisher; using DysonNetwork.Sphere.Publisher;
using DysonNetwork.Sphere.ActivityPub; using DysonNetwork.Sphere.ActivityPub;
@@ -11,8 +10,10 @@ using Microsoft.EntityFrameworkCore;
using Microsoft.Extensions.Localization; using Microsoft.Extensions.Localization;
using NodaTime; using NodaTime;
using Markdig; using Markdig;
using AngleSharp.Html.Parser;
using DysonNetwork.Shared.Models; using DysonNetwork.Shared.Models;
using DysonNetwork.Shared.Models.Embed; using DysonNetwork.Shared.Models.Embed;
using PostContentType = DysonNetwork.Shared.Models.PostContentType;
namespace DysonNetwork.Sphere.Post; namespace DysonNetwork.Sphere.Post;
@@ -37,32 +38,79 @@ public partial class PostService(
{ {
const int maxLength = 256; const int maxLength = 256;
const int embedMaxLength = 80; const int embedMaxLength = 80;
var parser = new HtmlParser();
foreach (var item in input) foreach (var item in input)
{ {
if (item.Content?.Length > maxLength) if (item.Content?.Length > maxLength)
{ {
var plainText = Markdown.ToPlainText(item.Content); string plainText;
item.Content = plainText.Length > maxLength ? plainText[..maxLength] : plainText; if (item.ContentType == PostContentType.Markdown)
item.IsTruncated = true; {
plainText = Markdown.ToPlainText(item.Content);
}
else if (item.ContentType == PostContentType.Html)
{
var document = parser.ParseDocument(item.Content);
plainText = document.Body?.TextContent.Trim() ?? "";
}
else
{
continue;
}
if (plainText.Length > maxLength)
{
item.Content = plainText.Substring(0, maxLength);
item.IsTruncated = true;
}
} }
// Truncate replied post content with shorter embed length // Truncate replied post content with shorter embed length
if (item.RepliedPost?.Content != null) if (item.RepliedPost?.Content != null && item.Content?.Length > embedMaxLength)
{ {
var plainText = Markdown.ToPlainText(item.RepliedPost.Content); string plainText;
if (item.ContentType == PostContentType.Markdown)
{
plainText = Markdown.ToPlainText(item.RepliedPost.Content);
}
else if (item.ContentType == PostContentType.Html)
{
var document = parser.ParseDocument(item.RepliedPost.Content);
plainText = document.Body?.TextContent.Trim() ?? "";
}
else
{
continue;
}
if (plainText.Length > embedMaxLength) if (plainText.Length > embedMaxLength)
{ {
item.RepliedPost.Content = plainText[..embedMaxLength]; item.RepliedPost.Content = plainText.Substring(0, embedMaxLength);
item.RepliedPost.IsTruncated = true; item.RepliedPost.IsTruncated = true;
} }
} }
// Truncate forwarded post content with shorter embed length // Truncate forwarded post content with shorter embed length
if (item.ForwardedPost?.Content == null || if (item.ForwardedPost?.Content != null && item.Content?.Length > embedMaxLength)
Markdown.ToPlainText(item.ForwardedPost.Content).Length <= embedMaxLength) continue; {
var forwardedPlainText = Markdown.ToPlainText(item.ForwardedPost.Content); string plainText;
item.ForwardedPost.Content = forwardedPlainText[..embedMaxLength]; if (item.ContentType == PostContentType.Markdown)
item.ForwardedPost.IsTruncated = true; {
plainText = Markdown.ToPlainText(item.ForwardedPost.Content);
}
else if (item.ContentType == PostContentType.Html)
{
var document = parser.ParseDocument(item.ForwardedPost.Content);
plainText = document.Body?.TextContent.Trim() ?? "";
}
else
{
continue;
}
if (plainText.Length > embedMaxLength)
{
item.ForwardedPost.Content = plainText.Substring(0, embedMaxLength);
item.ForwardedPost.IsTruncated = true;
}
}
} }
return input; return input;