👔 Optimize spehre rewind segmenter

This commit is contained in:
2025-12-27 13:15:25 +08:00
parent 9d1bc46bf1
commit 27afe5da9f

View File

@@ -18,7 +18,10 @@ public class SphereRewindServiceGrpc(
Publisher.PublisherService ps Publisher.PublisherService ps
) : RewindService.RewindServiceBase ) : RewindService.RewindServiceBase
{ {
public override async Task<RewindEvent> GetRewindEvent(RequestRewindEvent request, ServerCallContext context) public override async Task<RewindEvent> GetRewindEvent(
RequestRewindEvent request,
ServerCallContext context
)
{ {
var accountId = Guid.Parse(request.AccountId); var accountId = Guid.Parse(request.AccountId);
var year = request.Year; var year = request.Year;
@@ -27,45 +30,44 @@ public class SphereRewindServiceGrpc(
var endDate = new LocalDate(year, 12, 26).AtMidnight().InUtc().ToInstant(); var endDate = new LocalDate(year, 12, 26).AtMidnight().InUtc().ToInstant();
// Audience data // Audience data
var mostLovedPublisherClue = var mostLovedPublisherClue = await db
await db.PostReactions .PostReactions.Where(a => a.CreatedAt >= startDate && a.CreatedAt < endDate)
.Where(a => a.CreatedAt >= startDate && a.CreatedAt < endDate) .Where(p => p.AccountId == accountId && p.Attitude == PostReactionAttitude.Positive)
.Where(p => p.AccountId == accountId && p.Attitude == PostReactionAttitude.Positive) .GroupBy(p => p.Post.PublisherId)
.GroupBy(p => p.Post.PublisherId) .OrderByDescending(g => g.Count())
.OrderByDescending(g => g.Count()) .Select(g => new { PublisherId = g.Key, ReactionCount = g.Count() })
.Select(g => new { PublisherId = g.Key, ReactionCount = g.Count() }) .FirstOrDefaultAsync();
.FirstOrDefaultAsync();
var mostLovedPublisher = mostLovedPublisherClue is not null var mostLovedPublisher = mostLovedPublisherClue is not null
? await ps.GetPublisherLoaded(mostLovedPublisherClue.PublisherId) ? await ps.GetPublisherLoaded(mostLovedPublisherClue.PublisherId)
: null; : null;
// Creator data // Creator data
var publishers = await db.PublisherMembers var publishers = await db
.Where(pm => pm.AccountId == accountId) .PublisherMembers.Where(pm => pm.AccountId == accountId)
.Select(pm => pm.PublisherId) .Select(pm => pm.PublisherId)
.ToListAsync(); .ToListAsync();
var mostLovedAudienceClue = var mostLovedAudienceClue = await db
await db.PostReactions .PostReactions.Where(a => a.CreatedAt >= startDate && a.CreatedAt < endDate)
.Where(a => a.CreatedAt >= startDate && a.CreatedAt < endDate) .Where(pr =>
.Where(pr => pr.Attitude == PostReactionAttitude.Positive
pr.Attitude == PostReactionAttitude.Positive && && publishers.Contains(pr.Post.PublisherId)
publishers.Contains(pr.Post.PublisherId)) )
.GroupBy(pr => pr.AccountId) .GroupBy(pr => pr.AccountId)
.OrderByDescending(g => g.Count()) .OrderByDescending(g => g.Count())
.Select(g => new { AccountId = g.Key, ReactionCount = g.Count() }) .Select(g => new { AccountId = g.Key, ReactionCount = g.Count() })
.FirstOrDefaultAsync(); .FirstOrDefaultAsync();
var mostLovedAudience = mostLovedAudienceClue is not null var mostLovedAudience = mostLovedAudienceClue is not null
? await remoteAccounts.GetAccount(mostLovedAudienceClue.AccountId) ? await remoteAccounts.GetAccount(mostLovedAudienceClue.AccountId)
: null; : null;
var posts = db.Posts var posts = db
.Where(a => a.CreatedAt >= startDate && a.CreatedAt < endDate) .Posts.Where(a => a.CreatedAt >= startDate && a.CreatedAt < endDate)
.Where(p => publishers.Contains(p.PublisherId)) .Where(p => publishers.Contains(p.PublisherId))
.AsQueryable(); .AsQueryable();
var postTotalCount = await posts.CountAsync(); var postTotalCount = await posts.CountAsync();
var postTotalUpvotes = await db.PostReactions var postTotalUpvotes = await db
.Where(a => a.CreatedAt >= startDate && a.CreatedAt < endDate) .PostReactions.Where(a => a.CreatedAt >= startDate && a.CreatedAt < endDate)
.Where(p => publishers.Contains(p.Post.PublisherId)) .Where(p => publishers.Contains(p.Post.PublisherId))
.Where(r => r.Attitude == PostReactionAttitude.Positive) .Where(r => r.Attitude == PostReactionAttitude.Positive)
.CountAsync(); .CountAsync();
@@ -86,7 +88,7 @@ public class SphereRewindServiceGrpc(
.Take(1000) .Take(1000)
.ToListAsync(); .ToListAsync();
var segmenter = new JiebaSegmenter(); var segmenter = new JiebaSegmenter();
var words = segmenter.CutInParallel(postContents, cutAll: true, hmm: false); var words = segmenter.CutInParallel(postContents, cutAll: false, hmm: false);
var allWords = words.SelectMany(w => w); var allWords = words.SelectMany(w => w);
var topWords = allWords var topWords = allWords
.GroupBy(w => w) .GroupBy(w => w)
@@ -96,8 +98,8 @@ public class SphereRewindServiceGrpc(
.ToList(); .ToList();
// Chat data // Chat data
var messagesQuery = db.ChatMessages var messagesQuery = db
.Include(m => m.Sender) .ChatMessages.Include(m => m.Sender)
.Include(m => m.ChatRoom) .Include(m => m.ChatRoom)
.Where(m => m.CreatedAt >= startDate && m.CreatedAt < endDate) .Where(m => m.CreatedAt >= startDate && m.CreatedAt < endDate)
.Where(m => m.Sender.AccountId == accountId) .Where(m => m.Sender.AccountId == accountId)
@@ -120,8 +122,8 @@ public class SphereRewindServiceGrpc(
: null; : null;
// Call data // Call data
var callQuery = db.ChatRealtimeCall var callQuery = db
.Include(c => c.Sender) .ChatRealtimeCall.Include(c => c.Sender)
.Include(c => c.Room) .Include(c => c.Room)
.Where(c => c.CreatedAt >= startDate && c.CreatedAt < endDate) .Where(c => c.CreatedAt >= startDate && c.CreatedAt < endDate)
.Where(c => c.Sender.AccountId == accountId) .Where(c => c.Sender.AccountId == accountId)
@@ -130,29 +132,52 @@ public class SphereRewindServiceGrpc(
var now = SystemClock.Instance.GetCurrentInstant(); var now = SystemClock.Instance.GetCurrentInstant();
var groupCallRecords = await callQuery var groupCallRecords = await callQuery
.Where(c => c.Room.Type == ChatRoomType.Group) .Where(c => c.Room.Type == ChatRoomType.Group)
.Select(c => new { c.RoomId, c.CreatedAt, c.EndedAt }) .Select(c => new
{
c.RoomId,
c.CreatedAt,
c.EndedAt,
})
.ToListAsync(); .ToListAsync();
var callDurations = groupCallRecords var callDurations = groupCallRecords
.Select(c => new { c.RoomId, Duration = (c.EndedAt ?? now).Minus(c.CreatedAt).Seconds }).ToList(); .Select(c => new { c.RoomId, Duration = (c.EndedAt ?? now).Minus(c.CreatedAt).Seconds })
.ToList();
var mostCalledRoomInfo = callDurations var mostCalledRoomInfo = callDurations
.GroupBy(c => c.RoomId) .GroupBy(c => c.RoomId)
.Select(g => new { RoomId = g.Key, TotalDuration = g.Sum(c => c.Duration) }) .Select(g => new { RoomId = g.Key, TotalDuration = g.Sum(c => c.Duration) })
.OrderByDescending(g => g.TotalDuration) .OrderByDescending(g => g.TotalDuration)
.FirstOrDefault(); .FirstOrDefault();
var mostCalledRoom = mostCalledRoomInfo != null && mostCalledRoomInfo.RoomId != Guid.Empty var mostCalledRoom =
? await db.ChatRooms.FindAsync(mostCalledRoomInfo.RoomId) mostCalledRoomInfo != null && mostCalledRoomInfo.RoomId != Guid.Empty
: null; ? await db.ChatRooms.FindAsync(mostCalledRoomInfo.RoomId)
: null;
List<SnAccount>? mostCalledChatTopMembers = null; List<SnAccount>? mostCalledChatTopMembers = null;
if (mostCalledRoom != null) if (mostCalledRoom != null)
mostCalledChatTopMembers = await crs.GetTopActiveMembers(mostCalledRoom.Id, startDate, endDate); mostCalledChatTopMembers = await crs.GetTopActiveMembers(
mostCalledRoom.Id,
startDate,
endDate
);
var directCallRecords = await callQuery var directCallRecords = await callQuery
.Where(c => c.Room.Type == ChatRoomType.DirectMessage) .Where(c => c.Room.Type == ChatRoomType.DirectMessage)
.Select(c => new { c.RoomId, c.CreatedAt, c.EndedAt, c.Room }) .Select(c => new
{
c.RoomId,
c.CreatedAt,
c.EndedAt,
c.Room,
})
.ToListAsync(); .ToListAsync();
var directCallDurations = directCallRecords var directCallDurations = directCallRecords
.Select(c => new { c.RoomId, c.Room, Duration = (c.EndedAt ?? now).Minus(c.CreatedAt).Seconds }).ToList(); .Select(c => new
{
c.RoomId,
c.Room,
Duration = (c.EndedAt ?? now).Minus(c.CreatedAt).Seconds,
})
.ToList();
var mostCalledDirectRooms = directCallDurations var mostCalledDirectRooms = directCallDurations
.GroupBy(c => c.RoomId) .GroupBy(c => c.RoomId)
.Select(g => new { ChatRoom = g.First().Room, TotalDuration = g.Sum(c => c.Duration) }) .Select(g => new { ChatRoom = g.First().Room, TotalDuration = g.Sum(c => c.Duration) })
@@ -170,19 +195,29 @@ public class SphereRewindServiceGrpc(
} }
var accounts = await remoteAccounts.GetAccountBatch(accountIds); var accounts = await remoteAccounts.GetAccountBatch(accountIds);
var mostCalledAccounts = accounts.Zip(mostCalledDirectRooms, var mostCalledAccounts = accounts
(account, room) => new Dictionary<string, object?> .Zip(
{ ["account"] = account, ["duration"] = room.TotalDuration } mostCalledDirectRooms,
(account, room) =>
new Dictionary<string, object?>
{
["account"] = account,
["duration"] = room.TotalDuration,
}
) )
.ToList(); .ToList();
var data = new Dictionary<string, object?> var data = new Dictionary<string, object?>
{ {
["total_post_count"] = postTotalCount, ["total_post_count"] = postTotalCount,
["total_upvote_count"] = postTotalUpvotes, ["total_upvote_count"] = postTotalUpvotes,
["top_words"] = topWords.Select(wc => new Dictionary<string, object?> ["top_words"] = topWords
{ ["word"] = wc.Word, ["count"] = wc.Count }).ToList(), .Select(wc => new Dictionary<string, object?>
{
["word"] = wc.Word,
["count"] = wc.Count,
})
.ToList(),
["most_popular_post"] = mostPopularPost, ["most_popular_post"] = mostPopularPost,
["most_productive_day"] = mostProductiveDay is not null ["most_productive_day"] = mostProductiveDay is not null
? new Dictionary<string, object?> ? new Dictionary<string, object?>
@@ -216,13 +251,13 @@ public class SphereRewindServiceGrpc(
? new Dictionary<string, object?> ? new Dictionary<string, object?>
{ {
["chat"] = mostMessagedDirectChat, ["chat"] = mostMessagedDirectChat,
["message_counts"] = mostMessagedDirectChatInfo.MessageCount ["message_counts"] = mostMessagedDirectChatInfo.MessageCount,
} }
: null, : null,
["most_called_chat"] = new Dictionary<string, object?> ["most_called_chat"] = new Dictionary<string, object?>
{ {
["chat"] = mostCalledRoom, ["chat"] = mostCalledRoom,
["duration"] = mostCalledRoomInfo?.TotalDuration ["duration"] = mostCalledRoomInfo?.TotalDuration,
}, },
["most_called_chat_top_members"] = mostCalledChatTopMembers, ["most_called_chat_top_members"] = mostCalledChatTopMembers,
["most_called_accounts"] = mostCalledAccounts, ["most_called_accounts"] = mostCalledAccounts,
@@ -232,7 +267,8 @@ public class SphereRewindServiceGrpc(
{ {
ServiceId = "sphere", ServiceId = "sphere",
AccountId = request.AccountId, AccountId = request.AccountId,
Data = GrpcTypeHelper.ConvertObjectToByteString(data) Data = GrpcTypeHelper.ConvertObjectToByteString(data),
}; };
} }
} }