🐛 修复bilibili推送的一些格式错误 (#263)

* 🎈 perf(platform/bilibili): 增加了相似度计算前文本的预处理

将动态和简介文本中较长的一段按照较短的一段进行截取(分了从前截和从后截的两种情况)

* 🐞 fix(bilibili): 修复视频简介多余空格的bug

* 🦄 refactor(bilibili): 更改文本相似度比较函数
This commit is contained in:
UKM 2023-07-18 15:06:46 +08:00 committed by GitHub
parent 5922a7827f
commit ff3c0ffe65
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 5 additions and 19 deletions

2
.gitignore vendored
View File

@ -316,6 +316,8 @@ docs/.vuepress/.temp/
# and uncomment the following lines # and uncomment the following lines
# .pnp.* # .pnp.*
### macOS ###
.DS_Store
# End of https://www.toptal.com/developers/gitignore/api/python,linux,vim # End of https://www.toptal.com/developers/gitignore/api/python,linux,vim
data*/* data*/*

View File

@ -12,7 +12,7 @@ from typing_extensions import Self
from ..post import Post from ..post import Post
from ..types import ApiError, Category, RawPost, Tag, Target from ..types import ApiError, Category, RawPost, Tag, Target
from ..utils import SchedulerConfig, jaccard_text_similarity from ..utils import SchedulerConfig, text_similarity
from .platform import CategoryNotRecognize, CategoryNotSupport, NewMessage, StatusChange from .platform import CategoryNotRecognize, CategoryNotSupport, NewMessage, StatusChange
@ -151,7 +151,7 @@ class Bilibili(NewMessage):
title = card["title"] title = card["title"]
desc = card.get("desc", "") desc = card.get("desc", "")
if jaccard_text_similarity(desc, dynamic) > 0.8: if text_similarity(desc, dynamic) > 0.8:
# 如果视频简介和动态内容相似,就只保留长的那个 # 如果视频简介和动态内容相似,就只保留长的那个
if len(dynamic) > len(desc): if len(dynamic) > len(desc):
text = f"{dynamic}\n=================\n{title}" text = f"{dynamic}\n=================\n{title}"
@ -159,12 +159,7 @@ class Bilibili(NewMessage):
text = f"{title}\n\n{desc}" text = f"{title}\n\n{desc}"
else: else:
# 否则就把两个拼起来 # 否则就把两个拼起来
text = f""" text = f"{dynamic}\n=================\n{title}\n\n{desc}"
{dynamic}
\n=================\n
{title}\n\n
{desc}
"""
pic = [card["pic"]] pic = [card["pic"]]
elif post_type == 4: elif post_type == 4:

View File

@ -101,17 +101,6 @@ if plugin_config.bison_filter_log:
) )
def jaccard_text_similarity(str1: str, str2: str) -> float:
"""
计算两个字符串(基于字符)
[Jaccard相似系数](https://zh.wikipedia.org/wiki/雅卡尔指数)
是否达到阈值
"""
set1 = set(str1)
set2 = set(str2)
return len(set1 & set2) / len(set1 | set2)
def text_similarity(str1, str2) -> float: def text_similarity(str1, str2) -> float:
matcher = difflib.SequenceMatcher(None, str1, str2) matcher = difflib.SequenceMatcher(None, str1, str2)
t = sum(temp.size for temp in matcher.get_matching_blocks()) t = sum(temp.size for temp in matcher.get_matching_blocks())