🐛 修复bilibili推送的一些格式错误 (#263)

* 🎈 perf(platform/bilibili): 增加了相似度计算前文本的预处理将动态和简介文本中较长的一段按照较短的一段进行截取（分了从前截和从后截的两种情况） * 🐞 fix(bilibili): 修复视频简介多余空格的bug * 🦄 refactor(bilibili): 更改文本相似度比较函数
2026-02-04 21:54:08 +08:00 · 2023-07-18 15:06:46 +08:00 · 2023-07-18 15:06:46 +08:00 · ff3c0ffe65
commit ff3c0ffe65
parent 5922a7827f
3 changed files with 5 additions and 19 deletions
--- a/.gitignore
+++ b/.gitignore
@ -316,6 +316,8 @@ docs/.vuepress/.temp/
 # and uncomment the following lines
 # .pnp.*

+### macOS ###
+.DS_Store

 # End of https://www.toptal.com/developers/gitignore/api/python,linux,vim
 data*/*
--- a/nonebot_bison/platform/bilibili.py
+++ b/nonebot_bison/platform/bilibili.py
@ -12,7 +12,7 @@ from typing_extensions import Self

 from ..post import Post
 from ..types import ApiError, Category, RawPost, Tag, Target
-from ..utils import SchedulerConfig, jaccard_text_similarity
+from ..utils import SchedulerConfig, text_similarity
 from .platform import CategoryNotRecognize, CategoryNotSupport, NewMessage, StatusChange


@ -151,7 +151,7 @@ class Bilibili(NewMessage):
            title = card["title"]
            desc = card.get("desc", "")

-            if jaccard_text_similarity(desc, dynamic) > 0.8:
+            if text_similarity(desc, dynamic) > 0.8:
                # 如果视频简介和动态内容相似，就只保留长的那个
                if len(dynamic) > len(desc):
                    text = f"{dynamic}\n=================\n{title}"
@ -159,12 +159,7 @@ class Bilibili(NewMessage):
                    text = f"{title}\n\n{desc}"
            else:
                # 否则就把两个拼起来
-                text = f"""
-                {dynamic}
-                \n=================\n
-                {title}\n\n
-                {desc}
-                """
+                text = f"{dynamic}\n=================\n{title}\n\n{desc}"

            pic = [card["pic"]]
        elif post_type == 4:
--- a/nonebot_bison/utils/init.py
+++ b/nonebot_bison/utils/init.py
@ -101,17 +101,6 @@ if plugin_config.bison_filter_log:
    )


-def jaccard_text_similarity(str1: str, str2: str) -> float:
-    """
-    计算两个字符串(基于字符)的
-    [Jaccard相似系数](https://zh.wikipedia.org/wiki/雅卡尔指数)
-    是否达到阈值
-    """
-    set1 = set(str1)
-    set2 = set(str2)
-    return len(set1 & set2) / len(set1 | set2)
-
-
 def text_similarity(str1, str2) -> float:
    matcher = difflib.SequenceMatcher(None, str1, str2)
    t = sum(temp.size for temp in matcher.get_matching_blocks())