mirror of
https://github.com/suyiiyii/nonebot-bison.git
synced 2025-07-13 11:03:00 +08:00
🐛 修复bilibili推送的一些格式错误 (#263)
* 🎈 perf(platform/bilibili): 增加了相似度计算前文本的预处理 将动态和简介文本中较长的一段按照较短的一段进行截取(分了从前截和从后截的两种情况) * 🐞 fix(bilibili): 修复视频简介多余空格的bug * 🦄 refactor(bilibili): 更改文本相似度比较函数
This commit is contained in:
parent
5922a7827f
commit
ff3c0ffe65
2
.gitignore
vendored
2
.gitignore
vendored
@ -316,6 +316,8 @@ docs/.vuepress/.temp/
|
|||||||
# and uncomment the following lines
|
# and uncomment the following lines
|
||||||
# .pnp.*
|
# .pnp.*
|
||||||
|
|
||||||
|
### macOS ###
|
||||||
|
.DS_Store
|
||||||
|
|
||||||
# End of https://www.toptal.com/developers/gitignore/api/python,linux,vim
|
# End of https://www.toptal.com/developers/gitignore/api/python,linux,vim
|
||||||
data*/*
|
data*/*
|
||||||
|
@ -12,7 +12,7 @@ from typing_extensions import Self
|
|||||||
|
|
||||||
from ..post import Post
|
from ..post import Post
|
||||||
from ..types import ApiError, Category, RawPost, Tag, Target
|
from ..types import ApiError, Category, RawPost, Tag, Target
|
||||||
from ..utils import SchedulerConfig, jaccard_text_similarity
|
from ..utils import SchedulerConfig, text_similarity
|
||||||
from .platform import CategoryNotRecognize, CategoryNotSupport, NewMessage, StatusChange
|
from .platform import CategoryNotRecognize, CategoryNotSupport, NewMessage, StatusChange
|
||||||
|
|
||||||
|
|
||||||
@ -151,7 +151,7 @@ class Bilibili(NewMessage):
|
|||||||
title = card["title"]
|
title = card["title"]
|
||||||
desc = card.get("desc", "")
|
desc = card.get("desc", "")
|
||||||
|
|
||||||
if jaccard_text_similarity(desc, dynamic) > 0.8:
|
if text_similarity(desc, dynamic) > 0.8:
|
||||||
# 如果视频简介和动态内容相似,就只保留长的那个
|
# 如果视频简介和动态内容相似,就只保留长的那个
|
||||||
if len(dynamic) > len(desc):
|
if len(dynamic) > len(desc):
|
||||||
text = f"{dynamic}\n=================\n{title}"
|
text = f"{dynamic}\n=================\n{title}"
|
||||||
@ -159,12 +159,7 @@ class Bilibili(NewMessage):
|
|||||||
text = f"{title}\n\n{desc}"
|
text = f"{title}\n\n{desc}"
|
||||||
else:
|
else:
|
||||||
# 否则就把两个拼起来
|
# 否则就把两个拼起来
|
||||||
text = f"""
|
text = f"{dynamic}\n=================\n{title}\n\n{desc}"
|
||||||
{dynamic}
|
|
||||||
\n=================\n
|
|
||||||
{title}\n\n
|
|
||||||
{desc}
|
|
||||||
"""
|
|
||||||
|
|
||||||
pic = [card["pic"]]
|
pic = [card["pic"]]
|
||||||
elif post_type == 4:
|
elif post_type == 4:
|
||||||
|
@ -101,17 +101,6 @@ if plugin_config.bison_filter_log:
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
def jaccard_text_similarity(str1: str, str2: str) -> float:
|
|
||||||
"""
|
|
||||||
计算两个字符串(基于字符)的
|
|
||||||
[Jaccard相似系数](https://zh.wikipedia.org/wiki/雅卡尔指数)
|
|
||||||
是否达到阈值
|
|
||||||
"""
|
|
||||||
set1 = set(str1)
|
|
||||||
set2 = set(str2)
|
|
||||||
return len(set1 & set2) / len(set1 | set2)
|
|
||||||
|
|
||||||
|
|
||||||
def text_similarity(str1, str2) -> float:
|
def text_similarity(str1, str2) -> float:
|
||||||
matcher = difflib.SequenceMatcher(None, str1, str2)
|
matcher = difflib.SequenceMatcher(None, str1, str2)
|
||||||
t = sum(temp.size for temp in matcher.get_matching_blocks())
|
t = sum(temp.size for temp in matcher.get_matching_blocks())
|
||||||
|
Loading…
x
Reference in New Issue
Block a user