mirror of
https://github.com/suyiiyii/nonebot-bison.git
synced 2025-06-05 19:36:43 +08:00
* 🧪 test(tests): 添加了RSS的单元测试 * 🎈 perf(rss and test): 优化了RSS部分源标题正文重复的问题 部分RSS源(RSSHub的Twitter)存在正文当标题用的情况,导致推送的时候呈现为两段重复的文字,现通过Jaccard相似系数来判断是否需要去重 * Update nonebot_bison/platform/rss.py Co-authored-by: AzideCupric <57004769+AzideCupric@users.noreply.github.com> * Update nonebot_bison/platform/rss.py Co-authored-by: AzideCupric <57004769+AzideCupric@users.noreply.github.com> * 🐞 fix(platform/rss): 修复了漏掉相似文本在后端位置的问题 * 🐞 fix(rss): 修正一些feed无法正确识别时间的bug 一些feed时间只有updated标签或者没有,原先的代码只能解析用published标签的时间 felinae98#275 * 🎈 perf(rss): 更改字符串相似度比较方法 从Jaccard相似系数比较相似度改为通过最长公共子序列来比较 * 🦄 refactor(rss): 重构实现字符串相似度比较的方法 使用标准库difflib代替原先手搓的LCS * Update nonebot_bison/utils/__init__.py Co-authored-by: felinae98 <731499577@qq.com> * Update nonebot_bison/platform/rss.py * Update nonebot_bison/platform/rss.py --------- Co-authored-by: AzideCupric <57004769+AzideCupric@users.noreply.github.com> Co-authored-by: felinae98 <731499577@qq.com>
170 lines
5.6 KiB
Python
170 lines
5.6 KiB
Python
import typing
|
|
import xml.etree.ElementTree as ET
|
|
from datetime import datetime
|
|
|
|
import pytest
|
|
import pytz
|
|
import respx
|
|
from httpx import AsyncClient, Response
|
|
from nonebug.app import App
|
|
|
|
from .utils import get_file
|
|
|
|
if typing.TYPE_CHECKING:
|
|
from nonebot_bison.platform.rss import Rss
|
|
|
|
|
|
@pytest.fixture
|
|
def dummy_user(app: App):
|
|
from nonebot_bison.types import User
|
|
|
|
user = User(123, "group")
|
|
return user
|
|
|
|
|
|
@pytest.fixture
|
|
def user_info_factory(app: App, dummy_user):
|
|
from nonebot_bison.types import UserSubInfo
|
|
|
|
def _user_info(category_getter, tag_getter):
|
|
return UserSubInfo(dummy_user, category_getter, tag_getter)
|
|
|
|
return _user_info
|
|
|
|
|
|
@pytest.fixture
|
|
def rss(app: App):
|
|
from nonebot_bison.platform import platform_manager
|
|
from nonebot_bison.utils import ProcessContext
|
|
|
|
return platform_manager["rss"](ProcessContext(), AsyncClient())
|
|
|
|
|
|
@pytest.fixture
|
|
def update_time_feed_1():
|
|
file = get_file("rss-twitter-ArknightsStaff.xml")
|
|
root = ET.fromstring(file)
|
|
item = root.find("channel/item")
|
|
current_time = datetime.now(pytz.timezone("GMT")).strftime(
|
|
"%a, %d %b %Y %H:%M:%S %Z"
|
|
)
|
|
pubdate_elem = item.find("pubDate")
|
|
pubdate_elem.text = current_time
|
|
return ET.tostring(root, encoding="unicode")
|
|
|
|
|
|
@pytest.fixture
|
|
def update_time_feed_2():
|
|
file = get_file("rss-ruanyifeng.xml")
|
|
root = ET.fromstring(file)
|
|
current_time = datetime.now(pytz.timezone("GMT")).strftime(
|
|
"%a, %d %b %Y %H:%M:%S %Z"
|
|
)
|
|
published_element = root.find(".//{*}published")
|
|
published_element.text = current_time
|
|
return ET.tostring(root, encoding="unicode")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@respx.mock
|
|
async def test_fetch_new_1(
|
|
rss,
|
|
user_info_factory,
|
|
update_time_feed_1,
|
|
):
|
|
## 标题重复的情况
|
|
rss_router = respx.get("https://rsshub.app/twitter/user/ArknightsStaff")
|
|
rss_router.mock(
|
|
return_value=Response(200, text=get_file("rss-twitter-ArknightsStaff-0.xml"))
|
|
)
|
|
target = "https://rsshub.app/twitter/user/ArknightsStaff"
|
|
res1 = await rss.fetch_new_post(target, [user_info_factory([], [])])
|
|
assert len(res1) == 0
|
|
|
|
rss_router.mock(return_value=Response(200, text=update_time_feed_1))
|
|
res2 = await rss.fetch_new_post(target, [user_info_factory([], [])])
|
|
assert len(res2[0][1]) == 1
|
|
post1 = res2[0][1][0]
|
|
assert post1.url == "https://twitter.com/ArknightsStaff/status/1659091539023282178"
|
|
assert (
|
|
post1.text
|
|
== "【#統合戦略】 引き続き新テーマ「ミヅキと紺碧の樹」の新要素及びシステムの変更点を一部ご紹介します! 今回は「灯火」、「ダイス」、「記号認識」、「鍵」についてです。詳細は添付の画像をご確認ください。#アークナイツ https://t.co/ARmptV0Zvu"
|
|
)
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@respx.mock
|
|
async def test_fetch_new_2(
|
|
rss,
|
|
user_info_factory,
|
|
update_time_feed_2,
|
|
):
|
|
## 标题与正文不重复的情况
|
|
rss_router = respx.get("https://www.ruanyifeng.com/blog/atom.xml")
|
|
rss_router.mock(return_value=Response(200, text=get_file("rss-ruanyifeng-0.xml")))
|
|
target = "https://www.ruanyifeng.com/blog/atom.xml"
|
|
res1 = await rss.fetch_new_post(target, [user_info_factory([], [])])
|
|
assert len(res1) == 0
|
|
|
|
rss_router.mock(return_value=Response(200, text=update_time_feed_2))
|
|
res2 = await rss.fetch_new_post(target, [user_info_factory([], [])])
|
|
assert len(res2[0][1]) == 1
|
|
post1 = res2[0][1][0]
|
|
assert post1.url == "http://www.ruanyifeng.com/blog/2023/05/weekly-issue-255.html"
|
|
assert post1.text == "科技爱好者周刊(第 255 期):对待 AI 的正确态度\n\n这里记录每周值得分享的科技内容,周五发布。..."
|
|
|
|
|
|
@pytest.fixture
|
|
def update_time_feed_3():
|
|
file = get_file("rss-github-atom.xml")
|
|
root = ET.fromstring(file)
|
|
current_time = datetime.now(pytz.timezone("GMT")).strftime(
|
|
"%a, %d %b %Y %H:%M:%S %Z"
|
|
)
|
|
published_element = root.findall(".//{*}updated")[1]
|
|
published_element.text = current_time
|
|
return ET.tostring(root, encoding="unicode")
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@respx.mock
|
|
async def test_fetch_new_3(
|
|
rss,
|
|
user_info_factory,
|
|
update_time_feed_3,
|
|
):
|
|
## 只有<updated>没有<published>
|
|
rss_router = respx.get("https://github.com/R3nzTheCodeGOD/R3nzSkin/releases.atom")
|
|
rss_router.mock(return_value=Response(200, text=get_file("rss-github-atom-0.xml")))
|
|
target = "https://github.com/R3nzTheCodeGOD/R3nzSkin/releases.atom"
|
|
res1 = await rss.fetch_new_post(target, [user_info_factory([], [])])
|
|
assert len(res1) == 0
|
|
|
|
rss_router.mock(return_value=Response(200, text=update_time_feed_3))
|
|
res2 = await rss.fetch_new_post(target, [user_info_factory([], [])])
|
|
assert len(res2[0][1]) == 1
|
|
post1 = res2[0][1][0]
|
|
assert post1.url == "https://github.com/R3nzTheCodeGOD/R3nzSkin/releases/tag/v3.0.9"
|
|
assert post1.text == "R3nzSkin\n\nNo content."
|
|
|
|
|
|
@pytest.mark.asyncio
|
|
@respx.mock
|
|
async def test_fetch_new_4(
|
|
rss,
|
|
user_info_factory,
|
|
):
|
|
## 没有日期信息的情况
|
|
rss_router = respx.get("https://rsshub.app/wallhaven/hot?limit=5")
|
|
rss_router.mock(return_value=Response(200, text=get_file("rss-top5-old.xml")))
|
|
target = "https://rsshub.app/wallhaven/hot?limit=5"
|
|
res1 = await rss.fetch_new_post(target, [user_info_factory([], [])])
|
|
assert len(res1) == 0
|
|
|
|
rss_router.mock(return_value=Response(200, text=get_file("rss-top5-new.xml")))
|
|
res2 = await rss.fetch_new_post(target, [user_info_factory([], [])])
|
|
assert len(res2[0][1]) == 1
|
|
post1 = res2[0][1][0]
|
|
assert post1.url == "https://wallhaven.cc/w/85rjej"
|
|
assert post1.text == "85rjej.jpg"
|