221 lines
8.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

import typing
from datetime import datetime
import xml.etree.ElementTree as ET
import pytz
import respx
import pytest
from httpx import Response
from nonebug.app import App
from .utils import get_file
if typing.TYPE_CHECKING:
pass
@pytest.fixture
def dummy_user(app: App):
from nonebot_bison.types import User
user = User(123, "group")
return user
@pytest.fixture
def user_info_factory(app: App, dummy_user):
from nonebot_bison.types import UserSubInfo
def _user_info(category_getter, tag_getter):
return UserSubInfo(dummy_user, category_getter, tag_getter)
return _user_info
@pytest.fixture
def rss(app: App):
from nonebot_bison.platform import platform_manager
from nonebot_bison.utils import ProcessContext, DefaultClientManager
return platform_manager["rss"](ProcessContext(DefaultClientManager()))
@pytest.fixture
def update_time_feed_1():
file = get_file("rss-twitter-ArknightsStaff.xml")
root = ET.fromstring(file)
item = root.find("channel/item")
current_time = datetime.now(pytz.timezone("GMT")).strftime("%a, %d %b %Y %H:%M:%S %Z")
assert item is not None
pubdate_elem = item.find("pubDate")
assert pubdate_elem is not None
pubdate_elem.text = current_time
return ET.tostring(root, encoding="unicode")
@pytest.fixture
def update_time_feed_2():
file = get_file("rss-ruanyifeng.xml")
root = ET.fromstring(file)
current_time = datetime.now(pytz.timezone("GMT")).strftime("%a, %d %b %Y %H:%M:%S %Z")
published_element = root.find(".//{*}published")
assert published_element is not None
published_element.text = current_time
return ET.tostring(root, encoding="unicode")
@pytest.mark.asyncio
@respx.mock
async def test_fetch_new_1(
rss,
user_info_factory,
update_time_feed_1,
):
from nonebot_bison.types import Target, SubUnit
## 标题重复的情况
rss_router = respx.get("https://rsshub.app/twitter/user/ArknightsStaff")
rss_router.mock(return_value=Response(200, text=get_file("rss-twitter-ArknightsStaff-0.xml")))
target = Target("https://rsshub.app/twitter/user/ArknightsStaff")
res1 = await rss.fetch_new_post(SubUnit(target, [user_info_factory([], [])]))
assert len(res1) == 0
rss_router.mock(return_value=Response(200, text=update_time_feed_1))
res2 = await rss.fetch_new_post(SubUnit(target, [user_info_factory([], [])]))
assert len(res2[0][1]) == 1
post1 = res2[0][1][0]
assert post1.url == "https://twitter.com/ArknightsStaff/status/1659091539023282178"
assert post1.title is None
assert (
post1.content
== "【#統合戦略】 <br />引き続き新テーマ「ミヅキと紺碧の樹」の新要素及びシステムの変更点を一部ご紹介します! "
"<br /><br />"
"今回は「灯火」、「ダイス」、「記号認識」、「鍵」についてです。<br />詳細は添付の画像をご確認ください。"
"<br /><br />"
"#アークナイツ https://t.co/ARmptV0Zvu<br />"
'<img src="https://pbs.twimg.com/media/FwZG9YAacAIXDw2?format=jpg&amp;name=orig" />'
)
plain_content = await post1.get_plain_content()
assert (
plain_content == "【#統合戦略】 \n"
"引き続き新テーマ「ミヅキと紺碧の樹」の新要素及びシステムの変更点を一部ご紹介します! \n\n"
"今回は「灯火」、「ダイス」、「記号認識」、「鍵」についてです。\n"
"詳細は添付の画像をご確認ください。\n\n"
"#アークナイツ https://t.co/ARmptV0Zvu\n"
"[图片]"
)
@pytest.mark.asyncio
@respx.mock
async def test_fetch_new_2(
rss,
user_info_factory,
update_time_feed_2,
):
from nonebot_bison.types import Target, SubUnit
## 标题与正文不重复的情况
rss_router = respx.get("https://www.ruanyifeng.com/blog/atom.xml")
rss_router.mock(return_value=Response(200, text=get_file("rss-ruanyifeng-0.xml")))
target = Target("https://www.ruanyifeng.com/blog/atom.xml")
res1 = await rss.fetch_new_post(SubUnit(target, [user_info_factory([], [])]))
assert len(res1) == 0
rss_router.mock(return_value=Response(200, text=update_time_feed_2))
res2 = await rss.fetch_new_post(SubUnit(target, [user_info_factory([], [])]))
assert len(res2[0][1]) == 1
post1 = res2[0][1][0]
assert post1.url == "http://www.ruanyifeng.com/blog/2023/05/weekly-issue-255.html"
assert post1.title == "科技爱好者周刊(第 255 期):对待 AI 的正确态度"
assert post1.content == "这里记录每周值得分享的科技内容,周五发布。..."
@pytest.fixture
def update_time_feed_3():
file = get_file("rss-github-atom.xml")
root = ET.fromstring(file)
current_time = datetime.now(pytz.timezone("GMT")).strftime("%a, %d %b %Y %H:%M:%S %Z")
published_element = root.findall(".//{*}updated")[1]
published_element.text = current_time
return ET.tostring(root, encoding="unicode")
@pytest.mark.asyncio
@respx.mock
async def test_fetch_new_3(
rss,
user_info_factory,
update_time_feed_3,
):
from nonebot_bison.types import Target, SubUnit
## 只有<updated>没有<published>
rss_router = respx.get("https://github.com/R3nzTheCodeGOD/R3nzSkin/releases.atom")
rss_router.mock(return_value=Response(200, text=get_file("rss-github-atom-0.xml")))
target = Target("https://github.com/R3nzTheCodeGOD/R3nzSkin/releases.atom")
res1 = await rss.fetch_new_post(SubUnit(target, [user_info_factory([], [])]))
assert len(res1) == 0
rss_router.mock(return_value=Response(200, text=update_time_feed_3))
res2 = await rss.fetch_new_post(SubUnit(target, [user_info_factory([], [])]))
assert len(res2[0][1]) == 1
post1 = res2[0][1][0]
assert post1.url == "https://github.com/R3nzTheCodeGOD/R3nzSkin/releases/tag/v3.0.9"
assert post1.title == "R3nzSkin"
assert post1.content == "No content."
@pytest.mark.asyncio
@respx.mock
async def test_fetch_new_4(
rss,
user_info_factory,
):
from nonebot_bison.types import Target, SubUnit
## 没有日期信息的情况
rss_router = respx.get("https://rsshub.app/wallhaven/hot?limit=5")
rss_router.mock(return_value=Response(200, text=get_file("rss-top5-old.xml")))
target = Target("https://rsshub.app/wallhaven/hot?limit=5")
res1 = await rss.fetch_new_post(SubUnit(target, [user_info_factory([], [])]))
assert len(res1) == 0
rss_router.mock(return_value=Response(200, text=get_file("rss-top5-new.xml")))
res2 = await rss.fetch_new_post(SubUnit(target, [user_info_factory([], [])]))
assert len(res2[0][1]) == 1
post1 = res2[0][1][0]
assert post1.url == "https://wallhaven.cc/w/85rjej"
assert post1.content == '<img alt="loading" class="lazyload" src="https://th.wallhaven.cc/small/85/85rjej.jpg" />'
plain_content = await post1.get_plain_content()
assert plain_content == "[图片]"
def test_similar_text_process():
from nonebot_bison.utils import text_similarity
str1 = ""
str2 = "xxxx"
with pytest.raises(ValueError, match="The length of string can not be 0"):
text_similarity(str1, str2)
str1 = "xxxx"
str2 = ""
with pytest.raises(ValueError, match="The length of string can not be 0"):
text_similarity(str1, str2)
str1 = (
"天使九局下被追平,米基-莫尼亚克(Mickey Moniak)超前安打拒绝剧本天使7-6老虎阿莱克-博姆Alec"
" Bohm再见安打费城人4-3金莺..."
)
str2 = (
"天使九局下被追平,米基-莫尼亚克(Mickey Moniak)超前安打拒绝剧本天使7-6老虎阿莱克-博姆Alec"
" Bohm再见安打费城人4-3金莺布兰登-洛维Brandon Lowe阳春炮光芒4-1马林鱼皮特-阿隆索(Pete Alonso)、"
"丹尼尔-沃格尔巴克(Daniel Vogelbach)背靠背本垒打大都会9-3扬基吉田正尚Masataka Yoshida3安2打点"
"红袜7-1勇士凯尔-塔克Kyle Tucker阳春炮太空人4-3连胜游骑兵。"
)
res = text_similarity(str1, str2)
assert res > 0.8
str1 = "我爱你"
str2 = "你爱我"
res = text_similarity(str1, str2)
assert res <= 0.8