diff --git a/nonebot_bison/platform/rss.py b/nonebot_bison/platform/rss.py index 895b1e2..e3b6a56 100644 --- a/nonebot_bison/platform/rss.py +++ b/nonebot_bison/platform/rss.py @@ -9,7 +9,7 @@ from bs4 import BeautifulSoup as bs from ..post import Post from .platform import NewMessage from ..types import Target, RawPost -from ..utils import Site, text_fletten, text_similarity +from ..utils import Site, text_similarity class RssSite(Site): @@ -32,7 +32,7 @@ class RssPost(Post): for p in soup.find_all("p"): p.insert_after("\n") - return text_fletten(soup.get_text()) + return soup.get_text() class Rss(NewMessage): @@ -82,7 +82,7 @@ class Rss(NewMessage): async def parse(self, raw_post: RawPost) -> Post: title = raw_post.get("title", "") soup = bs(raw_post.description, "html.parser") - desc = soup.text.strip() + desc = raw_post.description title, desc = self._text_process(title, desc) pics = [x.attrs["src"] for x in soup("img")] if raw_post.get("media_content"): diff --git a/tests/platforms/test_rss.py b/tests/platforms/test_rss.py index 2d5b792..dc3a715 100644 --- a/tests/platforms/test_rss.py +++ b/tests/platforms/test_rss.py @@ -88,9 +88,21 @@ async def test_fetch_new_1( assert post1.title is None assert ( post1.content - == "【#統合戦略】 引き続き新テーマ「ミヅキと紺碧の樹」の新要素及びシステムの変更点を一部ご紹介します!" - " 今回は「灯火」、「ダイス」、「記号認識」、「鍵」についてです。詳細は添付の画像をご確認ください。" - "#アークナイツ https://t.co/ARmptV0Zvu" + == "【#統合戦略】
引き続き新テーマ「ミヅキと紺碧の樹」の新要素及びシステムの変更点を一部ご紹介します! " + "

" + "今回は「灯火」、「ダイス」、「記号認識」、「鍵」についてです。
詳細は添付の画像をご確認ください。" + "

" + "#アークナイツ https://t.co/ARmptV0Zvu
" + '' + ) + plain_content = await post1.get_plain_content() + assert ( + plain_content == "【#統合戦略】 \n" + "引き続き新テーマ「ミヅキと紺碧の樹」の新要素及びシステムの変更点を一部ご紹介します! \n\n" + "今回は「灯火」、「ダイス」、「記号認識」、「鍵」についてです。\n" + "詳細は添付の画像をご確認ください。\n\n" + "#アークナイツ https://t.co/ARmptV0Zvu\n" + "[图片]" ) @@ -174,7 +186,9 @@ async def test_fetch_new_4( assert len(res2[0][1]) == 1 post1 = res2[0][1][0] assert post1.url == "https://wallhaven.cc/w/85rjej" - assert post1.content == "85rjej.jpg" + assert post1.content == 'loading' + plain_content = await post1.get_plain_content() + assert plain_content == "[图片]" def test_similar_text_process():