diff --git a/src/plugins/nonebot_bison/platform/mcbbsnews.py b/src/plugins/nonebot_bison/platform/mcbbsnews.py index b46b939..f5d5844 100644 --- a/src/plugins/nonebot_bison/platform/mcbbsnews.py +++ b/src/plugins/nonebot_bison/platform/mcbbsnews.py @@ -13,12 +13,15 @@ from .platform import CategoryNotSupport, NewMessage def _format_text(rawtext: str, mode: int) -> str: """处理BeautifulSoup生成的string中奇怪的回车+连续空格 mode 0:处理标题 - mode 1:处理推文""" + mode 1:处理版本资讯类推文 + mode 2:处理快讯类推文""" match mode: case 0: ftext = re.sub(r"\n\s*", " ", rawtext) case 1: ftext = re.sub(r"[\n\s*]", "", rawtext) + case 2: + ftext = re.sub(r"\r\n", "", rawtext) return ftext @@ -29,7 +32,7 @@ def _stamp_date(rawdate: str) -> int: class McbbsNews(NewMessage): - categories = {1: "Java版本资讯", 2: "基岩版本资讯"} + categories = {1: "Java版本资讯", 2: "基岩版本资讯", 3: "快讯", 4: "基岩快讯", 5: "周边消息"} enable_tag = False platform_name = "mcbbsnews" name = "MCBBS幻翼块讯" @@ -191,6 +194,44 @@ class McbbsNews(NewMessage): continue return post_text, pic_url + def _express_parser(self, raw_text: str, news_type: Literal["快讯", "基岩快讯", "周边消息"]): + """提取快讯/基岩快讯/周边消息的推送消息""" + raw_soup = BeautifulSoup(raw_text.replace("
", ""), "html.parser") + # 获取原始推文内容 + soup = raw_soup.find("td", id=re.compile(r"postmessage_[0-9]*")) + if tag := soup.find("ignore_js_op"): + tag.extract() + # 获取所有图片 + pic_urls = [] + for img_tag in soup.find_all("img"): + pic_url = img_tag.get("file") or img_tag.get("src") + pic_urls.append(pic_url) + # 验证是否有blockquote标签 + has_bolockquote = soup.find("blockquote") + # 删除无用的span,div段内容 + for del_tag in soup.find_all("i"): + del_tag.extract() + soup.find(class_="attach_nopermission attach_tips").extract() + # 展开所有的a,strong标签 + for unwrap_tag in soup.find_all(["a", "strong"]): + unwrap_tag.unwrap() + # 展开blockquote标签里的blockquote标签 + for b_tag in soup.find_all("blockquote"): + for unwrap_tag in b_tag.find_all("blockquote"): + unwrap_tag.unwrap() + # 获取推文 + text = "" + if has_bolockquote: + for post in soup.find_all("blockquote"): + # post.font.unwrap() + for string in post.stripped_strings: + text += "{}\n".format(string) + else: + for string in soup.stripped_strings: + text += "{}\n".format(string) + ftext = _format_text(text, 2) + return ftext, pic_urls + async def parse(self, raw_post: RawPost) -> Post: post_url = "https://www.mcbbs.net/{}".format(raw_post["url"]) headers = { @@ -207,7 +248,10 @@ class McbbsNews(NewMessage): raw_text = re.sub(r"【本文排版借助了:[\s\S]*】", "", html.text) text, pic_urls = self._news_parser(raw_text, raw_post["category"]) case "基岩版本资讯": - text, pic_urls = self._news_parser(html.text, raw_post["category"]) + raw_text = re.sub(r"【本文排版借助了:[\s\S]*】", "", html.text) + text, pic_urls = self._news_parser(raw_text, raw_post["category"]) + case "快讯" | "基岩快讯" | "周边消息": + text, pic_urls = self._express_parser(html.text, raw_post["category"]) case _: raise CategoryNotSupport( "McbbsNews订阅暂不支持 `{}".format(raw_post["category"]) diff --git a/tests/platforms/static/mcbbsnews_bedrock_express_post.txt b/tests/platforms/static/mcbbsnews_bedrock_express_post.txt new file mode 100644 index 0000000..15981da --- /dev/null +++ b/tests/platforms/static/mcbbsnews_bedrock_express_post.txt @@ -0,0 +1,12 @@ +Mojang Status:正在寻找1.18.30更新问题的解决方案 + +Mojangstatus +@Mojangstatus +We are aware that the 1.18.30 update caused issues for some Bedrock players. We are actively looking into solutions and hope to have solutions out soon. Thank you for your patience! jhp +由 ArmorRush 翻译自 英文 +我们注意到1.18.30版本的更新导致了一些基岩版玩家出现了(游戏中的)问题。我们正在积极寻找解决方案,并希望能尽快解决问题。感谢您的耐心等待! jhp +Twitter +· SPXX +2022年 +4月27日 +上午 7:41 · HipChat Villager diff --git a/tests/platforms/static/mcbbsnews_bedrock_post.txt b/tests/platforms/static/mcbbsnews_bedrock_post.txt index 3871689..6a5d672 100644 --- a/tests/platforms/static/mcbbsnews_bedrock_post.txt +++ b/tests/platforms/static/mcbbsnews_bedrock_post.txt @@ -59,5 +59,4 @@ Gametest框架(实验性游戏内容) 【苦力怕553译自 feedback.minecraft.net2022年5月12日发布的MinecraftBeta&Preview-1.19.0.32/33 】 -【本文排版借助了: -v2.4.7】 + diff --git a/tests/platforms/static/mcbbsnews_java_express_post.txt b/tests/platforms/static/mcbbsnews_java_express_post.txt new file mode 100644 index 0000000..43d1bdf --- /dev/null +++ b/tests/platforms/static/mcbbsnews_java_express_post.txt @@ -0,0 +1,14 @@ +Mojang Status:服务器出现一些小问题 + +Mojang Status +@MojangStatus +Our services have returned to normal operations. Thank you for your patience. - Martin +由 DreamVoid 翻译自英语 +我们的服务已恢复正常,感谢你的耐心等待。——Martin +下午7:43 · 2022年5月16日 · HipChat Villager · SPX +Mojang Status +@MojangStatus +Some of our services are having issues with increased response times. We are looking into the issue. - Martin +由 DreamVoid 翻译自英语 +我们的一些服务存在响应时间过长的问题。我们正在调查这个问题。——Martin +下午7:01 · 2022年5月16日 · Twitter Web App · SPX diff --git a/tests/platforms/static/mcbbsnews_merch_post.txt b/tests/platforms/static/mcbbsnews_merch_post.txt new file mode 100644 index 0000000..b87b564 --- /dev/null +++ b/tests/platforms/static/mcbbsnews_merch_post.txt @@ -0,0 +1,12 @@ +Minecraft: 加入Microsoft Rewards赢取限量Xbox Series S + +Minecraft +@Minecraft +Here's one warden you'll want to awaken...Join Microsoft Rewards and get a chance to win this exclusive Deep Dark Minecraft Xbox Series S! +https://www.microsoft.com/en-us/rewards/minecraft-xbox-series-s-sweeps?rtc=1&ocid=Wild_Update_soc_omc_min_tw_Link_no_ +由 ETW_Derp 翻译自 英语 +这里有一只等待你唤醒的监守者……加入Microsoft Rewards,你将有机会赢得这台 +** +独一无二的“深暗之域”Minecraft主题Xbox Series S! +https://www.microsoft.com/en-us/rewards/minecraft-xbox-series-s-sweeps?rtc=1&ocid=Wild_Update_soc_omc_min_tw_Link_no_ +上午2:42 · 2022年5月21日 diff --git a/tests/platforms/static/mcbbsnews_raw_post_list.json b/tests/platforms/static/mcbbsnews_raw_post_list.json index 19eb457..7c2f166 100644 --- a/tests/platforms/static/mcbbsnews_raw_post_list.json +++ b/tests/platforms/static/mcbbsnews_raw_post_list.json @@ -206,5 +206,13 @@ "author": "卡狗", "id": "normalthread_1320931", "date": 1648742400 + }, + { + "url": "thread-1342236-1-1.html", + "title": "Minecraft: 加入Microsoft Rewards赢取限量Xbox Series S", + "category": "周边消息", + "author": "ETW_Derp", + "id": "normalthread_1342236", + "date": 1648742400 } ] diff --git a/tests/platforms/test_mcbbsnews.py b/tests/platforms/test_mcbbsnews.py index 732097e..0ee3dec 100644 --- a/tests/platforms/test_mcbbsnews.py +++ b/tests/platforms/test_mcbbsnews.py @@ -45,6 +45,16 @@ async def test_bedrocknews_parser(mcbbsnews, raw_post_list, bedrocknews_post): assert post.text == bedrocknews_post +@pytest.mark.asyncio +async def test_express_merch_parser(mcbbsnews, raw_post_list): + java_express_post = await mcbbsnews.parse(raw_post_list[0]) + bedrock_express_post = await mcbbsnews.parse(raw_post_list[13]) + mc_merch_post = await mcbbsnews.parse(raw_post_list[26]) + assert java_express_post.text == get_file("mcbbsnews_java_express_post.txt") + assert bedrock_express_post.text == get_file("mcbbsnews_bedrock_express_post.txt") + assert mc_merch_post.text == get_file("mcbbsnews_merch_post.txt") + + @pytest.mark.asyncio @respx.mock async def test_fetch_new(mcbbsnews, dummy_user_subinfo, javanews_post_1):