From 5ede3b27d1c2d6d4b9fd5df3f9e3df88c33ad154 Mon Sep 17 00:00:00 2001 From: Azide Date: Wed, 18 May 2022 01:22:13 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0McbbsNews=E4=B8=AD=E7=9A=84Ja?= =?UTF-8?q?va=E7=89=88=E6=9C=AC=E8=B5=84=E8=AE=AF=E8=AE=A2=E9=98=85?= =?UTF-8?q?=E7=B1=BB=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../nonebot_bison/platform/mcbbsnews.py | 78 +++++++++++++++++++ 1 file changed, 78 insertions(+) create mode 100644 src/plugins/nonebot_bison/platform/mcbbsnews.py diff --git a/src/plugins/nonebot_bison/platform/mcbbsnews.py b/src/plugins/nonebot_bison/platform/mcbbsnews.py new file mode 100644 index 0000000..ad9b684 --- /dev/null +++ b/src/plugins/nonebot_bison/platform/mcbbsnews.py @@ -0,0 +1,78 @@ +import re +import time + +import httpx +from bs4 import BeautifulSoup + +from ..types import Category, RawPost, Target +from .platform import CategoryNotSupport, NewMessage + + +def _format_text(rawtext: str) -> str: + """处理BeautifulSoup生成的string中奇怪的回车+连续空格""" + ftext = re.sub(r"\n\s*", " ", rawtext) + return ftext + + +def _stamp_date(rawdate: str) -> int: + """将时间转化为时间戳yyyy-mm-dd->timestamp""" + time_stamp = int(time.mktime(time.strptime(rawdate, "%Y-%m-%d"))) + return time_stamp + + +class McbbsJavaNews(NewMessage): + categories = {1: "Java版本资讯"} + enable_tag = False + platform_name = "mcbbsnews" + name = "MCBBS幻翼块讯" + enabled = True + is_common = False + schedule_type = "interval" + schedule_kw = {"hours": 1} + has_target = False + + async def get_target_name(self, _: Target) -> str: + return f"{self.name} {self.categories[1]}" + + async def get_sub_list(self, _: Target) -> list[RawPost]: + url = "https://www.mcbbs.net/forum-news-1.html" + headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) " + "Chrome/51.0.2704.63 Safari/537.36" + } + + async with httpx.AsyncClient() as client: + html = await client.get(url, headers=headers) + soup = BeautifulSoup(html.text, "html.parser") + raw_post_list = soup.find_all( + "tbody", id=re.compile(r"normalthread_[0-9]*") + ) + post_list = [] + for raw_post in raw_post_list: + post = {} + post["url"] = raw_post.find("a", class_="s xst")["href"] + post["title"] = _format_text(raw_post.find("a", class_="s xst").string) + post["category"] = raw_post.select("th em a")[0].string + post["author"] = raw_post.select("td:nth-of-type(2) cite a")[0].string + post["id"] = raw_post["id"] + rawdate = ( + raw_post.select("td:nth-of-type(2) em span span")[0]["title"] + if raw_post.select("td:nth-of-type(2) em span span") + else raw_post.select("td:nth-of-type(2) em span")[0].string + ) + post["date"] = _stamp_date(rawdate) + post_list.append(post) + + return post_list + + def get_id(self, post: RawPost) -> str: + return post["id"] + + def get_date(self, post: RawPost) -> int: + return post["date"] + + def get_category(self, post: RawPost) -> Category: + if post["category"] == "Java版本资讯": + return Category(1) + else: + return CategoryNotSupport("McbbsNews订阅暂不支持 `{}".format(post["category"]))