From 5ede3b27d1c2d6d4b9fd5df3f9e3df88c33ad154 Mon Sep 17 00:00:00 2001
From: Azide <rukuy@qq.com>
Date: Wed, 18 May 2022 01:22:13 +0800
Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0McbbsNews=E4=B8=AD=E7=9A=84Ja?=
 =?UTF-8?q?va=E7=89=88=E6=9C=AC=E8=B5=84=E8=AE=AF=E8=AE=A2=E9=98=85?=
 =?UTF-8?q?=E7=B1=BB=E5=9E=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../nonebot_bison/platform/mcbbsnews.py       | 78 +++++++++++++++++++
 1 file changed, 78 insertions(+)
 create mode 100644 src/plugins/nonebot_bison/platform/mcbbsnews.py

diff --git a/src/plugins/nonebot_bison/platform/mcbbsnews.py b/src/plugins/nonebot_bison/platform/mcbbsnews.py
new file mode 100644
index 0000000..ad9b684
--- /dev/null
+++ b/src/plugins/nonebot_bison/platform/mcbbsnews.py
@@ -0,0 +1,78 @@
+import re
+import time
+
+import httpx
+from bs4 import BeautifulSoup
+
+from ..types import Category, RawPost, Target
+from .platform import CategoryNotSupport, NewMessage
+
+
+def _format_text(rawtext: str) -> str:
+    """处理BeautifulSoup生成的string中奇怪的回车+连续空格"""
+    ftext = re.sub(r"\n\s*", " ", rawtext)
+    return ftext
+
+
+def _stamp_date(rawdate: str) -> int:
+    """将时间转化为时间戳yyyy-mm-dd->timestamp"""
+    time_stamp = int(time.mktime(time.strptime(rawdate, "%Y-%m-%d")))
+    return time_stamp
+
+
+class McbbsJavaNews(NewMessage):
+    categories = {1: "Java版本资讯"}
+    enable_tag = False
+    platform_name = "mcbbsnews"
+    name = "MCBBS幻翼块讯"
+    enabled = True
+    is_common = False
+    schedule_type = "interval"
+    schedule_kw = {"hours": 1}
+    has_target = False
+
+    async def get_target_name(self, _: Target) -> str:
+        return f"{self.name} {self.categories[1]}"
+
+    async def get_sub_list(self, _: Target) -> list[RawPost]:
+        url = "https://www.mcbbs.net/forum-news-1.html"
+        headers = {
+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
+            "Chrome/51.0.2704.63 Safari/537.36"
+        }
+
+        async with httpx.AsyncClient() as client:
+            html = await client.get(url, headers=headers)
+            soup = BeautifulSoup(html.text, "html.parser")
+            raw_post_list = soup.find_all(
+                "tbody", id=re.compile(r"normalthread_[0-9]*")
+            )
+            post_list = []
+            for raw_post in raw_post_list:
+                post = {}
+                post["url"] = raw_post.find("a", class_="s xst")["href"]
+                post["title"] = _format_text(raw_post.find("a", class_="s xst").string)
+                post["category"] = raw_post.select("th em a")[0].string
+                post["author"] = raw_post.select("td:nth-of-type(2) cite a")[0].string
+                post["id"] = raw_post["id"]
+                rawdate = (
+                    raw_post.select("td:nth-of-type(2) em span span")[0]["title"]
+                    if raw_post.select("td:nth-of-type(2) em span span")
+                    else raw_post.select("td:nth-of-type(2) em span")[0].string
+                )
+                post["date"] = _stamp_date(rawdate)
+                post_list.append(post)
+
+        return post_list
+
+    def get_id(self, post: RawPost) -> str:
+        return post["id"]
+
+    def get_date(self, post: RawPost) -> int:
+        return post["date"]
+
+    def get_category(self, post: RawPost) -> Category:
+        if post["category"] == "Java版本资讯":
+            return Category(1)
+        else:
+            return CategoryNotSupport("McbbsNews订阅暂不支持 `{}".format(post["category"]))