From 7fa31b606010f99c1a8187d7006c9f39f955048c Mon Sep 17 00:00:00 2001
From: AzideCupric <57004769+AzideCupric@users.noreply.github.com>
Date: Sun, 5 Feb 2023 17:00:11 +0800
Subject: [PATCH] =?UTF-8?q?:recycle:=20=E4=BD=BF=E7=94=A8=E6=9B=B4?=
=?UTF-8?q?=E5=8A=A0=E7=AE=80=E7=BA=A6=E7=9A=84=E6=96=B9=E6=B3=95=E7=94=9F?=
=?UTF-8?q?=E6=88=90mcbbsnews=E7=9A=84=E6=8E=A8=E9=80=81,=E4=BF=AE?=
=?UTF-8?q?=E6=94=B9=E6=B5=8B=E8=AF=95=E7=94=A8=E4=BE=8B=20(#170)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
* change(mcbbsnews):使用更加简约的方法生成mcbbsnews的推送,修改测试用例
test(mcbbsnews):添加测试函数小工具
change(mcbbsnews):优化代码
test(mcbbsnews):调整测试
test(mcbbsnews):完善细节部分
fix(mcbbsnews):修改traceback的import位置
test fix(mcbbsnews):删除错误传参
* fix(mcbbsnews): 更新过时的category名称
feat(platform): 添加新的异常(CategoryNotRecognize), 用以区别已知但不支持的category(CategoryNotSupport)和未知的新增category(CategoryNotRecognize)
chore: 为各处的CategoryNotRecognize和CategoryNotSupport添加异常描述
test(mcbbsnews): 更新测试用文件的过时category名称
---
.../nonebot_bison/platform/arknights.py | 4 +-
.../nonebot_bison/platform/bilibili.py | 6 +-
.../nonebot_bison/platform/mcbbsnews.py | 376 +-
.../nonebot_bison/platform/platform.py | 17 +-
.../mcbbsnews/mcbbsnews_raw_post_list.json | 52 +-
.../mcbbsnews_raw_post_list_update.json | 52 +-
.../mock/mcbbsnews_bedrock_express.html | 2 +-
.../mcbbsnews/mock/mcbbsnews_bedrocknews.html | 2 +-
.../mcbbsnews/mock/mcbbsnews_javanews.html | 2 +-
.../mcbbsnews/mock/mcbbsnews_merch.html | 2 +-
.../mock/mcbbsnews_post_list_html-0.html | 72 +-
.../mock/mcbbsnews_post_list_html-1.html | 74 +-
.../post/mcbbsnews_bedrock_express_post.txt | 12 -
.../mcbbsnews/post/mcbbsnews_bedrock_post.txt | 68 -
.../post/mcbbsnews_java_express_post.txt | 14 -
.../mcbbsnews/post/mcbbsnews_java_post-0.txt | 68 -
.../mcbbsnews/post/mcbbsnews_java_post-1.txt | 78 -
.../mcbbsnews/post/mcbbsnews_merch_post.txt | 16 -
.../static/mcbbsnews_bedrock_express_post.txt | 12 -
.../static/mcbbsnews_bedrock_post.txt | 62 -
.../static/mcbbsnews_java_express_post.txt | 14 -
.../static/mcbbsnews_java_post-0.txt | 67 -
.../static/mcbbsnews_java_post-1.txt | 78 -
.../platforms/static/mcbbsnews_merch_post.txt | 12 -
.../static/mcbbsnews_new_post_html.html | 6504 -----------------
.../static/mcbbsnews_post_list_html-0.html | 2552 -------
.../static/mcbbsnews_post_list_html-1.html | 2601 -------
.../static/mcbbsnews_raw_post_list.json | 218 -
.../mcbbsnews_raw_post_list_update.json | 218 -
tests/platforms/test_mcbbsnews.py | 96 +-
tests/platforms/test_platform.py | 2 +-
31 files changed, 343 insertions(+), 13010 deletions(-)
delete mode 100644 tests/platforms/static/mcbbsnews/post/mcbbsnews_bedrock_express_post.txt
delete mode 100644 tests/platforms/static/mcbbsnews/post/mcbbsnews_bedrock_post.txt
delete mode 100644 tests/platforms/static/mcbbsnews/post/mcbbsnews_java_express_post.txt
delete mode 100644 tests/platforms/static/mcbbsnews/post/mcbbsnews_java_post-0.txt
delete mode 100644 tests/platforms/static/mcbbsnews/post/mcbbsnews_java_post-1.txt
delete mode 100644 tests/platforms/static/mcbbsnews/post/mcbbsnews_merch_post.txt
delete mode 100644 tests/platforms/static/mcbbsnews_bedrock_express_post.txt
delete mode 100644 tests/platforms/static/mcbbsnews_bedrock_post.txt
delete mode 100644 tests/platforms/static/mcbbsnews_java_express_post.txt
delete mode 100644 tests/platforms/static/mcbbsnews_java_post-0.txt
delete mode 100644 tests/platforms/static/mcbbsnews_java_post-1.txt
delete mode 100644 tests/platforms/static/mcbbsnews_merch_post.txt
delete mode 100644 tests/platforms/static/mcbbsnews_new_post_html.html
delete mode 100644 tests/platforms/static/mcbbsnews_post_list_html-0.html
delete mode 100644 tests/platforms/static/mcbbsnews_post_list_html-1.html
delete mode 100644 tests/platforms/static/mcbbsnews_raw_post_list.json
delete mode 100644 tests/platforms/static/mcbbsnews_raw_post_list_update.json
diff --git a/src/plugins/nonebot_bison/platform/arknights.py b/src/plugins/nonebot_bison/platform/arknights.py
index 3b7d9a3..2513de5 100644
--- a/src/plugins/nonebot_bison/platform/arknights.py
+++ b/src/plugins/nonebot_bison/platform/arknights.py
@@ -8,7 +8,7 @@ from nonebot.plugin import require
from ..post import Post
from ..types import Category, RawPost, Target
from ..utils.scheduler_config import SchedulerConfig
-from .platform import CategoryNotSupport, NewMessage, StatusChange
+from .platform import CategoryNotRecognize, NewMessage, StatusChange
class ArknightsSchedConf(SchedulerConfig):
@@ -79,7 +79,7 @@ class Arknights(NewMessage):
elif pic := soup.find("img", class_="banner-image"):
pics.append(pic["src"]) # type: ignore
else:
- raise CategoryNotSupport()
+ raise CategoryNotRecognize("未找到可渲染部分")
return Post(
"arknights",
text=text,
diff --git a/src/plugins/nonebot_bison/platform/bilibili.py b/src/plugins/nonebot_bison/platform/bilibili.py
index 7bdb1d7..b0a10c0 100644
--- a/src/plugins/nonebot_bison/platform/bilibili.py
+++ b/src/plugins/nonebot_bison/platform/bilibili.py
@@ -12,7 +12,7 @@ from typing_extensions import Self
from ..post import Post
from ..types import ApiError, Category, RawPost, Tag, Target
from ..utils import SchedulerConfig
-from .platform import CategoryNotSupport, NewMessage, StatusChange
+from .platform import CategoryNotRecognize, CategoryNotSupport, NewMessage, StatusChange
class BilibiliSchedConf(SchedulerConfig):
@@ -121,7 +121,7 @@ class Bilibili(NewMessage):
elif post_type == 1:
# 转发
return Category(5)
- raise CategoryNotSupport()
+ raise CategoryNotRecognize(post_type)
def get_category(self, post: RawPost) -> Category:
post_type = post["desc"]["type"]
@@ -153,7 +153,7 @@ class Bilibili(NewMessage):
text = card["item"]["content"]
pic = []
else:
- raise CategoryNotSupport()
+ raise CategoryNotSupport(post_type)
return text, pic
async def parse(self, raw_post: RawPost) -> Post:
diff --git a/src/plugins/nonebot_bison/platform/mcbbsnews.py b/src/plugins/nonebot_bison/platform/mcbbsnews.py
index cb762a0..030c5ab 100644
--- a/src/plugins/nonebot_bison/platform/mcbbsnews.py
+++ b/src/plugins/nonebot_bison/platform/mcbbsnews.py
@@ -1,290 +1,190 @@
import re
import time
-from typing import Literal, Optional
+import traceback
+from typing import Literal
-import httpx
-from bs4 import BeautifulSoup, NavigableString, Tag
+from bs4 import BeautifulSoup, Tag
from httpx import AsyncClient
+from nonebot.plugin import require
+from ..plugin_config import plugin_config
from ..post import Post
from ..types import Category, RawPost, Target
-from ..utils import scheduler
-from .platform import CategoryNotSupport, NewMessage
+from ..utils import SchedulerConfig, http_client
+from .platform import CategoryNotRecognize, CategoryNotSupport, NewMessage
-def _format_text(rawtext: str, mode: int) -> str:
- """处理BeautifulSoup生成的string中奇怪的回车+连续空格
- mode 0:处理标题
- mode 1:处理版本资讯类推文
- mode 2:处理快讯类推文"""
- match mode:
- case 0:
- ftext = re.sub(r"\n\s*", " ", rawtext)
- case 1:
- ftext = re.sub(r"[\n\s*]", "", rawtext)
- case 2:
- ftext = re.sub(r"\r\n", "", rawtext)
- return ftext
-
-
-def _stamp_date(rawdate: str) -> int:
- """将时间转化为时间戳yyyy-mm-dd->timestamp"""
- time_stamp = int(time.mktime(time.strptime(rawdate, "%Y-%m-%d")))
- return time_stamp
+class McbbsnewsSchedConf(SchedulerConfig):
+ name = "mcbbsnews"
+ schedule_type = "interval"
+ schedule_setting = {"minutes": 30}
class McbbsNews(NewMessage):
- categories = {1: "Java版本资讯", 2: "基岩版本资讯", 3: "快讯", 4: "基岩快讯", 5: "周边消息"}
- enable_tag = False
- platform_name = "mcbbsnews"
- name = "MCBBS幻翼块讯"
- enabled = True
- is_common = False
- scheduler = scheduler("interval", {"hours": 1})
- has_target = False
+ categories: dict[int, str] = {
+ 1: "Java版资讯",
+ 2: "基岩版资讯",
+ 3: "块讯",
+ 4: "基岩块讯",
+ 5: "周边",
+ 6: "主机",
+ 7: "时评",
+ }
+ enable_tag: bool = False
+ platform_name: str = "mcbbsnews"
+ name: str = "MCBBS幻翼块讯"
+ enabled: bool = True
+ is_common: bool = False
+ scheduler = McbbsnewsSchedConf
+ has_target: bool = False
+
+ _known_cats: dict[int, str] = {
+ 1: "Java版资讯",
+ 2: "基岩版资讯",
+ 3: "块讯",
+ 4: "基岩块讯",
+ 5: "周边",
+ 6: "主机",
+ 7: "时评",
+ }
@classmethod
- async def get_target_name(
- cls, client: AsyncClient, target: Target
- ) -> Optional[str]:
+ async def get_target_name(cls, client: AsyncClient, target: Target) -> str:
return cls.name
async def get_sub_list(self, _: Target) -> list[RawPost]:
- url = "https://www.mcbbs.net/forum-news-1.html"
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
- "Chrome/51.0.2704.63 Safari/537.36"
- }
+ url: str = "https://www.mcbbs.net/forum-news-1.html"
- async with httpx.AsyncClient() as client:
- html = await client.get(url, headers=headers)
- soup = BeautifulSoup(html.text, "html.parser")
- raw_post_list = soup.find_all(
- "tbody", id=re.compile(r"normalthread_[0-9]*")
- )
- post_list = self._gen_post_list(raw_post_list)
+ html = await self.client.get(url)
+ soup = BeautifulSoup(html.text, "html.parser")
+ raw_post_list = soup.find_all("tbody", id=re.compile(r"normalthread_[0-9]*"))
+ post_list = self._gen_post_list(raw_post_list)
return post_list
- @staticmethod
- def _format_text(rawtext: str, mode: int) -> str:
- """处理BeautifulSoup生成的string中奇怪的回车+连续空格
- mode 0:处理标题
- mode 1:处理版本资讯类推文
- mode 2:处理快讯类推文"""
- if mode == 0:
- ftext = re.sub(r"\n\s*", " ", rawtext)
- elif mode == 1:
- ftext = re.sub(r"[\n\s*]", "", rawtext)
- elif mode == 2:
- ftext = re.sub(r"\r\n", "", rawtext)
- else:
- raise NotImplementedError
- return ftext
-
- @staticmethod
- def _stamp_date(rawdate: str) -> int:
- """将时间转化为时间戳yyyy-mm-dd->timestamp"""
- time_stamp = int(time.mktime(time.strptime(rawdate, "%Y-%m-%d")))
- return time_stamp
-
- def _gen_post_list(self, raw_post_list) -> list[RawPost]:
+ def _gen_post_list(self, raw_post_list: list[Tag]) -> list[RawPost]:
"""解析生成推文列表"""
post_list = []
+
for raw_post in raw_post_list:
post = {}
- post["url"] = raw_post.find("a", class_="s xst")["href"]
- post["title"] = self._format_text(
- raw_post.find("a", class_="s xst").string, 0
- )
+
+ url_tag = raw_post.find("a", class_="s xst")
+ if isinstance(url_tag, Tag):
+ post["url"] = url_tag.get("href")
+ title_tag = raw_post.find("a", class_="s xst")
+ if isinstance(title_tag, Tag):
+ title_string = title_tag.string
+ if isinstance(title_string, str):
+ post["title"] = self._format_text(title_string, "title")
post["category"] = raw_post.select("th em a")[0].string
post["author"] = raw_post.select("td:nth-of-type(2) cite a")[0].string
post["id"] = raw_post["id"]
- rawdate = (
+ raw_date = (
raw_post.select("td:nth-of-type(2) em span span")[0]["title"]
if raw_post.select("td:nth-of-type(2) em span span")
else raw_post.select("td:nth-of-type(2) em span")[0].string
)
- post["date"] = self._stamp_date(rawdate)
+ if isinstance(raw_date, str):
+ post["date"] = self._stamp_date(raw_date)
+
post_list.append(post)
+
return post_list
+ @staticmethod
+ def _format_text(raw_text: str, mode: str) -> str:
+ """
+ 处理BeautifulSoup生成的string中奇怪的回车+连续空格
+
+ 参数:
+ title: 处理标题
+ """
+ match mode:
+ case "title":
+ ftext = re.sub(r"\n\s*", " ", raw_text)
+ case _:
+ raise NotImplementedError("不支持的处理模式: {mode}")
+
+ return ftext
+
+ @staticmethod
+ def _stamp_date(raw_date: str) -> int:
+ """
+ 将时间转化为时间戳:
+ yyyy-mm-dd -> timestamp
+ """
+ time_stamp = int(time.mktime(time.strptime(raw_date, "%Y-%m-%d")))
+
+ return time_stamp
+
def get_id(self, post: RawPost) -> str:
return post["id"]
- def get_date(self, post: RawPost) -> int:
+ def get_date(self, _: RawPost) -> int | None:
# 获取datetime精度只到日期,故暂时舍弃
# return post["date"]
return None
def get_category(self, post: RawPost) -> Category:
- if post["category"] == "Java版本资讯":
- return Category(1)
- elif post["category"] == "基岩版本资讯":
- return Category(2)
+ categoty_name = post["category"]
+ category_keys = list(self.categories.keys())
+ category_values = list(self.categories.values())
+ known_category_values = list(self._known_cats.values())
+
+ if categoty_name in category_values:
+ category_id = category_keys[category_values.index(categoty_name)]
+ elif categoty_name in known_category_values:
+ raise CategoryNotSupport("McbbsNews订阅暂不支持 {}".format(categoty_name))
else:
- raise CategoryNotSupport("McbbsNews订阅暂不支持 `{}".format(post["category"]))
+ raise CategoryNotRecognize("Mcbbsnews订阅尚未识别 {}".format(categoty_name))
+ return category_id
- @staticmethod
- def _check_str_chinese(check_str: str) -> bool:
- """检测字符串是否含有中文(有一个就算)"""
- for ch in check_str:
- if "\u4e00" <= ch <= "\u9fff":
- return True
- return False
+ async def parse(self, post: RawPost) -> Post:
+ """获取并分配正式推文交由相应的函数渲染"""
+ post_url = "https://www.mcbbs.net/{}".format(post["url"])
+ async with http_client() as client:
+ html = await client.get(post_url)
+ html.raise_for_status()
- def _news_parser(self, raw_text: str, news_type: Literal["Java版本资讯", "基岩版本资讯"]):
- """提取Java/Bedrock版本资讯的推送消息"""
- raw_soup = BeautifulSoup(raw_text.replace("
", ""), "html.parser")
- # 获取头图
- if news_type == "Java版本资讯":
- # 获取头图
- pic_tag = raw_soup.find(
- "img", file=re.compile(r"https://www.minecraft.net/\S*header.jpg")
- )
- pic_url: list[str] = (
- [pic_tag.get("src", pic_tag.get("file"))] if pic_tag else []
- )
- # 获取blockquote标签下的内容
- soup = raw_soup.find(
- "td", id=re.compile(r"postmessage_[0-9]*")
- ).blockquote.blockquote
- elif news_type == "基岩版本资讯":
- # 获取头图
- pic_tag_0 = raw_soup.find(
- "img", file=re.compile(r"https://www.minecraft.net/\S*header.jpg")
- )
- pic_tag_1 = raw_soup.find(
- "img",
- file=re.compile(r"https://feedback.minecraft.net/\S*beta\S*.jpg"),
- )
- pic_url: list[str] = [
- pic_tag_0.get("src", pic_tag_0.get("file")) if pic_tag_0 else None,
- pic_tag_1.get("src", pic_tag_1.get("file")) if pic_tag_1 else None,
- ]
- # 获取blockquote标签下的内容
- soup = (
- raw_soup.find("td", id=re.compile(r"postmessage_[0-9]*"))
- .select("blockquote:nth-of-type(2)")[0]
- .blockquote
- )
+ soup = BeautifulSoup(html.text, "html.parser")
+ post_body = soup.find("td", id=re.compile(r"postmessage_[0-9]*"))
+ if isinstance(post_body, Tag):
+ post_id = post_body.attrs.get("id")
else:
- raise CategoryNotSupport(f"该函数不支持处理{news_type}")
-
- # 通用步骤
- # 删除无用的div和span段内容
- for del_tag in soup.find_all(["div", "span"]):
- del_tag.extract()
- # 进一步删除无用尾部
- # orig_info=soup.select("blockquote > strong")
- # orig_info[0].extract()
- # 展开所有的a,u和strong标签,展开ul,font标签里的font标签
- for unwrap_tag in soup.find_all(["a", "strong", "u", "ul", "font"]):
- if unwrap_tag.name in ["a", "strong", "u"]: # 展开所有的a,u和strong标签
- unwrap_tag.unwrap()
- elif unwrap_tag.name in ["ul", "font"]: # 展开ul,font里的font标签
- for font_tag in unwrap_tag.find_all("font"):
- font_tag.unwrap()
-
- # 获取所有的中文句子
- post_text = ""
- last_is_empty_line = True
- for element in soup.contents:
- if isinstance(element, Tag):
- if element.name == "font":
- text = ""
- for sub in element.contents:
- if isinstance(sub, NavigableString):
- text += sub
- if self._check_str_chinese(text):
- post_text += "{}\n".format(self._format_text(text, 1))
- last_is_empty_line = False
- elif element.name == "ul":
- for li_tag in element.find_all("li"):
- text = ""
- for sub in li_tag.contents:
- if isinstance(sub, NavigableString):
- text += sub
- if self._check_str_chinese(text):
- post_text += "{}\n".format(self._format_text(text, 1))
- last_is_empty_line = False
- else:
- continue
- elif isinstance(element, NavigableString):
- if str(element) == "\n":
- if not last_is_empty_line:
- post_text += "\n"
- last_is_empty_line = True
- else:
- post_text += "{}\n".format(self._format_text(element, 1))
- last_is_empty_line = False
- else:
- continue
- return post_text, pic_url
-
- def _express_parser(self, raw_text: str, news_type: Literal["快讯", "基岩快讯", "周边消息"]):
- """提取快讯/基岩快讯/周边消息的推送消息"""
- raw_soup = BeautifulSoup(raw_text.replace("
", ""), "html.parser")
- # 获取原始推文内容
- soup = raw_soup.find("td", id=re.compile(r"postmessage_[0-9]*"))
- if tag := soup.find("ignore_js_op"):
- tag.extract()
- # 获取所有图片
- pic_urls = []
- for img_tag in soup.find_all("img"):
- pic_url = img_tag.get("file") or img_tag.get("src")
- pic_urls.append(pic_url)
- # 验证是否有blockquote标签
- has_bolockquote = soup.find("blockquote")
- # 删除无用的span,div段内容
- for del_tag in soup.find_all("i"):
- del_tag.extract()
- if extag := soup.find(class_="attach_nopermission attach_tips"):
- extag.extract()
- # 展开所有的a,strong标签
- for unwrap_tag in soup.find_all(["a", "strong"]):
- unwrap_tag.unwrap()
- # 展开blockquote标签里的blockquote标签
- for b_tag in soup.find_all("blockquote"):
- for unwrap_tag in b_tag.find_all("blockquote"):
- unwrap_tag.unwrap()
- # 获取推文
- text = ""
- if has_bolockquote:
- for post in soup.find_all("blockquote"):
- # post.font.unwrap()
- for string in post.stripped_strings:
- text += "{}\n".format(string)
- else:
- for string in soup.stripped_strings:
- text += "{}\n".format(string)
- ftext = self._format_text(text, 2)
- return ftext, pic_urls
-
- async def parse(self, raw_post: RawPost) -> Post:
- """获取并分配正式推文交由相应的函数解析"""
- post_url = "https://www.mcbbs.net/{}".format(raw_post["url"])
- headers = {
- "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) "
- "Chrome/51.0.2704.63 Safari/537.36"
- }
-
- async with httpx.AsyncClient() as client:
- html = await client.get(post_url, headers=headers)
-
- if raw_post["category"] in ["Java版本资讯", "基岩版本资讯"]:
- # 事先删除不需要的尾部
- raw_text = re.sub(r"【本文排版借助了:[\s\S]*】", "", html.text)
- text, pic_urls = self._news_parser(raw_text, raw_post["category"])
- elif raw_post["category"] in ["快讯", "基岩快讯", "周边消息"]:
- text, pic_urls = self._express_parser(html.text, raw_post["category"])
- else:
- raise CategoryNotSupport("McbbsNews订阅暂不支持 `{}".format(raw_post["category"]))
+ post_id = None
+ pics = await self._news_render(post_url, f"#{post_id}")
return Post(
self.name,
- text="{}\n\n{}".format(raw_post["title"], text),
+ text="{}\n│\n└由 {} 发表".format(post["title"], post["author"]),
url=post_url,
- pics=pic_urls,
- target_name=raw_post["category"],
+ pics=list(pics),
+ target_name=post["category"],
)
+
+ async def _news_render(self, url: str, selector: str) -> list[bytes]:
+ """
+ 将给定的url网页的指定CSS选择器部分渲染成图片
+
+ 注意:
+ 一般而言每条新闻的长度都很可观,图片生成时间比较喜人
+ """
+ require("nonebot_plugin_htmlrender")
+ from nonebot_plugin_htmlrender import capture_element, text_to_pic
+
+ try:
+ assert url
+ pic_data = await capture_element(
+ url,
+ selector,
+ viewport={"width": 1000, "height": 6400},
+ device_scale_factor=3,
+ )
+ assert pic_data
+ except:
+ err_pic0 = await text_to_pic("错误发生!")
+ err_pic1 = await text_to_pic(traceback.format_exc())
+ return [err_pic0, err_pic1]
+ else:
+ return [pic_data]
diff --git a/src/plugins/nonebot_bison/platform/platform.py b/src/plugins/nonebot_bison/platform/platform.py
index 7077685..90fd86a 100644
--- a/src/plugins/nonebot_bison/platform/platform.py
+++ b/src/plugins/nonebot_bison/platform/platform.py
@@ -18,7 +18,11 @@ from ..utils import ProcessContext, SchedulerConfig
class CategoryNotSupport(Exception):
- "raise in get_category, when post category is not supported"
+ "raise in get_category, when you know the category of the post but don't want to support it or don't support its parsing yet"
+
+
+class CategoryNotRecognize(Exception):
+ "raise in get_category, when you don't know the category of post"
class RegistryMeta(type):
@@ -181,8 +185,9 @@ class Platform(metaclass=PlatformABCMeta, base=True):
if cats and cat not in cats:
continue
if self.enable_tag and tags:
- if self.is_banned_post(
- self.get_tags(raw_post), *self.tag_separator(tags)
+ raw_post_tags = self.get_tags(raw_post)
+ if isinstance(raw_post_tags, Collection) and self.is_banned_post(
+ raw_post_tags, *self.tag_separator(tags)
):
continue
res.append(raw_post)
@@ -255,7 +260,11 @@ class MessageProcess(Platform, abstract=True):
continue
try:
self.get_category(raw_post)
- except CategoryNotSupport:
+ except CategoryNotSupport as e:
+ logger.info("未支持解析的推文类别:" + repr(e) + ",忽略")
+ continue
+ except CategoryNotRecognize as e:
+ logger.warning("未知推文类别:" + repr(e))
msgs = self.ctx.gen_req_records()
for m in msgs:
logger.warning(m)
diff --git a/tests/platforms/static/mcbbsnews/mcbbsnews_raw_post_list.json b/tests/platforms/static/mcbbsnews/mcbbsnews_raw_post_list.json
index 7c2f166..a54edcf 100644
--- a/tests/platforms/static/mcbbsnews/mcbbsnews_raw_post_list.json
+++ b/tests/platforms/static/mcbbsnews/mcbbsnews_raw_post_list.json
@@ -2,7 +2,7 @@
{
"url": "thread-1340080-1-1.html",
"title": "Mojang Status:服务器出现一些小问题",
- "category": "快讯",
+ "category": "块讯",
"author": "DreamVoid",
"id": "normalthread_1340080",
"date": 1652630400
@@ -10,7 +10,7 @@
{
"url": "thread-1339940-1-1.html",
"title": "kinbdogz 就近期荒野更新的风波发表看法",
- "category": "快讯",
+ "category": "块讯",
"author": "卡狗",
"id": "normalthread_1339940",
"date": 1652630400
@@ -18,7 +18,7 @@
{
"url": "thread-1339097-1-1.html",
"title": "Minecraft 基岩版 1.18.33 发布(仅 Switch)",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "电量量",
"id": "normalthread_1339097",
"date": 1652457600
@@ -26,7 +26,7 @@
{
"url": "thread-1338607-1-1.html",
"title": "Minecraft Java版 22w19a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "寂华",
"id": "normalthread_1338607",
"date": 1652371200
@@ -34,7 +34,7 @@
{
"url": "thread-1338592-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.32/33 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "苦力怕553",
"id": "normalthread_1338592",
"date": 1652371200
@@ -50,7 +50,7 @@
{
"url": "thread-1338496-1-1.html",
"title": "slicedlime:周三无快照,推迟至周四",
- "category": "快讯",
+ "category": "块讯",
"author": "橄榄Chan",
"id": "normalthread_1338496",
"date": 1652198400
@@ -58,7 +58,7 @@
{
"url": "thread-1336371-1-1.html",
"title": "Minecraft 基岩版 1.18.32 发布(仅 Android、NS)【新增 NS 平台】",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "电量量",
"id": "normalthread_1336371",
"date": 1651766400
@@ -66,7 +66,7 @@
{
"url": "thread-1335897-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.30/31 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "AzureZeng",
"id": "normalthread_1335897",
"date": 1651680000
@@ -74,7 +74,7 @@
{
"url": "thread-1335891-1-1.html",
"title": "Minecraft Java版 22w18a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "Aurora_Feather",
"id": "normalthread_1335891",
"date": 1651680000
@@ -82,7 +82,7 @@
{
"url": "thread-1333196-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.28/29 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "希铁石z",
"id": "normalthread_1333196",
"date": 1651161600
@@ -90,7 +90,7 @@
{
"url": "thread-1332834-1-1.html",
"title": "Minecraft 基岩版 1.18.31 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "希铁石z",
"id": "normalthread_1332834",
"date": 1651075200
@@ -98,7 +98,7 @@
{
"url": "thread-1332811-1-1.html",
"title": "Minecraft Java版 22w17a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "卡狗",
"id": "normalthread_1332811",
"date": 1651075200
@@ -106,7 +106,7 @@
{
"url": "thread-1332424-1-1.html",
"title": "Mojang Status:正在寻找1.18.30更新问题的解决方案",
- "category": "基岩快讯",
+ "category": "基岩块讯",
"author": "ArmorRush",
"id": "normalthread_1332424",
"date": 1650988800
@@ -114,7 +114,7 @@
{
"url": "thread-1329712-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.26/27 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "ArmorRush",
"id": "normalthread_1329712",
"date": 1650470400
@@ -122,7 +122,7 @@
{
"url": "thread-1329651-1-1.html",
"title": "Minecraft Java版 22w16b 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "卡狗",
"id": "normalthread_1329651",
"date": 1650470400
@@ -130,7 +130,7 @@
{
"url": "thread-1329644-1-1.html",
"title": "Minecraft Java版 22w16a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "希铁石z",
"id": "normalthread_1329644",
"date": 1650470400
@@ -138,7 +138,7 @@
{
"url": "thread-1329335-1-1.html",
"title": "Minecraft 基岩版 1.18.30 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "ArmorRush",
"id": "normalthread_1329335",
"date": 1650384000
@@ -146,7 +146,7 @@
{
"url": "thread-1328892-1-1.html",
"title": "“海王” 杰森·莫玛 有望主演《我的世界》大电影",
- "category": "快讯",
+ "category": "块讯",
"author": "广药",
"id": "normalthread_1328892",
"date": 1650297600
@@ -154,7 +154,7 @@
{
"url": "thread-1327089-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.24/25 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "ArmorRush",
"id": "normalthread_1327089",
"date": 1649952000
@@ -162,7 +162,7 @@
{
"url": "thread-1326640-1-1.html",
"title": "Minecraft Java版 22w15a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "ArmorRush",
"id": "normalthread_1326640",
"date": 1649865600
@@ -170,7 +170,7 @@
{
"url": "thread-1323762-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.20 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "ArmorRush",
"id": "normalthread_1323762",
"date": 1649260800
@@ -178,7 +178,7 @@
{
"url": "thread-1323662-1-1.html",
"title": "Minecraft Java版 22w14a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "卡狗",
"id": "normalthread_1323662",
"date": 1649260800
@@ -186,7 +186,7 @@
{
"url": "thread-1321419-1-1.html",
"title": "[愚人节] Minecraft Java版 22w13oneBlockAtATime 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "希铁石z",
"id": "normalthread_1321419",
"date": 1648742400
@@ -194,7 +194,7 @@
{
"url": "thread-1320986-1-1.html",
"title": "Minecraft:近期没有为主机平台添加光线追踪的计划",
- "category": "基岩快讯",
+ "category": "基岩块讯",
"author": "ArmorRush",
"id": "normalthread_1320986",
"date": 1648742400
@@ -202,7 +202,7 @@
{
"url": "thread-1320931-1-1.html",
"title": "Minecraft Java版 22w13a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "卡狗",
"id": "normalthread_1320931",
"date": 1648742400
@@ -210,7 +210,7 @@
{
"url": "thread-1342236-1-1.html",
"title": "Minecraft: 加入Microsoft Rewards赢取限量Xbox Series S",
- "category": "周边消息",
+ "category": "周边",
"author": "ETW_Derp",
"id": "normalthread_1342236",
"date": 1648742400
diff --git a/tests/platforms/static/mcbbsnews/mcbbsnews_raw_post_list_update.json b/tests/platforms/static/mcbbsnews/mcbbsnews_raw_post_list_update.json
index 6229393..0443f1d 100644
--- a/tests/platforms/static/mcbbsnews/mcbbsnews_raw_post_list_update.json
+++ b/tests/platforms/static/mcbbsnews/mcbbsnews_raw_post_list_update.json
@@ -2,7 +2,7 @@
{
"url": "thread-1340927-1-1.html",
"title": "Minecraft Java版 1.19-pre1 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "希铁石z",
"id": "normalthread_1340927",
"date": 1652889600
@@ -10,7 +10,7 @@
{
"url": "thread-1340080-1-1.html",
"title": "Mojang Status:服务器出现一些小问题",
- "category": "快讯",
+ "category": "块讯",
"author": "DreamVoid",
"id": "normalthread_1340080",
"date": 1652630400
@@ -18,7 +18,7 @@
{
"url": "thread-1339940-1-1.html",
"title": "kinbdogz 就近期荒野更新的风波发表看法",
- "category": "快讯",
+ "category": "块讯",
"author": "卡狗",
"id": "normalthread_1339940",
"date": 1652630400
@@ -26,7 +26,7 @@
{
"url": "thread-1339097-1-1.html",
"title": "Minecraft 基岩版 1.18.33 发布(仅 Switch)",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "电量量",
"id": "normalthread_1339097",
"date": 1652457600
@@ -34,7 +34,7 @@
{
"url": "thread-1338607-1-1.html",
"title": "Minecraft Java版 22w19a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "寂华",
"id": "normalthread_1338607",
"date": 1652371200
@@ -42,7 +42,7 @@
{
"url": "thread-1338592-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.32/33 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "苦力怕553",
"id": "normalthread_1338592",
"date": 1652371200
@@ -58,7 +58,7 @@
{
"url": "thread-1338496-1-1.html",
"title": "slicedlime:周三无快照,推迟至周四",
- "category": "快讯",
+ "category": "块讯",
"author": "橄榄Chan",
"id": "normalthread_1338496",
"date": 1652198400
@@ -66,7 +66,7 @@
{
"url": "thread-1336371-1-1.html",
"title": "Minecraft 基岩版 1.18.32 发布(仅 Android、NS)【新增 NS 平台】",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "电量量",
"id": "normalthread_1336371",
"date": 1651766400
@@ -74,7 +74,7 @@
{
"url": "thread-1335897-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.30/31 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "AzureZeng",
"id": "normalthread_1335897",
"date": 1651680000
@@ -82,7 +82,7 @@
{
"url": "thread-1335891-1-1.html",
"title": "Minecraft Java版 22w18a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "Aurora_Feather",
"id": "normalthread_1335891",
"date": 1651680000
@@ -90,7 +90,7 @@
{
"url": "thread-1333196-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.28/29 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "希铁石z",
"id": "normalthread_1333196",
"date": 1651161600
@@ -98,7 +98,7 @@
{
"url": "thread-1332834-1-1.html",
"title": "Minecraft 基岩版 1.18.31 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "希铁石z",
"id": "normalthread_1332834",
"date": 1651075200
@@ -106,7 +106,7 @@
{
"url": "thread-1332811-1-1.html",
"title": "Minecraft Java版 22w17a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "卡狗",
"id": "normalthread_1332811",
"date": 1651075200
@@ -114,7 +114,7 @@
{
"url": "thread-1332424-1-1.html",
"title": "Mojang Status:正在寻找1.18.30更新问题的解决方案",
- "category": "基岩快讯",
+ "category": "基岩块讯",
"author": "ArmorRush",
"id": "normalthread_1332424",
"date": 1650988800
@@ -122,7 +122,7 @@
{
"url": "thread-1329712-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.26/27 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "ArmorRush",
"id": "normalthread_1329712",
"date": 1650470400
@@ -130,7 +130,7 @@
{
"url": "thread-1329651-1-1.html",
"title": "Minecraft Java版 22w16b 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "卡狗",
"id": "normalthread_1329651",
"date": 1650470400
@@ -138,7 +138,7 @@
{
"url": "thread-1329644-1-1.html",
"title": "Minecraft Java版 22w16a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "希铁石z",
"id": "normalthread_1329644",
"date": 1650470400
@@ -146,7 +146,7 @@
{
"url": "thread-1329335-1-1.html",
"title": "Minecraft 基岩版 1.18.30 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "ArmorRush",
"id": "normalthread_1329335",
"date": 1650384000
@@ -154,7 +154,7 @@
{
"url": "thread-1328892-1-1.html",
"title": "“海王” 杰森·莫玛 有望主演《我的世界》大电影",
- "category": "快讯",
+ "category": "块讯",
"author": "广药",
"id": "normalthread_1328892",
"date": 1650297600
@@ -162,7 +162,7 @@
{
"url": "thread-1327089-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.24/25 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "ArmorRush",
"id": "normalthread_1327089",
"date": 1649952000
@@ -170,7 +170,7 @@
{
"url": "thread-1326640-1-1.html",
"title": "Minecraft Java版 22w15a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "ArmorRush",
"id": "normalthread_1326640",
"date": 1649865600
@@ -178,7 +178,7 @@
{
"url": "thread-1323762-1-1.html",
"title": "Minecraft 基岩版 Beta & Preview 1.19.0.20 发布",
- "category": "基岩版本资讯",
+ "category": "基岩版资讯",
"author": "ArmorRush",
"id": "normalthread_1323762",
"date": 1649260800
@@ -186,7 +186,7 @@
{
"url": "thread-1323662-1-1.html",
"title": "Minecraft Java版 22w14a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "卡狗",
"id": "normalthread_1323662",
"date": 1649260800
@@ -194,7 +194,7 @@
{
"url": "thread-1321419-1-1.html",
"title": "[愚人节] Minecraft Java版 22w13oneBlockAtATime 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "希铁石z",
"id": "normalthread_1321419",
"date": 1648742400
@@ -202,7 +202,7 @@
{
"url": "thread-1320986-1-1.html",
"title": "Minecraft:近期没有为主机平台添加光线追踪的计划",
- "category": "基岩快讯",
+ "category": "基岩块讯",
"author": "ArmorRush",
"id": "normalthread_1320986",
"date": 1648742400
@@ -210,7 +210,7 @@
{
"url": "thread-1320931-1-1.html",
"title": "Minecraft Java版 22w13a 发布",
- "category": "Java版本资讯",
+ "category": "Java版资讯",
"author": "卡狗",
"id": "normalthread_1320931",
"date": 1648742400
diff --git a/tests/platforms/static/mcbbsnews/mock/mcbbsnews_bedrock_express.html b/tests/platforms/static/mcbbsnews/mock/mcbbsnews_bedrock_express.html
index 669c750..b672a1a 100644
--- a/tests/platforms/static/mcbbsnews/mock/mcbbsnews_bedrock_express.html
+++ b/tests/platforms/static/mcbbsnews/mock/mcbbsnews_bedrock_express.html
@@ -815,7 +815,7 @@ top.window.location.href = 'member.php?mod=logging&action=logout&formhas