mirror of
https://github.com/suyiiyii/nonebot-bison.git
synced 2026-05-09 18:27:56 +08:00
🚚 修改 nonebot_bison 项目结构 (#211)
* 🎨 修改 nonebot_bison 目录位置 * auto fix by pre-commit hooks * 🚚 fix frontend build target * 🚚 use soft link * Revert "🚚 use soft link" This reverts commit de21f79d5ae1bd5515b04f42a4138cb25ddf3e62. * 🚚 modify dockerfile --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> Co-authored-by: felinae98 <731499577@qq.com>
This commit is contained in:
@@ -0,0 +1,25 @@
|
||||
from collections import defaultdict
|
||||
from importlib import import_module
|
||||
from pathlib import Path
|
||||
from pkgutil import iter_modules
|
||||
from typing import DefaultDict, Type
|
||||
|
||||
from .platform import Platform, make_no_target_group
|
||||
|
||||
_package_dir = str(Path(__file__).resolve().parent)
|
||||
for (_, module_name, _) in iter_modules([_package_dir]):
|
||||
import_module(f"{__name__}.{module_name}")
|
||||
|
||||
|
||||
_platform_list: DefaultDict[str, list[Type[Platform]]] = defaultdict(list)
|
||||
for _platform in Platform.registry:
|
||||
if not _platform.enabled:
|
||||
continue
|
||||
_platform_list[_platform.platform_name].append(_platform)
|
||||
|
||||
platform_manager: dict[str, Type[Platform]] = dict()
|
||||
for name, platform_list in _platform_list.items():
|
||||
if len(platform_list) == 1:
|
||||
platform_manager[name] = platform_list[0]
|
||||
else:
|
||||
platform_manager[name] = make_no_target_group(platform_list)
|
||||
@@ -0,0 +1,247 @@
|
||||
import json
|
||||
from typing import Any, Optional
|
||||
|
||||
from bs4 import BeautifulSoup as bs
|
||||
from httpx import AsyncClient
|
||||
from nonebot.plugin import require
|
||||
|
||||
from ..post import Post
|
||||
from ..types import Category, RawPost, Target
|
||||
from ..utils.scheduler_config import SchedulerConfig
|
||||
from .platform import CategoryNotRecognize, NewMessage, StatusChange
|
||||
|
||||
|
||||
class ArknightsSchedConf(SchedulerConfig):
|
||||
|
||||
name = "arknights"
|
||||
schedule_type = "interval"
|
||||
schedule_setting = {"seconds": 30}
|
||||
|
||||
|
||||
class Arknights(NewMessage):
|
||||
|
||||
categories = {1: "游戏公告"}
|
||||
platform_name = "arknights"
|
||||
name = "明日方舟游戏信息"
|
||||
enable_tag = False
|
||||
enabled = True
|
||||
is_common = False
|
||||
scheduler = ArknightsSchedConf
|
||||
has_target = False
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
return "明日方舟游戏信息"
|
||||
|
||||
async def get_sub_list(self, _) -> list[RawPost]:
|
||||
raw_data = await self.client.get(
|
||||
"https://ak-conf.hypergryph.com/config/prod/announce_meta/IOS/announcement.meta.json"
|
||||
)
|
||||
return json.loads(raw_data.text)["announceList"]
|
||||
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
return post["announceId"]
|
||||
|
||||
def get_date(self, _: RawPost) -> None:
|
||||
return None
|
||||
|
||||
def get_category(self, _) -> Category:
|
||||
return Category(1)
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
announce_url = raw_post["webUrl"]
|
||||
text = ""
|
||||
raw_html = await self.client.get(announce_url)
|
||||
soup = bs(raw_html.text, "html.parser")
|
||||
pics = []
|
||||
if soup.find("div", class_="standerd-container"):
|
||||
# 图文
|
||||
require("nonebot_plugin_htmlrender")
|
||||
from nonebot_plugin_htmlrender import capture_element
|
||||
|
||||
pic_data = await capture_element(
|
||||
announce_url,
|
||||
"div.main",
|
||||
viewport={"width": 320, "height": 6400},
|
||||
device_scale_factor=3,
|
||||
)
|
||||
# render = Render()
|
||||
# viewport = {"width": 320, "height": 6400, "deviceScaleFactor": 3}
|
||||
# pic_data = await render.render(
|
||||
# announce_url, viewport=viewport, target="div.main"
|
||||
# )
|
||||
if pic_data:
|
||||
pics.append(pic_data)
|
||||
else:
|
||||
text = "图片渲染失败"
|
||||
elif pic := soup.find("img", class_="banner-image"):
|
||||
pics.append(pic["src"]) # type: ignore
|
||||
else:
|
||||
raise CategoryNotRecognize("未找到可渲染部分")
|
||||
return Post(
|
||||
"arknights",
|
||||
text=text,
|
||||
url="",
|
||||
target_name="明日方舟游戏内公告",
|
||||
pics=pics,
|
||||
compress=True,
|
||||
override_use_pic=False,
|
||||
)
|
||||
|
||||
|
||||
class AkVersion(StatusChange):
|
||||
|
||||
categories = {2: "更新信息"}
|
||||
platform_name = "arknights"
|
||||
name = "明日方舟游戏信息"
|
||||
enable_tag = False
|
||||
enabled = True
|
||||
is_common = False
|
||||
scheduler = ArknightsSchedConf
|
||||
has_target = False
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
return "明日方舟游戏信息"
|
||||
|
||||
async def get_status(self, _):
|
||||
res_ver = await self.client.get(
|
||||
"https://ak-conf.hypergryph.com/config/prod/official/IOS/version"
|
||||
)
|
||||
res_preanounce = await self.client.get(
|
||||
"https://ak-conf.hypergryph.com/config/prod/announce_meta/IOS/preannouncement.meta.json"
|
||||
)
|
||||
res = res_ver.json()
|
||||
res.update(res_preanounce.json())
|
||||
return res
|
||||
|
||||
def compare_status(self, _, old_status, new_status):
|
||||
res = []
|
||||
if (
|
||||
old_status.get("preAnnounceType") == 2
|
||||
and new_status.get("preAnnounceType") == 0
|
||||
):
|
||||
res.append(
|
||||
Post("arknights", text="登录界面维护公告上线(大概是开始维护了)", target_name="明日方舟更新信息")
|
||||
)
|
||||
elif (
|
||||
old_status.get("preAnnounceType") == 0
|
||||
and new_status.get("preAnnounceType") == 2
|
||||
):
|
||||
res.append(
|
||||
Post("arknights", text="登录界面维护公告下线(大概是开服了,冲!)", target_name="明日方舟更新信息")
|
||||
)
|
||||
if old_status.get("clientVersion") != new_status.get("clientVersion"):
|
||||
res.append(Post("arknights", text="游戏本体更新(大更新)", target_name="明日方舟更新信息"))
|
||||
if old_status.get("resVersion") != new_status.get("resVersion"):
|
||||
res.append(Post("arknights", text="游戏资源更新(小更新)", target_name="明日方舟更新信息"))
|
||||
return res
|
||||
|
||||
def get_category(self, _):
|
||||
return Category(2)
|
||||
|
||||
async def parse(self, raw_post):
|
||||
return raw_post
|
||||
|
||||
|
||||
class MonsterSiren(NewMessage):
|
||||
|
||||
categories = {3: "塞壬唱片新闻"}
|
||||
platform_name = "arknights"
|
||||
name = "明日方舟游戏信息"
|
||||
enable_tag = False
|
||||
enabled = True
|
||||
is_common = False
|
||||
scheduler = ArknightsSchedConf
|
||||
has_target = False
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
return "明日方舟游戏信息"
|
||||
|
||||
async def get_sub_list(self, _) -> list[RawPost]:
|
||||
raw_data = await self.client.get(
|
||||
"https://monster-siren.hypergryph.com/api/news"
|
||||
)
|
||||
return raw_data.json()["data"]["list"]
|
||||
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
return post["cid"]
|
||||
|
||||
def get_date(self, _) -> None:
|
||||
return None
|
||||
|
||||
def get_category(self, _) -> Category:
|
||||
return Category(3)
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
url = f'https://monster-siren.hypergryph.com/info/{raw_post["cid"]}'
|
||||
res = await self.client.get(
|
||||
f'https://monster-siren.hypergryph.com/api/news/{raw_post["cid"]}'
|
||||
)
|
||||
raw_data = res.json()
|
||||
content = raw_data["data"]["content"]
|
||||
content = content.replace("</p>", "</p>\n")
|
||||
soup = bs(content, "html.parser")
|
||||
imgs = list(map(lambda x: x["src"], soup("img")))
|
||||
text = f'{raw_post["title"]}\n{soup.text.strip()}'
|
||||
return Post(
|
||||
"monster-siren",
|
||||
text=text,
|
||||
pics=imgs,
|
||||
url=url,
|
||||
target_name="塞壬唱片新闻",
|
||||
compress=True,
|
||||
override_use_pic=False,
|
||||
)
|
||||
|
||||
|
||||
class TerraHistoricusComic(NewMessage):
|
||||
|
||||
categories = {4: "泰拉记事社漫画"}
|
||||
platform_name = "arknights"
|
||||
name = "明日方舟游戏信息"
|
||||
enable_tag = False
|
||||
enabled = True
|
||||
is_common = False
|
||||
scheduler = ArknightsSchedConf
|
||||
has_target = False
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
return "明日方舟游戏信息"
|
||||
|
||||
async def get_sub_list(self, _) -> list[RawPost]:
|
||||
raw_data = await self.client.get(
|
||||
"https://terra-historicus.hypergryph.com/api/recentUpdate"
|
||||
)
|
||||
return raw_data.json()["data"]
|
||||
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
return f'{post["comicCid"]}/{post["episodeCid"]}'
|
||||
|
||||
def get_date(self, _) -> None:
|
||||
return None
|
||||
|
||||
def get_category(self, _) -> Category:
|
||||
return Category(4)
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
url = f'https://terra-historicus.hypergryph.com/comic/{raw_post["comicCid"]}/episode/{raw_post["episodeCid"]}'
|
||||
return Post(
|
||||
"terra-historicus",
|
||||
text=f'{raw_post["title"]} - {raw_post["episodeShortTitle"]}',
|
||||
pics=[raw_post["coverUrl"]],
|
||||
url=url,
|
||||
target_name="泰拉记事社漫画",
|
||||
compress=True,
|
||||
override_use_pic=False,
|
||||
)
|
||||
@@ -0,0 +1,393 @@
|
||||
import json
|
||||
import re
|
||||
from copy import deepcopy
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime, timedelta
|
||||
from typing import Any, Optional
|
||||
|
||||
from httpx import AsyncClient
|
||||
from nonebot.log import logger
|
||||
from typing_extensions import Self
|
||||
|
||||
from ..post import Post
|
||||
from ..types import ApiError, Category, RawPost, Tag, Target
|
||||
from ..utils import SchedulerConfig
|
||||
from .platform import CategoryNotRecognize, CategoryNotSupport, NewMessage, StatusChange
|
||||
|
||||
|
||||
class BilibiliSchedConf(SchedulerConfig):
|
||||
|
||||
name = "bilibili.com"
|
||||
schedule_type = "interval"
|
||||
schedule_setting = {"seconds": 10}
|
||||
|
||||
_client_refresh_time: datetime
|
||||
cookie_expire_time = timedelta(hours=5)
|
||||
|
||||
def __init__(self):
|
||||
self._client_refresh_time = datetime(
|
||||
year=2000, month=1, day=1
|
||||
) # an expired time
|
||||
super().__init__()
|
||||
|
||||
async def _init_session(self):
|
||||
res = await self.default_http_client.get("https://www.bilibili.com/")
|
||||
if res.status_code != 200:
|
||||
logger.warning("unable to refresh temp cookie")
|
||||
else:
|
||||
self._client_refresh_time = datetime.now()
|
||||
|
||||
async def _refresh_client(self):
|
||||
if datetime.now() - self._client_refresh_time > self.cookie_expire_time:
|
||||
await self._init_session()
|
||||
|
||||
async def get_client(self, target: Target) -> AsyncClient:
|
||||
await self._refresh_client()
|
||||
return await super().get_client(target)
|
||||
|
||||
async def get_query_name_client(self) -> AsyncClient:
|
||||
await self._refresh_client()
|
||||
return await super().get_query_name_client()
|
||||
|
||||
|
||||
class Bilibili(NewMessage):
|
||||
|
||||
categories = {
|
||||
1: "一般动态",
|
||||
2: "专栏文章",
|
||||
3: "视频",
|
||||
4: "纯文字",
|
||||
5: "转发"
|
||||
# 5: "短视频"
|
||||
}
|
||||
platform_name = "bilibili"
|
||||
enable_tag = True
|
||||
enabled = True
|
||||
is_common = True
|
||||
scheduler = BilibiliSchedConf
|
||||
name = "B站"
|
||||
has_target = True
|
||||
parse_target_promot = "请输入用户主页的链接"
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
res = await client.get(
|
||||
"https://api.bilibili.com/x/space/acc/info", params={"mid": target}
|
||||
)
|
||||
res_data = json.loads(res.text)
|
||||
if res_data["code"]:
|
||||
return None
|
||||
return res_data["data"]["name"]
|
||||
|
||||
@classmethod
|
||||
async def parse_target(cls, target_text: str) -> Target:
|
||||
if re.match(r"\d+", target_text):
|
||||
return Target(target_text)
|
||||
elif m := re.match(r"(?:https?://)?space\.bilibili\.com/(\d+)", target_text):
|
||||
return Target(m.group(1))
|
||||
else:
|
||||
raise cls.ParseTargetException()
|
||||
|
||||
async def get_sub_list(self, target: Target) -> list[RawPost]:
|
||||
params = {"host_uid": target, "offset": 0, "need_top": 0}
|
||||
res = await self.client.get(
|
||||
"https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history",
|
||||
params=params,
|
||||
timeout=4.0,
|
||||
)
|
||||
res_dict = json.loads(res.text)
|
||||
if res_dict["code"] == 0:
|
||||
return res_dict["data"].get("cards")
|
||||
else:
|
||||
raise ApiError(res.request.url)
|
||||
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
return post["desc"]["dynamic_id"]
|
||||
|
||||
def get_date(self, post: RawPost) -> int:
|
||||
return post["desc"]["timestamp"]
|
||||
|
||||
def _do_get_category(self, post_type: int) -> Category:
|
||||
if post_type == 2:
|
||||
return Category(1)
|
||||
elif post_type == 64:
|
||||
return Category(2)
|
||||
elif post_type == 8:
|
||||
return Category(3)
|
||||
elif post_type == 4:
|
||||
return Category(4)
|
||||
elif post_type == 1:
|
||||
# 转发
|
||||
return Category(5)
|
||||
raise CategoryNotRecognize(post_type)
|
||||
|
||||
def get_category(self, post: RawPost) -> Category:
|
||||
post_type = post["desc"]["type"]
|
||||
return self._do_get_category(post_type)
|
||||
|
||||
def get_tags(self, raw_post: RawPost) -> list[Tag]:
|
||||
return [
|
||||
*map(
|
||||
lambda tp: tp["topic_name"],
|
||||
raw_post["display"]["topic_info"]["topic_details"],
|
||||
)
|
||||
]
|
||||
|
||||
def _get_info(self, post_type: Category, card) -> tuple[str, list]:
|
||||
if post_type == 1:
|
||||
# 一般动态
|
||||
text = card["item"]["description"]
|
||||
pic = [img["img_src"] for img in card["item"]["pictures"]]
|
||||
elif post_type == 2:
|
||||
# 专栏文章
|
||||
text = "{} {}".format(card["title"], card["summary"])
|
||||
pic = card["image_urls"]
|
||||
elif post_type == 3:
|
||||
# 视频
|
||||
text = card["dynamic"]
|
||||
pic = [card["pic"]]
|
||||
elif post_type == 4:
|
||||
# 纯文字
|
||||
text = card["item"]["content"]
|
||||
pic = []
|
||||
else:
|
||||
raise CategoryNotSupport(post_type)
|
||||
return text, pic
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
card_content = json.loads(raw_post["card"])
|
||||
post_type = self.get_category(raw_post)
|
||||
target_name = raw_post["desc"]["user_profile"]["info"]["uname"]
|
||||
if post_type >= 1 and post_type < 5:
|
||||
url = ""
|
||||
if post_type == 1:
|
||||
# 一般动态
|
||||
url = "https://t.bilibili.com/{}".format(
|
||||
raw_post["desc"]["dynamic_id_str"]
|
||||
)
|
||||
elif post_type == 2:
|
||||
# 专栏文章
|
||||
url = "https://www.bilibili.com/read/cv{}".format(
|
||||
raw_post["desc"]["rid"]
|
||||
)
|
||||
elif post_type == 3:
|
||||
# 视频
|
||||
url = "https://www.bilibili.com/video/{}".format(
|
||||
raw_post["desc"]["bvid"]
|
||||
)
|
||||
elif post_type == 4:
|
||||
# 纯文字
|
||||
url = "https://t.bilibili.com/{}".format(
|
||||
raw_post["desc"]["dynamic_id_str"]
|
||||
)
|
||||
text, pic = self._get_info(post_type, card_content)
|
||||
elif post_type == 5:
|
||||
# 转发
|
||||
url = "https://t.bilibili.com/{}".format(raw_post["desc"]["dynamic_id_str"])
|
||||
text = card_content["item"]["content"]
|
||||
orig_type = card_content["item"]["orig_type"]
|
||||
orig = json.loads(card_content["origin"])
|
||||
orig_text, _ = self._get_info(self._do_get_category(orig_type), orig)
|
||||
text += "\n--------------\n"
|
||||
text += orig_text
|
||||
pic = []
|
||||
else:
|
||||
raise CategoryNotSupport(post_type)
|
||||
return Post("bilibili", text=text, url=url, pics=pic, target_name=target_name)
|
||||
|
||||
|
||||
class Bilibililive(StatusChange):
|
||||
# Author : Sichongzou
|
||||
# Date : 2022-5-18 8:54
|
||||
# Description : bilibili开播提醒
|
||||
# E-mail : 1557157806@qq.com
|
||||
categories = {1: "开播提醒", 2: "标题更新提醒"}
|
||||
platform_name = "bilibili-live"
|
||||
enable_tag = False
|
||||
enabled = True
|
||||
is_common = True
|
||||
scheduler = BilibiliSchedConf
|
||||
name = "Bilibili直播"
|
||||
has_target = True
|
||||
|
||||
@dataclass
|
||||
class Info:
|
||||
uname: str
|
||||
live_status: int
|
||||
room_id: str
|
||||
title: str
|
||||
cover_from_user: str
|
||||
keyframe: str
|
||||
category: Category = field(default=Category(0))
|
||||
|
||||
def __init__(self, raw_info: dict):
|
||||
self.__dict__.update(raw_info)
|
||||
|
||||
def is_live_turn_on(self, old_info: Self) -> bool:
|
||||
# 使用 & 判断直播开始
|
||||
# live_status:
|
||||
# 0:关播
|
||||
# 1:直播中
|
||||
# 2:轮播中
|
||||
if self.live_status == 1 and old_info.live_status != self.live_status:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def is_title_update(self, old_info: Self) -> bool:
|
||||
# 使用 ^ 判断直播时标题改变
|
||||
if self.live_status == 1 and old_info.title != self.title:
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
res = await client.get(
|
||||
"https://api.bilibili.com/x/space/acc/info", params={"mid": target}
|
||||
)
|
||||
res_data = json.loads(res.text)
|
||||
if res_data["code"]:
|
||||
return None
|
||||
return res_data["data"]["name"]
|
||||
|
||||
async def get_status(self, target: Target) -> Info:
|
||||
params = {"uids[]": target}
|
||||
# from https://github.com/SocialSisterYi/bilibili-API-collect/blob/master/live/info.md#%E6%89%B9%E9%87%8F%E6%9F%A5%E8%AF%A2%E7%9B%B4%E6%92%AD%E9%97%B4%E7%8A%B6%E6%80%81
|
||||
res = await self.client.get(
|
||||
"https://api.live.bilibili.com/room/v1/Room/get_status_info_by_uids",
|
||||
params=params,
|
||||
timeout=4.0,
|
||||
)
|
||||
res_dict = json.loads(res.text)
|
||||
if res_dict["code"] == 0:
|
||||
data = res_dict["data"][target]
|
||||
|
||||
info = self.Info(data)
|
||||
|
||||
return info
|
||||
else:
|
||||
raise self.FetchError()
|
||||
|
||||
def compare_status(
|
||||
self, target: Target, old_status: Info, new_status: Info
|
||||
) -> list[RawPost]:
|
||||
if new_status.is_live_turn_on(old_status):
|
||||
# 判断开播 运算符左右有顺序要求
|
||||
current_status = deepcopy(new_status)
|
||||
current_status.category = Category(1)
|
||||
return [current_status]
|
||||
elif new_status.is_title_update(old_status):
|
||||
# 判断直播时直播间标题变更 运算符左右有顺序要求
|
||||
current_status = deepcopy(new_status)
|
||||
current_status.category = Category(2)
|
||||
return [current_status]
|
||||
else:
|
||||
return []
|
||||
|
||||
def get_category(self, status: Info) -> Category:
|
||||
assert status.category != Category(0)
|
||||
return status.category
|
||||
|
||||
async def parse(self, raw_info: Info) -> Post:
|
||||
url = "https://live.bilibili.com/{}".format(raw_info.room_id)
|
||||
pic = [raw_info.keyframe]
|
||||
target_name = raw_info.uname
|
||||
title = raw_info.title
|
||||
return Post(
|
||||
self.name,
|
||||
text=title,
|
||||
url=url,
|
||||
pics=list(pic),
|
||||
target_name=target_name,
|
||||
compress=True,
|
||||
)
|
||||
|
||||
|
||||
class BilibiliBangumi(StatusChange):
|
||||
|
||||
categories = {}
|
||||
platform_name = "bilibili-bangumi"
|
||||
enable_tag = False
|
||||
enabled = True
|
||||
is_common = True
|
||||
scheduler = BilibiliSchedConf
|
||||
name = "Bilibili剧集"
|
||||
has_target = True
|
||||
parse_target_promot = "请输入剧集主页"
|
||||
|
||||
_url = "https://api.bilibili.com/pgc/review/user"
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
res = await client.get(cls._url, params={"media_id": target})
|
||||
res_data = res.json()
|
||||
if res_data["code"]:
|
||||
return None
|
||||
return res_data["result"]["media"]["title"]
|
||||
|
||||
@classmethod
|
||||
async def parse_target(cls, target_string: str) -> Target:
|
||||
if re.match(r"\d+", target_string):
|
||||
return Target(target_string)
|
||||
elif m := re.match(r"md(\d+)", target_string):
|
||||
return Target(m.group(1))
|
||||
elif m := re.match(
|
||||
r"(?:https?://)?www\.bilibili\.com/bangumi/media/md(\d+)/", target_string
|
||||
):
|
||||
return Target(m.group(1))
|
||||
raise cls.ParseTargetException()
|
||||
|
||||
async def get_status(self, target: Target):
|
||||
res = await self.client.get(
|
||||
self._url,
|
||||
params={"media_id": target},
|
||||
timeout=4.0,
|
||||
)
|
||||
res_dict = res.json()
|
||||
if res_dict["code"] == 0:
|
||||
return {
|
||||
"index": res_dict["result"]["media"]["new_ep"]["index"],
|
||||
"index_show": res_dict["result"]["media"]["new_ep"]["index"],
|
||||
"season_id": res_dict["result"]["media"]["season_id"],
|
||||
}
|
||||
else:
|
||||
raise self.FetchError
|
||||
|
||||
def compare_status(self, target: Target, old_status, new_status) -> list[RawPost]:
|
||||
if new_status["index"] != old_status["index"]:
|
||||
return [new_status]
|
||||
else:
|
||||
return []
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
detail_res = await self.client.get(
|
||||
f'https://api.bilibili.com/pgc/view/web/season?season_id={raw_post["season_id"]}'
|
||||
)
|
||||
detail_dict = detail_res.json()
|
||||
lastest_episode = None
|
||||
for episode in detail_dict["result"]["episodes"][::-1]:
|
||||
if episode["badge"] in ("", "会员"):
|
||||
lastest_episode = episode
|
||||
break
|
||||
if not lastest_episode:
|
||||
lastest_episode = detail_dict["result"]["episodes"]
|
||||
|
||||
url = lastest_episode["link"]
|
||||
pic: list[str] = [lastest_episode["cover"]]
|
||||
target_name = detail_dict["result"]["season_title"]
|
||||
text = lastest_episode["share_copy"]
|
||||
return Post(
|
||||
self.name,
|
||||
text=text,
|
||||
url=url,
|
||||
pics=list(pic),
|
||||
target_name=target_name,
|
||||
compress=True,
|
||||
)
|
||||
@@ -0,0 +1,48 @@
|
||||
from typing import Any, Optional
|
||||
|
||||
from httpx import AsyncClient
|
||||
|
||||
from ..post import Post
|
||||
from ..types import RawPost, Target
|
||||
from ..utils import scheduler
|
||||
from .platform import NewMessage
|
||||
|
||||
|
||||
class FF14(NewMessage):
|
||||
|
||||
categories = {}
|
||||
platform_name = "ff14"
|
||||
name = "最终幻想XIV官方公告"
|
||||
enable_tag = False
|
||||
enabled = True
|
||||
is_common = False
|
||||
scheduler_class = "ff14"
|
||||
scheduler = scheduler("interval", {"seconds": 60})
|
||||
has_target = False
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
return "最终幻想XIV官方公告"
|
||||
|
||||
async def get_sub_list(self, _) -> list[RawPost]:
|
||||
raw_data = await self.client.get(
|
||||
"https://ff.web.sdo.com/inc/newdata.ashx?url=List?gameCode=ff&category=5309,5310,5311,5312,5313&pageIndex=0&pageSize=5"
|
||||
)
|
||||
return raw_data.json()["Data"]
|
||||
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
"""用发布时间当作 ID
|
||||
|
||||
因为有时候官方会直接编辑以前的文章内容
|
||||
"""
|
||||
return post["PublishDate"]
|
||||
|
||||
def get_date(self, _: RawPost) -> None:
|
||||
return None
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
text = f'{raw_post["Title"]}\n{raw_post["Summary"]}'
|
||||
url = raw_post["Author"]
|
||||
return Post("ff14", text=text, url=url, target_name="最终幻想XIV官方公告")
|
||||
@@ -0,0 +1,192 @@
|
||||
import re
|
||||
import time
|
||||
import traceback
|
||||
|
||||
from bs4 import BeautifulSoup, Tag
|
||||
from httpx import AsyncClient
|
||||
from nonebot.log import logger
|
||||
from nonebot.plugin import require
|
||||
|
||||
from ..post import Post
|
||||
from ..types import Category, RawPost, Target
|
||||
from ..utils import SchedulerConfig, http_client
|
||||
from .platform import CategoryNotRecognize, CategoryNotSupport, NewMessage
|
||||
|
||||
|
||||
class McbbsnewsSchedConf(SchedulerConfig):
|
||||
name = "mcbbsnews"
|
||||
schedule_type = "interval"
|
||||
schedule_setting = {"minutes": 30}
|
||||
|
||||
|
||||
class McbbsNews(NewMessage):
|
||||
categories: dict[int, str] = {
|
||||
1: "Java版资讯",
|
||||
2: "基岩版资讯",
|
||||
3: "块讯",
|
||||
4: "基岩块讯",
|
||||
5: "周边",
|
||||
6: "主机",
|
||||
7: "时评",
|
||||
}
|
||||
enable_tag: bool = False
|
||||
platform_name: str = "mcbbsnews"
|
||||
name: str = "MCBBS幻翼块讯"
|
||||
enabled: bool = True
|
||||
is_common: bool = False
|
||||
scheduler = McbbsnewsSchedConf
|
||||
has_target: bool = False
|
||||
|
||||
_known_cats: dict[int, str] = {
|
||||
1: "Java版资讯",
|
||||
2: "基岩版资讯",
|
||||
3: "块讯",
|
||||
4: "基岩块讯",
|
||||
5: "周边",
|
||||
6: "主机",
|
||||
7: "时评",
|
||||
}
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(cls, client: AsyncClient, target: Target) -> str:
|
||||
return cls.name
|
||||
|
||||
async def get_sub_list(self, _: Target) -> list[RawPost]:
|
||||
url: str = "https://www.mcbbs.net/forum-news-1.html"
|
||||
|
||||
html = await self.client.get(url)
|
||||
soup = BeautifulSoup(html.text, "html.parser")
|
||||
raw_post_list = soup.find_all("tbody", id=re.compile(r"normalthread_[0-9]*"))
|
||||
post_list = self._gen_post_list(raw_post_list)
|
||||
|
||||
return post_list
|
||||
|
||||
def _gen_post_list(self, raw_post_list: list[Tag]) -> list[RawPost]:
|
||||
"""解析生成推文列表"""
|
||||
post_list = []
|
||||
|
||||
for raw_post in raw_post_list:
|
||||
post = {}
|
||||
|
||||
url_tag = raw_post.find("a", class_="s xst")
|
||||
if isinstance(url_tag, Tag):
|
||||
post["url"] = url_tag.get("href")
|
||||
title_tag = raw_post.find("a", class_="s xst")
|
||||
if isinstance(title_tag, Tag):
|
||||
title_string = title_tag.string
|
||||
if isinstance(title_string, str):
|
||||
post["title"] = self._format_text(title_string, "title")
|
||||
post["category"] = raw_post.select("th em a")[0].string
|
||||
post["author"] = raw_post.select("td:nth-of-type(2) cite a")[0].string
|
||||
post["id"] = raw_post["id"]
|
||||
raw_date = (
|
||||
raw_post.select("td:nth-of-type(2) em span span")[0]["title"]
|
||||
if raw_post.select("td:nth-of-type(2) em span span")
|
||||
else raw_post.select("td:nth-of-type(2) em span")[0].string
|
||||
)
|
||||
if isinstance(raw_date, str):
|
||||
post["date"] = self._stamp_date(raw_date)
|
||||
|
||||
post_list.append(post)
|
||||
|
||||
return post_list
|
||||
|
||||
@staticmethod
|
||||
def _format_text(raw_text: str, mode: str) -> str:
|
||||
"""
|
||||
处理BeautifulSoup生成的string中奇怪的回车+连续空格
|
||||
|
||||
参数:
|
||||
title: 处理标题
|
||||
"""
|
||||
match mode:
|
||||
case "title":
|
||||
ftext = re.sub(r"\n\s*", " ", raw_text)
|
||||
case _:
|
||||
raise NotImplementedError("不支持的处理模式: {mode}")
|
||||
|
||||
return ftext
|
||||
|
||||
@staticmethod
|
||||
def _stamp_date(raw_date: str) -> int:
|
||||
"""
|
||||
将时间转化为时间戳:
|
||||
yyyy-mm-dd -> timestamp
|
||||
"""
|
||||
time_stamp = int(time.mktime(time.strptime(raw_date, "%Y-%m-%d")))
|
||||
|
||||
return time_stamp
|
||||
|
||||
def get_id(self, post: RawPost) -> str:
|
||||
return post["id"]
|
||||
|
||||
def get_date(self, _: RawPost) -> int | None:
|
||||
# 获取datetime精度只到日期,故暂时舍弃
|
||||
# return post["date"]
|
||||
return None
|
||||
|
||||
def get_category(self, post: RawPost) -> Category:
|
||||
categoty_name = post["category"]
|
||||
category_keys = list(self.categories.keys())
|
||||
category_values = list(self.categories.values())
|
||||
known_category_values = list(self._known_cats.values())
|
||||
|
||||
if categoty_name in category_values:
|
||||
category_id = category_keys[category_values.index(categoty_name)]
|
||||
elif categoty_name in known_category_values:
|
||||
raise CategoryNotSupport("McbbsNews订阅暂不支持 {}".format(categoty_name))
|
||||
else:
|
||||
raise CategoryNotRecognize("Mcbbsnews订阅尚未识别 {}".format(categoty_name))
|
||||
return category_id
|
||||
|
||||
async def parse(self, post: RawPost) -> Post:
|
||||
"""获取并分配正式推文交由相应的函数渲染"""
|
||||
post_url = "https://www.mcbbs.net/{}".format(post["url"])
|
||||
async with http_client() as client:
|
||||
html = await client.get(post_url)
|
||||
html.raise_for_status()
|
||||
|
||||
soup = BeautifulSoup(html.text, "html.parser")
|
||||
post_body = soup.find("td", id=re.compile(r"postmessage_[0-9]*"))
|
||||
if isinstance(post_body, Tag):
|
||||
post_id = post_body.attrs.get("id")
|
||||
else:
|
||||
post_id = None
|
||||
pics = await self._news_render(post_url, f"#{post_id}")
|
||||
|
||||
return Post(
|
||||
self.name,
|
||||
text="{}\n│\n└由 {} 发表".format(post["title"], post["author"]),
|
||||
url=post_url,
|
||||
pics=list(pics),
|
||||
target_name=post["category"],
|
||||
)
|
||||
|
||||
async def _news_render(self, url: str, selector: str) -> list[bytes]:
|
||||
"""
|
||||
将给定的url网页的指定CSS选择器部分渲染成图片
|
||||
|
||||
注意:
|
||||
一般而言每条新闻的长度都很可观,图片生成时间比较喜人
|
||||
"""
|
||||
require("nonebot_plugin_htmlrender")
|
||||
from nonebot_plugin_htmlrender import capture_element, text_to_pic
|
||||
|
||||
try:
|
||||
assert url
|
||||
pic_data = await capture_element(
|
||||
url,
|
||||
selector,
|
||||
viewport={"width": 1000, "height": 6400},
|
||||
device_scale_factor=3,
|
||||
)
|
||||
assert pic_data
|
||||
except:
|
||||
err_info = traceback.format_exc()
|
||||
logger.warning(f"渲染错误:{err_info}")
|
||||
|
||||
err_pic0 = await text_to_pic("错误发生!")
|
||||
err_pic1 = await text_to_pic(err_info)
|
||||
return [err_pic0, err_pic1]
|
||||
else:
|
||||
return [pic_data]
|
||||
@@ -0,0 +1,142 @@
|
||||
import re
|
||||
from typing import Any, Optional
|
||||
|
||||
from httpx import AsyncClient
|
||||
|
||||
from ..post import Post
|
||||
from ..types import ApiError, RawPost, Target
|
||||
from ..utils import SchedulerConfig
|
||||
from .platform import NewMessage
|
||||
|
||||
|
||||
class NcmSchedConf(SchedulerConfig):
|
||||
|
||||
name = "music.163.com"
|
||||
schedule_type = "interval"
|
||||
schedule_setting = {"minutes": 1}
|
||||
|
||||
|
||||
class NcmArtist(NewMessage):
|
||||
|
||||
categories = {}
|
||||
platform_name = "ncm-artist"
|
||||
enable_tag = False
|
||||
enabled = True
|
||||
is_common = True
|
||||
scheduler = NcmSchedConf
|
||||
name = "网易云-歌手"
|
||||
has_target = True
|
||||
parse_target_promot = "请输入歌手主页(包含数字ID)的链接"
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
res = await client.get(
|
||||
"https://music.163.com/api/artist/albums/{}".format(target),
|
||||
headers={"Referer": "https://music.163.com/"},
|
||||
)
|
||||
res_data = res.json()
|
||||
if res_data["code"] != 200:
|
||||
raise ApiError(res.request.url)
|
||||
return res_data["artist"]["name"]
|
||||
|
||||
@classmethod
|
||||
async def parse_target(cls, target_text: str) -> Target:
|
||||
if re.match(r"^\d+$", target_text):
|
||||
return Target(target_text)
|
||||
elif match := re.match(
|
||||
r"(?:https?://)?music\.163\.com/#/artist\?id=(\d+)", target_text
|
||||
):
|
||||
return Target(match.group(1))
|
||||
else:
|
||||
raise cls.ParseTargetException()
|
||||
|
||||
async def get_sub_list(self, target: Target) -> list[RawPost]:
|
||||
res = await self.client.get(
|
||||
"https://music.163.com/api/artist/albums/{}".format(target),
|
||||
headers={"Referer": "https://music.163.com/"},
|
||||
)
|
||||
res_data = res.json()
|
||||
if res_data["code"] != 200:
|
||||
return []
|
||||
else:
|
||||
return res_data["hotAlbums"]
|
||||
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
return post["id"]
|
||||
|
||||
def get_date(self, post: RawPost) -> int:
|
||||
return post["publishTime"] // 1000
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
text = "新专辑发布:{}".format(raw_post["name"])
|
||||
target_name = raw_post["artist"]["name"]
|
||||
pics = [raw_post["picUrl"]]
|
||||
url = "https://music.163.com/#/album?id={}".format(raw_post["id"])
|
||||
return Post(
|
||||
"ncm-artist", text=text, url=url, pics=pics, target_name=target_name
|
||||
)
|
||||
|
||||
|
||||
class NcmRadio(NewMessage):
|
||||
|
||||
categories = {}
|
||||
platform_name = "ncm-radio"
|
||||
enable_tag = False
|
||||
enabled = True
|
||||
is_common = False
|
||||
scheduler = NcmSchedConf
|
||||
name = "网易云-电台"
|
||||
has_target = True
|
||||
parse_target_promot = "请输入主播电台主页(包含数字ID)的链接"
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
res = await client.post(
|
||||
"http://music.163.com/api/dj/program/byradio",
|
||||
headers={"Referer": "https://music.163.com/"},
|
||||
data={"radioId": target, "limit": 1000, "offset": 0},
|
||||
)
|
||||
res_data = res.json()
|
||||
if res_data["code"] != 200 or res_data["programs"] == 0:
|
||||
return
|
||||
return res_data["programs"][0]["radio"]["name"]
|
||||
|
||||
@classmethod
|
||||
async def parse_target(cls, target_text: str) -> Target:
|
||||
if re.match(r"^\d+$", target_text):
|
||||
return Target(target_text)
|
||||
elif match := re.match(
|
||||
r"(?:https?://)?music\.163\.com/#/djradio\?id=(\d+)", target_text
|
||||
):
|
||||
return Target(match.group(1))
|
||||
else:
|
||||
raise cls.ParseTargetException()
|
||||
|
||||
async def get_sub_list(self, target: Target) -> list[RawPost]:
|
||||
res = await self.client.post(
|
||||
"http://music.163.com/api/dj/program/byradio",
|
||||
headers={"Referer": "https://music.163.com/"},
|
||||
data={"radioId": target, "limit": 1000, "offset": 0},
|
||||
)
|
||||
res_data = res.json()
|
||||
if res_data["code"] != 200:
|
||||
return []
|
||||
else:
|
||||
return res_data["programs"]
|
||||
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
return post["id"]
|
||||
|
||||
def get_date(self, post: RawPost) -> int:
|
||||
return post["createTime"] // 1000
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
text = "网易云电台更新:{}".format(raw_post["name"])
|
||||
target_name = raw_post["radio"]["name"]
|
||||
pics = [raw_post["coverUrl"]]
|
||||
url = "https://music.163.com/#/program/{}".format(raw_post["id"])
|
||||
return Post("ncm-radio", text=text, url=url, pics=pics, target_name=target_name)
|
||||
@@ -0,0 +1,478 @@
|
||||
import json
|
||||
import ssl
|
||||
import time
|
||||
import typing
|
||||
from abc import ABC, abstractmethod
|
||||
from collections import defaultdict
|
||||
from dataclasses import dataclass
|
||||
from typing import Any, Collection, Optional, Type
|
||||
|
||||
import httpx
|
||||
from httpx import AsyncClient
|
||||
from nonebot.log import logger
|
||||
|
||||
from ..plugin_config import plugin_config
|
||||
from ..post import Post
|
||||
from ..types import Category, RawPost, Tag, Target, User, UserSubInfo
|
||||
from ..utils import ProcessContext, SchedulerConfig
|
||||
|
||||
|
||||
class CategoryNotSupport(Exception):
|
||||
"raise in get_category, when you know the category of the post but don't want to support it or don't support its parsing yet"
|
||||
|
||||
|
||||
class CategoryNotRecognize(Exception):
|
||||
"raise in get_category, when you don't know the category of post"
|
||||
|
||||
|
||||
class RegistryMeta(type):
|
||||
def __new__(cls, name, bases, namespace, **kwargs):
|
||||
return super().__new__(cls, name, bases, namespace)
|
||||
|
||||
def __init__(cls, name, bases, namespace, **kwargs):
|
||||
if kwargs.get("base"):
|
||||
# this is the base class
|
||||
cls.registry = []
|
||||
elif not kwargs.get("abstract"):
|
||||
# this is the subclass
|
||||
cls.registry.append(cls)
|
||||
|
||||
super().__init__(name, bases, namespace, **kwargs)
|
||||
|
||||
|
||||
class PlatformMeta(RegistryMeta):
|
||||
|
||||
categories: dict[Category, str]
|
||||
store: dict[Target, Any]
|
||||
|
||||
def __init__(cls, name, bases, namespace, **kwargs):
|
||||
cls.reverse_category = {}
|
||||
cls.store = {}
|
||||
if hasattr(cls, "categories") and cls.categories:
|
||||
for key, val in cls.categories.items():
|
||||
cls.reverse_category[val] = key
|
||||
super().__init__(name, bases, namespace, **kwargs)
|
||||
|
||||
|
||||
class PlatformABCMeta(PlatformMeta, ABC):
|
||||
...
|
||||
|
||||
|
||||
class Platform(metaclass=PlatformABCMeta, base=True):
|
||||
|
||||
scheduler: Type[SchedulerConfig]
|
||||
ctx: ProcessContext
|
||||
is_common: bool
|
||||
enabled: bool
|
||||
name: str
|
||||
has_target: bool
|
||||
categories: dict[Category, str]
|
||||
enable_tag: bool
|
||||
platform_name: str
|
||||
parse_target_promot: Optional[str] = None
|
||||
registry: list[Type["Platform"]]
|
||||
client: AsyncClient
|
||||
reverse_category: dict[str, Category]
|
||||
|
||||
@classmethod
|
||||
@abstractmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def fetch_new_post(
|
||||
self, target: Target, users: list[UserSubInfo]
|
||||
) -> list[tuple[User, list[Post]]]:
|
||||
...
|
||||
|
||||
async def do_fetch_new_post(
|
||||
self, target: Target, users: list[UserSubInfo]
|
||||
) -> list[tuple[User, list[Post]]]:
|
||||
try:
|
||||
return await self.fetch_new_post(target, users)
|
||||
except httpx.RequestError as err:
|
||||
logger.warning(
|
||||
"network connection error: {}, url: {}".format(
|
||||
type(err), err.request.url
|
||||
)
|
||||
)
|
||||
return []
|
||||
except ssl.SSLError as err:
|
||||
logger.warning(f"ssl error: {err}")
|
||||
return []
|
||||
except json.JSONDecodeError as err:
|
||||
logger.warning(f"json error, parsing: {err.doc}")
|
||||
raise err
|
||||
|
||||
@abstractmethod
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
...
|
||||
|
||||
async def do_parse(self, raw_post: RawPost) -> Post:
|
||||
"actually function called"
|
||||
return await self.parse(raw_post)
|
||||
|
||||
def __init__(self, context: ProcessContext, client: AsyncClient):
|
||||
super().__init__()
|
||||
self.client = client
|
||||
self.ctx = context
|
||||
|
||||
class ParseTargetException(Exception):
|
||||
pass
|
||||
|
||||
@classmethod
|
||||
async def parse_target(cls, target_string: str) -> Target:
|
||||
return Target(target_string)
|
||||
|
||||
@abstractmethod
|
||||
def get_tags(self, raw_post: RawPost) -> Optional[Collection[Tag]]:
|
||||
"Return Tag list of given RawPost"
|
||||
|
||||
@classmethod
|
||||
def get_stored_data(cls, target: Target) -> Any:
|
||||
return cls.store.get(target)
|
||||
|
||||
@classmethod
|
||||
def set_stored_data(cls, target: Target, data: Any):
|
||||
cls.store[target] = data
|
||||
|
||||
def tag_separator(self, stored_tags: list[Tag]) -> tuple[list[Tag], list[Tag]]:
|
||||
"""返回分离好的正反tag元组"""
|
||||
subscribed_tags = []
|
||||
banned_tags = []
|
||||
for tag in stored_tags:
|
||||
if tag.startswith("~"):
|
||||
banned_tags.append(tag.lstrip("~"))
|
||||
else:
|
||||
subscribed_tags.append(tag)
|
||||
return subscribed_tags, banned_tags
|
||||
|
||||
def is_banned_post(
|
||||
self,
|
||||
post_tags: Collection[Tag],
|
||||
subscribed_tags: list[Tag],
|
||||
banned_tags: list[Tag],
|
||||
) -> bool:
|
||||
"""只要存在任意屏蔽tag则返回真,此行为优先级最高。
|
||||
存在任意被订阅tag则返回假,此行为优先级次之。
|
||||
若被订阅tag为空,则返回假。
|
||||
"""
|
||||
# 存在任意需要屏蔽的tag则为真
|
||||
if banned_tags:
|
||||
for tag in post_tags or []:
|
||||
if tag in banned_tags:
|
||||
return True
|
||||
# 检测屏蔽tag后,再检测订阅tag
|
||||
# 存在任意需要订阅的tag则为假
|
||||
if subscribed_tags:
|
||||
ban_it = True
|
||||
for tag in post_tags or []:
|
||||
if tag in subscribed_tags:
|
||||
ban_it = False
|
||||
return ban_it
|
||||
else:
|
||||
return False
|
||||
|
||||
async def filter_user_custom(
|
||||
self, raw_post_list: list[RawPost], cats: list[Category], tags: list[Tag]
|
||||
) -> list[RawPost]:
|
||||
res: list[RawPost] = []
|
||||
for raw_post in raw_post_list:
|
||||
if self.categories:
|
||||
cat = self.get_category(raw_post)
|
||||
if cats and cat not in cats:
|
||||
continue
|
||||
if self.enable_tag and tags:
|
||||
raw_post_tags = self.get_tags(raw_post)
|
||||
if isinstance(raw_post_tags, Collection) and self.is_banned_post(
|
||||
raw_post_tags, *self.tag_separator(tags)
|
||||
):
|
||||
continue
|
||||
res.append(raw_post)
|
||||
return res
|
||||
|
||||
async def dispatch_user_post(
|
||||
self, target: Target, new_posts: list[RawPost], users: list[UserSubInfo]
|
||||
) -> list[tuple[User, list[Post]]]:
|
||||
res: list[tuple[User, list[Post]]] = []
|
||||
for user, cats, required_tags in users:
|
||||
user_raw_post = await self.filter_user_custom(
|
||||
new_posts, cats, required_tags
|
||||
)
|
||||
user_post: list[Post] = []
|
||||
for raw_post in user_raw_post:
|
||||
user_post.append(await self.do_parse(raw_post))
|
||||
res.append((user, user_post))
|
||||
return res
|
||||
|
||||
@abstractmethod
|
||||
def get_category(self, post: RawPost) -> Optional[Category]:
|
||||
"Return category of given Rawpost"
|
||||
raise NotImplementedError()
|
||||
|
||||
|
||||
class MessageProcess(Platform, abstract=True):
|
||||
"General message process fetch, parse, filter progress"
|
||||
|
||||
def __init__(self, ctx: ProcessContext, client: AsyncClient):
|
||||
super().__init__(ctx, client)
|
||||
self.parse_cache: dict[Any, Post] = dict()
|
||||
|
||||
@abstractmethod
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
"Get post id of given RawPost"
|
||||
|
||||
async def do_parse(self, raw_post: RawPost) -> Post:
|
||||
post_id = self.get_id(raw_post)
|
||||
if post_id not in self.parse_cache:
|
||||
retry_times = 3
|
||||
while retry_times:
|
||||
try:
|
||||
self.parse_cache[post_id] = await self.parse(raw_post)
|
||||
break
|
||||
except Exception as err:
|
||||
retry_times -= 1
|
||||
if not retry_times:
|
||||
raise err
|
||||
return self.parse_cache[post_id]
|
||||
|
||||
@abstractmethod
|
||||
async def get_sub_list(self, target: Target) -> list[RawPost]:
|
||||
"Get post list of the given target"
|
||||
|
||||
@abstractmethod
|
||||
def get_date(self, post: RawPost) -> Optional[int]:
|
||||
"Get post timestamp and return, return None if can't get the time"
|
||||
|
||||
async def filter_common(self, raw_post_list: list[RawPost]) -> list[RawPost]:
|
||||
res = []
|
||||
for raw_post in raw_post_list:
|
||||
# post_id = self.get_id(raw_post)
|
||||
# if post_id in exists_posts_set:
|
||||
# continue
|
||||
if (
|
||||
(post_time := self.get_date(raw_post))
|
||||
and time.time() - post_time > 2 * 60 * 60
|
||||
and plugin_config.bison_init_filter
|
||||
):
|
||||
continue
|
||||
try:
|
||||
self.get_category(raw_post)
|
||||
except CategoryNotSupport as e:
|
||||
logger.info("未支持解析的推文类别:" + repr(e) + ",忽略")
|
||||
continue
|
||||
except CategoryNotRecognize as e:
|
||||
logger.warning("未知推文类别:" + repr(e))
|
||||
msgs = self.ctx.gen_req_records()
|
||||
for m in msgs:
|
||||
logger.warning(m)
|
||||
continue
|
||||
except NotImplementedError:
|
||||
pass
|
||||
res.append(raw_post)
|
||||
return res
|
||||
|
||||
|
||||
class NewMessage(MessageProcess, abstract=True):
|
||||
"Fetch a list of messages, filter the new messages, dispatch it to different users"
|
||||
|
||||
@dataclass
|
||||
class MessageStorage:
|
||||
inited: bool
|
||||
exists_posts: set[Any]
|
||||
|
||||
async def filter_common_with_diff(
|
||||
self, target: Target, raw_post_list: list[RawPost]
|
||||
) -> list[RawPost]:
|
||||
filtered_post = await self.filter_common(raw_post_list)
|
||||
store = self.get_stored_data(target) or self.MessageStorage(False, set())
|
||||
res = []
|
||||
if not store.inited and plugin_config.bison_init_filter:
|
||||
# target not init
|
||||
for raw_post in filtered_post:
|
||||
post_id = self.get_id(raw_post)
|
||||
store.exists_posts.add(post_id)
|
||||
logger.info(
|
||||
"init {}-{} with {}".format(
|
||||
self.platform_name, target, store.exists_posts
|
||||
)
|
||||
)
|
||||
store.inited = True
|
||||
else:
|
||||
for raw_post in filtered_post:
|
||||
post_id = self.get_id(raw_post)
|
||||
if post_id in store.exists_posts:
|
||||
continue
|
||||
res.append(raw_post)
|
||||
store.exists_posts.add(post_id)
|
||||
self.set_stored_data(target, store)
|
||||
return res
|
||||
|
||||
async def fetch_new_post(
|
||||
self, target: Target, users: list[UserSubInfo]
|
||||
) -> list[tuple[User, list[Post]]]:
|
||||
post_list = await self.get_sub_list(target)
|
||||
new_posts = await self.filter_common_with_diff(target, post_list)
|
||||
if not new_posts:
|
||||
return []
|
||||
else:
|
||||
for post in new_posts:
|
||||
logger.info(
|
||||
"fetch new post from {} {}: {}".format(
|
||||
self.platform_name,
|
||||
target if self.has_target else "-",
|
||||
self.get_id(post),
|
||||
)
|
||||
)
|
||||
res = await self.dispatch_user_post(target, new_posts, users)
|
||||
self.parse_cache = {}
|
||||
return res
|
||||
|
||||
|
||||
class StatusChange(Platform, abstract=True):
|
||||
"Watch a status, and fire a post when status changes"
|
||||
|
||||
class FetchError(RuntimeError):
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def get_status(self, target: Target) -> Any:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def compare_status(self, target: Target, old_status, new_status) -> list[RawPost]:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
...
|
||||
|
||||
async def fetch_new_post(
|
||||
self, target: Target, users: list[UserSubInfo]
|
||||
) -> list[tuple[User, list[Post]]]:
|
||||
try:
|
||||
new_status = await self.get_status(target)
|
||||
except self.FetchError as err:
|
||||
logger.warning(f"fetching {self.name}-{target} error: {err}")
|
||||
raise
|
||||
res = []
|
||||
if old_status := self.get_stored_data(target):
|
||||
diff = self.compare_status(target, old_status, new_status)
|
||||
if diff:
|
||||
logger.info(
|
||||
"status changes {} {}: {} -> {}".format(
|
||||
self.platform_name,
|
||||
target if self.has_target else "-",
|
||||
old_status,
|
||||
new_status,
|
||||
)
|
||||
)
|
||||
res = await self.dispatch_user_post(target, diff, users)
|
||||
self.set_stored_data(target, new_status)
|
||||
return res
|
||||
|
||||
|
||||
class SimplePost(MessageProcess, abstract=True):
|
||||
"Fetch a list of messages, dispatch it to different users"
|
||||
|
||||
async def fetch_new_post(
|
||||
self, target: Target, users: list[UserSubInfo]
|
||||
) -> list[tuple[User, list[Post]]]:
|
||||
new_posts = await self.get_sub_list(target)
|
||||
if not new_posts:
|
||||
return []
|
||||
else:
|
||||
for post in new_posts:
|
||||
logger.info(
|
||||
"fetch new post from {} {}: {}".format(
|
||||
self.platform_name,
|
||||
target if self.has_target else "-",
|
||||
self.get_id(post),
|
||||
)
|
||||
)
|
||||
res = await self.dispatch_user_post(target, new_posts, users)
|
||||
self.parse_cache = {}
|
||||
return res
|
||||
|
||||
|
||||
def make_no_target_group(platform_list: list[Type[Platform]]) -> Type[Platform]:
|
||||
|
||||
if typing.TYPE_CHECKING:
|
||||
|
||||
class NoTargetGroup(Platform, abstract=True):
|
||||
platform_list: list[Type[Platform]]
|
||||
platform_obj_list: list[Platform]
|
||||
|
||||
DUMMY_STR = "_DUMMY"
|
||||
|
||||
platform_name = platform_list[0].platform_name
|
||||
name = DUMMY_STR
|
||||
categories_keys = set()
|
||||
categories = {}
|
||||
scheduler = platform_list[0].scheduler
|
||||
|
||||
for platform in platform_list:
|
||||
if platform.has_target:
|
||||
raise RuntimeError(
|
||||
"Platform {} should have no target".format(platform.name)
|
||||
)
|
||||
if name == DUMMY_STR:
|
||||
name = platform.name
|
||||
elif name != platform.name:
|
||||
raise RuntimeError("Platform name for {} not fit".format(platform_name))
|
||||
platform_category_key_set = set(platform.categories.keys())
|
||||
if platform_category_key_set & categories_keys:
|
||||
raise RuntimeError(
|
||||
"Platform categories for {} duplicate".format(platform_name)
|
||||
)
|
||||
categories_keys |= platform_category_key_set
|
||||
categories.update(platform.categories)
|
||||
if platform.scheduler != scheduler:
|
||||
raise RuntimeError(
|
||||
"Platform scheduler for {} not fit".format(platform_name)
|
||||
)
|
||||
|
||||
def __init__(self: "NoTargetGroup", ctx: ProcessContext, client: AsyncClient):
|
||||
Platform.__init__(self, ctx, client)
|
||||
self.platform_obj_list = []
|
||||
for platform_class in self.platform_list:
|
||||
self.platform_obj_list.append(platform_class(ctx, client))
|
||||
|
||||
def __str__(self: "NoTargetGroup") -> str:
|
||||
return "[" + " ".join(map(lambda x: x.name, self.platform_list)) + "]"
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(cls, client: AsyncClient, target: Target):
|
||||
return await platform_list[0].get_target_name(client, target)
|
||||
|
||||
async def fetch_new_post(
|
||||
self: "NoTargetGroup", target: Target, users: list[UserSubInfo]
|
||||
):
|
||||
res = defaultdict(list)
|
||||
for platform in self.platform_obj_list:
|
||||
platform_res = await platform.fetch_new_post(target=target, users=users)
|
||||
for user, posts in platform_res:
|
||||
res[user].extend(posts)
|
||||
return [[key, val] for key, val in res.items()]
|
||||
|
||||
return type(
|
||||
"NoTargetGroup",
|
||||
(Platform,),
|
||||
{
|
||||
"platform_list": platform_list,
|
||||
"platform_name": platform_list[0].platform_name,
|
||||
"name": name,
|
||||
"categories": categories,
|
||||
"scheduler": scheduler,
|
||||
"is_common": platform_list[0].is_common,
|
||||
"enabled": True,
|
||||
"has_target": False,
|
||||
"enable_tag": False,
|
||||
"__init__": __init__,
|
||||
"get_target_name": get_target_name,
|
||||
"fetch_new_post": fetch_new_post,
|
||||
},
|
||||
abstract=True,
|
||||
)
|
||||
@@ -0,0 +1,62 @@
|
||||
import calendar
|
||||
from typing import Any, Optional
|
||||
|
||||
import feedparser
|
||||
from bs4 import BeautifulSoup as bs
|
||||
from httpx import AsyncClient
|
||||
|
||||
from ..post import Post
|
||||
from ..types import RawPost, Target
|
||||
from ..utils import scheduler
|
||||
from .platform import NewMessage
|
||||
|
||||
|
||||
class Rss(NewMessage):
|
||||
|
||||
categories = {}
|
||||
enable_tag = False
|
||||
platform_name = "rss"
|
||||
name = "Rss"
|
||||
enabled = True
|
||||
is_common = True
|
||||
scheduler = scheduler("interval", {"seconds": 30})
|
||||
has_target = True
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
res = await client.get(target, timeout=10.0)
|
||||
feed = feedparser.parse(res.text)
|
||||
return feed["feed"]["title"]
|
||||
|
||||
def get_date(self, post: RawPost) -> int:
|
||||
return calendar.timegm(post.published_parsed)
|
||||
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
return post.id
|
||||
|
||||
async def get_sub_list(self, target: Target) -> list[RawPost]:
|
||||
res = await self.client.get(target, timeout=10.0)
|
||||
feed = feedparser.parse(res)
|
||||
entries = feed.entries
|
||||
for entry in entries:
|
||||
entry["_target_name"] = feed.feed.title
|
||||
return feed.entries
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
text = raw_post.get("title", "") + "\n" if raw_post.get("title") else ""
|
||||
soup = bs(raw_post.description, "html.parser")
|
||||
text += soup.text.strip()
|
||||
pics = list(map(lambda x: x.attrs["src"], soup("img")))
|
||||
if raw_post.get("media_content"):
|
||||
for media in raw_post["media_content"]:
|
||||
if media.get("medium") == "image" and media.get("url"):
|
||||
pics.append(media.get("url"))
|
||||
return Post(
|
||||
"rss",
|
||||
text=text,
|
||||
url=raw_post.link,
|
||||
pics=pics,
|
||||
target_name=raw_post["_target_name"],
|
||||
)
|
||||
@@ -0,0 +1,185 @@
|
||||
import json
|
||||
import re
|
||||
from collections.abc import Callable
|
||||
from datetime import datetime
|
||||
from typing import Any, Optional
|
||||
|
||||
from bs4 import BeautifulSoup as bs
|
||||
from httpx import AsyncClient
|
||||
from nonebot.log import logger
|
||||
|
||||
from ..post import Post
|
||||
from ..types import *
|
||||
from ..utils import SchedulerConfig, http_client
|
||||
from .platform import NewMessage
|
||||
|
||||
|
||||
class WeiboSchedConf(SchedulerConfig):
|
||||
name = "weibo.com"
|
||||
schedule_type = "interval"
|
||||
schedule_setting = {"seconds": 3}
|
||||
|
||||
|
||||
class Weibo(NewMessage):
|
||||
|
||||
categories = {
|
||||
1: "转发",
|
||||
2: "视频",
|
||||
3: "图文",
|
||||
4: "文字",
|
||||
}
|
||||
enable_tag = True
|
||||
platform_name = "weibo"
|
||||
name = "新浪微博"
|
||||
enabled = True
|
||||
is_common = True
|
||||
scheduler = WeiboSchedConf
|
||||
has_target = True
|
||||
parse_target_promot = "请输入用户主页(包含数字UID)的链接"
|
||||
|
||||
@classmethod
|
||||
async def get_target_name(
|
||||
cls, client: AsyncClient, target: Target
|
||||
) -> Optional[str]:
|
||||
param = {"containerid": "100505" + target}
|
||||
res = await client.get(
|
||||
"https://m.weibo.cn/api/container/getIndex", params=param
|
||||
)
|
||||
res_dict = json.loads(res.text)
|
||||
if res_dict.get("ok") == 1:
|
||||
return res_dict["data"]["userInfo"]["screen_name"]
|
||||
else:
|
||||
return None
|
||||
|
||||
@classmethod
|
||||
async def parse_target(cls, target_text: str) -> Target:
|
||||
if re.match(r"\d+", target_text):
|
||||
return Target(target_text)
|
||||
elif match := re.match(r"(?:https?://)?weibo\.com/u/(\d+)", target_text):
|
||||
# 都2202年了应该不会有http了吧,不过还是防一手
|
||||
return Target(match.group(1))
|
||||
else:
|
||||
raise cls.ParseTargetException()
|
||||
|
||||
async def get_sub_list(self, target: Target) -> list[RawPost]:
|
||||
params = {"containerid": "107603" + target}
|
||||
res = await self.client.get(
|
||||
"https://m.weibo.cn/api/container/getIndex?", params=params, timeout=4.0
|
||||
)
|
||||
res_data = json.loads(res.text)
|
||||
if not res_data["ok"] and res_data["msg"] != "这里还没有内容":
|
||||
raise ApiError(res.request.url)
|
||||
custom_filter: Callable[[RawPost], bool] = lambda d: d["card_type"] == 9
|
||||
return list(filter(custom_filter, res_data["data"]["cards"]))
|
||||
|
||||
def get_id(self, post: RawPost) -> Any:
|
||||
return post["mblog"]["id"]
|
||||
|
||||
def filter_platform_custom(self, raw_post: RawPost) -> bool:
|
||||
return raw_post["card_type"] == 9
|
||||
|
||||
def get_date(self, raw_post: RawPost) -> float:
|
||||
created_time = datetime.strptime(
|
||||
raw_post["mblog"]["created_at"], "%a %b %d %H:%M:%S %z %Y"
|
||||
)
|
||||
return created_time.timestamp()
|
||||
|
||||
def get_tags(self, raw_post: RawPost) -> Optional[list[Tag]]:
|
||||
"Return Tag list of given RawPost"
|
||||
text = raw_post["mblog"]["text"]
|
||||
soup = bs(text, "html.parser")
|
||||
res = list(
|
||||
map(
|
||||
lambda x: x[1:-1],
|
||||
filter(
|
||||
lambda s: s[0] == "#" and s[-1] == "#",
|
||||
map(lambda x: x.text, soup.find_all("span", class_="surl-text")),
|
||||
),
|
||||
)
|
||||
)
|
||||
super_topic_img = soup.find(
|
||||
"img", src=re.compile(r"timeline_card_small_super_default")
|
||||
)
|
||||
if super_topic_img:
|
||||
try:
|
||||
res.append(
|
||||
super_topic_img.parent.parent.find("span", class_="surl-text").text # type: ignore
|
||||
+ "超话"
|
||||
)
|
||||
except:
|
||||
logger.info("super_topic extract error: {}".format(text))
|
||||
return res
|
||||
|
||||
def get_category(self, raw_post: RawPost) -> Category:
|
||||
if raw_post["mblog"].get("retweeted_status"):
|
||||
return Category(1)
|
||||
elif (
|
||||
raw_post["mblog"].get("page_info")
|
||||
and raw_post["mblog"]["page_info"].get("type") == "video"
|
||||
):
|
||||
return Category(2)
|
||||
elif raw_post["mblog"].get("pics"):
|
||||
return Category(3)
|
||||
else:
|
||||
return Category(4)
|
||||
|
||||
def _get_text(self, raw_text: str) -> str:
|
||||
text = raw_text.replace("<br />", "\n")
|
||||
return bs(text, "html.parser").text
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
header = {
|
||||
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
|
||||
"accept-language": "zh-CN,zh;q=0.9",
|
||||
"authority": "m.weibo.cn",
|
||||
"cache-control": "max-age=0",
|
||||
"sec-fetch-dest": "empty",
|
||||
"sec-fetch-mode": "same-origin",
|
||||
"sec-fetch-site": "same-origin",
|
||||
"upgrade-insecure-requests": "1",
|
||||
"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) "
|
||||
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 "
|
||||
"Mobile Safari/537.36",
|
||||
}
|
||||
info = raw_post["mblog"]
|
||||
retweeted = False
|
||||
if info.get("retweeted_status"):
|
||||
retweeted = True
|
||||
pic_num = info["retweeted_status"]["pic_num"] if retweeted else info["pic_num"]
|
||||
if info["isLongText"] or pic_num > 9:
|
||||
res = await self.client.get(
|
||||
"https://m.weibo.cn/detail/{}".format(info["mid"]), headers=header
|
||||
)
|
||||
try:
|
||||
match = re.search(r'"status": ([\s\S]+),\s+"call"', res.text)
|
||||
assert match
|
||||
full_json_text = match.group(1)
|
||||
info = json.loads(full_json_text)
|
||||
except:
|
||||
logger.info(
|
||||
"detail message error: https://m.weibo.cn/detail/{}".format(
|
||||
info["mid"]
|
||||
)
|
||||
)
|
||||
parsed_text = self._get_text(info["text"])
|
||||
raw_pics_list = (
|
||||
info["retweeted_status"].get("pics", [])
|
||||
if retweeted
|
||||
else info.get("pics", [])
|
||||
)
|
||||
pic_urls = [img["large"]["url"] for img in raw_pics_list]
|
||||
pics = []
|
||||
for pic_url in pic_urls:
|
||||
async with http_client(headers={"referer": "https://weibo.com"}) as client:
|
||||
res = await client.get(pic_url)
|
||||
res.raise_for_status()
|
||||
pics.append(res.content)
|
||||
detail_url = "https://weibo.com/{}/{}".format(info["user"]["id"], info["bid"])
|
||||
# return parsed_text, detail_url, pic_urls
|
||||
return Post(
|
||||
"weibo",
|
||||
text=parsed_text,
|
||||
url=detail_url,
|
||||
pics=pics,
|
||||
target_name=info["user"]["screen_name"],
|
||||
)
|
||||
Reference in New Issue
Block a user