🐛 B站蹲饼修复 (#525)

*  使用新接口

* ♻️ 调整刷新逻辑

* 🐛 调整刷新逻辑

* ♻️ 将单个哔哩哔哩文件拆开

* 🐛 修修补补边界情况

*  添加UID:xxx匹配

*  调整测试中的导入

*  调整测试的断言

* 🐛 添加unicode字符的escape

*  不再主动刷新cookies

* 🔀 适配新版Site

* 🐛 解析live_rcmd中的json string

* 🚨 make ruff happy

* 🐛 调整并测试bilibili retry函数

*  修正测试

* ♻️ 按review意见调整

* ♻️ 清理一些遗留的复杂写法

* ♻️ 移出函数内的NameTuple

* 🔇 删除不必要的日志输出

Co-authored-by: felinae98 <731499577@qq.com>

* Update nonebot_bison/platform/bilibili/scheduler.py

* Update scheduler.py

---------

Co-authored-by: felinae98 <731499577@qq.com>
This commit is contained in:
Azide
2024-06-17 13:24:04 +08:00
committed by GitHub
parent 9729d4b776
commit af72b6c3d0
15 changed files with 6387 additions and 7213 deletions
@@ -0,0 +1,7 @@
from .platforms import Bilibili as Bilibili
from .platforms import Bilibililive as Bilibililive
from .scheduler import BilibiliSite as BilibiliSite
from .scheduler import BililiveSite as BililiveSite
from .platforms import BilibiliBangumi as BilibiliBangumi
from .scheduler import BiliBangumiSite as BiliBangumiSite
from .scheduler import BilibiliClientManager as BilibiliClientManager
+394
View File
@@ -0,0 +1,394 @@
from typing import Any, Literal, TypeVar, TypeAlias
from pydantic import BaseModel
from nonebot.compat import PYDANTIC_V2, ConfigDict
from nonebot_bison.compat import model_rebuild
TBaseModel = TypeVar("TBaseModel", bound=type[BaseModel])
# 不能当成装饰器用
# 当装饰器用时,global namespace 中还没有被装饰的类,会报错
def model_rebuild_recurse(cls: TBaseModel) -> TBaseModel:
"""Recursively rebuild all BaseModel subclasses in the class."""
if not PYDANTIC_V2:
from inspect import isclass, getmembers
for _, sub_cls in getmembers(cls, lambda x: isclass(x) and issubclass(x, BaseModel)):
model_rebuild_recurse(sub_cls)
model_rebuild(cls)
return cls
class Base(BaseModel):
if PYDANTIC_V2:
model_config = ConfigDict(from_attributes=True)
else:
class Config:
orm_mode = True
class APIBase(Base):
"""Bilibili API返回的基础数据"""
code: int
message: str
class UserAPI(APIBase):
class Card(Base):
name: str
class Data(Base):
card: "UserAPI.Card"
data: Data | None = None
DynamicType = Literal[
"DYNAMIC_TYPE_ARTICLE",
"DYNAMIC_TYPE_AV",
"DYNAMIC_TYPE_WORD",
"DYNAMIC_TYPE_DRAW",
"DYNAMIC_TYPE_FORWARD",
"DYNAMIC_TYPE_LIVE",
"DYNAMIC_TYPE_LIVE_RCMD",
"DYNAMIC_TYPE_PGC",
"DYNAMIC_TYPE_PGC_UNION",
"DYNAMIC_TYPE_NONE", # 已删除的动态,一般只会出现在转发动态的源动态被删除
"DYNAMIC_TYPE_COMMON_SQUARE",
"DYNAMIC_TYPE_COMMON_VERTICAL",
"DYNAMIC_TYPE_COURSES_SEASON",
]
# 参考 https://github.com/Yun-Shan/bilibili-dynamic
class PostAPI(APIBase):
class Basic(Base):
rid_str: str
"""可能含义是referrer id,表示引用的对象的ID?
已知专栏动态时该ID与专栏ID一致,视频动态时与av号一致
"""
class Modules(Base):
class Author(Base):
face: str
mid: int
name: str
jump_url: str
pub_ts: int
type: Literal["AUTHOR_TYPE_NORMAL", "AUTHOR_TYPE_PGC"]
"""作者类型,一般情况下都是NORMAL,番剧推送是PGC"""
class Additional(Base):
type: str
"""用户发视频时同步发布的动态带图片: ADDITIONAL_TYPE_UGC
显示相关游戏: ADDITIONAL_TYPE_COMMON
显示预约: ADDITIONAL_TYPE_RESERVE
显示投票: ADDITIONAL_TYPE_VOTE
显示包月充电专属抽奖: ADDITIONAL_TYPE_UPOWER_LOTTERY
显示赛事时(暂时只看到回放的,理论上直播时应该也是这个): ADDITIONAL_TYPE_MATCH
"""
class Desc(Base):
rich_text_nodes: list[dict[str, Any]]
"""描述的富文本节点,组成动态的各种内容
一个可能通用的结构:
```json
[
{
"jump_url": "//search.bilibili.com/all?keyword=鸣潮公测定档",
"orig_text": "#鸣潮公测定档#",
"text": "#鸣潮公测定档#",
"type": "RICH_TEXT_NODE_TYPE_TOPIC"
},
//...
]
```
"""
text: str
"""描述的纯文本内容"""
class Dynamic(Base):
additional: "PostAPI.Modules.Additional | None" = None
desc: "PostAPI.Modules.Desc | None" = None
"""动态描述,可能为空"""
major: "Major | None" = None
"""主要内容,可能为空"""
module_author: "PostAPI.Modules.Author"
module_dynamic: "PostAPI.Modules.Dynamic"
class Topic(Base):
id: int
name: str
jump_url: str
class Item(Base):
basic: "PostAPI.Basic"
id_str: str
modules: "PostAPI.Modules"
orig: "PostAPI.Item | None" = None
topic: "PostAPI.Topic | None" = None
type: DynamicType
class DeletedItem(Base):
basic: "PostAPI.Basic"
id_str: None
modules: "PostAPI.Modules"
type: Literal["DYNAMIC_TYPE_NONE"]
class Data(Base):
items: "list[PostAPI.Item | PostAPI.DeletedItem] | None" = None
data: "PostAPI.Data | None" = None
class VideoMajor(Base):
class Archive(Base):
aid: str
bvid: str
title: str
desc: str
"""视频简介,太长的话会被截断"""
cover: str
jump_url: str
type: Literal["MAJOR_TYPE_ARCHIVE"]
archive: "VideoMajor.Archive"
class LiveRecommendMajor(Base):
class LiveRecommand(Base):
content: str
"""直播卡片的内容,值为JSON文本,可以解析为LiveRecommendMajor.Content"""
class Content(Base):
type: int
"""直播类型"""
live_play_info: "LiveRecommendMajor.LivePlayInfo"
class LivePlayInfo(Base):
uid: int
"""用户UID,不是直播间号"""
room_type: int
"""房间类型"""
room_paid_type: int
"""付费类型"""
play_type: int
"""播放类型?"""
live_status: int
"""直播状态"""
live_screen_type: int
"""直播画面类型?"""
room_id: int
"""直播间号"""
cover: str
"""直播封面"""
title: str
online: int
"""开播时长?"""
parent_area_id: int
"""主分区ID"""
parent_area_name: str
"""主分区名称"""
area_id: int
"""分区ID"""
area_name: str
"""分区名称"""
live_start_time: int
"""开播时间戳"""
link: str
"""跳转链接,相对协议(即//开头而不是https://开头)"""
live_id: str
"""直播ID,不知道有什么用"""
watched_show: "LiveRecommendMajor.WatchedShow"
class WatchedShow(Base):
num: int
"""观看人数"""
text_small: str
"""观看人数的文本描述: 例如 1.2万"""
text_large: str
"""观看人数的文本描述: 例如 1.2万人看过"""
switch: bool
"""未知"""
icon: str
"""观看文本前的图标"""
icon_web: str
"""观看文本前的图标(网页版)"""
icon_location: str
"""图标位置?"""
type: Literal["MAJOR_TYPE_LIVE_RCMD"]
live_rcmd: "LiveRecommendMajor.LiveRecommand"
class LiveMajor(Base):
class Live(Base):
id: int
"""直播间号"""
title: str
live_state: int
"""直播状态,1为直播中,0为未开播"""
cover: str
desc_first: str
"""直播信息的第一部分,用来显示分区"""
desc_second: str
jump_url: str
"""跳转链接,目前用的是相对协议(即//开头而不是https://开头)"""
type: Literal["MAJOR_TYPE_LIVE"]
live: "LiveMajor.Live"
class ArticleMajor(Base):
class Article(Base):
id: int
"""专栏CID"""
title: str
desc: str
"""专栏简介"""
covers: list[str]
"""专栏封面,一般是一张图片"""
jump_url: str
type: Literal["MAJOR_TYPE_ARTICLE"]
article: "ArticleMajor.Article"
class DrawMajor(Base):
class Item(Base):
width: int
height: int
size: float
"""文件大小,KiB1024"""
src: str
"""图片链接"""
class Draw(Base):
id: int
items: "list[DrawMajor.Item]"
type: Literal["MAJOR_TYPE_DRAW"]
draw: "DrawMajor.Draw"
class PGCMajor(Base):
"""番剧推送"""
class PGC(Base):
title: str
cover: str
jump_url: str
"""通常https://www.bilibili.com/bangumi/play/ep{epid}"""
epid: int
season_id: int
type: Literal["MAJOR_TYPE_PGC"]
pgc: "PGCMajor.PGC"
class OPUSMajor(Base):
"""通用图文内容"""
class Summary(Base):
rich_text_nodes: list[dict[str, Any]]
"""描述的富文本节点,组成动态的各种内容"""
text: str
class Pic(Base):
width: int
height: int
size: int
"""文件大小,KiB1024"""
url: str
"""图片链接"""
class Opus(Base):
jump_url: str
title: str
summary: "OPUSMajor.Summary"
pics: "list[OPUSMajor.Pic]"
type: Literal["MAJOR_TYPE_OPUS"]
opus: "OPUSMajor.Opus"
class CommonMajor(Base):
"""还是通用图文内容
主要跟特殊官方功能有关系,例如专属活动页、会员购、漫画、赛事中心、游戏中心、小黑屋、工房集市、装扮等
"""
class Common(Base):
cover: str
"""卡片左侧图片的URL"""
title: str
desc: str
"""内容"""
jump_url: str
type: Literal["MAJOR_TYPE_COMMON"]
common: "CommonMajor.Common"
class CoursesMajor(Base):
"""课程推送"""
class Courses(Base):
title: str
sub_title: str
"""副标题,一般是课程的简介"""
desc: str
"""课时信息"""
cover: str
jump_url: str
id: int
"""课程ID"""
type: Literal["MAJOR_TYPE_COURSES"]
courses: "CoursesMajor.Courses"
class DeletedMajor(Base):
class None_(Base):
tips: str
type: Literal["MAJOR_TYPE_NONE"]
none: "DeletedMajor.None_"
class UnknownMajor(Base):
type: str
Major = (
VideoMajor
| LiveRecommendMajor
| LiveMajor
| ArticleMajor
| DrawMajor
| PGCMajor
| OPUSMajor
| CommonMajor
| CoursesMajor
| DeletedMajor
| UnknownMajor
)
DynRawPost: TypeAlias = PostAPI.Item
model_rebuild_recurse(VideoMajor)
model_rebuild_recurse(LiveRecommendMajor)
model_rebuild_recurse(LiveMajor)
model_rebuild_recurse(ArticleMajor)
model_rebuild_recurse(DrawMajor)
model_rebuild_recurse(PGCMajor)
model_rebuild_recurse(OPUSMajor)
model_rebuild_recurse(CommonMajor)
model_rebuild_recurse(CoursesMajor)
model_rebuild_recurse(UserAPI)
model_rebuild_recurse(PostAPI)
@@ -1,148 +1,86 @@
import re
import json
from copy import deepcopy
from functools import wraps
from enum import Enum, unique
from typing_extensions import Self
from datetime import datetime, timedelta
from typing import Any, TypeVar, TypeAlias, NamedTuple
from typing import TypeVar, NamedTuple
from collections.abc import Callable, Awaitable
from yarl import URL
from nonebot import logger
from httpx import AsyncClient
from nonebot.log import logger
from pydantic import Field, BaseModel
from nonebot.compat import PYDANTIC_V2, ConfigDict, type_validate_json, type_validate_python
from httpx import URL as HttpxURL
from pydantic import Field, BaseModel, ValidationError
from nonebot.compat import type_validate_json, type_validate_python
from ..post import Post
from ..compat import model_rebuild
from ..types import Tag, Target, RawPost, ApiError, Category
from ..utils import Site, ClientManager, http_client, text_similarity
from .platform import NewMessage, StatusChange, CategoryNotSupport, CategoryNotRecognize
from nonebot_bison.post.post import Post
from nonebot_bison.compat import model_rebuild
from nonebot_bison.utils import text_similarity, decode_unicode_escapes
from nonebot_bison.types import Tag, Target, RawPost, ApiError, Category
TBaseModel = TypeVar("TBaseModel", bound=type[BaseModel])
from .scheduler import BilibiliSite, BililiveSite, BiliBangumiSite
from ..platform import NewMessage, StatusChange, CategoryNotSupport, CategoryNotRecognize
from .models import (
PostAPI,
UserAPI,
PGCMajor,
DrawMajor,
LiveMajor,
OPUSMajor,
DynRawPost,
VideoMajor,
CommonMajor,
DynamicType,
ArticleMajor,
CoursesMajor,
UnknownMajor,
LiveRecommendMajor,
)
B = TypeVar("B", bound="Bilibili")
MAX_352_RETRY_COUNT = 3
# 不能当成装饰器用
# 当装饰器用时,global namespace 中还没有被装饰的类,会报错
def model_rebuild_recurse(cls: TBaseModel) -> TBaseModel:
"""Recursively rebuild all BaseModel subclasses in the class."""
if not PYDANTIC_V2:
from inspect import isclass, getmembers
for _, sub_cls in getmembers(cls, lambda x: isclass(x) and issubclass(x, BaseModel)):
model_rebuild_recurse(sub_cls)
model_rebuild(cls)
return cls
class ApiCode352Error(Exception):
def __init__(self, url: HttpxURL) -> None:
msg = f"api {url} error"
super().__init__(msg)
class Base(BaseModel):
if PYDANTIC_V2:
model_config = ConfigDict(from_attributes=True)
else:
def retry_for_352(func: Callable[[B, Target], Awaitable[list[DynRawPost]]]):
retried_times = 0
class Config:
orm_mode = True
class APIBase(Base):
"""Bilibili API返回的基础数据"""
code: int
message: str
class UserAPI(APIBase):
class Card(Base):
name: str
class Data(Base):
card: "UserAPI.Card"
data: Data | None = None
class PostAPI(APIBase):
class Info(Base):
uname: str
class UserProfile(Base):
info: "PostAPI.Info"
class Origin(Base):
uid: int
dynamic_id: int
dynamic_id_str: str
timestamp: int
type: int
rid: int
bvid: str | None = None
class Desc(Base):
dynamic_id: int
dynamic_id_str: str
timestamp: int
type: int
user_profile: "PostAPI.UserProfile"
rid: int
bvid: str | None = None
origin: "PostAPI.Origin | None" = None
class Card(Base):
desc: "PostAPI.Desc"
card: str
class Data(Base):
cards: "list[PostAPI.Card] | None"
data: Data | None = None
DynRawPost: TypeAlias = PostAPI.Card
model_rebuild_recurse(UserAPI)
model_rebuild_recurse(PostAPI)
class BilibiliClient(ClientManager):
_client: AsyncClient
_refresh_time: datetime
cookie_expire_time = timedelta(hours=5)
def __init__(self) -> None:
self._client = http_client()
self._refresh_time = datetime(year=2000, month=1, day=1) # an expired time
async def _init_session(self):
res = await self._client.get("https://www.bilibili.com/")
if res.status_code != 200:
logger.warning("unable to refresh temp cookie")
@wraps(func)
async def wrapper(bls: B, *args, **kwargs):
nonlocal retried_times
try:
res = await func(bls, *args, **kwargs)
except ApiCode352Error as e:
if retried_times < MAX_352_RETRY_COUNT:
retried_times += 1
logger.warning(f"获取动态列表失败,尝试刷新cookie: {retried_times}/{MAX_352_RETRY_COUNT}")
await bls.ctx.refresh_client()
return [] # 返回空列表
else:
raise ApiError(e.args[0])
else:
self._refresh_time = datetime.now()
retried_times = 0
return res
async def _refresh_client(self):
if datetime.now() - self._refresh_time > self.cookie_expire_time:
await self._init_session()
async def get_client(self, target: Target | None) -> AsyncClient:
await self._refresh_client()
return self._client
async def get_query_name_client(self) -> AsyncClient:
await self._refresh_client()
return self._client
return wrapper
class BilibiliSite(Site):
name = "bilibili.com"
schedule_type = "interval"
schedule_setting = {"seconds": 10}
client_mgr = BilibiliClient
class _ProcessedText(NamedTuple):
title: str
content: str
class BililiveSchedConf(Site):
name = "live.bilibili.com"
schedule_type = "interval"
schedule_setting = {"seconds": 3}
client_mgr = BilibiliClient
class _ParsedMojarPost(NamedTuple):
title: str
content: str
pics: list[str]
url: str | None = None
class Bilibili(NewMessage):
@@ -152,6 +90,7 @@ class Bilibili(NewMessage):
3: "视频",
4: "纯文字",
5: "转发",
6: "直播推送",
# 5: "短视频"
}
platform_name = "bilibili"
@@ -176,7 +115,7 @@ class Bilibili(NewMessage):
async def parse_target(cls, target_text: str) -> Target:
if re.match(r"\d+", target_text):
return Target(target_text)
elif re.match(r"UID:\d+", target_text):
elif re.match(r"UID:(\d+)", target_text):
return Target(target_text[4:])
elif m := re.match(r"(?:https?://)?space\.bilibili\.com/(\d+)", target_text):
return Target(m.group(1))
@@ -185,142 +124,202 @@ class Bilibili(NewMessage):
prompt="正确格式:\n1. 用户纯数字id\n2. UID:<用户id>\n3. 用户主页链接: https://space.bilibili.com/xxxx"
)
@retry_for_352
async def get_sub_list(self, target: Target) -> list[DynRawPost]:
client = await self.ctx.get_client()
params = {"host_uid": target, "offset": 0, "need_top": 0}
client = await self.ctx.get_client(target)
params = {"host_mid": target, "timezone_offset": -480, "offset": ""}
res = await client.get(
"https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history",
"https://api.bilibili.com/x/polymer/web-dynamic/v1/feed/space",
params=params,
timeout=4.0,
)
res.raise_for_status()
res_obj = type_validate_json(PostAPI, res.content)
try:
res_obj = type_validate_json(PostAPI, res.content)
except ValidationError as e:
logger.exception("解析B站动态列表失败")
logger.error(res.json())
raise ApiError(res.request.url) from e
# 0: 成功
# -352: 需要cookie
if res_obj.code == 0:
if (data := res_obj.data) and (card := data.cards):
return card
return []
raise ApiError(res.request.url)
if (data := res_obj.data) and (items := data.items):
logger.trace(f"获取用户{target}的动态列表成功,共{len(items)}条动态")
logger.trace(f"用户{target}的动态列表: {':'.join(x.id_str or x.basic.rid_str for x in items)}")
return [item for item in items if item.type != "DYNAMIC_TYPE_NONE"]
def get_id(self, post: DynRawPost) -> int:
return post.desc.dynamic_id
logger.trace(f"获取用户{target}的动态列表成功,但是没有动态")
return []
elif res_obj.code == -352:
raise ApiCode352Error(res.request.url)
else:
raise ApiError(res.request.url)
def get_id(self, post: DynRawPost) -> str:
return post.id_str
def get_date(self, post: DynRawPost) -> int:
return post.desc.timestamp
return post.modules.module_author.pub_ts
def _do_get_category(self, post_type: int) -> Category:
def _do_get_category(self, post_type: DynamicType) -> Category:
match post_type:
case 2:
case "DYNAMIC_TYPE_DRAW" | "DYNAMIC_TYPE_COMMON_VERTICAL" | "DYNAMIC_TYPE_COMMON_SQUARE":
return Category(1)
case 64:
case "DYNAMIC_TYPE_ARTICLE":
return Category(2)
case 8:
case "DYNAMIC_TYPE_AV":
return Category(3)
case 4:
case "DYNAMIC_TYPE_WORD":
return Category(4)
case 1:
case "DYNAMIC_TYPE_FORWARD":
# 转发
return Category(5)
case "DYNAMIC_TYPE_LIVE_RCMD" | "DYNAMIC_TYPE_LIVE":
return Category(6)
case unknown_type:
raise CategoryNotRecognize(unknown_type)
def get_category(self, post: DynRawPost) -> Category:
post_type = post.desc.type
post_type = post.type
return self._do_get_category(post_type)
def get_tags(self, raw_post: DynRawPost) -> list[Tag]:
card_content = json.loads(raw_post.card)
text: str = card_content["item"]["content"]
result: list[str] = re.findall(r"#(.*?)#", text)
return result
tags: list[Tag] = []
if raw_post.topic:
tags.append(raw_post.topic.name)
if desc := raw_post.modules.module_dynamic.desc:
for node in desc.rich_text_nodes:
if (node_type := node.get("type", None)) and node_type == "RICH_TEXT_NODE_TYPE_TOPIC":
tags.append(node["text"].strip("#"))
return tags
def _text_process(self, dynamic: str, desc: str, title: str) -> str:
similarity = 1.0 if len(dynamic) == 0 or len(desc) == 0 else text_similarity(dynamic, desc)
if len(dynamic) == 0 and len(desc) == 0:
text = title
elif similarity > 0.8:
text = title + "\n\n" + desc if len(dynamic) < len(desc) else dynamic + "\n=================\n" + title
def _text_process(self, dynamic: str, desc: str, title: str) -> _ProcessedText:
# 计算视频标题和视频描述相似度
title_similarity = 0.0 if len(title) == 0 or len(desc) == 0 else text_similarity(title, desc[: len(title)])
if title_similarity > 0.9:
desc = desc[len(title) :].lstrip()
# 计算视频描述和动态描述相似度
content_similarity = 0.0 if len(dynamic) == 0 or len(desc) == 0 else text_similarity(dynamic, desc)
if content_similarity > 0.8:
return _ProcessedText(title, desc if len(dynamic) < len(desc) else dynamic) # 选择较长的描述
else:
text = dynamic + "\n=================\n" + title + "\n\n" + desc
return text
return _ProcessedText(title, f"{desc}" + (f"\n=================\n{dynamic}" if dynamic else ""))
def _raw_post_parse(self, raw_post: DynRawPost, in_repost: bool = False):
class ParsedPost(NamedTuple):
text: str
pics: list[str]
url: str | None
repost_owner: str | None = None
repost: "ParsedPost | None" = None
def pre_parse_by_mojar(self, raw_post: DynRawPost) -> _ParsedMojarPost:
dyn = raw_post.modules.module_dynamic
card_content: dict[str, Any] = json.loads(raw_post.card)
repost_owner: str | None = ou["info"]["uname"] if (ou := card_content.get("origin_user")) else None
def extract_url_id(url_template: str, name: str) -> str | None:
if in_repost:
if origin := raw_post.desc.origin:
return url_template.format(getattr(origin, name))
return None
return url_template.format(getattr(raw_post.desc, name))
match self._do_get_category(raw_post.desc.type):
case 1:
# 一般动态
url = extract_url_id("https://t.bilibili.com/{}", "dynamic_id_str")
text: str = card_content["item"]["description"]
pic: list[str] = [img["img_src"] for img in card_content["item"]["pictures"]]
return ParsedPost(text, pic, url, repost_owner)
case 2:
# 专栏文章
url = extract_url_id("https://www.bilibili.com/read/cv{}", "rid")
text = "{} {}".format(card_content["title"], card_content["summary"])
pic = card_content["image_urls"]
return ParsedPost(text, pic, url, repost_owner)
case 3:
# 视频
url = extract_url_id("https://www.bilibili.com/video/{}", "bvid")
dynamic = card_content.get("dynamic", "")
title = card_content["title"]
desc = card_content.get("desc", "")
text = self._text_process(dynamic, desc, title)
pic = [card_content["pic"]]
return ParsedPost(text, pic, url, repost_owner)
case 4:
# 纯文字
url = extract_url_id("https://t.bilibili.com/{}", "dynamic_id_str")
text = card_content["item"]["content"]
pic = []
return ParsedPost(text, pic, url, repost_owner)
case 5:
# 转发
url = extract_url_id("https://t.bilibili.com/{}", "dynamic_id_str")
text = card_content["item"]["content"]
orig_type: int = card_content["item"]["orig_type"]
orig_card: str = card_content["origin"]
orig_post = DynRawPost(desc=raw_post.desc, card=orig_card)
orig_post.desc.type = orig_type
orig_parsed_post = self._raw_post_parse(orig_post, in_repost=True)
return ParsedPost(text, [], url, repost_owner, orig_parsed_post)
case unsupported_type:
raise CategoryNotSupport(unsupported_type)
match raw_post.modules.module_dynamic.major:
case VideoMajor(archive=archive):
desc_text = dyn.desc.text if dyn.desc else ""
parsed = self._text_process(desc_text, archive.desc, archive.title)
return _ParsedMojarPost(
title=parsed.title,
content=parsed.content,
pics=[archive.cover],
url=URL(archive.jump_url).with_scheme("https").human_repr(),
)
case LiveRecommendMajor(live_rcmd=live_rcmd):
live_play_info = type_validate_json(LiveRecommendMajor.Content, live_rcmd.content).live_play_info
return _ParsedMojarPost(
title=live_play_info.title,
content=f"{live_play_info.parent_area_name} {live_play_info.area_name}",
pics=[live_play_info.cover],
url=URL(live_play_info.link).with_scheme("https").with_query(None).human_repr(),
)
case LiveMajor(live=live):
return _ParsedMojarPost(
title=live.title,
content=f"{live.desc_first}\n{live.desc_second}",
pics=[live.cover],
url=URL(live.jump_url).with_scheme("https").human_repr(),
)
case ArticleMajor(article=article):
return _ParsedMojarPost(
title=article.title,
content=article.desc,
pics=article.covers,
url=URL(article.jump_url).with_scheme("https").human_repr(),
)
case DrawMajor(draw=draw):
return _ParsedMojarPost(
title="",
content=dyn.desc.text if dyn.desc else "",
pics=[item.src for item in draw.items],
url=f"https://t.bilibili.com/{raw_post.id_str}",
)
case PGCMajor(pgc=pgc):
return _ParsedMojarPost(
title=pgc.title,
content="",
pics=[pgc.cover],
url=URL(pgc.jump_url).with_scheme("https").human_repr(),
)
case OPUSMajor(opus=opus):
return _ParsedMojarPost(
title=opus.title,
content=opus.summary.text,
pics=[pic.url for pic in opus.pics],
url=URL(opus.jump_url).with_scheme("https").human_repr(),
)
case CommonMajor(common=common):
return _ParsedMojarPost(
title=common.title,
content=common.desc,
pics=[common.cover],
url=URL(common.jump_url).with_scheme("https").human_repr(),
)
case CoursesMajor(courses=courses):
return _ParsedMojarPost(
title=courses.title,
content=f"{courses.sub_title}\n{courses.desc}",
pics=[courses.cover],
url=URL(courses.jump_url).with_scheme("https").human_repr(),
)
case UnknownMajor(type=unknown_type):
raise CategoryNotSupport(unknown_type)
case None: # 没有major的情况
return _ParsedMojarPost(
title="",
content=dyn.desc.text if dyn.desc else "",
pics=[],
url=f"https://t.bilibili.com/{raw_post.id_str}",
)
case _:
raise CategoryNotSupport(f"{raw_post.id_str=}")
async def parse(self, raw_post: DynRawPost) -> Post:
parsed_raw_post = self._raw_post_parse(raw_post)
parsed_raw_post = self.pre_parse_by_mojar(raw_post)
parsed_raw_repost = None
if self._do_get_category(raw_post.type) == Category(5):
if raw_post.orig:
parsed_raw_repost = self.pre_parse_by_mojar(raw_post.orig)
else:
logger.warning(f"转发动态{raw_post.id_str}没有原动态")
post = Post(
self,
parsed_raw_post.text,
url=parsed_raw_post.url,
content=decode_unicode_escapes(parsed_raw_post.content),
title=parsed_raw_post.title,
images=list(parsed_raw_post.pics),
nickname=raw_post.desc.user_profile.info.uname,
timestamp=self.get_date(raw_post),
url=parsed_raw_post.url,
avatar=raw_post.modules.module_author.face,
nickname=raw_post.modules.module_author.name,
)
if rp := parsed_raw_post.repost:
if parsed_raw_repost:
orig = raw_post.orig
assert orig
post.repost = Post(
self,
rp.text,
url=rp.url,
images=list(rp.pics),
nickname=rp.repost_owner,
content=decode_unicode_escapes(parsed_raw_repost.content),
title=parsed_raw_repost.title,
images=list(parsed_raw_repost.pics),
timestamp=self.get_date(orig),
url=parsed_raw_repost.url,
avatar=orig.modules.module_author.face,
nickname=orig.modules.module_author.name,
)
return post
@@ -331,7 +330,7 @@ class Bilibililive(StatusChange):
enable_tag = False
enabled = True
is_common = True
site = BililiveSchedConf
site = BililiveSite
name = "Bilibili直播"
has_target = True
use_batch = True
@@ -481,7 +480,7 @@ class BilibiliBangumi(StatusChange):
enable_tag = False
enabled = True
is_common = True
site = BilibiliSite
site = BiliBangumiSite
name = "Bilibili剧集"
has_target = True
parse_target_promot = "请输入剧集主页"
@@ -0,0 +1,80 @@
from random import randint
from typing_extensions import override
from httpx import AsyncClient
from nonebot import logger, require
from playwright.async_api import Cookie
from nonebot_bison.types import Target
from nonebot_bison.utils import Site, ClientManager, http_client
require("nonebot_plugin_htmlrender")
from nonebot_plugin_htmlrender import get_browser
class BilibiliClientManager(ClientManager):
_client: AsyncClient
_inited: bool = False
def __init__(self) -> None:
self._client = http_client()
async def _get_cookies(self) -> list[Cookie]:
browser = await get_browser()
async with await browser.new_page() as page:
await page.goto(f"https://space.bilibili.com/{randint(1, 1000)}/dynamic")
await page.wait_for_load_state("load")
cookies = await page.context.cookies()
return cookies
async def _reset_client_cookies(self, cookies: list[Cookie]):
for cookie in cookies:
self._client.cookies.set(
name=cookie.get("name", ""),
value=cookie.get("value", ""),
domain=cookie.get("domain", ""),
path=cookie.get("path", "/"),
)
@override
async def refresh_client(self):
cookies = await self._get_cookies()
await self._reset_client_cookies(cookies)
logger.debug("刷新B站客户端的cookie")
@override
async def get_client(self, target: Target | None) -> AsyncClient:
if not self._inited:
logger.debug("初始化B站客户端")
await self.refresh_client()
self._inited = True
return self._client
@override
async def get_client_for_static(self) -> AsyncClient:
return http_client()
@override
async def get_query_name_client(self) -> AsyncClient:
return http_client()
class BilibiliSite(Site):
name = "bilibili.com"
schedule_setting = {"seconds": 30}
schedule_type = "interval"
client_mgr = BilibiliClientManager
require_browser = True
class BililiveSite(Site):
name = "live.bilibili.com"
schedule_setting = {"seconds": 5}
schedule_type = "interval"
class BiliBangumiSite(Site):
name = "bilibili.com/bangumi"
schedule_setting = {"seconds": 30}
schedule_type = "interval"