mirror of
https://github.com/suyiiyii/nonebot-bison.git
synced 2026-05-09 18:27:56 +08:00
🐛 Bilibili调度新增回避策略 (#573)
* 🐛 将Bilibili的调度速度降低到60s * ✨ 增加回避策略 * ✨ 降低轮询间隔,增加回避次数,抛出阶段随机刷新 * ♻️ 更清晰的调度逻辑实现 * 🐛 兼容3.10的NamedTuple多继承 * ♻️ 合并重复逻辑 * ♻️ ctx放入fsm * 🐛 测试并调整逻辑 * 🐛 补全类型标注 * ♻️ 添加Condition和State.on_exit/on_enter,以实现自动状态切换 * ✅ 调整测试 * 🐛 私有化命名方法 * 🔊 调整补充日志 * 🐛 添加测试后清理 * ✏️ fix typing typo
This commit is contained in:
@@ -0,0 +1,168 @@
|
||||
import sys
|
||||
import asyncio
|
||||
import inspect
|
||||
from enum import Enum
|
||||
from dataclasses import dataclass
|
||||
from collections.abc import Set as AbstractSet
|
||||
from collections.abc import Callable, Sequence, Awaitable, AsyncGenerator
|
||||
from typing import TYPE_CHECKING, Any, Generic, TypeVar, Protocol, TypeAlias, TypedDict, NamedTuple, runtime_checkable
|
||||
|
||||
from nonebot import logger
|
||||
|
||||
|
||||
class StrEnum(str, Enum): ...
|
||||
|
||||
|
||||
TAddon = TypeVar("TAddon", contravariant=True)
|
||||
TState = TypeVar("TState", contravariant=True)
|
||||
TEvent = TypeVar("TEvent", contravariant=True)
|
||||
TFSM = TypeVar("TFSM", bound="FSM", contravariant=True)
|
||||
|
||||
|
||||
class StateError(Exception): ...
|
||||
|
||||
|
||||
ActionReturn: TypeAlias = Any
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class SupportStateOnExit(Generic[TAddon], Protocol):
|
||||
async def on_exit(self, addon: TAddon) -> None: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class SupportStateOnEnter(Generic[TAddon], Protocol):
|
||||
async def on_enter(self, addon: TAddon) -> None: ...
|
||||
|
||||
|
||||
class Action(Generic[TState, TEvent, TAddon], Protocol):
|
||||
async def __call__(self, from_: TState, event: TEvent, to: TState, addon: TAddon) -> ActionReturn: ...
|
||||
|
||||
|
||||
ConditionFunc = Callable[[TAddon], Awaitable[bool]]
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class Condition(Generic[TAddon]):
|
||||
call: ConditionFunc[TAddon]
|
||||
not_: bool = False
|
||||
|
||||
def __repr__(self):
|
||||
if inspect.isfunction(self.call) or inspect.isclass(self.call):
|
||||
call_str = self.call.__name__
|
||||
else:
|
||||
call_str = repr(self.call)
|
||||
return f"Condition(call={call_str})"
|
||||
|
||||
async def __call__(self, addon: TAddon) -> bool:
|
||||
return (await self.call(addon)) ^ self.not_
|
||||
|
||||
|
||||
# FIXME: Python 3.11+ 才支持 NamedTuple和TypedDict使用多继承添加泛型
|
||||
# 所以什么时候 drop 3.10(?
|
||||
if sys.version_info >= (3, 11) or TYPE_CHECKING:
|
||||
|
||||
class Transition(Generic[TState, TEvent, TAddon], NamedTuple):
|
||||
action: Action[TState, TEvent, TAddon]
|
||||
to: TState
|
||||
conditions: AbstractSet[Condition[TAddon]] | None = None
|
||||
|
||||
class StateGraph(Generic[TState, TEvent, TAddon], TypedDict):
|
||||
transitions: dict[
|
||||
TState,
|
||||
dict[
|
||||
TEvent,
|
||||
Transition[TState, TEvent, TAddon] | Sequence[Transition[TState, TEvent, TAddon]],
|
||||
],
|
||||
]
|
||||
initial: TState
|
||||
|
||||
else:
|
||||
|
||||
class Transition(NamedTuple):
|
||||
action: Action
|
||||
to: Any
|
||||
conditions: AbstractSet[Condition] | None = None
|
||||
|
||||
class StateGraph(TypedDict):
|
||||
transitions: dict[Any, dict[Any, Transition]]
|
||||
initial: Any
|
||||
|
||||
|
||||
class FSM(Generic[TState, TEvent, TAddon]):
|
||||
def __init__(self, graph: StateGraph[TState, TEvent, TAddon], addon: TAddon):
|
||||
self.started = False
|
||||
self.graph = graph
|
||||
self.current_state = graph["initial"]
|
||||
self.machine = self._core()
|
||||
self.addon = addon
|
||||
|
||||
async def _core(self) -> AsyncGenerator[ActionReturn, TEvent]:
|
||||
self.current_state = self.graph["initial"]
|
||||
res = None
|
||||
while True:
|
||||
event = yield res
|
||||
|
||||
if not self.started:
|
||||
raise StateError("FSM not started, please call start() first")
|
||||
|
||||
selected_transition = await self.cherry_pick(event)
|
||||
|
||||
logger.trace(f"exit state: {self.current_state}")
|
||||
if isinstance(self.current_state, SupportStateOnExit):
|
||||
logger.trace(f"do {self.current_state}.on_exit")
|
||||
await self.current_state.on_exit(self.addon)
|
||||
|
||||
logger.trace(f"do action: {selected_transition.action}")
|
||||
res = await selected_transition.action(self.current_state, event, selected_transition.to, self.addon)
|
||||
|
||||
logger.trace(f"enter state: {selected_transition.to}")
|
||||
self.current_state = selected_transition.to
|
||||
|
||||
if isinstance(self.current_state, SupportStateOnEnter):
|
||||
logger.trace(f"do {self.current_state}.on_enter")
|
||||
await self.current_state.on_enter(self.addon)
|
||||
|
||||
async def start(self):
|
||||
await anext(self.machine)
|
||||
self.started = True
|
||||
logger.trace(f"FSM started, initial state: {self.current_state}")
|
||||
|
||||
async def cherry_pick(self, event: TEvent) -> Transition[TState, TEvent, TAddon]:
|
||||
transitions = self.graph["transitions"][self.current_state].get(event)
|
||||
if transitions is None:
|
||||
raise StateError(f"Invalid event {event} in state {self.current_state}")
|
||||
|
||||
if isinstance(transitions, Transition):
|
||||
return transitions
|
||||
elif isinstance(transitions, Sequence):
|
||||
no_conds: list[Transition[TState, TEvent, TAddon]] = []
|
||||
for transition in transitions:
|
||||
if not transition.conditions:
|
||||
no_conds.append(transition)
|
||||
continue
|
||||
|
||||
values = await asyncio.gather(*(condition(self.addon) for condition in transition.conditions))
|
||||
|
||||
if all(values):
|
||||
logger.trace(f"conditions {transition.conditions} passed")
|
||||
return transition
|
||||
else:
|
||||
if no_conds:
|
||||
return no_conds.pop()
|
||||
else:
|
||||
raise StateError(f"Invalid event {event} in state {self.current_state}")
|
||||
else:
|
||||
raise TypeError("Invalid transition type: {transitions}, expected Transition or Sequence[Transition]")
|
||||
|
||||
async def emit(self, event: TEvent):
|
||||
return await self.machine.asend(event)
|
||||
|
||||
async def reset(self):
|
||||
await self.machine.aclose()
|
||||
self.started = False
|
||||
|
||||
del self.machine
|
||||
self.machine = self._core()
|
||||
|
||||
logger.trace("FSM closed")
|
||||
@@ -1,16 +1,13 @@
|
||||
import re
|
||||
import json
|
||||
from copy import deepcopy
|
||||
from functools import wraps
|
||||
from enum import Enum, unique
|
||||
from typing import NamedTuple
|
||||
from typing_extensions import Self
|
||||
from typing import TypeVar, NamedTuple
|
||||
from collections.abc import Callable, Awaitable
|
||||
|
||||
from yarl import URL
|
||||
from nonebot import logger
|
||||
from httpx import AsyncClient
|
||||
from httpx import URL as HttpxURL
|
||||
from pydantic import Field, BaseModel, ValidationError
|
||||
from nonebot.compat import type_validate_json, type_validate_python
|
||||
|
||||
@@ -19,6 +16,7 @@ from nonebot_bison.compat import model_rebuild
|
||||
from nonebot_bison.utils import text_similarity, decode_unicode_escapes
|
||||
from nonebot_bison.types import Tag, Target, RawPost, ApiError, Category
|
||||
|
||||
from .retry import ApiCode352Error, retry_for_352
|
||||
from .scheduler import BilibiliSite, BililiveSite, BiliBangumiSite
|
||||
from ..platform import NewMessage, StatusChange, CategoryNotSupport, CategoryNotRecognize
|
||||
from .models import (
|
||||
@@ -38,38 +36,6 @@ from .models import (
|
||||
LiveRecommendMajor,
|
||||
)
|
||||
|
||||
B = TypeVar("B", bound="Bilibili")
|
||||
MAX_352_RETRY_COUNT = 3
|
||||
|
||||
|
||||
class ApiCode352Error(Exception):
|
||||
def __init__(self, url: HttpxURL) -> None:
|
||||
msg = f"api {url} error"
|
||||
super().__init__(msg)
|
||||
|
||||
|
||||
def retry_for_352(func: Callable[[B, Target], Awaitable[list[DynRawPost]]]):
|
||||
retried_times = 0
|
||||
|
||||
@wraps(func)
|
||||
async def wrapper(bls: B, *args, **kwargs):
|
||||
nonlocal retried_times
|
||||
try:
|
||||
res = await func(bls, *args, **kwargs)
|
||||
except ApiCode352Error as e:
|
||||
if retried_times < MAX_352_RETRY_COUNT:
|
||||
retried_times += 1
|
||||
logger.warning(f"获取动态列表失败,尝试刷新cookie: {retried_times}/{MAX_352_RETRY_COUNT}")
|
||||
await bls.ctx.refresh_client()
|
||||
return [] # 返回空列表
|
||||
else:
|
||||
raise ApiError(e.args[0])
|
||||
else:
|
||||
retried_times = 0
|
||||
return res
|
||||
|
||||
return wrapper
|
||||
|
||||
|
||||
class _ProcessedText(NamedTuple):
|
||||
title: str
|
||||
|
||||
@@ -0,0 +1,253 @@
|
||||
import random
|
||||
from enum import Enum
|
||||
from functools import wraps
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime, timedelta
|
||||
from collections.abc import Callable, Awaitable
|
||||
from typing_extensions import override, assert_never
|
||||
from typing import TYPE_CHECKING, Generic, Literal, TypeVar
|
||||
|
||||
from strenum import StrEnum
|
||||
from nonebot.log import logger
|
||||
from httpx import URL as HttpxURL
|
||||
|
||||
from nonebot_bison.types import Target
|
||||
|
||||
from .models import DynRawPost
|
||||
from .fsm import FSM, Condition, StateGraph, Transition, ActionReturn
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .platforms import Bilibili
|
||||
|
||||
# 不用 TypeVar 的话,使用装饰器 Pyright 会报错
|
||||
TBilibili = TypeVar("TBilibili", bound="Bilibili")
|
||||
|
||||
|
||||
class ApiCode352Error(Exception):
|
||||
def __init__(self, url: HttpxURL) -> None:
|
||||
msg = f"api {url} error"
|
||||
super().__init__(msg)
|
||||
|
||||
|
||||
# see https://docs.python.org/zh-cn/3/howto/enum.html#dataclass-support
|
||||
@dataclass(frozen=True)
|
||||
class StateMixin:
|
||||
state: Literal["NORMAL", "REFRESH", "BACKOFF", "RAISE"]
|
||||
enter_func: Callable[["RetryAddon"], Awaitable[None]] | None = None
|
||||
exit_func: Callable[["RetryAddon"], Awaitable[None]] | None = None
|
||||
|
||||
async def on_enter(self, addon: "RetryAddon"):
|
||||
if self.enter_func:
|
||||
await self.enter_func(addon)
|
||||
|
||||
async def on_exit(self, addon: "RetryAddon"):
|
||||
if self.exit_func:
|
||||
await self.exit_func(addon)
|
||||
|
||||
def __str__(self):
|
||||
return f"<retry state {self.state}>"
|
||||
|
||||
|
||||
async def on_normal_enter(addon: "RetryAddon"):
|
||||
addon.reset_all()
|
||||
|
||||
|
||||
async def on_refresh_enter(addon: "RetryAddon"):
|
||||
addon.refresh_count += 1
|
||||
await addon.refresh_client()
|
||||
logger.warning(f"当前刷新次数: {addon.refresh_count}/{addon.max_refresh_count}")
|
||||
|
||||
|
||||
async def on_raise_enter(addon: "RetryAddon"):
|
||||
if random.random() < 0.1236:
|
||||
await addon.refresh_client()
|
||||
logger.warning("触发随机刷新")
|
||||
|
||||
|
||||
class RetryState(StateMixin, Enum):
|
||||
NROMAL = "NORMAL", on_normal_enter
|
||||
REFRESH = "REFRESH", on_refresh_enter
|
||||
BACKOFF = "BACKOFF"
|
||||
RAISE = "RAISE", on_raise_enter
|
||||
|
||||
def __str__(self):
|
||||
return f"<retry state {self.name}>"
|
||||
|
||||
|
||||
class RetryEvent(StrEnum):
|
||||
REQUEST_AND_SUCCESS = "request_and_success"
|
||||
REQUEST_AND_RAISE = "request_and_raise"
|
||||
IN_BACKOFF_TIME = "in_backoff_time"
|
||||
|
||||
def __str__(self):
|
||||
return f"<retry event {self.name}>"
|
||||
|
||||
|
||||
@dataclass
|
||||
class RetryAddon(Generic[TBilibili]):
|
||||
bilibili_platform: TBilibili | None = None
|
||||
refresh_count: int = 0
|
||||
backoff_count: int = 0
|
||||
backoff_finish_time: datetime | None = None
|
||||
|
||||
@property
|
||||
def max_refresh_count(cls):
|
||||
return 3
|
||||
|
||||
@property
|
||||
def max_backoff_count(self):
|
||||
return 3
|
||||
|
||||
@property
|
||||
def backoff_timedelta(self):
|
||||
return timedelta(minutes=5)
|
||||
|
||||
async def refresh_client(self):
|
||||
if self.bilibili_platform:
|
||||
await self.bilibili_platform.ctx.refresh_client()
|
||||
else:
|
||||
raise RuntimeError("未设置 bilibili_platform")
|
||||
|
||||
def reset_all(self):
|
||||
self.refresh_count = 0
|
||||
self.backoff_count = 0
|
||||
self.backoff_finish_time = None
|
||||
|
||||
def record_backoff_finish_time(self):
|
||||
self.backoff_finish_time = (
|
||||
datetime.now()
|
||||
+ self.backoff_timedelta * self.backoff_count**2
|
||||
# + timedelta(seconds=random.randint(1, 60)) # jitter
|
||||
)
|
||||
logger.trace(f"set backoff finish time: {self.backoff_finish_time}")
|
||||
|
||||
def is_in_backoff_time(self):
|
||||
"""是否在指数回避时间内"""
|
||||
# 指数回避
|
||||
if not self.backoff_finish_time:
|
||||
logger.trace("not have backoff_finish_time")
|
||||
return False
|
||||
|
||||
logger.trace(f"now: {datetime.now()}, backoff_finish_time: {self.backoff_finish_time}")
|
||||
return datetime.now() < self.backoff_finish_time
|
||||
|
||||
|
||||
async def action_log(from_: RetryState, event: RetryEvent, to: RetryState, addon: RetryAddon) -> ActionReturn:
|
||||
logger.debug(f"{from_} -> {to}, by {event}")
|
||||
|
||||
|
||||
async def action_up_to_backoff(from_: RetryState, event: RetryEvent, to: RetryState, addon: RetryAddon) -> ActionReturn:
|
||||
addon.refresh_count = 0
|
||||
addon.backoff_count += 1
|
||||
addon.record_backoff_finish_time()
|
||||
logger.warning(
|
||||
f"当前已回避次数: {addon.backoff_count}/{addon.max_backoff_count}, 本次回避时间至 {addon.backoff_finish_time}"
|
||||
)
|
||||
|
||||
|
||||
async def action_back_to_refresh(
|
||||
from_: RetryState, event: RetryEvent, to: RetryState, addon: RetryAddon
|
||||
) -> ActionReturn:
|
||||
addon.backoff_finish_time = None
|
||||
logger.debug("back to refresh state")
|
||||
|
||||
|
||||
async def is_reach_max_refresh(addon: RetryAddon) -> bool:
|
||||
return addon.refresh_count > addon.max_refresh_count - 1
|
||||
|
||||
|
||||
async def is_reach_max_backoff(addon: RetryAddon) -> bool:
|
||||
return addon.backoff_count > addon.max_backoff_count - 1
|
||||
|
||||
|
||||
async def is_out_backoff_time(addon: RetryAddon) -> bool:
|
||||
return not addon.is_in_backoff_time()
|
||||
|
||||
|
||||
RETRY_GRAPH: StateGraph[RetryState, RetryEvent, RetryAddon] = {
|
||||
"transitions": {
|
||||
RetryState.NROMAL: {
|
||||
RetryEvent.REQUEST_AND_SUCCESS: Transition(action_log, RetryState.NROMAL),
|
||||
RetryEvent.REQUEST_AND_RAISE: Transition(action_log, RetryState.REFRESH),
|
||||
},
|
||||
RetryState.REFRESH: {
|
||||
RetryEvent.REQUEST_AND_SUCCESS: Transition(action_log, RetryState.NROMAL),
|
||||
RetryEvent.REQUEST_AND_RAISE: [
|
||||
Transition(action_log, RetryState.REFRESH),
|
||||
Transition(
|
||||
action_up_to_backoff,
|
||||
RetryState.BACKOFF,
|
||||
{
|
||||
Condition(is_reach_max_refresh),
|
||||
Condition(is_reach_max_backoff, not_=True),
|
||||
},
|
||||
),
|
||||
Transition(
|
||||
action_log,
|
||||
RetryState.RAISE,
|
||||
{
|
||||
Condition(is_reach_max_refresh),
|
||||
Condition(is_reach_max_backoff),
|
||||
},
|
||||
),
|
||||
],
|
||||
},
|
||||
RetryState.BACKOFF: {
|
||||
RetryEvent.IN_BACKOFF_TIME: [
|
||||
Transition(action_log, RetryState.BACKOFF),
|
||||
Transition(action_back_to_refresh, RetryState.REFRESH, {Condition(is_out_backoff_time)}),
|
||||
],
|
||||
},
|
||||
RetryState.RAISE: {
|
||||
RetryEvent.REQUEST_AND_SUCCESS: Transition(action_log, RetryState.NROMAL),
|
||||
RetryEvent.REQUEST_AND_RAISE: Transition(action_log, RetryState.RAISE),
|
||||
},
|
||||
},
|
||||
"initial": RetryState.NROMAL,
|
||||
}
|
||||
|
||||
|
||||
class RetryFSM(FSM[RetryState, RetryEvent, RetryAddon[TBilibili]]):
|
||||
@override
|
||||
async def start(self, bls: TBilibili):
|
||||
self.addon.bilibili_platform = bls
|
||||
await super().start()
|
||||
|
||||
@override
|
||||
async def reset(self):
|
||||
self.addon.reset_all()
|
||||
await super().reset()
|
||||
|
||||
|
||||
# FIXME: 拿出来是方便测试了,但全局单例会导致所有被装饰的函数共享状态,有待改进
|
||||
_retry_fsm = RetryFSM(RETRY_GRAPH, RetryAddon["Bilibili"]())
|
||||
|
||||
|
||||
def retry_for_352(api_func: Callable[[TBilibili, Target], Awaitable[list[DynRawPost]]]):
|
||||
# _retry_fsm = RetryFSM(RETRY_GRAPH, RetryAddon[TBilibili]())
|
||||
|
||||
@wraps(api_func)
|
||||
async def wrapper(bls: TBilibili, *args, **kwargs) -> list[DynRawPost]:
|
||||
# nonlocal _retry_fsm
|
||||
if not _retry_fsm.started:
|
||||
await _retry_fsm.start(bls)
|
||||
|
||||
match _retry_fsm.current_state:
|
||||
case RetryState.NROMAL | RetryState.REFRESH | RetryState.RAISE:
|
||||
try:
|
||||
res = await api_func(bls, *args, **kwargs)
|
||||
except ApiCode352Error:
|
||||
logger.error("API 352 错误")
|
||||
await _retry_fsm.emit(RetryEvent.REQUEST_AND_RAISE)
|
||||
return []
|
||||
else:
|
||||
await _retry_fsm.emit(RetryEvent.REQUEST_AND_SUCCESS)
|
||||
return res
|
||||
case RetryState.BACKOFF:
|
||||
logger.warning("回避中,不请求")
|
||||
await _retry_fsm.emit(RetryEvent.IN_BACKOFF_TIME)
|
||||
return []
|
||||
case _:
|
||||
assert_never(_retry_fsm.current_state)
|
||||
|
||||
return wrapper
|
||||
@@ -1,5 +1,6 @@
|
||||
from random import randint
|
||||
import random
|
||||
from typing_extensions import override
|
||||
from typing import TYPE_CHECKING, TypeVar
|
||||
|
||||
from httpx import AsyncClient
|
||||
from nonebot import logger, require
|
||||
@@ -8,9 +9,14 @@ from playwright.async_api import Cookie
|
||||
from nonebot_bison.types import Target
|
||||
from nonebot_bison.utils import Site, ClientManager, http_client
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from .platforms import Bilibili
|
||||
|
||||
require("nonebot_plugin_htmlrender")
|
||||
from nonebot_plugin_htmlrender import get_browser
|
||||
|
||||
B = TypeVar("B", bound="Bilibili")
|
||||
|
||||
|
||||
class BilibiliClientManager(ClientManager):
|
||||
_client: AsyncClient
|
||||
@@ -22,7 +28,7 @@ class BilibiliClientManager(ClientManager):
|
||||
async def _get_cookies(self) -> list[Cookie]:
|
||||
browser = await get_browser()
|
||||
async with await browser.new_page() as page:
|
||||
await page.goto(f"https://space.bilibili.com/{randint(1, 1000)}/dynamic")
|
||||
await page.goto(f"https://space.bilibili.com/{random.randint(1, 1000)}/dynamic")
|
||||
await page.wait_for_load_state("load")
|
||||
cookies = await page.context.cookies()
|
||||
|
||||
@@ -62,7 +68,7 @@ class BilibiliClientManager(ClientManager):
|
||||
|
||||
class BilibiliSite(Site):
|
||||
name = "bilibili.com"
|
||||
schedule_setting = {"seconds": 30}
|
||||
schedule_setting = {"seconds": 50}
|
||||
schedule_type = "interval"
|
||||
client_mgr = BilibiliClientManager
|
||||
require_browser = True
|
||||
|
||||
Reference in New Issue
Block a user