🐛 Bilibili调度新增回避策略 (#573)

* 🐛 将Bilibili的调度速度降低到60s

*  增加回避策略

*  降低轮询间隔,增加回避次数,抛出阶段随机刷新

* ♻️ 更清晰的调度逻辑实现

* 🐛 兼容3.10的NamedTuple多继承

* ♻️ 合并重复逻辑

* ♻️ ctx放入fsm

* 🐛 测试并调整逻辑

* 🐛 补全类型标注

* ♻️ 添加Condition和State.on_exit/on_enter,以实现自动状态切换

*  调整测试

* 🐛 私有化命名方法

* 🔊 调整补充日志

* 🐛 添加测试后清理

* ✏️ fix typing typo
This commit is contained in:
Azide
2024-08-04 18:40:01 +08:00
committed by GitHub
parent 9e6a35585a
commit 38f0edcd25
7 changed files with 562 additions and 238 deletions
+168
View File
@@ -0,0 +1,168 @@
import sys
import asyncio
import inspect
from enum import Enum
from dataclasses import dataclass
from collections.abc import Set as AbstractSet
from collections.abc import Callable, Sequence, Awaitable, AsyncGenerator
from typing import TYPE_CHECKING, Any, Generic, TypeVar, Protocol, TypeAlias, TypedDict, NamedTuple, runtime_checkable
from nonebot import logger
class StrEnum(str, Enum): ...
TAddon = TypeVar("TAddon", contravariant=True)
TState = TypeVar("TState", contravariant=True)
TEvent = TypeVar("TEvent", contravariant=True)
TFSM = TypeVar("TFSM", bound="FSM", contravariant=True)
class StateError(Exception): ...
ActionReturn: TypeAlias = Any
@runtime_checkable
class SupportStateOnExit(Generic[TAddon], Protocol):
async def on_exit(self, addon: TAddon) -> None: ...
@runtime_checkable
class SupportStateOnEnter(Generic[TAddon], Protocol):
async def on_enter(self, addon: TAddon) -> None: ...
class Action(Generic[TState, TEvent, TAddon], Protocol):
async def __call__(self, from_: TState, event: TEvent, to: TState, addon: TAddon) -> ActionReturn: ...
ConditionFunc = Callable[[TAddon], Awaitable[bool]]
@dataclass(frozen=True)
class Condition(Generic[TAddon]):
call: ConditionFunc[TAddon]
not_: bool = False
def __repr__(self):
if inspect.isfunction(self.call) or inspect.isclass(self.call):
call_str = self.call.__name__
else:
call_str = repr(self.call)
return f"Condition(call={call_str})"
async def __call__(self, addon: TAddon) -> bool:
return (await self.call(addon)) ^ self.not_
# FIXME: Python 3.11+ 才支持 NamedTuple和TypedDict使用多继承添加泛型
# 所以什么时候 drop 3.10(?
if sys.version_info >= (3, 11) or TYPE_CHECKING:
class Transition(Generic[TState, TEvent, TAddon], NamedTuple):
action: Action[TState, TEvent, TAddon]
to: TState
conditions: AbstractSet[Condition[TAddon]] | None = None
class StateGraph(Generic[TState, TEvent, TAddon], TypedDict):
transitions: dict[
TState,
dict[
TEvent,
Transition[TState, TEvent, TAddon] | Sequence[Transition[TState, TEvent, TAddon]],
],
]
initial: TState
else:
class Transition(NamedTuple):
action: Action
to: Any
conditions: AbstractSet[Condition] | None = None
class StateGraph(TypedDict):
transitions: dict[Any, dict[Any, Transition]]
initial: Any
class FSM(Generic[TState, TEvent, TAddon]):
def __init__(self, graph: StateGraph[TState, TEvent, TAddon], addon: TAddon):
self.started = False
self.graph = graph
self.current_state = graph["initial"]
self.machine = self._core()
self.addon = addon
async def _core(self) -> AsyncGenerator[ActionReturn, TEvent]:
self.current_state = self.graph["initial"]
res = None
while True:
event = yield res
if not self.started:
raise StateError("FSM not started, please call start() first")
selected_transition = await self.cherry_pick(event)
logger.trace(f"exit state: {self.current_state}")
if isinstance(self.current_state, SupportStateOnExit):
logger.trace(f"do {self.current_state}.on_exit")
await self.current_state.on_exit(self.addon)
logger.trace(f"do action: {selected_transition.action}")
res = await selected_transition.action(self.current_state, event, selected_transition.to, self.addon)
logger.trace(f"enter state: {selected_transition.to}")
self.current_state = selected_transition.to
if isinstance(self.current_state, SupportStateOnEnter):
logger.trace(f"do {self.current_state}.on_enter")
await self.current_state.on_enter(self.addon)
async def start(self):
await anext(self.machine)
self.started = True
logger.trace(f"FSM started, initial state: {self.current_state}")
async def cherry_pick(self, event: TEvent) -> Transition[TState, TEvent, TAddon]:
transitions = self.graph["transitions"][self.current_state].get(event)
if transitions is None:
raise StateError(f"Invalid event {event} in state {self.current_state}")
if isinstance(transitions, Transition):
return transitions
elif isinstance(transitions, Sequence):
no_conds: list[Transition[TState, TEvent, TAddon]] = []
for transition in transitions:
if not transition.conditions:
no_conds.append(transition)
continue
values = await asyncio.gather(*(condition(self.addon) for condition in transition.conditions))
if all(values):
logger.trace(f"conditions {transition.conditions} passed")
return transition
else:
if no_conds:
return no_conds.pop()
else:
raise StateError(f"Invalid event {event} in state {self.current_state}")
else:
raise TypeError("Invalid transition type: {transitions}, expected Transition or Sequence[Transition]")
async def emit(self, event: TEvent):
return await self.machine.asend(event)
async def reset(self):
await self.machine.aclose()
self.started = False
del self.machine
self.machine = self._core()
logger.trace("FSM closed")
+2 -36
View File
@@ -1,16 +1,13 @@
import re
import json
from copy import deepcopy
from functools import wraps
from enum import Enum, unique
from typing import NamedTuple
from typing_extensions import Self
from typing import TypeVar, NamedTuple
from collections.abc import Callable, Awaitable
from yarl import URL
from nonebot import logger
from httpx import AsyncClient
from httpx import URL as HttpxURL
from pydantic import Field, BaseModel, ValidationError
from nonebot.compat import type_validate_json, type_validate_python
@@ -19,6 +16,7 @@ from nonebot_bison.compat import model_rebuild
from nonebot_bison.utils import text_similarity, decode_unicode_escapes
from nonebot_bison.types import Tag, Target, RawPost, ApiError, Category
from .retry import ApiCode352Error, retry_for_352
from .scheduler import BilibiliSite, BililiveSite, BiliBangumiSite
from ..platform import NewMessage, StatusChange, CategoryNotSupport, CategoryNotRecognize
from .models import (
@@ -38,38 +36,6 @@ from .models import (
LiveRecommendMajor,
)
B = TypeVar("B", bound="Bilibili")
MAX_352_RETRY_COUNT = 3
class ApiCode352Error(Exception):
def __init__(self, url: HttpxURL) -> None:
msg = f"api {url} error"
super().__init__(msg)
def retry_for_352(func: Callable[[B, Target], Awaitable[list[DynRawPost]]]):
retried_times = 0
@wraps(func)
async def wrapper(bls: B, *args, **kwargs):
nonlocal retried_times
try:
res = await func(bls, *args, **kwargs)
except ApiCode352Error as e:
if retried_times < MAX_352_RETRY_COUNT:
retried_times += 1
logger.warning(f"获取动态列表失败,尝试刷新cookie: {retried_times}/{MAX_352_RETRY_COUNT}")
await bls.ctx.refresh_client()
return [] # 返回空列表
else:
raise ApiError(e.args[0])
else:
retried_times = 0
return res
return wrapper
class _ProcessedText(NamedTuple):
title: str
+253
View File
@@ -0,0 +1,253 @@
import random
from enum import Enum
from functools import wraps
from dataclasses import dataclass
from datetime import datetime, timedelta
from collections.abc import Callable, Awaitable
from typing_extensions import override, assert_never
from typing import TYPE_CHECKING, Generic, Literal, TypeVar
from strenum import StrEnum
from nonebot.log import logger
from httpx import URL as HttpxURL
from nonebot_bison.types import Target
from .models import DynRawPost
from .fsm import FSM, Condition, StateGraph, Transition, ActionReturn
if TYPE_CHECKING:
from .platforms import Bilibili
# 不用 TypeVar 的话,使用装饰器 Pyright 会报错
TBilibili = TypeVar("TBilibili", bound="Bilibili")
class ApiCode352Error(Exception):
def __init__(self, url: HttpxURL) -> None:
msg = f"api {url} error"
super().__init__(msg)
# see https://docs.python.org/zh-cn/3/howto/enum.html#dataclass-support
@dataclass(frozen=True)
class StateMixin:
state: Literal["NORMAL", "REFRESH", "BACKOFF", "RAISE"]
enter_func: Callable[["RetryAddon"], Awaitable[None]] | None = None
exit_func: Callable[["RetryAddon"], Awaitable[None]] | None = None
async def on_enter(self, addon: "RetryAddon"):
if self.enter_func:
await self.enter_func(addon)
async def on_exit(self, addon: "RetryAddon"):
if self.exit_func:
await self.exit_func(addon)
def __str__(self):
return f"<retry state {self.state}>"
async def on_normal_enter(addon: "RetryAddon"):
addon.reset_all()
async def on_refresh_enter(addon: "RetryAddon"):
addon.refresh_count += 1
await addon.refresh_client()
logger.warning(f"当前刷新次数: {addon.refresh_count}/{addon.max_refresh_count}")
async def on_raise_enter(addon: "RetryAddon"):
if random.random() < 0.1236:
await addon.refresh_client()
logger.warning("触发随机刷新")
class RetryState(StateMixin, Enum):
NROMAL = "NORMAL", on_normal_enter
REFRESH = "REFRESH", on_refresh_enter
BACKOFF = "BACKOFF"
RAISE = "RAISE", on_raise_enter
def __str__(self):
return f"<retry state {self.name}>"
class RetryEvent(StrEnum):
REQUEST_AND_SUCCESS = "request_and_success"
REQUEST_AND_RAISE = "request_and_raise"
IN_BACKOFF_TIME = "in_backoff_time"
def __str__(self):
return f"<retry event {self.name}>"
@dataclass
class RetryAddon(Generic[TBilibili]):
bilibili_platform: TBilibili | None = None
refresh_count: int = 0
backoff_count: int = 0
backoff_finish_time: datetime | None = None
@property
def max_refresh_count(cls):
return 3
@property
def max_backoff_count(self):
return 3
@property
def backoff_timedelta(self):
return timedelta(minutes=5)
async def refresh_client(self):
if self.bilibili_platform:
await self.bilibili_platform.ctx.refresh_client()
else:
raise RuntimeError("未设置 bilibili_platform")
def reset_all(self):
self.refresh_count = 0
self.backoff_count = 0
self.backoff_finish_time = None
def record_backoff_finish_time(self):
self.backoff_finish_time = (
datetime.now()
+ self.backoff_timedelta * self.backoff_count**2
# + timedelta(seconds=random.randint(1, 60)) # jitter
)
logger.trace(f"set backoff finish time: {self.backoff_finish_time}")
def is_in_backoff_time(self):
"""是否在指数回避时间内"""
# 指数回避
if not self.backoff_finish_time:
logger.trace("not have backoff_finish_time")
return False
logger.trace(f"now: {datetime.now()}, backoff_finish_time: {self.backoff_finish_time}")
return datetime.now() < self.backoff_finish_time
async def action_log(from_: RetryState, event: RetryEvent, to: RetryState, addon: RetryAddon) -> ActionReturn:
logger.debug(f"{from_} -> {to}, by {event}")
async def action_up_to_backoff(from_: RetryState, event: RetryEvent, to: RetryState, addon: RetryAddon) -> ActionReturn:
addon.refresh_count = 0
addon.backoff_count += 1
addon.record_backoff_finish_time()
logger.warning(
f"当前已回避次数: {addon.backoff_count}/{addon.max_backoff_count}, 本次回避时间至 {addon.backoff_finish_time}"
)
async def action_back_to_refresh(
from_: RetryState, event: RetryEvent, to: RetryState, addon: RetryAddon
) -> ActionReturn:
addon.backoff_finish_time = None
logger.debug("back to refresh state")
async def is_reach_max_refresh(addon: RetryAddon) -> bool:
return addon.refresh_count > addon.max_refresh_count - 1
async def is_reach_max_backoff(addon: RetryAddon) -> bool:
return addon.backoff_count > addon.max_backoff_count - 1
async def is_out_backoff_time(addon: RetryAddon) -> bool:
return not addon.is_in_backoff_time()
RETRY_GRAPH: StateGraph[RetryState, RetryEvent, RetryAddon] = {
"transitions": {
RetryState.NROMAL: {
RetryEvent.REQUEST_AND_SUCCESS: Transition(action_log, RetryState.NROMAL),
RetryEvent.REQUEST_AND_RAISE: Transition(action_log, RetryState.REFRESH),
},
RetryState.REFRESH: {
RetryEvent.REQUEST_AND_SUCCESS: Transition(action_log, RetryState.NROMAL),
RetryEvent.REQUEST_AND_RAISE: [
Transition(action_log, RetryState.REFRESH),
Transition(
action_up_to_backoff,
RetryState.BACKOFF,
{
Condition(is_reach_max_refresh),
Condition(is_reach_max_backoff, not_=True),
},
),
Transition(
action_log,
RetryState.RAISE,
{
Condition(is_reach_max_refresh),
Condition(is_reach_max_backoff),
},
),
],
},
RetryState.BACKOFF: {
RetryEvent.IN_BACKOFF_TIME: [
Transition(action_log, RetryState.BACKOFF),
Transition(action_back_to_refresh, RetryState.REFRESH, {Condition(is_out_backoff_time)}),
],
},
RetryState.RAISE: {
RetryEvent.REQUEST_AND_SUCCESS: Transition(action_log, RetryState.NROMAL),
RetryEvent.REQUEST_AND_RAISE: Transition(action_log, RetryState.RAISE),
},
},
"initial": RetryState.NROMAL,
}
class RetryFSM(FSM[RetryState, RetryEvent, RetryAddon[TBilibili]]):
@override
async def start(self, bls: TBilibili):
self.addon.bilibili_platform = bls
await super().start()
@override
async def reset(self):
self.addon.reset_all()
await super().reset()
# FIXME: 拿出来是方便测试了,但全局单例会导致所有被装饰的函数共享状态,有待改进
_retry_fsm = RetryFSM(RETRY_GRAPH, RetryAddon["Bilibili"]())
def retry_for_352(api_func: Callable[[TBilibili, Target], Awaitable[list[DynRawPost]]]):
# _retry_fsm = RetryFSM(RETRY_GRAPH, RetryAddon[TBilibili]())
@wraps(api_func)
async def wrapper(bls: TBilibili, *args, **kwargs) -> list[DynRawPost]:
# nonlocal _retry_fsm
if not _retry_fsm.started:
await _retry_fsm.start(bls)
match _retry_fsm.current_state:
case RetryState.NROMAL | RetryState.REFRESH | RetryState.RAISE:
try:
res = await api_func(bls, *args, **kwargs)
except ApiCode352Error:
logger.error("API 352 错误")
await _retry_fsm.emit(RetryEvent.REQUEST_AND_RAISE)
return []
else:
await _retry_fsm.emit(RetryEvent.REQUEST_AND_SUCCESS)
return res
case RetryState.BACKOFF:
logger.warning("回避中,不请求")
await _retry_fsm.emit(RetryEvent.IN_BACKOFF_TIME)
return []
case _:
assert_never(_retry_fsm.current_state)
return wrapper
+9 -3
View File
@@ -1,5 +1,6 @@
from random import randint
import random
from typing_extensions import override
from typing import TYPE_CHECKING, TypeVar
from httpx import AsyncClient
from nonebot import logger, require
@@ -8,9 +9,14 @@ from playwright.async_api import Cookie
from nonebot_bison.types import Target
from nonebot_bison.utils import Site, ClientManager, http_client
if TYPE_CHECKING:
from .platforms import Bilibili
require("nonebot_plugin_htmlrender")
from nonebot_plugin_htmlrender import get_browser
B = TypeVar("B", bound="Bilibili")
class BilibiliClientManager(ClientManager):
_client: AsyncClient
@@ -22,7 +28,7 @@ class BilibiliClientManager(ClientManager):
async def _get_cookies(self) -> list[Cookie]:
browser = await get_browser()
async with await browser.new_page() as page:
await page.goto(f"https://space.bilibili.com/{randint(1, 1000)}/dynamic")
await page.goto(f"https://space.bilibili.com/{random.randint(1, 1000)}/dynamic")
await page.wait_for_load_state("load")
cookies = await page.context.cookies()
@@ -62,7 +68,7 @@ class BilibiliClientManager(ClientManager):
class BilibiliSite(Site):
name = "bilibili.com"
schedule_setting = {"seconds": 30}
schedule_setting = {"seconds": 50}
schedule_type = "interval"
client_mgr = BilibiliClientManager
require_browser = True