Merge pull request #110 from felinae98/bilibili-cookie

feat: 临时解决 bilibili 的反爬机制
This commit is contained in:
felinae98 2022-10-07 01:42:51 +08:00 committed by GitHub
commit 997b711288
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
4 changed files with 111 additions and 56 deletions

View File

@ -1,14 +1,45 @@
import functools
import json import json
import re import re
from datetime import datetime, timedelta
from typing import Any, Optional from typing import Any, Optional
import httpx
from nonebot.log import logger
from ..post import Post from ..post import Post
from ..types import Category, RawPost, Tag, Target from ..types import Category, RawPost, Tag, Target
from ..utils import http_client from ..utils.http import http_args
from .platform import CategoryNotSupport, NewMessage, StatusChange from .platform import CategoryNotSupport, NewMessage, StatusChange
class Bilibili(NewMessage): class _BilibiliClient:
_http_client: httpx.AsyncClient
_client_refresh_time: Optional[datetime]
cookie_expire_time = timedelta(hours=5)
async def _init_session(self):
self._http_client = httpx.AsyncClient(**http_args)
res = await self._http_client.get("https://www.bilibili.com/")
if res.status_code != 200:
import ipdb
ipdb.set_trace()
logger.warning("unable to refresh temp cookie")
else:
self._client_refresh_time = datetime.now()
async def _refresh_client(self):
if (
getattr(self, "_client_refresh_time", None) is None
or datetime.now() - self._client_refresh_time > self.cookie_expire_time
or self._http_client is None
):
await self._init_session()
class Bilibili(_BilibiliClient, NewMessage):
categories = { categories = {
1: "一般动态", 1: "一般动态",
@ -28,15 +59,23 @@ class Bilibili(NewMessage):
has_target = True has_target = True
parse_target_promot = "请输入用户主页的链接" parse_target_promot = "请输入用户主页的链接"
def ensure_client(fun):
@functools.wraps(fun)
async def wrapped(self, *args, **kwargs):
await self._refresh_client()
return await fun(self, *args, **kwargs)
return wrapped
@ensure_client
async def get_target_name(self, target: Target) -> Optional[str]: async def get_target_name(self, target: Target) -> Optional[str]:
async with http_client() as client: res = await self._http_client.get(
res = await client.get( "https://api.bilibili.com/x/space/acc/info", params={"mid": target}
"https://api.bilibili.com/x/space/acc/info", params={"mid": target} )
) res_data = json.loads(res.text)
res_data = json.loads(res.text) if res_data["code"]:
if res_data["code"]: return None
return None return res_data["data"]["name"]
return res_data["data"]["name"]
async def parse_target(self, target_text: str) -> Target: async def parse_target(self, target_text: str) -> Target:
if re.match(r"\d+", target_text): if re.match(r"\d+", target_text):
@ -48,19 +87,19 @@ class Bilibili(NewMessage):
else: else:
raise self.ParseTargetException() raise self.ParseTargetException()
@ensure_client
async def get_sub_list(self, target: Target) -> list[RawPost]: async def get_sub_list(self, target: Target) -> list[RawPost]:
async with http_client() as client: params = {"host_uid": target, "offset": 0, "need_top": 0}
params = {"host_uid": target, "offset": 0, "need_top": 0} res = await self._http_client.get(
res = await client.get( "https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history",
"https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history", params=params,
params=params, timeout=4.0,
timeout=4.0, )
) res_dict = json.loads(res.text)
res_dict = json.loads(res.text) if res_dict["code"] == 0:
if res_dict["code"] == 0: return res_dict["data"].get("cards")
return res_dict["data"].get("cards") else:
else: return []
return []
def get_id(self, post: RawPost) -> Any: def get_id(self, post: RawPost) -> Any:
return post["desc"]["dynamic_id"] return post["desc"]["dynamic_id"]
@ -157,7 +196,7 @@ class Bilibili(NewMessage):
return Post("bilibili", text=text, url=url, pics=pic, target_name=target_name) return Post("bilibili", text=text, url=url, pics=pic, target_name=target_name)
class Bilibililive(StatusChange): class Bilibililive(_BilibiliClient, StatusChange):
# Author : Sichongzou # Author : Sichongzou
# Date : 2022-5-18 8:54 # Date : 2022-5-18 8:54
# Description : bilibili开播提醒 # Description : bilibili开播提醒
@ -172,36 +211,44 @@ class Bilibililive(StatusChange):
name = "Bilibili直播" name = "Bilibili直播"
has_target = True has_target = True
async def get_target_name(self, target: Target) -> Optional[str]: def ensure_client(fun):
async with http_client() as client: @functools.wraps(fun)
res = await client.get( async def wrapped(self, *args, **kwargs):
"https://api.bilibili.com/x/space/acc/info", params={"mid": target} await self._refresh_client()
) return await fun(self, *args, **kwargs)
res_data = json.loads(res.text)
if res_data["code"]:
return None
return res_data["data"]["name"]
return wrapped
@ensure_client
async def get_target_name(self, target: Target) -> Optional[str]:
res = await self._http_client.get(
"https://api.bilibili.com/x/space/acc/info", params={"mid": target}
)
res_data = json.loads(res.text)
if res_data["code"]:
return None
return res_data["data"]["name"]
@ensure_client
async def get_status(self, target: Target): async def get_status(self, target: Target):
async with http_client() as client: params = {"mid": target}
params = {"mid": target} res = await self._http_client.get(
res = await client.get( "https://api.bilibili.com/x/space/acc/info",
"https://api.bilibili.com/x/space/acc/info", params=params,
params=params, timeout=4.0,
timeout=4.0, )
) res_dict = json.loads(res.text)
res_dict = json.loads(res.text) if res_dict["code"] == 0:
if res_dict["code"] == 0: info = {}
info = {} info["uid"] = res_dict["data"]["mid"]
info["uid"] = res_dict["data"]["mid"] info["uname"] = res_dict["data"]["name"]
info["uname"] = res_dict["data"]["name"] info["live_state"] = res_dict["data"]["live_room"]["liveStatus"]
info["live_state"] = res_dict["data"]["live_room"]["liveStatus"] info["room_id"] = res_dict["data"]["live_room"]["roomid"]
info["room_id"] = res_dict["data"]["live_room"]["roomid"] info["title"] = res_dict["data"]["live_room"]["title"]
info["title"] = res_dict["data"]["live_room"]["title"] info["cover"] = res_dict["data"]["live_room"]["cover"]
info["cover"] = res_dict["data"]["live_room"]["cover"] return info
return info else:
else: raise self.FetchError()
raise self.FetchError()
def compare_status(self, target: Target, old_status, new_status) -> list[RawPost]: def compare_status(self, target: Target, old_status, new_status) -> list[RawPost]:
if ( if (

View File

@ -4,8 +4,9 @@ import httpx
from ..plugin_config import plugin_config from ..plugin_config import plugin_config
http_client = functools.partial( http_args = {
httpx.AsyncClient, "proxies": plugin_config.bison_proxy or None,
proxies=plugin_config.bison_proxy or None, "headers": {"user-agent": plugin_config.bison_ua},
headers={"user-agent": plugin_config.bison_ua}, }
)
http_client = functools.partial(httpx.AsyncClient, **http_args)

View File

@ -26,6 +26,10 @@ async def test_fetch_bilibili_live_status(bili_live, dummy_user_subinfo):
"https://api.bilibili.com/x/space/acc/info?mid=13164144" "https://api.bilibili.com/x/space/acc/info?mid=13164144"
) )
bili_live_router.mock(return_value=Response(200, json=mock_bili_live_status)) bili_live_router.mock(return_value=Response(200, json=mock_bili_live_status))
bilibili_main_page_router = respx.get("https://www.bilibili.com/")
bilibili_main_page_router.mock(return_value=Response(200))
target = "13164144" target = "13164144"
res = await bili_live.fetch_new_post(target, [dummy_user_subinfo]) res = await bili_live.fetch_new_post(target, [dummy_user_subinfo])
assert bili_live_router.called assert bili_live_router.called

View File

@ -440,6 +440,9 @@ async def test_add_with_bilibili_target_parser(app: App):
return_value=Response(200, json=get_json("bilibili_arknights_profile.json")) return_value=Response(200, json=get_json("bilibili_arknights_profile.json"))
) )
bilibili_main_page_router = respx.get("https://www.bilibili.com/")
bilibili_main_page_router.mock(return_value=Response(200))
async with app.test_matcher(add_sub_matcher) as ctx: async with app.test_matcher(add_sub_matcher) as ctx:
bot = ctx.create_bot() bot = ctx.create_bot()
event_1 = fake_group_message_event( event_1 = fake_group_message_event(