12 Commits

Author SHA1 Message Date
suyiiyii 75bbbb68e8 支持上报超时的任务 2024-12-16 21:48:22 +08:00
suyiiyii 96573ec86e 添加 request_histogram 和 render_histogram 2024-12-12 19:43:44 +08:00
suyiiyii 4c29cf10e4 添加 cookie_choose_counter 2024-12-12 14:54:12 +08:00
suyiiyii 80f924123d 使用 label 简化 request_counter 2024-12-12 13:08:27 +08:00
suyiiyii 073bd314fc ♻️ 整理代码结构 2024-12-12 11:28:32 +08:00
suyiiyii 63f59ada3c 初步添加 metrics 2024-12-12 11:28:28 +08:00
suyiiyii 9fef8028c5 添加 NewMessage 类型 Platform 的抓取数量日志提示 2024-12-12 11:27:50 +08:00
pre-commit-ci[bot] af1609730c ⬆️ auto update by pre-commit hooks (#660)
test-build / Build Frontend (push) Has been cancelled
test-build / Smoke-test Coverage (macos-latest, 3.10) (push) Has been cancelled
test-build / Smoke-test Coverage (macos-latest, 3.11) (push) Has been cancelled
test-build / Smoke-test Coverage (macos-latest, 3.12) (push) Has been cancelled
test-build / Smoke-test Coverage (ubuntu-latest, 3.10) (push) Has been cancelled
test-build / Smoke-test Coverage (ubuntu-latest, 3.11) (push) Has been cancelled
test-build / Smoke-test Coverage (ubuntu-latest, 3.12) (push) Has been cancelled
test-build / Smoke-test Coverage (windows-latest, 3.10) (push) Has been cancelled
test-build / Smoke-test Coverage (windows-latest, 3.11) (push) Has been cancelled
test-build / Smoke-test Coverage (windows-latest, 3.12) (push) Has been cancelled
test-build / All-test Coverage (macos-latest, 3.10) (push) Has been cancelled
test-build / All-test Coverage (macos-latest, 3.11) (push) Has been cancelled
test-build / All-test Coverage (macos-latest, 3.12) (push) Has been cancelled
test-build / All-test Coverage (ubuntu-latest, 3.10) (push) Has been cancelled
test-build / All-test Coverage (ubuntu-latest, 3.11) (push) Has been cancelled
test-build / All-test Coverage (ubuntu-latest, 3.12) (push) Has been cancelled
test-build / All-test Coverage (windows-latest, 3.10) (push) Has been cancelled
test-build / All-test Coverage (windows-latest, 3.11) (push) Has been cancelled
test-build / All-test Coverage (windows-latest, 3.12) (push) Has been cancelled
pydantic1-compat-test / pydantic1 test (ubuntu-latest, 3.11) (push) Has been cancelled
Ruff Lint / Ruff Lint (push) Has been cancelled
test-build / Docker main (push) Has been cancelled
test-build / Docker main sentry (push) Has been cancelled
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-12-05 18:28:38 +08:00
github-actions[bot] 37958b0850 📝 Update changelog 2024-12-05 08:09:09 +00:00
suyiiyii a48ea0e947 🐛 修复 cookie 模块 type hint (#658) 2024-12-05 16:08:42 +08:00
github-actions[bot] d4048716b2 📝 Update changelog 2024-12-04 12:09:30 +00:00
Azide 22bf05949d 🐛 B站转发动态补充 DeletedItem 类型解析 (#659)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-12-04 20:09:01 +08:00
14 changed files with 376 additions and 36 deletions
+2 -2
View File
@@ -7,7 +7,7 @@ ci:
autoupdate_commit_msg: ":arrow_up: auto update by pre-commit hooks"
repos:
- repo: https://github.com/astral-sh/ruff-pre-commit
rev: v0.7.2
rev: v0.8.1
hooks:
- id: ruff
args: [--fix, --exit-non-zero-on-fix]
@@ -34,7 +34,7 @@ repos:
stages: [pre-commit]
- repo: https://github.com/pre-commit/mirrors-eslint
rev: v9.14.0
rev: v9.16.0
hooks:
- id: eslint
additional_dependencies:
+2
View File
@@ -9,6 +9,8 @@
### Bug 修复
- :bug: 修复 cookie 模块 type hint [@suyiiyii](https://github.com/suyiiyii) ([#658](https://github.com/MountainDash/nonebot-bison/pull/658))
- :bug: B站转发动态补充 DeletedItem 类型解析 [@AzideCupric](https://github.com/AzideCupric) ([#659](https://github.com/MountainDash/nonebot-bison/pull/659))
- :bug: 小刻食堂cdn使用https [@phidiaLam](https://github.com/phidiaLam) ([#650](https://github.com/MountainDash/nonebot-bison/pull/650))
## v0.9.5
+2
View File
@@ -12,6 +12,7 @@ from nonebot.adapters.onebot.v11.event import PrivateMessageEvent
from .api import router as api_router
from ..plugin_config import plugin_config
from .token_manager import token_manager as tm
from ..metrics import metrics_router as metrics_router
if TYPE_CHECKING:
from nonebot.drivers.fastapi import Driver
@@ -46,6 +47,7 @@ def init_fastapi(driver: "Driver"):
description="nonebot-bison webui and api",
)
nonebot_app.include_router(api_router)
nonebot_app.include_router(metrics_router)
nonebot_app.mount("/", SinglePageApplication(directory=static_path), name="bison-frontend")
app = driver.server_app
+42
View File
@@ -0,0 +1,42 @@
import time
from fastapi import APIRouter
from starlette.responses import Response
from prometheus_client import CONTENT_TYPE_LATEST, Gauge, Counter, Histogram, generate_latest
# Request counter
request_counter = Counter(
"bison_request_counter", "The number of requests", ["site_name", "platform_name", "target", "success"]
)
# Sent counter
sent_counter = Counter("bison_sent_counter", "The number of sent messages", ["site_name", "platform_name", "target"])
cookie_choose_counter = Counter(
"bison_cookie_choose_counter", "The number of cookie choose", ["site_name", "target", "cookie_id"]
)
request_histogram = Histogram(
"bison_request_histogram",
"The time of platform used to request the source",
["site_name", "platform_name"],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60],
)
render_histogram = Histogram(
"bison_render_histogram",
"The time of theme used to render",
["site_name", "platform_name"],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60],
)
start_time = Gauge("bison_start_time", "The start time of the program")
start_time.set(time.time())
metrics_router = APIRouter(prefix="/api/metrics", tags=["metrics"])
@metrics_router.get("")
async def metrics():
return Response(media_type=CONTENT_TYPE_LATEST, content=generate_latest())
+9 -1
View File
@@ -131,7 +131,7 @@ class PostAPI(APIBase):
basic: "PostAPI.Basic"
id_str: str
modules: "PostAPI.Modules"
orig: "PostAPI.Item | None" = None
orig: "PostAPI.Item | PostAPI.DeletedItem | None" = None
topic: "PostAPI.Topic | None" = None
type: DynamicType
@@ -141,6 +141,14 @@ class PostAPI(APIBase):
modules: "PostAPI.Modules"
type: Literal["DYNAMIC_TYPE_NONE"]
def to_item(self) -> "PostAPI.Item":
return PostAPI.Item(
basic=self.basic,
id_str="",
modules=self.modules,
type=self.type,
)
class Data(Base):
items: "list[PostAPI.Item | PostAPI.DeletedItem] | None" = None
+23 -6
View File
@@ -32,6 +32,7 @@ from .models import (
DynamicType,
ArticleMajor,
CoursesMajor,
DeletedMajor,
UnknownMajor,
LiveRecommendMajor,
)
@@ -243,6 +244,13 @@ class Bilibili(NewMessage):
pics=[courses.cover],
url=URL(courses.jump_url).with_scheme("https").human_repr(),
)
case DeletedMajor(none=none):
return _ParsedMojarPost(
title="",
content=none.tips,
pics=[],
url=None,
)
case UnknownMajor(type=unknown_type):
raise CategoryNotSupport(unknown_type)
case None: # 没有major的情况
@@ -259,10 +267,13 @@ class Bilibili(NewMessage):
parsed_raw_post = self.pre_parse_by_mojar(raw_post)
parsed_raw_repost = None
if self._do_get_category(raw_post.type) == Category(5):
if raw_post.orig:
parsed_raw_repost = self.pre_parse_by_mojar(raw_post.orig)
else:
logger.warning(f"转发动态{raw_post.id_str}没有原动态")
match raw_post.orig:
case PostAPI.Item() as orig:
parsed_raw_repost = self.pre_parse_by_mojar(orig)
case PostAPI.DeletedItem() as orig:
parsed_raw_repost = self.pre_parse_by_mojar(orig.to_item())
case None:
logger.warning(f"转发动态{raw_post.id_str}没有原动态")
post = Post(
self,
@@ -275,8 +286,14 @@ class Bilibili(NewMessage):
nickname=raw_post.modules.module_author.name,
)
if parsed_raw_repost:
orig = raw_post.orig
assert orig
match raw_post.orig:
case PostAPI.Item() as orig:
orig = orig
case PostAPI.DeletedItem() as orig:
orig = orig.to_item()
case None:
raise ValueError("转发动态没有原动态")
post.repost = Post(
self,
content=decode_unicode_escapes(parsed_raw_repost.content),
+1
View File
@@ -315,6 +315,7 @@ class NewMessage(MessageProcess, abstract=True):
res.append(raw_post)
store.exists_posts.add(post_id)
self.set_stored_data(target, store)
logger.trace(f"本次抓取 {len(raw_post_list)} 条,过滤后 {len(filtered_post)} 条,新消息 {len(res)}")
return res
async def _handle_new_post(
+2 -1
View File
@@ -2,6 +2,7 @@ import reprlib
from io import BytesIO
from pathlib import Path
from typing import TYPE_CHECKING
from collections.abc import Sequence
from dataclasses import fields, dataclass
from nonebot.log import logger
@@ -30,7 +31,7 @@ class Post(AbstractPost, PlainContentSupport):
"""文本内容"""
title: str | None = None
"""标题"""
images: list[str | bytes | Path | BytesIO] | None = None
images: Sequence[str | bytes | Path | BytesIO] | None = None
"""图片列表"""
timestamp: float | None = None
"""发布/获取时间戳, 秒"""
+53 -24
View File
@@ -1,11 +1,14 @@
from dataclasses import dataclass
from collections import defaultdict
from collections.abc import Callable
from nonebot.log import logger
from nonebot_plugin_apscheduler import scheduler
from apscheduler.events import EVENT_JOB_MAX_INSTANCES
from nonebot_plugin_saa.utils.exceptions import NoBotFound
from nonebot_bison.utils import ClientManager
from nonebot_bison.metrics import sent_counter, request_counter, render_histogram, request_histogram
from ..config import config
from ..send import send_msgs
@@ -23,6 +26,15 @@ class Schedulable:
use_batch: bool = False
def handle_time_exceeded(event):
# event.job_id 是该任务在 apscheduler 的 id, 进而可以获得该任务的函数,再获取该函数绑定的对象
logger.warning(f"{scheduler.get_job(event.job_id).func.__self__.name} 抓取执行超时")
scheduler.get_job(event.job_id).func.__self__.metrics_report(False)
scheduler.add_listener(handle_time_exceeded, EVENT_JOB_MAX_INSTANCES)
class Scheduler:
schedulable_list: list[Schedulable] # for load weigth from db
batch_api_target_cache: dict[str, dict[Target, list[Target]]] # platform_name -> (target -> [target])
@@ -55,6 +67,7 @@ class Scheduler:
self.platform_name_list = platform_name_list
self.pre_weight_val = 0 # 轮调度中“本轮”增加权重和的初值
self.metrics_report: Callable[[bool], None] | None = None # 作为函数变量,允许外部调用来上报此次抓取是否成功
logger.info(
f"register scheduler for {self.name} with "
f"{self.scheduler_config.schedule_type} {self.scheduler_config.schedule_setting}"
@@ -94,20 +107,32 @@ class Scheduler:
context = ProcessContext(self.client_mgr)
success_flag = False
platform_obj = platform_manager[schedulable.platform_name](context)
# 通过闭包的形式,将此次抓取任务的信息保存为函数变量,允许在该任务无法正常结束时由外部上报
self.metrics_report = lambda x: request_counter.labels(
platform_name=schedulable.platform_name,
site_name=platform_obj.site.name,
target=schedulable.target,
success=x,
).inc()
try:
platform_obj = platform_manager[schedulable.platform_name](context)
if schedulable.use_batch:
batch_targets = self.batch_api_target_cache[schedulable.platform_name][schedulable.target]
sub_units = []
for batch_target in batch_targets:
userinfo = await config.get_platform_target_subscribers(schedulable.platform_name, batch_target)
sub_units.append(SubUnit(batch_target, userinfo))
to_send = await platform_obj.do_batch_fetch_new_post(sub_units)
else:
send_userinfo_list = await config.get_platform_target_subscribers(
schedulable.platform_name, schedulable.target
)
to_send = await platform_obj.do_fetch_new_post(SubUnit(schedulable.target, send_userinfo_list))
with request_histogram.labels(
platform_name=schedulable.platform_name, site_name=platform_obj.site.name
).time():
if schedulable.use_batch:
batch_targets = self.batch_api_target_cache[schedulable.platform_name][schedulable.target]
sub_units = []
for batch_target in batch_targets:
userinfo = await config.get_platform_target_subscribers(schedulable.platform_name, batch_target)
sub_units.append(SubUnit(batch_target, userinfo))
to_send = await platform_obj.do_batch_fetch_new_post(sub_units)
else:
send_userinfo_list = await config.get_platform_target_subscribers(
schedulable.platform_name, schedulable.target
)
to_send = await platform_obj.do_fetch_new_post(SubUnit(schedulable.target, send_userinfo_list))
success_flag = True
except SkipRequestException as err:
logger.debug(f"skip request: {err}")
except Exception as err:
@@ -117,19 +142,23 @@ class Scheduler:
err.args += (records,)
raise
self.metrics_report(success_flag)
if not to_send:
return
for user, send_list in to_send:
for send_post in send_list:
logger.info(f"send to {user}: {send_post}")
try:
await send_msgs(
user,
await send_post.generate_messages(),
)
except NoBotFound:
logger.warning("no bot connected")
sent_counter.labels(
platform_name=schedulable.platform_name, site_name=platform_obj.site.name, target=schedulable.target
).inc()
with render_histogram.labels(platform_name=schedulable.platform_name, site_name=platform_obj.site.name).time():
for user, send_list in to_send:
for send_post in send_list:
logger.info(f"send to {user}: {send_post}")
try:
await send_msgs(
user,
await send_post.generate_messages(),
)
except NoBotFound:
logger.warning("no bot connected")
def insert_new_schedulable(self, platform_name: str, target: Target):
self.pre_weight_val += 1000
+2
View File
@@ -13,6 +13,7 @@ from ..types import Target
from ..config import config
from .http import http_client
from ..config.db_model import Cookie
from ..metrics import cookie_choose_counter
class ClientManager(ABC):
@@ -131,6 +132,7 @@ class CookieClientManager(ClientManager):
"""获取 client,根据 target 选择 cookie"""
client = http_client()
cookie = await self._choose_cookie(target)
cookie_choose_counter.labels(site_name=self._site_name, target=target, cookie_id=cookie.id).inc()
if cookie.is_universal:
logger.trace(f"平台 {self._site_name} 未获取到用户cookie, 使用匿名cookie")
else:
Generated
+21 -2
View File
@@ -1,4 +1,4 @@
# This file is automatically @generated by Poetry 1.8.3 and should not be changed by hand.
# This file is automatically @generated by Poetry 1.8.2 and should not be changed by hand.
[[package]]
name = "aiodns"
@@ -3207,6 +3207,25 @@ type = "legacy"
url = "https://pypi.org/simple"
reference = "offical-source"
[[package]]
name = "prometheus-client"
version = "0.21.0"
description = "Python client for the Prometheus monitoring system."
optional = false
python-versions = ">=3.8"
files = [
{file = "prometheus_client-0.21.0-py3-none-any.whl", hash = "sha256:4fa6b4dd0ac16d58bb587c04b1caae65b8c5043e85f778f42f5f632f6af2e166"},
{file = "prometheus_client-0.21.0.tar.gz", hash = "sha256:96c83c606b71ff2b0a433c98889d275f51ffec6c5e267de37c7a2b5c9aa9233e"},
]
[package.extras]
twisted = ["twisted"]
[package.source]
type = "legacy"
url = "https://pypi.org/simple"
reference = "offical-source"
[[package]]
name = "prompt-toolkit"
version = "3.0.47"
@@ -5166,4 +5185,4 @@ yaml = []
[metadata]
lock-version = "2.0"
python-versions = ">=3.10,<4.0.0"
content-hash = "3d3bd947b91b8053fc5fed4873b6d0ed4017a5be118611cd93d30ffa265e04fb"
content-hash = "5e4ea27ea11e18451d1ad0d4bbf3b44da9334c20728889041a4c908addbfdda8"
+1
View File
@@ -42,6 +42,7 @@ yarl = ">=1.11.1"
hishel = "^0.0.30"
expiringdictx = "^1.1.0"
rapidfuzz = "^3.9.7"
prometheus-client = "^0.21.0"
[tool.poetry.group.dev.dependencies]
black = ">=24.8.0,<25.0"
+204
View File
@@ -4053,6 +4053,210 @@
},
"type": "DYNAMIC_TYPE_DRAW",
"visible": true
},
{
"basic": {
"comment_id_str": "965806534205374473",
"comment_type": 17,
"like_icon": {
"action_url": "https://i0.hdslb.com/bfs/garb/item/8860c7c01179f9984f88fb61bc55cab9dc1d28cb.bin",
"end_url": "",
"id": 33772,
"start_url": ""
},
"rid_str": "965806534205374473"
},
"id_str": "965806534205374473",
"modules": {
"module_author": {
"avatar": {
"container_size": {
"height": 1.35,
"width": 1.35
},
"fallback_layers": {
"is_critical_group": true,
"layers": [
{
"general_spec": {
"pos_spec": {
"axis_x": 0.675,
"axis_y": 0.675,
"coordinate_pos": 2
},
"render_spec": {
"opacity": 1
},
"size_spec": {
"height": 1,
"width": 1
}
},
"layer_config": {
"is_critical": true,
"tags": {
"AVATAR_LAYER": {},
"GENERAL_CFG": {
"config_type": 1,
"general_config": {
"web_css_style": {
"borderRadius": "50%"
}
}
}
}
},
"resource": {
"res_image": {
"image_src": {
"placeholder": 6,
"remote": {
"bfs_style": "widget-layer-avatar",
"url": "https://i0.hdslb.com/bfs/face/5d255a47deae6c5214f93cdbbf3b01f23cac4a5e.jpg"
},
"src_type": 1
}
},
"res_type": 3
},
"visible": true
}
]
},
"mid": "6050499"
},
"face": "https://i0.hdslb.com/bfs/face/5d255a47deae6c5214f93cdbbf3b01f23cac4a5e.jpg",
"face_nft": false,
"following": null,
"jump_url": "//space.bilibili.com/6050499/dynamic",
"label": "",
"mid": 6050499,
"name": "血毒嘿咻",
"official_verify": {
"desc": "",
"type": -1
},
"pendant": {
"expire": 0,
"image": "",
"image_enhance": "",
"image_enhance_frame": "",
"n_pid": 0,
"name": "",
"pid": 0
},
"pub_action": "",
"pub_location_text": "",
"pub_time": "08月15日",
"pub_ts": 1723707757,
"type": "AUTHOR_TYPE_NORMAL",
"vip": {
"avatar_subscript": 0,
"avatar_subscript_url": "",
"due_date": 1648224000000,
"label": {
"bg_color": "",
"bg_style": 0,
"border_color": "",
"img_label_uri_hans": "",
"img_label_uri_hans_static": "https://i0.hdslb.com/bfs/vip/d7b702ef65a976b20ed854cbd04cb9e27341bb79.png",
"img_label_uri_hant": "",
"img_label_uri_hant_static": "https://i0.hdslb.com/bfs/activity-plat/static/20220614/e369244d0b14644f5e1a06431e22a4d5/KJunwh19T5.png",
"label_theme": "",
"path": "",
"text": "",
"text_color": "",
"use_img_label": true
},
"nickname_color": "",
"status": 0,
"theme_type": 0,
"type": 1
}
},
"module_dynamic": {
"additional": null,
"desc": {
"rich_text_nodes": [
{
"orig_text": "转发动态",
"text": "转发动态",
"type": "RICH_TEXT_NODE_TYPE_TEXT"
}
],
"text": "转发动态"
},
"major": null,
"topic": null
},
"module_more": {
"three_point_items": [
{
"label": "举报",
"type": "THREE_POINT_REPORT"
}
]
},
"module_stat": {
"comment": {
"count": 0,
"forbidden": false
},
"forward": {
"count": 0,
"forbidden": false
},
"like": {
"count": 0,
"forbidden": false,
"status": false
}
}
},
"orig": {
"basic": {
"comment_id_str": "",
"comment_type": 0,
"like_icon": {
"action_url": "",
"end_url": "",
"id": 0,
"start_url": ""
},
"rid_str": ""
},
"id_str": null,
"modules": {
"module_author": {
"face": "",
"face_nft": false,
"following": false,
"jump_url": "",
"label": "",
"mid": 0,
"name": "",
"pub_action": "",
"pub_time": "",
"pub_ts": 0,
"type": "AUTHOR_TYPE_NORMAL"
},
"module_dynamic": {
"additional": null,
"desc": null,
"major": {
"none": {
"tips": "源动态已被作者删除"
},
"type": "MAJOR_TYPE_NONE"
},
"topic": null
}
},
"type": "DYNAMIC_TYPE_NONE",
"visible": true
},
"type": "DYNAMIC_TYPE_FORWARD",
"visible": true
}
],
"offset": "915793667264872453",
+12
View File
@@ -407,6 +407,18 @@ async def test_dynamic_forward(bilibili: "Bilibili", bing_dy_list: list):
assert rp.url == "https://t.bilibili.com/915793667264872453"
async def test_dynamic_forword_deleted(bilibili: "Bilibili", bing_dy_list: list):
from nonebot_bison.post import Post
post: Post = await bilibili.parse(bing_dy_list[12])
assert post.content == "转发动态"
assert post.url == "https://t.bilibili.com/965806534205374473"
assert (repost := post.repost)
assert repost.url is None
assert not repost.title
assert repost.content == "源动态已被作者删除"
@pytest.mark.asyncio
@respx.mock
async def test_fetch_new_without_dynamic(bilibili, dummy_user_subinfo, without_dynamic):