12 Commits

Author SHA1 Message Date
suyiiyii 75bbbb68e8 支持上报超时的任务 2024-12-16 21:48:22 +08:00
suyiiyii 96573ec86e 添加 request_histogram 和 render_histogram 2024-12-12 19:43:44 +08:00
suyiiyii 4c29cf10e4 添加 cookie_choose_counter 2024-12-12 14:54:12 +08:00
suyiiyii 80f924123d 使用 label 简化 request_counter 2024-12-12 13:08:27 +08:00
suyiiyii 073bd314fc ♻️ 整理代码结构 2024-12-12 11:28:32 +08:00
suyiiyii 63f59ada3c 初步添加 metrics 2024-12-12 11:28:28 +08:00
suyiiyii 9fef8028c5 添加 NewMessage 类型 Platform 的抓取数量日志提示 2024-12-12 11:27:50 +08:00
pre-commit-ci[bot] af1609730c ⬆️ auto update by pre-commit hooks (#660)
test-build / Build Frontend (push) Has been cancelled
test-build / Smoke-test Coverage (macos-latest, 3.10) (push) Has been cancelled
test-build / Smoke-test Coverage (macos-latest, 3.11) (push) Has been cancelled
test-build / Smoke-test Coverage (macos-latest, 3.12) (push) Has been cancelled
test-build / Smoke-test Coverage (ubuntu-latest, 3.10) (push) Has been cancelled
test-build / Smoke-test Coverage (ubuntu-latest, 3.11) (push) Has been cancelled
test-build / Smoke-test Coverage (ubuntu-latest, 3.12) (push) Has been cancelled
test-build / Smoke-test Coverage (windows-latest, 3.10) (push) Has been cancelled
test-build / Smoke-test Coverage (windows-latest, 3.11) (push) Has been cancelled
test-build / Smoke-test Coverage (windows-latest, 3.12) (push) Has been cancelled
test-build / All-test Coverage (macos-latest, 3.10) (push) Has been cancelled
test-build / All-test Coverage (macos-latest, 3.11) (push) Has been cancelled
test-build / All-test Coverage (macos-latest, 3.12) (push) Has been cancelled
test-build / All-test Coverage (ubuntu-latest, 3.10) (push) Has been cancelled
test-build / All-test Coverage (ubuntu-latest, 3.11) (push) Has been cancelled
test-build / All-test Coverage (ubuntu-latest, 3.12) (push) Has been cancelled
test-build / All-test Coverage (windows-latest, 3.10) (push) Has been cancelled
test-build / All-test Coverage (windows-latest, 3.11) (push) Has been cancelled
test-build / All-test Coverage (windows-latest, 3.12) (push) Has been cancelled
pydantic1-compat-test / pydantic1 test (ubuntu-latest, 3.11) (push) Has been cancelled
Ruff Lint / Ruff Lint (push) Has been cancelled
test-build / Docker main (push) Has been cancelled
test-build / Docker main sentry (push) Has been cancelled
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-12-05 18:28:38 +08:00
github-actions[bot] 37958b0850 📝 Update changelog 2024-12-05 08:09:09 +00:00
suyiiyii a48ea0e947 🐛 修复 cookie 模块 type hint (#658) 2024-12-05 16:08:42 +08:00
github-actions[bot] d4048716b2 📝 Update changelog 2024-12-04 12:09:30 +00:00
Azide 22bf05949d 🐛 B站转发动态补充 DeletedItem 类型解析 (#659)
Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
2024-12-04 20:09:01 +08:00
3 changed files with 86 additions and 32 deletions
+30 -5
View File
@@ -1,14 +1,39 @@
import time
from fastapi import APIRouter from fastapi import APIRouter
from starlette.responses import Response from starlette.responses import Response
from prometheus_client import CONTENT_TYPE_LATEST, Counter, generate_latest from prometheus_client import CONTENT_TYPE_LATEST, Gauge, Counter, Histogram, generate_latest
# Request counter # Request counter
request_counter = Counter("bison_request_counter", "The number of requests") request_counter = Counter(
# Success counter "bison_request_counter", "The number of requests", ["site_name", "platform_name", "target", "success"]
success_counter = Counter("bison_success_counter", "The number of successful requests") )
# Sent counter # Sent counter
sent_counter = Counter("bison_sent_counter", "The number of sent messages") sent_counter = Counter("bison_sent_counter", "The number of sent messages", ["site_name", "platform_name", "target"])
cookie_choose_counter = Counter(
"bison_cookie_choose_counter", "The number of cookie choose", ["site_name", "target", "cookie_id"]
)
request_histogram = Histogram(
"bison_request_histogram",
"The time of platform used to request the source",
["site_name", "platform_name"],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60],
)
render_histogram = Histogram(
"bison_render_histogram",
"The time of theme used to render",
["site_name", "platform_name"],
buckets=[0.1, 0.5, 1, 2, 5, 10, 30, 60],
)
start_time = Gauge("bison_start_time", "The start time of the program")
start_time.set(time.time())
metrics_router = APIRouter(prefix="/api/metrics", tags=["metrics"]) metrics_router = APIRouter(prefix="/api/metrics", tags=["metrics"])
+54 -27
View File
@@ -1,12 +1,14 @@
from dataclasses import dataclass from dataclasses import dataclass
from collections import defaultdict from collections import defaultdict
from collections.abc import Callable
from nonebot.log import logger from nonebot.log import logger
from nonebot_plugin_apscheduler import scheduler from nonebot_plugin_apscheduler import scheduler
from apscheduler.events import EVENT_JOB_MAX_INSTANCES
from nonebot_plugin_saa.utils.exceptions import NoBotFound from nonebot_plugin_saa.utils.exceptions import NoBotFound
from nonebot_bison.utils import ClientManager from nonebot_bison.utils import ClientManager
from nonebot_bison.metrics import sent_counter, request_counter, success_counter from nonebot_bison.metrics import sent_counter, request_counter, render_histogram, request_histogram
from ..config import config from ..config import config
from ..send import send_msgs from ..send import send_msgs
@@ -24,6 +26,15 @@ class Schedulable:
use_batch: bool = False use_batch: bool = False
def handle_time_exceeded(event):
# event.job_id 是该任务在 apscheduler 的 id, 进而可以获得该任务的函数,再获取该函数绑定的对象
logger.warning(f"{scheduler.get_job(event.job_id).func.__self__.name} 抓取执行超时")
scheduler.get_job(event.job_id).func.__self__.metrics_report(False)
scheduler.add_listener(handle_time_exceeded, EVENT_JOB_MAX_INSTANCES)
class Scheduler: class Scheduler:
schedulable_list: list[Schedulable] # for load weigth from db schedulable_list: list[Schedulable] # for load weigth from db
batch_api_target_cache: dict[str, dict[Target, list[Target]]] # platform_name -> (target -> [target]) batch_api_target_cache: dict[str, dict[Target, list[Target]]] # platform_name -> (target -> [target])
@@ -56,6 +67,7 @@ class Scheduler:
self.platform_name_list = platform_name_list self.platform_name_list = platform_name_list
self.pre_weight_val = 0 # 轮调度中“本轮”增加权重和的初值 self.pre_weight_val = 0 # 轮调度中“本轮”增加权重和的初值
self.metrics_report: Callable[[bool], None] | None = None # 作为函数变量,允许外部调用来上报此次抓取是否成功
logger.info( logger.info(
f"register scheduler for {self.name} with " f"register scheduler for {self.name} with "
f"{self.scheduler_config.schedule_type} {self.scheduler_config.schedule_setting}" f"{self.scheduler_config.schedule_type} {self.scheduler_config.schedule_setting}"
@@ -94,22 +106,33 @@ class Scheduler:
logger.trace(f"scheduler {self.name} fetching next target: [{schedulable.platform_name}]{schedulable.target}") logger.trace(f"scheduler {self.name} fetching next target: [{schedulable.platform_name}]{schedulable.target}")
context = ProcessContext(self.client_mgr) context = ProcessContext(self.client_mgr)
request_counter.inc()
success_flag = False
platform_obj = platform_manager[schedulable.platform_name](context)
# 通过闭包的形式,将此次抓取任务的信息保存为函数变量,允许在该任务无法正常结束时由外部上报
self.metrics_report = lambda x: request_counter.labels(
platform_name=schedulable.platform_name,
site_name=platform_obj.site.name,
target=schedulable.target,
success=x,
).inc()
try: try:
platform_obj = platform_manager[schedulable.platform_name](context) with request_histogram.labels(
if schedulable.use_batch: platform_name=schedulable.platform_name, site_name=platform_obj.site.name
batch_targets = self.batch_api_target_cache[schedulable.platform_name][schedulable.target] ).time():
sub_units = [] if schedulable.use_batch:
for batch_target in batch_targets: batch_targets = self.batch_api_target_cache[schedulable.platform_name][schedulable.target]
userinfo = await config.get_platform_target_subscribers(schedulable.platform_name, batch_target) sub_units = []
sub_units.append(SubUnit(batch_target, userinfo)) for batch_target in batch_targets:
to_send = await platform_obj.do_batch_fetch_new_post(sub_units) userinfo = await config.get_platform_target_subscribers(schedulable.platform_name, batch_target)
else: sub_units.append(SubUnit(batch_target, userinfo))
send_userinfo_list = await config.get_platform_target_subscribers( to_send = await platform_obj.do_batch_fetch_new_post(sub_units)
schedulable.platform_name, schedulable.target else:
) send_userinfo_list = await config.get_platform_target_subscribers(
to_send = await platform_obj.do_fetch_new_post(SubUnit(schedulable.target, send_userinfo_list)) schedulable.platform_name, schedulable.target
)
to_send = await platform_obj.do_fetch_new_post(SubUnit(schedulable.target, send_userinfo_list))
success_flag = True
except SkipRequestException as err: except SkipRequestException as err:
logger.debug(f"skip request: {err}") logger.debug(f"skip request: {err}")
except Exception as err: except Exception as err:
@@ -118,20 +141,24 @@ class Scheduler:
logger.warning("API request record: " + record) logger.warning("API request record: " + record)
err.args += (records,) err.args += (records,)
raise raise
success_counter.inc()
self.metrics_report(success_flag)
if not to_send: if not to_send:
return return
sent_counter.inc() sent_counter.labels(
for user, send_list in to_send: platform_name=schedulable.platform_name, site_name=platform_obj.site.name, target=schedulable.target
for send_post in send_list: ).inc()
logger.info(f"send to {user}: {send_post}") with render_histogram.labels(platform_name=schedulable.platform_name, site_name=platform_obj.site.name).time():
try: for user, send_list in to_send:
await send_msgs( for send_post in send_list:
user, logger.info(f"send to {user}: {send_post}")
await send_post.generate_messages(), try:
) await send_msgs(
except NoBotFound: user,
logger.warning("no bot connected") await send_post.generate_messages(),
)
except NoBotFound:
logger.warning("no bot connected")
def insert_new_schedulable(self, platform_name: str, target: Target): def insert_new_schedulable(self, platform_name: str, target: Target):
self.pre_weight_val += 1000 self.pre_weight_val += 1000
+2
View File
@@ -13,6 +13,7 @@ from ..types import Target
from ..config import config from ..config import config
from .http import http_client from .http import http_client
from ..config.db_model import Cookie from ..config.db_model import Cookie
from ..metrics import cookie_choose_counter
class ClientManager(ABC): class ClientManager(ABC):
@@ -131,6 +132,7 @@ class CookieClientManager(ClientManager):
"""获取 client,根据 target 选择 cookie""" """获取 client,根据 target 选择 cookie"""
client = http_client() client = http_client()
cookie = await self._choose_cookie(target) cookie = await self._choose_cookie(target)
cookie_choose_counter.labels(site_name=self._site_name, target=target, cookie_id=cookie.id).inc()
if cookie.is_universal: if cookie.is_universal:
logger.trace(f"平台 {self._site_name} 未获取到用户cookie, 使用匿名cookie") logger.trace(f"平台 {self._site_name} 未获取到用户cookie, 使用匿名cookie")
else: else: