move files

This commit is contained in:
felinae98
2021-11-17 15:59:19 +08:00
parent 7f46d87c3f
commit e9de860058
17 changed files with 0 additions and 0 deletions
+10
View File
@@ -0,0 +1,10 @@
import nonebot
from . import config_manager
from . import config
from . import scheduler
from . import send
from . import post
from . import platform
from . import types
from . import utils
+136
View File
@@ -0,0 +1,136 @@
from collections import defaultdict
from os import path
import os
from typing import DefaultDict, Mapping
import nonebot
from tinydb import Query, TinyDB
from .plugin_config import plugin_config
from .types import Target, User
from .utils import Singleton
from .platform import platform_manager
supported_target_type = platform_manager.keys()
def get_config_path() -> str:
if plugin_config.hk_reporter_config_path:
data_dir = plugin_config.hk_reporter_config_path
else:
working_dir = os.getcwd()
data_dir = path.join(working_dir, 'data')
if not path.isdir(data_dir):
os.makedirs(data_dir)
return path.join(data_dir, 'hk_reporter.json')
class NoSuchUserException(Exception):
pass
class NoSuchSubscribeException(Exception):
pass
class Config(metaclass=Singleton):
migrate_version = 2
def __init__(self):
self.db = TinyDB(get_config_path(), encoding='utf-8')
self.kv_config = self.db.table('kv')
self.user_target = self.db.table('user_target')
self.target_user_cache: dict[str, defaultdict[Target, list[User]]] = {}
self.target_user_cat_cache = {}
self.target_user_tag_cache = {}
self.target_list = {}
self.next_index: DefaultDict[str, int] = defaultdict(lambda: 0)
def add_subscribe(self, user, user_type, target, target_name, target_type, cats, tags):
user_query = Query()
query = (user_query.user == user) & (user_query.user_type == user_type)
if (user_data := self.user_target.get(query)):
# update
subs: list = user_data.get('subs', [])
subs.append({"target": target, "target_type": target_type, 'target_name': target_name, 'cats': cats, 'tags': tags})
self.user_target.update({"subs": subs}, query)
else:
# insert
self.user_target.insert({
'user': user, 'user_type': user_type,
'subs': [{'target': target, 'target_type': target_type, 'target_name': target_name, 'cats': cats, 'tags': tags }]
})
self.update_send_cache()
def list_subscribe(self, user, user_type):
query = Query()
if user_sub := self.user_target.get((query.user == user) & (query.user_type ==user_type)):
return user_sub['subs']
return []
def del_subscribe(self, user, user_type, target, target_type):
user_query = Query()
query = (user_query.user == user) & (user_query.user_type == user_type)
if not (query_res := self.user_target.get(query)):
raise NoSuchUserException()
subs = query_res.get('subs', [])
for idx, sub in enumerate(subs):
if sub.get('target') == target and sub.get('target_type') == target_type:
subs.pop(idx)
self.user_target.update({'subs': subs}, query)
self.update_send_cache()
return
raise NoSuchSubscribeException()
def update_send_cache(self):
res = {target_type: defaultdict(list) for target_type in supported_target_type}
cat_res = {target_type: defaultdict(lambda: defaultdict(list)) for target_type in supported_target_type}
tag_res = {target_type: defaultdict(lambda: defaultdict(list)) for target_type in supported_target_type}
# res = {target_type: defaultdict(lambda: defaultdict(list)) for target_type in supported_target_type}
for user in self.user_target.all():
for sub in user.get('subs', []):
if not sub.get('target_type') in supported_target_type:
continue
res[sub['target_type']][sub['target']].append(User(user['user'], user['user_type']))
cat_res[sub['target_type']][sub['target']]['{}-{}'.format(user['user_type'], user['user'])] = sub['cats']
tag_res[sub['target_type']][sub['target']]['{}-{}'.format(user['user_type'], user['user'])] = sub['tags']
self.target_user_cache = res
self.target_user_cat_cache = cat_res
self.target_user_tag_cache = tag_res
for target_type in self.target_user_cache:
self.target_list[target_type] = list(self.target_user_cache[target_type].keys())
def get_sub_category(self, target_type, target, user_type, user):
return self.target_user_cat_cache[target_type][target]['{}-{}'.format(user_type, user)]
def get_sub_tags(self, target_type, target, user_type, user):
return self.target_user_tag_cache[target_type][target]['{}-{}'.format(user_type, user)]
def get_next_target(self, target_type):
# FIXME 插入或删除target后对队列的影响(但是并不是大问题
if not self.target_list[target_type]:
return None
self.next_index[target_type] %= len(self.target_list[target_type])
res = self.target_list[target_type][self.next_index[target_type]]
self.next_index[target_type] += 1
return res
def start_up():
config = Config()
if not (search_res := config.kv_config.search(Query().name=="version")):
config.kv_config.insert({"name": "version", "value": config.migrate_version})
elif search_res[0].get("value") < config.migrate_version:
query = Query()
version_query = (query.name == 'version')
cur_version = search_res[0].get("value")
if cur_version == 1:
cur_version = 2
for user_conf in config.user_target.all():
conf_id = user_conf.doc_id
subs = user_conf['subs']
for sub in subs:
sub['cats'] = []
sub['tags'] = []
config.user_target.update({'subs': subs}, doc_ids=[conf_id])
config.kv_config.update({"value": config.migrate_version}, version_query)
# do migration
config.update_send_cache()
nonebot.get_driver().on_startup(start_up)
+186
View File
@@ -0,0 +1,186 @@
from typing import Type
from nonebot import logger, on_command
from nonebot.adapters.cqhttp import Bot, Event, GroupMessageEvent
from nonebot.adapters.cqhttp.message import Message
from nonebot.adapters.cqhttp.permission import GROUP_ADMIN, GROUP_MEMBER, GROUP_OWNER
from nonebot.permission import Permission, SUPERUSER
from nonebot.rule import to_me
from nonebot.typing import T_State
from nonebot.matcher import Matcher
from .config import Config, NoSuchSubscribeException
from .platform import platform_manager, check_sub_target
from .utils import parse_text
from .types import Target
def _gen_prompt_template(prompt: str):
if hasattr(Message, 'template'):
return Message.template(prompt)
return prompt
common_platform = [p.platform_name for p in \
filter(lambda platform: platform.enabled and platform.is_common,
platform_manager.values())
]
help_match = on_command('help', rule=to_me(), priority=5)
@help_match.handle()
async def send_help(bot: Bot, event: Event, state: T_State):
message = '使用方法:\n@bot 添加订阅(仅管理员)\n@bot 查询订阅\n@bot 删除订阅(仅管理员)'
await help_match.finish(Message(await parse_text(message)))
def do_add_sub(add_sub: Type[Matcher]):
@add_sub.handle()
async def init_promote(bot: Bot, event: Event, state: T_State):
state['_prompt'] = '请输入想要订阅的平台,目前支持,请输入冒号左边的名称:\n' + \
''.join(['{}{}\n'.format(platform_name, platform_manager[platform_name].name) \
for platform_name in common_platform]) + \
'要查看全部平台请输入:“全部”'
async def parse_platform(bot: Bot, event: Event, state: T_State) -> None:
platform = str(event.get_message()).strip()
if platform == '全部':
message = '全部平台\n' + \
'\n'.join(['{}{}'.format(platform_name, platform.name) \
for platform_name, platform in platform_manager.items()])
await add_sub.reject(message)
elif platform in platform_manager:
state['platform'] = platform
else:
await add_sub.reject('平台输入错误')
@add_sub.got('platform', _gen_prompt_template('{_prompt}'), parse_platform)
@add_sub.handle()
async def init_id(bot: Bot, event: Event, state: T_State):
if platform_manager[state['platform']].has_target:
state['_prompt'] = '请输入订阅用户的id,详情查阅https://nonebot-hk-reporter.vercel.app/usage/#%E6%89%80%E6%94%AF%E6%8C%81%E5%B9%B3%E5%8F%B0%E7%9A%84uid'
else:
state['id'] = 'default'
state['name'] = await platform_manager[state['platform']].get_target_name(Target(''))
async def parse_id(bot: Bot, event: Event, state: T_State):
target = str(event.get_message()).strip()
name = await check_sub_target(state['platform'], target)
if not name:
await add_sub.reject('id输入错误')
state['id'] = target
state['name'] = name
@add_sub.got('id', _gen_prompt_template('{_prompt}'), parse_id)
@add_sub.handle()
async def init_cat(bot: Bot, event: Event, state: T_State):
if not platform_manager[state['platform']].categories:
state['cats'] = []
return
state['_prompt'] = '请输入要订阅的类别,以空格分隔,支持的类别有:{}'.format(
' '.join(list(platform_manager[state['platform']].categories.values())))
async def parser_cats(bot: Bot, event: Event, state: T_State):
res = []
for cat in str(event.get_message()).strip().split():
if cat not in platform_manager[state['platform']].reverse_category:
await add_sub.reject('不支持 {}'.format(cat))
res.append(platform_manager[state['platform']].reverse_category[cat])
state['cats'] = res
@add_sub.got('cats', _gen_prompt_template('{_prompt}'), parser_cats)
@add_sub.handle()
async def init_tag(bot: Bot, event: Event, state: T_State):
if not platform_manager[state['platform']].enable_tag:
state['tags'] = []
return
state['_prompt'] = '请输入要订阅的tag,订阅所有tag输入"全部标签"'
async def parser_tags(bot: Bot, event: Event, state: T_State):
if str(event.get_message()).strip() == '全部标签':
state['tags'] = []
else:
state['tags'] = str(event.get_message()).strip().split()
@add_sub.got('tags', _gen_prompt_template('{_prompt}'), parser_tags)
@add_sub.handle()
async def add_sub_process(bot: Bot, event: Event, state: T_State):
config = Config()
config.add_subscribe(state.get('_user_id') or event.group_id, user_type='group',
target=state['id'],
target_name=state['name'], target_type=state['platform'],
cats=state.get('cats', []), tags=state.get('tags', []))
await add_sub.finish('添加 {} 成功'.format(state['name']))
def do_query_sub(query_sub: Type[Matcher]):
@query_sub.handle()
async def _(bot: Bot, event: Event, state: T_State):
config: Config = Config()
sub_list = config.list_subscribe(state.get('_user_id') or event.group_id, "group")
res = '订阅的帐号为:\n'
for sub in sub_list:
res += '{} {} {}'.format(sub['target_type'], sub['target_name'], sub['target'])
platform = platform_manager[sub['target_type']]
if platform.categories:
res += ' [{}]'.format(', '.join(map(lambda x: platform.categories[x], sub['cats'])))
if platform.enable_tag:
res += ' {}'.format(', '.join(sub['tags']))
res += '\n'
await query_sub.finish(Message(await parse_text(res)))
def do_del_sub(del_sub: Type[Matcher]):
@del_sub.handle()
async def send_list(bot: Bot, event: Event, state: T_State):
config: Config = Config()
sub_list = config.list_subscribe(state.get('_user_id') or event.group_id, "group")
res = '订阅的帐号为:\n'
state['sub_table'] = {}
for index, sub in enumerate(sub_list, 1):
state['sub_table'][index] = {'target_type': sub['target_type'], 'target': sub['target']}
res += '{} {} {} {}\n'.format(index, sub['target_type'], sub['target_name'], sub['target'])
platform = platform_manager[sub['target_type']]
if platform.categories:
res += ' [{}]'.format(', '.join(map(lambda x: platform.categories[x], sub['cats'])))
if platform.enable_tag:
res += ' {}'.format(', '.join(sub['tags']))
res += '\n'
res += '请输入要删除的订阅的序号'
await bot.send(event=event, message=Message(await parse_text(res)))
@del_sub.receive()
async def do_del(bot, event: Event, state: T_State):
try:
index = int(str(event.get_message()).strip())
config = Config()
config.del_subscribe(state.get('_user_id') or event.group_id, 'group', **state['sub_table'][index])
except Exception as e:
await del_sub.reject('删除错误')
logger.warning(e)
else:
await del_sub.finish('删除成功')
async def parse_group_number(bot: Bot, event: Event, state: T_State):
state[state["_current_key"]] = int(str(event.get_message()))
add_sub_matcher = on_command("添加订阅", rule=to_me(), permission=GROUP_ADMIN | GROUP_OWNER | SUPERUSER, priority=5)
do_add_sub(add_sub_matcher)
manage_add_sub_mather = on_command('管理-添加订阅', permission=SUPERUSER, priority=5)
@manage_add_sub_mather.got('_user_id', "群号", parse_group_number)
async def handle(bot: Bot, event: Event, state: T_State):
pass
do_add_sub(manage_add_sub_mather)
query_sub_macher = on_command("查询订阅", rule=to_me(), priority=5)
do_query_sub(query_sub_macher)
manage_query_sub_mather = on_command('管理-查询订阅', permission=SUPERUSER, priority=5)
@manage_query_sub_mather.got('_user_id', "群号", parse_group_number)
async def handle(bot: Bot, event: Event, state: T_State):
pass
do_query_sub(manage_query_sub_mather)
del_sub_macher = on_command("删除订阅", rule=to_me(), permission=GROUP_ADMIN | GROUP_OWNER | SUPERUSER, priority=5)
do_del_sub(del_sub_macher)
manage_del_sub_mather = on_command('管理-删除订阅', permission=SUPERUSER, priority=5)
@manage_del_sub_mather.got('_user_id', "群号", parse_group_number)
async def handle(bot: Bot, event: Event, state: T_State):
pass
do_del_sub(manage_del_sub_mather)
@@ -0,0 +1,28 @@
from collections import defaultdict
from .platform import Platform, NoTargetGroup
from pkgutil import iter_modules
from pathlib import Path
from importlib import import_module
_package_dir = str(Path(__file__).resolve().parent)
for (_, module_name, _) in iter_modules([_package_dir]):
import_module(f'{__name__}.{module_name}')
async def check_sub_target(target_type, target):
return await platform_manager[target_type].get_target_name(target)
_platform_list = defaultdict(list)
for _platform in Platform.registory:
if not _platform.enabled:
continue
_platform_list[_platform.platform_name].append(_platform)
platform_manager: dict[str, Platform] = dict()
for name, platform_list in _platform_list.items():
if len(platform_list) == 1:
platform_manager[name] = platform_list[0]()
else:
platform_manager[name] = NoTargetGroup([_platform() for _platform in platform_list])
@@ -0,0 +1,147 @@
import json
from typing import Any
from bs4 import BeautifulSoup as bs
import httpx
from ..post import Post
from ..types import Category, RawPost, Target
from ..utils import Render
from .platform import CategoryNotSupport, NewMessage, NoTargetMixin, StatusChange
class Arknights(NewMessage, NoTargetMixin):
categories = {1: '游戏公告'}
platform_name = 'arknights'
name = '明日方舟游戏信息'
enable_tag = False
enabled = True
is_common = False
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
async def get_target_name(self, _: Target) -> str:
return '明日方舟游戏信息'
async def get_sub_list(self, _) -> list[RawPost]:
async with httpx.AsyncClient() as client:
raw_data = await client.get('https://ak-conf.hypergryph.com/config/prod/announce_meta/IOS/announcement.meta.json')
return json.loads(raw_data.text)['announceList']
def get_id(self, post: RawPost) -> Any:
return post['announceId']
def get_date(self, _: RawPost) -> None:
return None
def get_category(self, _) -> Category:
return Category(1)
async def parse(self, raw_post: RawPost) -> Post:
announce_url = raw_post['webUrl']
text = ''
async with httpx.AsyncClient() as client:
raw_html = await client.get(announce_url)
soup = bs(raw_html, 'html.parser')
pics = []
if soup.find("div", class_="standerd-container"):
# 图文
render = Render()
viewport = {'width': 320, 'height': 6400, 'deviceScaleFactor': 3}
pic_data = await render.render(announce_url, viewport=viewport, target='div.main')
if pic_data:
pics.append(pic_data)
else:
text = '图片渲染失败'
elif (pic := soup.find('img', class_='banner-image')):
pics.append(pic['src'])
else:
raise CategoryNotSupport()
return Post('arknights', text=text, url='', target_name="明日方舟游戏内公告", pics=pics, compress=True, override_use_pic=False)
class AkVersion(NoTargetMixin, StatusChange):
categories = {2: '更新信息'}
platform_name = 'arknights'
name = '明日方舟游戏信息'
enable_tag = False
enabled = True
is_common = False
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
async def get_target_name(self, _: Target) -> str:
return '明日方舟游戏信息'
async def get_status(self, _):
async with httpx.AsyncClient() as client:
res_ver = await client.get('https://ak-conf.hypergryph.com/config/prod/official/IOS/version')
res_preanounce = await client.get('https://ak-conf.hypergryph.com/config/prod/announce_meta/IOS/preannouncement.meta.json')
res = res_ver.json()
res.update(res_preanounce.json())
return res
def compare_status(self, _, old_status, new_status):
res = []
if old_status.get('preAnnounceType') == 2 and new_status.get('preAnnounceType') == 0:
res.append(Post('arknights',
text='登录界面维护公告上线(大概是开始维护了)',
target_name='明日方舟更新信息'))
elif old_status.get('preAnnounceType') == 0 and new_status.get('preAnnounceType') == 2:
res.append(Post('arknights',
text='登录界面维护公告下线(大概是开服了,冲!)',
target_name='明日方舟更新信息'))
if old_status.get('clientVersion') != new_status.get('clientVersion'):
res.append(Post('arknights', text='游戏本体更新(大更新)', target_name='明日方舟更新信息'))
if old_status.get('resVersion') != new_status.get('resVersion'):
res.append(Post('arknights', text='游戏资源更新(小更新)', target_name='明日方舟更新信息'))
return res
def get_category(self, _):
return Category(2)
async def parse(self, raw_post):
return raw_post
class MonsterSiren(NewMessage, NoTargetMixin):
categories = {3: '塞壬唱片新闻'}
platform_name = 'arknights'
name = '明日方舟游戏信息'
enable_tag = False
enabled = True
is_common = False
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
async def get_target_name(self, _: Target) -> str:
return '明日方舟游戏信息'
async def get_sub_list(self, _) -> list[RawPost]:
async with httpx.AsyncClient() as client:
raw_data = await client.get('https://monster-siren.hypergryph.com/api/news')
return raw_data.json()['data']['list']
def get_id(self, post: RawPost) -> Any:
return post['cid']
def get_date(self, _) -> None:
return None
def get_category(self, _) -> Category:
return Category(3)
async def parse(self, raw_post: RawPost) -> Post:
url = f'https://monster-siren.hypergryph.com/info/{raw_post["cid"]}'
async with httpx.AsyncClient() as client:
res = await client.get(f'https://monster-siren.hypergryph.com/api/news/{raw_post["cid"]}')
raw_data = res.json()
content = raw_data['data']['content']
content = content.replace('</p>', '</p>\n')
soup = bs(content, 'html.parser')
imgs = list(map(lambda x: x['src'], soup('img')))
text = f'{raw_post["title"]}\n{soup.text.strip()}'
return Post('monster-siren', text=text, pics=imgs,
url=url, target_name="塞壬唱片新闻", compress=True,
override_use_pic=False)
@@ -0,0 +1,126 @@
import json
from typing import Any, Optional
import httpx
from ..post import Post
from ..types import Category, RawPost, Tag, Target
from .platform import NewMessage, TargetMixin, CategoryNotSupport
class Bilibili(NewMessage, TargetMixin):
categories = {
1: "一般动态",
2: "专栏文章",
3: "视频",
4: "纯文字",
5: "转发"
# 5: "短视频"
}
platform_name = 'bilibili'
enable_tag = True
enabled = True
is_common = True
schedule_type = 'interval'
schedule_kw = {'seconds': 10}
name = 'B站'
async def get_target_name(self, target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
res = await client.get('https://api.bilibili.com/x/space/acc/info', params={'mid': target})
res_data = json.loads(res.text)
if res_data['code']:
return None
return res_data['data']['name']
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with httpx.AsyncClient() as client:
params = {'host_uid': target, 'offset': 0, 'need_top': 0}
res = await client.get('https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history', params=params, timeout=4.0)
res_dict = json.loads(res.text)
if res_dict['code'] == 0:
return res_dict['data']['cards']
else:
return []
def get_id(self, post: RawPost) -> Any:
return post['desc']['dynamic_id']
def get_date(self, post: RawPost) -> int:
return post['desc']['timestamp']
def _do_get_category(self, post_type: int) -> Category:
if post_type == 2:
return Category(1)
elif post_type == 64:
return Category(2)
elif post_type == 8:
return Category(3)
elif post_type == 4:
return Category(4)
elif post_type == 1:
# 转发
return Category(5)
raise CategoryNotSupport()
def get_category(self, post: RawPost) -> Category:
post_type = post['desc']['type']
return self._do_get_category(post_type)
def get_tags(self, raw_post: RawPost) -> list[Tag]:
return [*map(lambda tp: tp['topic_name'], raw_post['display']['topic_info']['topic_details'])]
def _get_info(self, post_type: Category, card) -> tuple[str, list]:
if post_type == 1:
# 一般动态
text = card['item']['description']
pic = [img['img_src'] for img in card['item']['pictures']]
elif post_type == 2:
# 专栏文章
text = '{} {}'.format(card['title'], card['summary'])
pic = card['image_urls']
elif post_type == 3:
# 视频
text = card['dynamic']
pic = [card['pic']]
elif post_type == 4:
# 纯文字
text = card['item']['content']
pic = []
else:
raise CategoryNotSupport()
return text, pic
async def parse(self, raw_post: RawPost) -> Post:
card_content = json.loads(raw_post['card'])
post_type = self.get_category(raw_post)
target_name = raw_post['desc']['user_profile']['info']['uname']
if post_type >= 1 and post_type < 5:
url = ''
if post_type == 1:
# 一般动态
url = 'https://t.bilibili.com/{}'.format(raw_post['desc']['dynamic_id_str'])
elif post_type == 2:
# 专栏文章
url = 'https://www.bilibili.com/read/cv{}'.format(raw_post['desc']['rid'])
elif post_type == 3:
# 视频
url = 'https://www.bilibili.com/video/{}'.format(raw_post['desc']['bvid'])
elif post_type == 4:
# 纯文字
url = 'https://t.bilibili.com/{}'.format(raw_post['desc']['dynamic_id_str'])
text, pic = self._get_info(post_type, card_content)
elif post_type == 5:
# 转发
url = 'https://t.bilibili.com/{}'.format(raw_post['desc']['dynamic_id_str'])
text = card_content['item']['content']
orig_type = card_content['item']['orig_type']
orig = json.loads(card_content['origin'])
orig_text, _ = self._get_info(self._do_get_category(orig_type), orig)
text += '\n--------------\n'
text += orig_text
pic = []
else:
raise CategoryNotSupport(post_type)
return Post('bilibili', text=text, url=url, pics=pic, target_name=target_name)
@@ -0,0 +1,53 @@
from typing import Any, Optional
import httpx
from ..post import Post
from ..types import RawPost, Target
from .platform import TargetMixin, NewMessage
class NcmArtist(TargetMixin, NewMessage):
categories = {}
platform_name = 'ncm-artist'
enable_tag = False
enabled = True
is_common = True
schedule_type = 'interval'
schedule_kw = {'minutes': 10}
name = "网易云-歌手"
async def get_target_name(self, target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
res = await client.get(
"https://music.163.com/api/artist/albums/{}".format(target),
headers={'Referer': 'https://music.163.com/'}
)
res_data = res.json()
if res_data['code'] != 200:
return
return res_data['artist']['name']
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with httpx.AsyncClient() as client:
res = await client.get(
"https://music.163.com/api/artist/albums/{}".format(target),
headers={'Referer': 'https://music.163.com/'}
)
res_data = res.json()
if res_data['code'] != 200:
return []
else:
return res_data['hotAlbums']
def get_id(self, post: RawPost) -> Any:
return post['id']
def get_date(self, post: RawPost) -> int:
return post['publishTime'] // 1000
async def parse(self, raw_post: RawPost) -> Post:
text = '新专辑发布:{}'.format(raw_post['name'])
target_name = raw_post['artist']['name']
pics = [raw_post['picUrl']]
url = "https://music.163.com/#/album?id={}".format(raw_post['id'])
return Post('ncm-artist', text=text, url=url, pics=pics, target_name=target_name)
@@ -0,0 +1,384 @@
from abc import abstractmethod, ABC
from collections import defaultdict
from dataclasses import dataclass
from functools import reduce
import time
from typing import Any, Collection, Optional, Literal
import httpx
from nonebot import logger
from ..plugin_config import plugin_config
from ..post import Post
from ..types import Category, RawPost, Tag, Target, User, UserSubInfo
class CategoryNotSupport(Exception):
"raise in get_category, when post category is not supported"
class RegistryMeta(type):
def __new__(cls, name, bases, namespace, **kwargs):
return super().__new__(cls, name, bases, namespace)
def __init__(cls, name, bases, namespace, **kwargs):
if kwargs.get('base'):
# this is the base class
cls.registory = []
elif not kwargs.get('abstract'):
# this is the subclass
cls.registory.append(cls)
super().__init__(name, bases, namespace, **kwargs)
class RegistryABCMeta(RegistryMeta, ABC):
...
class StorageMixinProto(metaclass=RegistryABCMeta, abstract=True):
has_target: bool
@abstractmethod
def get_stored_data(self, target: Target) -> Any:
...
@abstractmethod
def set_stored_data(self, target: Target, data: Any):
...
class TargetMixin(StorageMixinProto, abstract=True):
has_target = True
def __init__(self):
super().__init__()
self.store: dict[Target, Any] = dict()
def get_stored_data(self, target: Target) -> Any:
return self.store.get(target)
def set_stored_data(self, target: Target, data: Any):
self.store[target] = data
class NoTargetMixin(StorageMixinProto, abstract=True):
has_target = False
def __init__(self):
super().__init__()
self.store = None
def get_stored_data(self, _: Target) -> Any:
return self.store
def set_stored_data(self, _: Target, data: Any):
self.store = data
class PlatformNameMixin(metaclass=RegistryABCMeta, abstract=True):
platform_name: str
class CategoryMixin(metaclass=RegistryABCMeta, abstract=True):
@abstractmethod
def get_category(self, post: RawPost) -> Optional[Category]:
"Return category of given Rawpost"
raise NotImplementedError()
class ParsePostMixin(metaclass=RegistryABCMeta, abstract=True):
@abstractmethod
async def parse(self, raw_post: RawPost) -> Post:
"parse RawPost into post"
...
class MessageProcessMixin(PlatformNameMixin, CategoryMixin, ParsePostMixin, abstract=True):
"General message process fetch, parse, filter progress"
def __init__(self):
super().__init__()
self.parse_cache: dict[Any, Post] = dict()
@abstractmethod
def get_id(self, post: RawPost) -> Any:
"Get post id of given RawPost"
async def _parse_with_cache(self, raw_post: RawPost) -> Post:
post_id = self.get_id(raw_post)
if post_id not in self.parse_cache:
retry_times = 3
while retry_times:
try:
self.parse_cache[post_id] = await self.parse(raw_post)
break
except Exception as err:
retry_times -= 1
if not retry_times:
raise err
return self.parse_cache[post_id]
@abstractmethod
async def get_sub_list(self, target: Target) -> list[RawPost]:
"Get post list of the given target"
@abstractmethod
def get_date(self, post: RawPost) -> Optional[int]:
"Get post timestamp and return, return None if can't get the time"
async def filter_common(self, raw_post_list: list[RawPost]) -> list[RawPost]:
res = []
for raw_post in raw_post_list:
# post_id = self.get_id(raw_post)
# if post_id in exists_posts_set:
# continue
if (post_time := self.get_date(raw_post)) and time.time() - post_time > 2 * 60 * 60 and \
plugin_config.hk_reporter_init_filter:
continue
try:
self.get_category(raw_post)
except CategoryNotSupport:
continue
except NotImplementedError:
pass
res.append(raw_post)
return res
class NewMessageProcessMixin(StorageMixinProto, MessageProcessMixin, abstract=True):
"General message process, fetch, parse, filter, and only returns the new Post"
@dataclass
class MessageStorage():
inited: bool
exists_posts: set[Any]
async def filter_common_with_diff(self, target: Target, raw_post_list: list[RawPost]) -> list[RawPost]:
filtered_post = await self.filter_common(raw_post_list)
store = self.get_stored_data(target) or self.MessageStorage(False, set())
res = []
if not store.inited and plugin_config.hk_reporter_init_filter:
# target not init
for raw_post in filtered_post:
post_id = self.get_id(raw_post)
store.exists_posts.add(post_id)
logger.info('init {}-{} with {}'.format(self.platform_name, target, store.exists_posts))
store.inited = True
else:
for raw_post in filtered_post:
post_id = self.get_id(raw_post)
if post_id in store.exists_posts:
continue
res.append(raw_post)
store.exists_posts.add(post_id)
self.set_stored_data(target, store)
return res
class UserCustomFilterMixin(CategoryMixin, ParsePostMixin, abstract=True):
categories: dict[Category, str]
enable_tag: bool
def __init__(self):
super().__init__()
self.reverse_category = {}
for key, val in self.categories.items():
self.reverse_category[val] = key
@abstractmethod
def get_tags(self, raw_post: RawPost) -> Optional[Collection[Tag]]:
"Return Tag list of given RawPost"
async def filter_user_custom(self, raw_post_list: list[RawPost], cats: list[Category], tags: list[Tag]) -> list[RawPost]:
res: list[RawPost] = []
for raw_post in raw_post_list:
if self.categories:
cat = self.get_category(raw_post)
if cats and cat not in cats:
continue
if self.enable_tag and tags:
flag = False
post_tags = self.get_tags(raw_post)
for tag in post_tags or []:
if tag in tags:
flag = True
break
if not flag:
continue
res.append(raw_post)
return res
async def dispatch_user_post(self, target: Target, new_posts: list[RawPost], users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
res: list[tuple[User, list[Post]]] = []
for user, category_getter, tag_getter in users:
required_tags = tag_getter(target) if self.enable_tag else []
cats = category_getter(target)
user_raw_post = await self.filter_user_custom(new_posts, cats, required_tags)
user_post: list[Post] = []
for raw_post in user_raw_post:
if isinstance(self, MessageProcessMixin):
user_post.append(await self._parse_with_cache(raw_post))
else:
user_post.append(await self.parse(raw_post))
res.append((user, user_post))
return res
class Platform(PlatformNameMixin, UserCustomFilterMixin, base=True):
# schedule_interval: int
schedule_type: Literal['date', 'interval', 'cron']
schedule_kw: dict
is_common: bool
enabled: bool
name: str
@abstractmethod
async def get_target_name(self, target: Target) -> Optional[str]:
...
@abstractmethod
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
...
class NewMessage(
Platform,
NewMessageProcessMixin,
UserCustomFilterMixin,
abstract=True
):
"Fetch a list of messages, filter the new messages, dispatch it to different users"
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
try:
post_list = await self.get_sub_list(target)
new_posts = await self.filter_common_with_diff(target, post_list)
if not new_posts:
return []
else:
for post in new_posts:
logger.info('fetch new post from {} {}: {}'.format(
self.platform_name,
target if self.has_target else '-',
self.get_id(post)))
res = await self.dispatch_user_post(target, new_posts, users)
self.parse_cache = {}
return res
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []
class StatusChange(
Platform,
StorageMixinProto,
UserCustomFilterMixin,
abstract=True
):
"Watch a status, and fire a post when status changes"
@abstractmethod
async def get_status(self, target: Target) -> Any:
...
@abstractmethod
def compare_status(self, target: Target, old_status, new_status) -> list[RawPost]:
...
@abstractmethod
async def parse(self, raw_post: RawPost) -> Post:
...
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
try:
new_status = await self.get_status(target)
res = []
if old_status := self.get_stored_data(target):
diff = self.compare_status(target, old_status, new_status)
if diff:
logger.info("status changes {} {}: {} -> {}".format(
self.platform_name,
target if self.has_target else '-',
old_status, new_status
))
res = await self.dispatch_user_post(target, diff, users)
self.set_stored_data(target, new_status)
return res
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []
class SimplePost(
Platform,
MessageProcessMixin,
UserCustomFilterMixin,
StorageMixinProto,
abstract=True
):
"Fetch a list of messages, dispatch it to different users"
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
try:
new_posts = await self.get_sub_list(target)
if not new_posts:
return []
else:
for post in new_posts:
logger.info('fetch new post from {} {}: {}'.format(
self.platform_name,
target if self.has_target else '-',
self.get_id(post)))
res = await self.dispatch_user_post(target, new_posts, users)
self.parse_cache = {}
return res
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []
class NoTargetGroup(
Platform,
NoTargetMixin,
UserCustomFilterMixin,
abstract=True
):
enable_tag = False
DUMMY_STR = '_DUMMY'
enabled = True
class PlatformProto(Platform, NoTargetMixin, UserCustomFilterMixin, abstract=True):
...
def __init__(self, platform_list: list[PlatformProto]):
self.platform_list = platform_list
name = self.DUMMY_STR
self.categories = {}
categories_keys = set()
self.schedule_type = platform_list[0].schedule_type
self.schedule_kw = platform_list[0].schedule_kw
for platform in platform_list:
if name == self.DUMMY_STR:
name = platform.name
elif name != platform.name:
raise RuntimeError('Platform name for {} not fit'.format(self.platform_name))
platform_category_key_set = set(platform.categories.keys())
if platform_category_key_set & categories_keys:
raise RuntimeError('Platform categories for {} duplicate'.format(self.platform_name))
categories_keys |= platform_category_key_set
self.categories.update(platform.categories)
if platform.schedule_kw != self.schedule_kw or platform.schedule_type != self.schedule_type:
raise RuntimeError('Platform scheduler for {} not fit'.format(self.platform_name))
self.name = name
self.is_common = platform_list[0].is_common
super().__init__()
def __str__(self):
return '[' + ' '.join(map(lambda x: x.name, self.platform_list)) + ']'
async def get_target_name(self, _):
return await self.platform_list[0].get_target_name(_)
async def fetch_new_post(self, target, users):
res = defaultdict(list)
for platform in self.platform_list:
platform_res = await platform.fetch_new_post(target=target, users=users)
for user, posts in platform_res:
res[user].extend(posts)
return [[key, val] for key, val in res.items()]
+49
View File
@@ -0,0 +1,49 @@
import calendar
from typing import Any, Optional
from bs4 import BeautifulSoup as bs
import feedparser
import httpx
from ..post import Post
from ..types import RawPost, Target
from .platform import NewMessage, TargetMixin
class Rss(NewMessage, TargetMixin):
categories = {}
enable_tag = False
platform_name = 'rss'
name = "Rss"
enabled = True
is_common = True
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
async def get_target_name(self, target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
res = await client.get(target, timeout=10.0)
feed = feedparser.parse(res.text)
return feed['feed']['title']
def get_date(self, post: RawPost) -> int:
return calendar.timegm(post.published_parsed)
def get_id(self, post: RawPost) -> Any:
return post.id
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with httpx.AsyncClient() as client:
res = await client.get(target, timeout=10.0)
feed = feedparser.parse(res)
entries = feed.entries
for entry in entries:
entry['_target_name'] = feed.feed.title
return feed.entries
async def parse(self, raw_post: RawPost) -> Post:
text = raw_post.get('title', '') + '\n' if raw_post.get('title') else ''
soup = bs(raw_post.description, 'html.parser')
text += soup.text.strip()
pics = list(map(lambda x: x.attrs['src'], soup('img')))
return Post('rss', text=text, url=raw_post.link, pics=pics, target_name=raw_post['_target_name'])
@@ -0,0 +1,78 @@
from datetime import datetime
import hashlib
import json
import re
from typing import Any, Optional
from bs4 import BeautifulSoup as bs
import httpx
from ..post import Post
from ..types import *
# from .platform import Platform
# class Wechat(Platform):
# categories = {}
# enable_tag = False
# platform_name = 'wechat'
# enabled = False
# is_common = False
# name = '微信公众号'
# @classmethod
# def _get_query_url(cls, target: Target):
# return 'https://weixin.sogou.com/weixin?type=1&s_from=input&query={}&ie=utf8&_sug_=n&_sug_type_='.format(target)
# @classmethod
# async def _get_target_soup(cls, target: Target) -> Optional[bs]:
# target_url = cls._get_query_url(target)
# async with httpx.AsyncClient() as client:
# res = await client.get(target_url)
# soup = bs(res.text, 'html.parser')
# blocks = soup.find(class_='news-list2').find_all('li',recursive=False)
# for block in blocks:
# if block.find(string=[target]):
# return block
# @classmethod
# async def get_account_name(cls, target: Target) -> Optional[str]:
# if not (block := await cls._get_target_soup(target)):
# return None
# return block.find('p', class_='tit').find('a').text
# async def get_sub_list(self, target: Target) -> list[RawPost]:
# block = await self._get_target_soup(target)
# if (last_post_dt := block.find('dt', string='最近文章:')):
# post = {
# 'title': last_post_dt.find_parent().find('a').text,
# 'target': target,
# 'page_url': self._get_query_url(target),
# 'name': block.find('p', class_='tit').find('a').text
# }
# return [post]
# else:
# return []
# def get_id(self, post: RawPost) -> Any:
# return post['title']
# def get_date(self, post: RawPost):
# return None
# def get_tags(self, post: RawPost):
# return None
# def get_category(self, post: RawPost):
# return None
# async def parse(self, raw_post: RawPost) -> Post:
# # TODO get content of post
# return Post(target_type='wechat',
# text='{}\n详细内容请自行查看公众号'.format(raw_post['title']),
# target_name=raw_post['name'],
# pics=[],
# url=''
# )
+120
View File
@@ -0,0 +1,120 @@
from datetime import datetime
import json
import re
from typing import Any, Optional
from bs4 import BeautifulSoup as bs
import httpx
from nonebot import logger
from ..post import Post
from ..types import *
from .platform import NewMessage, TargetMixin
class Weibo(NewMessage, TargetMixin):
categories = {
1: '转发',
2: '视频',
3: '图文',
4: '文字',
}
enable_tag = True
platform_name = 'weibo'
name = '新浪微博'
enabled = True
is_common = True
schedule_type = 'interval'
schedule_kw = {'seconds': 3}
async def get_target_name(self, target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
param = {'containerid': '100505' + target}
res = await client.get('https://m.weibo.cn/api/container/getIndex', params=param)
res_dict = json.loads(res.text)
if res_dict.get('ok') == 1:
return res_dict['data']['userInfo']['screen_name']
else:
return None
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with httpx.AsyncClient() as client:
params = { 'containerid': '107603' + target}
res = await client.get('https://m.weibo.cn/api/container/getIndex?', params=params, timeout=4.0)
res_data = json.loads(res.text)
if not res_data['ok']:
return []
custom_filter: Callable[[RawPost], bool] = lambda d: d['card_type'] == 9
return list(filter(custom_filter, res_data['data']['cards']))
def get_id(self, post: RawPost) -> Any:
return post['mblog']['id']
def filter_platform_custom(self, raw_post: RawPost) -> bool:
return raw_post['card_type'] == 9
def get_date(self, raw_post: RawPost) -> float:
created_time = datetime.strptime(raw_post['mblog']['created_at'], '%a %b %d %H:%M:%S %z %Y')
return created_time.timestamp()
def get_tags(self, raw_post: RawPost) -> Optional[list[Tag]]:
"Return Tag list of given RawPost"
text = raw_post['mblog']['text']
soup = bs(text, 'html.parser')
res = list(map(
lambda x: x[1:-1],
filter(
lambda s: s[0] == '#' and s[-1] == '#',
map(lambda x:x.text, soup.find_all('span', class_='surl-text'))
)
))
super_topic_img = soup.find('img', src=re.compile(r'timeline_card_small_super_default'))
if super_topic_img:
try:
res.append(super_topic_img.parent.parent.find('span', class_='surl-text').text + '超话')
except:
logger.info('super_topic extract error: {}'.format(text))
return res
def get_category(self, raw_post: RawPost) -> Category:
if raw_post['mblog'].get('retweeted_status'):
return Category(1)
elif raw_post['mblog'].get('page_info') and raw_post['mblog']['page_info'].get('type') == 'video':
return Category(2)
elif raw_post['mblog'].get('pics'):
return Category(3)
else:
return Category(4)
def _get_text(self, raw_text: str) -> str:
text = raw_text.replace('<br />', '\n')
return bs(text, 'html.parser').text
async def parse(self, raw_post: RawPost) -> Post:
header = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'zh-CN,zh;q=0.9',
'authority': 'm.weibo.cn',
'cache-control': 'max-age=0',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'same-origin',
'sec-fetch-site': 'same-origin',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 '
'Mobile Safari/537.36'}
info = raw_post['mblog']
if info['isLongText'] or info['pic_num'] > 9:
async with httpx.AsyncClient() as client:
res = await client.get('https://m.weibo.cn/detail/{}'.format(info['mid']), headers=header)
try:
full_json_text = re.search(r'"status": ([\s\S]+),\s+"hotScheme"', res.text).group(1)
info = json.loads(full_json_text)
except:
logger.info('detail message error: https://m.weibo.cn/detail/{}'.format(info['mid']))
parsed_text = self._get_text(info['text'])
pic_urls = [img['large']['url'] for img in info.get('pics', [])]
detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
# return parsed_text, detail_url, pic_urls
return Post('weibo', text=parsed_text, url=detail_url, pics=pic_urls, target_name=info['user']['screen_name'])
@@ -0,0 +1,21 @@
from pydantic import BaseSettings
import warnings
import nonebot
class PlugConfig(BaseSettings):
hk_reporter_config_path: str = ""
hk_reporter_use_pic: bool = False
hk_reporter_use_local: bool = False
hk_reporter_browser: str = ''
hk_reporter_init_filter: bool = True
hk_reporter_use_queue: bool = True
class Config:
extra = 'ignore'
global_config = nonebot.get_driver().config
plugin_config = PlugConfig(**global_config.dict())
if plugin_config.hk_reporter_use_local:
warnings.warn('HK_REPORTER_USE_LOCAL is deprecated, please use HK_REPORTER_BROWSER')
+146
View File
@@ -0,0 +1,146 @@
import base64
from dataclasses import dataclass, field
from functools import reduce
from io import BytesIO
from typing import Optional, Union
from PIL import Image
import httpx
from nonebot import logger
from nonebot.adapters.cqhttp.message import Message, MessageSegment
from .plugin_config import plugin_config
from .utils import parse_text
@dataclass
class Post:
target_type: str
text: str
url: Optional[str] = None
target_name: Optional[str] = None
compress: bool = False
override_use_pic: Optional[bool] = None
pics: Union[list[Union[str,bytes]], list[str], list[bytes]] = field(default_factory=list)
extra_msg: list[Message] = field(default_factory=list)
_message: Optional[list] = None
def _use_pic(self):
if not self.override_use_pic is None:
return self.override_use_pic
return plugin_config.hk_reporter_use_pic
async def _pic_url_to_image(self, data: Union[str, bytes]) -> Image.Image:
pic_buffer = BytesIO()
if isinstance(data, str):
async with httpx.AsyncClient() as client:
res = await client.get(data)
pic_buffer.write(res.content)
else:
pic_buffer.write(data)
return Image.open(pic_buffer)
def _check_image_square(self, size: tuple[int, int]) -> bool:
return abs(size[0] - size[1]) / size[0] < 0.01
async def _pic_merge(self) -> None:
if len(self.pics) < 3:
return
first_image = await self._pic_url_to_image(self.pics[0])
if not self._check_image_square(first_image.size):
return
images: list[Image.Image] = [first_image]
# first row
for i in range(1, 3):
cur_img = await self._pic_url_to_image(self.pics[i])
if not self._check_image_square(cur_img.size):
return
if cur_img.size[1] != images[0].size[1]: # height not equal
return
images.append(cur_img)
_tmp = 0
x_coord = [0]
for i in range(3):
_tmp += images[i].size[0]
x_coord.append(_tmp)
y_coord = [0, first_image.size[1]]
async def process_row(row: int) -> bool:
if len(self.pics) < (row + 1) * 3:
return False
row_first_img = await self._pic_url_to_image(self.pics[row * 3])
if not self._check_image_square(row_first_img.size):
return False
if row_first_img.size[0] != images[0].size[0]:
return False
image_row: list[Image.Image] = [row_first_img]
for i in range(row * 3 + 1, row * 3 + 3):
cur_img = await self._pic_url_to_image(self.pics[i])
if not self._check_image_square(cur_img.size):
return False
if cur_img.size[1] != row_first_img.size[1]:
return False
if cur_img.size[0] != images[i % 3].size[0]:
return False
image_row.append(cur_img)
images.extend(image_row)
y_coord.append(y_coord[-1] + row_first_img.size[1])
return True
if await process_row(1):
matrix = (3,2)
else:
matrix = (3,1)
if await process_row(2):
matrix = (3,3)
logger.info('trigger merge image')
target = Image.new('RGB', (x_coord[-1], y_coord[-1]))
for y in range(matrix[1]):
for x in range(matrix[0]):
target.paste(images[y * matrix[0] + x], (
x_coord[x], y_coord[y], x_coord[x+1], y_coord[y+1]
))
target_io = BytesIO()
target.save(target_io, 'JPEG')
self.pics = self.pics[matrix[0] * matrix[1]: ]
self.pics.insert(0, target_io.getvalue())
async def generate_messages(self):
if self._message is None:
await self._pic_merge()
msgs = []
text = ''
if self.text:
if self._use_pic():
text += '{}'.format(self.text)
else:
text += '{}'.format(self.text if len(self.text) < 500 else self.text[:500] + '...')
text += '\n来源: {}'.format(self.target_type)
if self.target_name:
text += ' {}'.format(self.target_name)
if self._use_pic():
msgs.append(await parse_text(text))
if not self.target_type == 'rss' and self.url:
msgs.append(MessageSegment.text(self.url))
else:
if self.url:
text += ' \n详情: {}'.format(self.url)
msgs.append(MessageSegment.text(text))
for pic in self.pics:
# if isinstance(pic, bytes):
# pic = 'base64://' + base64.b64encode(pic).decode()
# msgs.append(Message("[CQ:image,file={url}]".format(url=pic)))
msgs.append(MessageSegment.image(pic))
if self.compress:
msgs = [reduce(lambda x, y: x.append(y), msgs, Message())]
msgs.extend(self.extra_msg)
self._message = msgs
return self._message
def __str__(self):
return 'type: {}\nfrom: {}\ntext: {}\nurl: {}\npic: {}'.format(
self.target_type,
self.target_name,
self.text if len(self.text) < 500 else self.text[:500] + '...',
self.url,
', '.join(map(lambda x: 'b64img' if isinstance(x, bytes) or x.startswith('base64') else x, self.pics))
)
+69
View File
@@ -0,0 +1,69 @@
import logging
from apscheduler.schedulers.asyncio import AsyncIOScheduler
import logging
import nonebot
from nonebot import get_driver, logger
from nonebot.log import LoguruHandler
from .config import Config
from .platform import platform_manager
from .send import do_send_msgs
from .send import send_msgs
from .types import UserSubInfo
from .plugin_config import plugin_config
scheduler = AsyncIOScheduler()
@get_driver().on_startup
async def _start():
scheduler.configure({"apscheduler.timezone": "Asia/Shanghai"})
scheduler.start()
# get_driver().on_startup(_start)
async def fetch_and_send(target_type: str):
config = Config()
target = config.get_next_target(target_type)
if not target:
return
logger.debug('try to fecth new posts from {}, target: {}'.format(target_type, target))
send_user_list = config.target_user_cache[target_type][target]
send_userinfo_list = list(map(
lambda user: UserSubInfo(
user,
lambda target: config.get_sub_category(target_type, target, user.user_type, user.user),
lambda target: config.get_sub_tags(target_type, target, user.user_type, user.user)
), send_user_list))
bot_list = list(nonebot.get_bots().values())
bot = bot_list[0] if bot_list else None
to_send = await platform_manager[target_type].fetch_new_post(target, send_userinfo_list)
for user, send_list in to_send:
for send_post in send_list:
logger.info('send to {}: {}'.format(user, send_post))
if not bot:
logger.warning('no bot connected')
else:
await send_msgs(bot, user.user, user.user_type, await send_post.generate_messages())
for platform_name, platform in platform_manager.items():
if platform.schedule_type in ['cron', 'interval', 'date']:
logger.info(f'start scheduler for {platform_name} with {platform.schedule_type} {platform.schedule_kw}')
scheduler.add_job(
fetch_and_send, platform.schedule_type, **platform.schedule_kw,
args=(platform_name,))
if plugin_config.hk_reporter_use_queue:
scheduler.add_job(do_send_msgs, 'interval', seconds=0.3, coalesce=True)
class SchedulerLogFilter(logging.Filter):
def filter(self, record: logging.LogRecord) -> bool:
logger.debug("logRecord", record, record.getMessage())
return not (record.name == "apscheduler" and 'skipped: maximum number of running instances reached' in record.getMessage())
aps_logger = logging.getLogger("apscheduler")
aps_logger.setLevel(30)
aps_logger.addFilter(SchedulerLogFilter())
aps_logger.handlers.clear()
aps_logger.addHandler(LoguruHandler())
+43
View File
@@ -0,0 +1,43 @@
import time
from nonebot import logger
from nonebot.adapters.cqhttp.bot import Bot
from .plugin_config import plugin_config
QUEUE = []
LAST_SEND_TIME = time.time()
async def _do_send(bot: 'Bot', user: str, user_type: str, msg):
if user_type == 'group':
await bot.call_api('send_group_msg', group_id=user, message=msg)
elif user_type == 'private':
await bot.call_api('send_private_msg', user_id=user, message=msg)
async def do_send_msgs():
global LAST_SEND_TIME
if time.time() - LAST_SEND_TIME < 1.5:
return
if QUEUE:
bot, user, user_type, msg, retry_time = QUEUE.pop(0)
try:
await _do_send(bot, user, user_type, msg)
except Exception as e:
if retry_time > 0:
QUEUE.insert(0, (bot, user, user_type, msg, retry_time - 1))
else:
msg_str = str(msg)
if len(msg_str) > 50:
msg_str = msg_str[:50] + '...'
logger.warning(f'send msg err {e} {msg_str}')
LAST_SEND_TIME = time.time()
async def send_msgs(bot, user, user_type, msgs):
if plugin_config.hk_reporter_use_queue:
for msg in msgs:
QUEUE.append((bot, user, user_type, msg, 2))
else:
for msg in msgs:
await _do_send(bot, user, user_type, msg)
+17
View File
@@ -0,0 +1,17 @@
from typing import Any, Callable, NamedTuple, NewType
from dataclasses import dataclass
RawPost = NewType('RawPost', Any)
Target = NewType('Target', str)
Category = NewType('Category', int)
Tag = NewType('Tag', str)
@dataclass(eq=True, frozen=True)
class User:
user: str
user_type: str
class UserSubInfo(NamedTuple):
user: User
category_getter: Callable[[Target], list[Category]]
tag_getter: Callable[[Target], list[Tag]]
+138
View File
@@ -0,0 +1,138 @@
import asyncio
import base64
from html import escape
import os
from time import asctime
import re
from typing import Awaitable, Callable, Optional
from nonebot.adapters.cqhttp.message import MessageSegment
from nonebot.log import logger
from pyppeteer import connect, launch
from pyppeteer.browser import Browser
from pyppeteer.chromium_downloader import check_chromium, download_chromium
from pyppeteer.page import Page
from bs4 import BeautifulSoup as bs
from .plugin_config import plugin_config
class Singleton(type):
_instances = {}
def __call__(cls, *args, **kwargs):
if cls not in cls._instances:
cls._instances[cls] = super(Singleton, cls).__call__(*args, **kwargs)
return cls._instances[cls]
if not plugin_config.hk_reporter_browser and not plugin_config.hk_reporter_use_local \
and not check_chromium():
os.environ['PYPPETEER_DOWNLOAD_HOST'] = 'http://npm.taobao.org/mirrors'
download_chromium()
class Render(metaclass=Singleton):
def __init__(self):
self.lock = asyncio.Lock()
self.browser: Browser
self.interval_log = ''
self.remote_browser = False
async def get_browser(self) -> Browser:
if plugin_config.hk_reporter_browser:
if plugin_config.hk_reporter_browser.startswith('local:'):
path = plugin_config.hk_reporter_browser.split('local:', 1)[1]
return await launch(executablePath=path, args=['--no-sandbox'])
if plugin_config.hk_reporter_browser.startswith('ws:'):
self.remote_browser = True
return await connect(browserWSEndpoint=plugin_config.hk_reporter_browser)
raise RuntimeError('HK_REPORTER_BROWSER error')
if plugin_config.hk_reporter_use_local:
return await launch(executablePath='/usr/bin/chromium', args=['--no-sandbox'])
return await launch(args=['--no-sandbox'])
async def close_browser(self):
if not self.remote_browser:
await self.browser.close()
async def render(self, url: str, viewport: Optional[dict] = None, target: Optional[str] = None,
operation: Optional[Callable[[Page], Awaitable[None]]] = None) -> Optional[bytes]:
retry_times = 0
while retry_times < 3:
try:
return await asyncio.wait_for(self.do_render(url, viewport, target, operation), 20)
except asyncio.TimeoutError:
retry_times += 1
logger.warning("render error {}\n".format(retry_times) + self.interval_log)
self.interval_log = ''
# if self.browser:
# await self.browser.close()
# self.lock.release()
def _inter_log(self, message: str) -> None:
self.interval_log += asctime() + '' + message + '\n'
async def do_render(self, url: str, viewport: Optional[dict] = None, target: Optional[str] = None,
operation: Optional[Callable[[Page], Awaitable[None]]] = None) -> Optional[bytes]:
async with self.lock:
self.browser = await self.get_browser()
self._inter_log('open browser')
page = await self.browser.newPage()
if operation:
await operation(page)
else:
await page.goto(url)
self._inter_log('open page')
if viewport:
await page.setViewport(viewport)
self._inter_log('set viewport')
if target:
target_ele = await page.querySelector(target)
if not target_ele:
return None
data = await target_ele.screenshot(type='jpeg', encoding='binary')
else:
data = await page.screenshot(type='jpeg', encoding='binary')
self._inter_log('screenshot')
await page.close()
self._inter_log('close page')
await self.close_browser()
self._inter_log('close browser')
assert(isinstance(data, bytes))
return data
async def text_to_pic(self, text: str) -> Optional[bytes]:
lines = text.split('\n')
parsed_lines = list(map(lambda x: '<p>{}</p>'.format(escape(x)), lines))
html_text = '<div style="width:17em;padding:1em">{}</div>'.format(''.join(parsed_lines))
url = 'data:text/html;charset=UTF-8;base64,{}'.format(base64.b64encode(html_text.encode()).decode())
data = await self.render(url, target='div')
return data
async def text_to_pic_cqcode(self, text:str) -> MessageSegment:
data = await self.text_to_pic(text)
# logger.debug('file size: {}'.format(len(data)))
if data:
# logger.debug(code)
return MessageSegment.image(data)
else:
return MessageSegment.text('生成图片错误')
async def parse_text(text: str) -> MessageSegment:
'return raw text if don\'t use pic, otherwise return rendered opcode'
if plugin_config.hk_reporter_use_pic:
render = Render()
return await render.text_to_pic_cqcode(text)
else:
return MessageSegment.text(text)
def html_to_text(html: str, query_dict: dict = {}) -> str:
html = re.sub(r'<br\s*/?>', '<br>\n', html)
html = html.replace('</p>', '</p>\n')
soup = bs(html, 'html.parser')
if query_dict:
node = soup.find(**query_dict)
else:
node = soup
assert node is not None
return node.text.strip()