diff --git a/src/plugins/hk_reporter/config.py b/src/plugins/hk_reporter/config.py index 1b15d16..84ea072 100644 --- a/src/plugins/hk_reporter/config.py +++ b/src/plugins/hk_reporter/config.py @@ -1,12 +1,14 @@ -from .utils import Singleton, supported_target_type -from .types import User -from .plugin_config import plugin_config -from os import path -import nonebot -from tinydb import TinyDB, Query from collections import defaultdict -from typing import DefaultDict +from os import path import os +from typing import DefaultDict + +import nonebot +from tinydb import Query, TinyDB + +from .plugin_config import plugin_config +from .types import User +from .utils import Singleton, supported_target_type def get_config_path() -> str: diff --git a/src/plugins/hk_reporter/config_manager.py b/src/plugins/hk_reporter/config_manager.py index b97a65d..70cef4a 100644 --- a/src/plugins/hk_reporter/config_manager.py +++ b/src/plugins/hk_reporter/config_manager.py @@ -1,16 +1,16 @@ -from nonebot.rule import to_me -from nonebot.typing import T_State +from nonebot import logger, on_command from nonebot.adapters.cqhttp import Bot, Event, GroupMessageEvent from nonebot.adapters.cqhttp.message import Message -from nonebot.permission import Permission, SUPERUSER from nonebot.adapters.cqhttp.permission import GROUP_ADMIN, GROUP_MEMBER, GROUP_OWNER -from nonebot import on_command, logger +from nonebot.permission import Permission, SUPERUSER +from nonebot.rule import to_me +from nonebot.typing import T_State -from .platform.utils import check_sub_target -from .platform import platform_manager from .config import Config, NoSuchSubscribeException -from .utils import parse_text +from .platform import platform_manager +from .platform.utils import check_sub_target from .send import send_msgs +from .utils import parse_text add_sub = on_command("添加订阅", rule=to_me(), permission=GROUP_ADMIN | GROUP_OWNER | SUPERUSER, priority=5) @add_sub.got('platform', '请输入想要订阅的平台,目前支持:{}'.format(', '.join(platform_manager.keys()))) diff --git a/src/plugins/hk_reporter/platform/bilibili.py b/src/plugins/hk_reporter/platform/bilibili.py index 7f65979..ba5bdba 100644 --- a/src/plugins/hk_reporter/platform/bilibili.py +++ b/src/plugins/hk_reporter/platform/bilibili.py @@ -1,97 +1,12 @@ +from collections import defaultdict +import json from typing import Any, Optional -from ..types import Category, RawPost, Tag, Target -from ..utils import Singleton -from ..post import Post -from collections import defaultdict -from nonebot import logger import httpx -import json -import time -from .platform import Platform, CategoryNotSupport -class Bilibili_(metaclass=Singleton): - - def __init__(self): - self.exists_posts = defaultdict(set) - self.inited = defaultdict(lambda: False) - - async def get_user_post_list(self, user_id): - async with httpx.AsyncClient() as client: - params = {'host_uid': user_id, 'offset': 0, 'need_top': 0} - res = await client.get('https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history', params=params, timeout=4.0) - res_dict = json.loads(res.text) - if res_dict['code'] == 0: - return res_dict['data'] - - def filter(self, data, target, init=False) -> list[Post]: - cards = data['cards'] - res: list[Post] = [] - for card in cards: - dynamic_id = card['desc']['dynamic_id'] - if init: - self.exists_posts[target].add(dynamic_id) - continue - if dynamic_id in self.exists_posts[target]: - continue - if time.time() - card['desc']['timestamp'] > 60 * 60 * 2: - continue - res.append(self.parse(card, target)) - if None in res: - res.remove(None) - return res - - def parse(self, card, target) -> Post: - card_content = json.loads(card['card']) - dynamic_id = card['desc']['dynamic_id'] - self.exists_posts[target].add(dynamic_id) - if card['desc']['type'] == 2: - # 一般动态 - text = card_content['item']['description'] - url = 'https://t.bilibili.com/{}'.format(card['desc']['dynamic_id']) - pic = [img['img_src'] for img in card_content['item']['pictures']] - elif card['desc']['type'] == 64: - # 专栏文章 - text = '{} {}'.format(card_content['title'], card_content['summary']) - url = 'https://www.bilibili.com/read/cv{}'.format(card['desc']['rid']) - pic = card_content['image_urls'] - elif card['desc']['type'] == 8: - # 视频 - text = card_content['dynamic'] - url = 'https://www.bilibili.com/video/{}'.format(card['desc']['bvid']) - pic = [card_content['pic']] - elif card['desc']['type'] == 4: - # 纯文字 - text = card_content['item']['content'] - url = 'https://t.bilibili.com/{}'.format(card['desc']['dynamic_id']) - pic = [] - else: - logger.error(card) - return None - return Post('bilibili', text, url, pic) - - async def fetch_new_post(self, target) -> list[Post]: - try: - post_list_data = await self.get_user_post_list(target) - if self.inited[target]: - return self.filter(post_list_data, target) - else: - self.filter(post_list_data, target, True) - logger.info('bilibili init {} success'.format(target)) - logger.info('post list: {}'.format(self.exists_posts[target])) - self.inited[target] = True - return [] - except httpx.RequestError as err: - logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url)) - return [] - -async def get_user_info(mid): - async with httpx.AsyncClient() as client: - res = await client.get('https://api.bilibili.com/x/space/acc/info', params={'mid': mid}) - res_data = json.loads(res.text) - if res_data['code']: - return None - return res_data['data']['name'] +from ..post import Post +from ..types import Category, RawPost, Tag, Target +from .platform import CategoryNotSupport, Platform class Bilibili(Platform): diff --git a/src/plugins/hk_reporter/platform/platform.py b/src/plugins/hk_reporter/platform/platform.py index 51788fc..3c023b6 100644 --- a/src/plugins/hk_reporter/platform/platform.py +++ b/src/plugins/hk_reporter/platform/platform.py @@ -1,13 +1,14 @@ import time from collections import defaultdict -from typing import Any, Literal, Optional, Protocol +from typing import Any, Optional +import httpx from nonebot import logger from ..config import Config from ..post import Post +from ..types import Category, RawPost, Tag, Target, User from ..utils import Singleton -from ..types import Category, Tag, RawPost, Target, User class CategoryNotSupport(Exception): @@ -82,7 +83,7 @@ class Platform(PlatformProto): return self.cache[post_id] async def filter_common(self, target: Target, raw_post_list: list[RawPost]) -> list[RawPost]: - if False and not self.inited.get(target, False): + if not self.inited.get(target, False): # target not init for raw_post in raw_post_list: post_id = self.get_id(raw_post) @@ -95,8 +96,8 @@ class Platform(PlatformProto): post_id = self.get_id(raw_post) if post_id in self.exists_posts[target]: continue - # if (post_time := self.get_date(raw_post)) and time.time() - post_time > 2 * 60 * 60: - # continue + if (post_time := self.get_date(raw_post)) and time.time() - post_time > 2 * 60 * 60: + continue try: if not self.filter_platform_custom(raw_post): continue @@ -132,25 +133,30 @@ class Platform(PlatformProto): return res async def fetch_new_post(self, target: Target, users: list[User]) -> list[tuple[User, list[Post]]]: - config = Config() - post_list = await self.get_sub_list(target) - new_posts = await self.filter_common(target, post_list) - res: list[tuple[User, list[Post]]] = [] - if not new_posts: + try: + config = Config() + post_list = await self.get_sub_list(target) + new_posts = await self.filter_common(target, post_list) + res: list[tuple[User, list[Post]]] = [] + if not new_posts: + return [] + else: + for post in new_posts: + logger.info('fetch new post from {} {}: {}'.format(self.platform_name, target, self.get_id(post))) + for user in users: + required_tags = config.get_sub_tags(self.platform_name, target, user.user_type, user.user) if self.enable_tag else [] + cats = config.get_sub_category(self.platform_name, target, user.user_type, user.user) + user_raw_post = await self.filter_user_custom(new_posts, cats, required_tags) + user_post: list[Post] = [] + for raw_post in user_raw_post: + user_post.append(await self._parse_with_cache(raw_post)) + res.append((user, user_post)) + self.cache = {} + return res + except httpx.RequestError as err: + logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url)) return [] - else: - for post in new_posts: - logger.info('fetch new post from {} {}: {}'.format(self.platform_name, target, self.get_id(post))) - for user in users: - required_tags = config.get_sub_tags(self.platform_name, target, user.user_type, user.user) if self.enable_tag else [] - cats = config.get_sub_category(self.platform_name, target, user.user_type, user.user) - user_raw_post = await self.filter_user_custom(new_posts, cats, required_tags) - user_post: list[Post] = [] - for raw_post in user_raw_post: - user_post.append(await self._parse_with_cache(raw_post)) - res.append((user, user_post)) - self.cache = {} - return res + class PlatformNoTarget(PlatformProto): diff --git a/src/plugins/hk_reporter/platform/rss.py b/src/plugins/hk_reporter/platform/rss.py index 626fc43..f5c1ac5 100644 --- a/src/plugins/hk_reporter/platform/rss.py +++ b/src/plugins/hk_reporter/platform/rss.py @@ -1,15 +1,13 @@ +import calendar from typing import Any, Optional -from ..types import RawPost, Target -from ..utils import Singleton -from ..post import Post -from .platform import Platform -from collections import defaultdict + from bs4 import BeautifulSoup as bs -from nonebot import logger import feedparser import httpx -import time -import calendar + +from ..post import Post +from ..types import RawPost, Target +from .platform import Platform class Rss(Platform): diff --git a/src/plugins/hk_reporter/platform/utils.py b/src/plugins/hk_reporter/platform/utils.py index 7ff39ed..53449d5 100644 --- a/src/plugins/hk_reporter/platform/utils.py +++ b/src/plugins/hk_reporter/platform/utils.py @@ -28,12 +28,11 @@ async def fetch_and_send(target_type: str): send_list = config.target_user_cache[target_type][target] bot_list = list(nonebot.get_bots().values()) bot = bot_list[0] if bot_list else None - if target_type == 'rss': - to_send = await platform_manager[target_type].fetch_new_post(target, send_list) - for user, send_list in to_send: - for send_post in send_list: - logger.debug('send to {}: {}'.format(user, send_post)) - if not bot: - logger.warning('no bot connected') - else: - send_msgs(bot, user.user, user.user_type, await send_post.generate_messages()) + to_send = await platform_manager[target_type].fetch_new_post(target, send_list) + for user, send_list in to_send: + for send_post in send_list: + logger.debug('send to {}: {}'.format(user, send_post)) + if not bot: + logger.warning('no bot connected') + else: + send_msgs(bot, user.user, user.user_type, await send_post.generate_messages()) diff --git a/src/plugins/hk_reporter/platform/weibo.py b/src/plugins/hk_reporter/platform/weibo.py index de20788..676dd72 100644 --- a/src/plugins/hk_reporter/platform/weibo.py +++ b/src/plugins/hk_reporter/platform/weibo.py @@ -1,81 +1,18 @@ -import httpx +from collections import defaultdict +from datetime import datetime import json import time -from collections import defaultdict -from bs4 import BeautifulSoup as bs -from datetime import datetime -from nonebot import logger from typing import Any, Optional -from ..utils import Singleton +from bs4 import BeautifulSoup as bs +import httpx +from nonebot import logger + from ..post import Post from ..types import * +from ..utils import Singleton from .platform import Platform -class Weibo_(metaclass=Singleton): - - def __init__(self): - self.exists_posts = defaultdict(set) - self.inited = defaultdict(lambda: False) - - async def get_user_post_list(self, weibo_id: str): - async with httpx.AsyncClient() as client: - params = { 'containerid': '107603' + weibo_id} - res = await client.get('https://m.weibo.cn/api/container/getIndex?', params=params, timeout=4.0) - return res.text - - def filter_weibo(self, weibo_raw_text, target, init=False): - weibo_dict = json.loads(weibo_raw_text) - weibos = weibo_dict['data']['cards'] - res: list[Post] = [] - for weibo in weibos: - if weibo['card_type'] != 9: - continue - info = weibo['mblog'] - if init: - self.exists_posts[target].add(info['id']) - continue - if info['id'] in self.exists_posts[target]: - continue - created_time = datetime.strptime(info['created_at'], '%a %b %d %H:%M:%S %z %Y') - if time.time() - created_time.timestamp() > 60 * 60 * 2: - continue - res.append(self.parse_weibo(weibo, target)) - return res - - def parse_weibo(self, weibo_dict, target): - info = weibo_dict['mblog'] - parsed_text = bs(info['text'], 'html.parser').text - pic_urls = [img['large']['url'] for img in info.get('pics', [])] - self.exists_posts[target].add(info['id']) - detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid']) - # return parsed_text, detail_url, pic_urls - return Post('weibo', parsed_text, detail_url, pic_urls) - - async def fetch_new_post(self, target): - try: - post_list = await self.get_user_post_list(target) - if not self.inited[target]: - self.filter_weibo(post_list, target, True) - logger.info('weibo init {} success'.format(target)) - logger.info('post list: {}'.format(self.exists_posts[target])) - self.inited[target] = True - return [] - return self.filter_weibo(post_list, target) - except httpx.RequestError as err: - logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url)) - return [] - -async def get_user_info(id): - async with httpx.AsyncClient() as client: - param = {'containerid': '100505' + id} - res = await client.get('https://m.weibo.cn/api/container/getIndex', params=param) - res_dict = json.loads(res.text) - if res_dict.get('ok') == 1: - return res_dict['data']['userInfo']['screen_name'] - else: - return None - class Weibo(Platform): categories = {