Merge branch 'dev'

This commit is contained in:
felinae98
2021-06-29 19:43:26 +08:00
19 changed files with 873 additions and 235 deletions
@@ -1,4 +1,4 @@
from .platform import PlatformProto
from .platform import Platform
from pkgutil import iter_modules
from pathlib import Path
from importlib import import_module
@@ -9,9 +9,9 @@ for (_, module_name, _) in iter_modules([_package_dir]):
async def check_sub_target(target_type, target):
return await platform_manager[target_type].get_account_name(target)
return await platform_manager[target_type].get_target_name(target)
platform_manager: dict[str, PlatformProto] = {
platform_manager: dict[str, Platform] = {
obj.platform_name: obj() for obj in \
filter(lambda platform: platform.enabled, PlatformProto.registory)
filter(lambda platform: platform.enabled, Platform.registory)
}
@@ -1,21 +1,17 @@
from typing import Any
import httpx
import json
import time
from collections import defaultdict
from bs4 import BeautifulSoup as bs
from datetime import datetime
from nonebot import logger
from ..types import Category, RawPost, Tag, Target
from ..types import RawPost, Target
from .platform import PlatformNoTarget, CategoryNotSupport
from .platform import NewMessage, NoTargetMixin, CategoryNotSupport
from ..utils import Singleton, Render
from ..utils import Render
from ..post import Post
class Arknights(PlatformNoTarget):
class Arknights(NewMessage, NoTargetMixin):
categories = {}
platform_name = 'arknights'
@@ -23,13 +19,14 @@ class Arknights(PlatformNoTarget):
enable_tag = False
enabled = True
is_common = False
schedule_interval = 30
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
@staticmethod
async def get_account_name(_: Target) -> str:
async def get_target_name(_: Target) -> str:
return '明日方舟游戏内公告'
async def get_sub_list(self) -> list[RawPost]:
async def get_sub_list(self, _) -> list[RawPost]:
async with httpx.AsyncClient() as client:
raw_data = await client.get('http://ak-fs.hypergryph.com/announce/IOS/announcement.meta.json')
return json.loads(raw_data.text)['announceList']
@@ -37,7 +34,7 @@ class Arknights(PlatformNoTarget):
def get_id(self, post: RawPost) -> Any:
return post['announceId']
def get_date(self, post: RawPost) -> None:
def get_date(self, _: RawPost) -> None:
return None
async def parse(self, raw_post: RawPost) -> Post:
@@ -5,9 +5,9 @@ import httpx
from ..post import Post
from ..types import Category, RawPost, Tag, Target
from .platform import CategoryNotSupport, Platform
from .platform import NewMessage, TargetMixin, CategoryNotSupport
class Bilibili(Platform):
class Bilibili(NewMessage, TargetMixin):
categories = {
1: "一般动态",
@@ -20,11 +20,12 @@ class Bilibili(Platform):
enable_tag = True
enabled = True
is_common = True
schedule_interval = 10
schedule_type = 'interval'
schedule_kw = {'seconds': 10}
name = 'B站'
@staticmethod
async def get_account_name(target: Target) -> Optional[str]:
async def get_target_name(target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
res = await client.get('https://api.bilibili.com/x/space/acc/info', params={'mid': target})
res_data = json.loads(res.text)
@@ -1,27 +1,26 @@
from typing import Any
import httpx
import json
from .platform import PlatformNoTarget
from ..utils import Singleton
from .platform import NewMessage, NoTargetMixin
from ..types import RawPost
from ..post import Post
class MonsterSiren(PlatformNoTarget):
class MonsterSiren(NewMessage, NoTargetMixin):
categories = {}
platform_name = 'monster-siren'
enable_tag = False
enabled = True
is_common = False
schedule_interval = 30
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
name = '塞壬唱片官网新闻'
@staticmethod
async def get_account_name(_) -> str:
async def get_target_name(_) -> str:
return '塞壬唱片新闻'
async def get_sub_list(self) -> list[RawPost]:
async def get_sub_list(self, _) -> list[RawPost]:
async with httpx.AsyncClient() as client:
raw_data = await client.get('https://monster-siren.hypergryph.com/api/news')
return raw_data.json()['data']['list']
@@ -1,7 +1,7 @@
from abc import abstractmethod
from abc import abstractmethod, ABC
from dataclasses import dataclass
import time
from collections import defaultdict
from typing import Any, Collection, Optional
from typing import Any, Collection, Optional, Literal
import httpx
from nonebot import logger
@@ -18,98 +18,122 @@ class CategoryNotSupport(Exception):
class RegistryMeta(type):
def __new__(cls, name, bases, namespace, **kwargs):
if name not in ['PlatformProto', 'Platform', 'PlatformNoTarget'] and \
'platform_name' not in namespace:
raise TypeError('Platform has no `platform_name`')
return super().__new__(cls, name, bases, namespace, **kwargs)
return super().__new__(cls, name, bases, namespace)
def __init__(cls, name, bases, namespace, **kwargs):
if not hasattr(cls, 'registory'):
if kwargs.get('base'):
# this is the base class
cls.registory = []
elif name not in ['Platform', 'PlatformNoTarget']:
elif not kwargs.get('abstract'):
# this is the subclass
cls.registory.append(cls)
super().__init__(name, bases, namespace, **kwargs)
class RegistryABCMeta(RegistryMeta, ABC):
...
class PlatformProto(metaclass=RegistryMeta):
categories: dict[Category, str]
reverse_category: dict[str, Category]
class StorageMixinProto(metaclass=RegistryABCMeta, abstract=True):
has_target: bool
platform_name: str
name: str
enable_tag: bool
cache: dict[Any, Post]
enabled: bool
is_common: bool
schedule_interval: int
@abstractmethod
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
def get_stored_data(self, target: Target) -> Any:
...
@staticmethod
@abstractmethod
async def get_account_name(target: Target) -> Optional[str]:
"return the username(name) of the target"
def set_stored_data(self, target: Target, data: Any):
...
@abstractmethod
def get_id(self, post: RawPost) -> Any:
"Get post id of given RawPost"
class TargetMixin(StorageMixinProto, abstract=True):
@abstractmethod
def get_date(self, post: RawPost) -> Optional[int]:
"Get post timestamp and return, return None if can't get the time"
has_target = True
def __init__(self):
super().__init__()
self.store: dict[Target, Any] = dict()
def get_stored_data(self, target: Target) -> Any:
return self.store.get(target)
def set_stored_data(self, target: Target, data: Any):
self.store[target] = data
class NoTargetMixin(StorageMixinProto, abstract=True):
has_target = False
def __init__(self):
super().__init__()
self.store = None
def get_stored_data(self, _: Target) -> Any:
return self.store
def set_stored_data(self, _: Target, data: Any):
self.store = data
class PlaformNameMixin(metaclass=RegistryABCMeta, abstract=True):
platform_name: str
class CategoryMixin(metaclass=RegistryABCMeta, abstract=True):
@abstractmethod
def get_category(self, post: RawPost) -> Optional[Category]:
"Return category of given Rawpost"
raise NotImplementedError()
@abstractmethod
def get_tags(self, raw_post: RawPost) -> Optional[Collection[Tag]]:
"Return Tag list of given RawPost"
class ParsePostMixin(metaclass=RegistryABCMeta, abstract=True):
@abstractmethod
async def parse(self, raw_post: RawPost) -> Post:
"parse RawPost into post"
...
class MessageProcessMixin(PlaformNameMixin, CategoryMixin, ParsePostMixin, abstract=True):
"General message process fetch, parse, filter progress"
def __init__(self):
super().__init__()
self.parse_cache: dict[Any, Post] = dict()
@abstractmethod
def filter_platform_custom(self, post: RawPost) -> bool:
"a customed filter"
raise NotImplementedError()
def get_id(self, post: RawPost) -> Any:
"Get post id of given RawPost"
async def _parse_with_cache(self, post: RawPost) -> Post:
post_id = self.get_id(post)
if post_id not in self.cache:
async def _parse_with_cache(self, raw_post: RawPost) -> Post:
post_id = self.get_id(raw_post)
if post_id not in self.parse_cache:
retry_times = 3
while retry_times:
try:
self.cache[post_id] = await self.parse(post)
self.parse_cache[post_id] = await self.parse(raw_post)
break
except Exception as err:
if not retry_times:
raise err
retry_times -= 1
return self.cache[post_id]
return self.parse_cache[post_id]
def _do_filter_common(self, raw_post_list: list[RawPost], exists_posts_set: set) -> list[RawPost]:
@abstractmethod
async def get_sub_list(self, target: Target) -> list[RawPost]:
"Get post list of the given target"
@abstractmethod
def get_date(self, post: RawPost) -> Optional[int]:
"Get post timestamp and return, return None if can't get the time"
async def filter_common(self, raw_post_list: list[RawPost]) -> list[RawPost]:
res = []
for raw_post in raw_post_list:
post_id = self.get_id(raw_post)
if post_id in exists_posts_set:
continue
# post_id = self.get_id(raw_post)
# if post_id in exists_posts_set:
# continue
if (post_time := self.get_date(raw_post)) and time.time() - post_time > 2 * 60 * 60 and \
plugin_config.hk_reporter_init_filter:
continue
try:
if not self.filter_platform_custom(raw_post):
continue
except NotImplementedError:
pass
try:
self.get_category(raw_post)
except CategoryNotSupport:
@@ -117,9 +141,45 @@ class PlatformProto(metaclass=RegistryMeta):
except NotImplementedError:
pass
res.append(raw_post)
exists_posts_set.add(post_id)
return res
class NewMessageProcessMixin(StorageMixinProto, MessageProcessMixin, abstract=True):
"General message process, fetch, parse, filter, and only returns the new Post"
@dataclass
class MessageStorage():
inited: bool
exists_posts: set[Any]
async def filter_common_with_diff(self, target: Target, raw_post_list: list[RawPost]) -> list[RawPost]:
filtered_post = await self.filter_common(raw_post_list)
store = self.get_stored_data(target) or self.MessageStorage(False, set())
res = []
if not store.inited and plugin_config.hk_reporter_init_filter:
# target not init
for raw_post in filtered_post:
post_id = self.get_id(raw_post)
store.exists_posts.add(post_id)
logger.info('init {}-{} with {}'.format(self.platform_name, target, store.exists_posts))
store.inited = True
else:
for raw_post in filtered_post:
post_id = self.get_id(raw_post)
if post_id in store.exists_posts:
continue
res.append(raw_post)
self.set_stored_data(target, store)
return res
class UserCustomFilterMixin(CategoryMixin, ParsePostMixin, abstract=True):
categories: dict[Category, str]
enable_tag: bool
@abstractmethod
def get_tags(self, raw_post: RawPost) -> Optional[Collection[Tag]]:
"Return Tag list of given RawPost"
async def filter_user_custom(self, raw_post_list: list[RawPost], cats: list[Category], tags: list[Tag]) -> list[RawPost]:
res: list[RawPost] = []
for raw_post in raw_post_list:
@@ -139,112 +199,96 @@ class PlatformProto(metaclass=RegistryMeta):
res.append(raw_post)
return res
async def dispatch_user_post(self, target: Target, new_posts: list[RawPost], users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
res: list[tuple[User, list[Post]]] = []
for user, category_getter, tag_getter in users:
required_tags = tag_getter(target) if self.enable_tag else []
cats = category_getter(target)
user_raw_post = await self.filter_user_custom(new_posts, cats, required_tags)
user_post: list[Post] = []
for raw_post in user_raw_post:
if isinstance(self, MessageProcessMixin):
user_post.append(await self._parse_with_cache(raw_post))
else:
user_post.append(await self.parse(raw_post))
res.append((user, user_post))
return res
class Platform(PlatformProto):
"platform with target(account), like weibo, bilibili"
class Platform(metaclass=RegistryABCMeta, base=True):
# schedule_interval: int
schedule_type: Literal['date', 'interval', 'cron']
schedule_kw: dict
is_common: bool
enabled: bool
name: str
categories: dict[Category, str]
has_target: bool = True
platform_name: str
enable_tag: bool
def __init__(self):
self.exists_posts = defaultdict(set)
self.inited = dict()
self.reverse_category = {}
self.cache: dict[Any, Post] = {}
for key, val in self.categories.items():
self.reverse_category[val] = key
@staticmethod
@abstractmethod
async def get_target_name(target: Target) -> Optional[str]:
...
@abstractmethod
async def get_sub_list(self, target: Target) -> list[RawPost]:
"Get post list of the given target"
async def filter_common(self, target: Target, raw_post_list: list[RawPost]) -> list[RawPost]:
if not self.inited.get(target, False) and plugin_config.hk_reporter_init_filter:
# target not init
for raw_post in raw_post_list:
post_id = self.get_id(raw_post)
self.exists_posts[target].add(post_id)
logger.info('init {}-{} with {}'.format(self.platform_name, target, self.exists_posts[target]))
self.inited[target] = True
return []
return self._do_filter_common(raw_post_list, self.exists_posts[target])
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
...
class NewMessage(
Platform,
NewMessageProcessMixin,
UserCustomFilterMixin,
abstract=True
):
"Fetch a list of messages, filter the new messages, dispatch it to different users"
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
try:
post_list = await self.get_sub_list(target)
new_posts = await self.filter_common(target, post_list)
res: list[tuple[User, list[Post]]] = []
new_posts = await self.filter_common_with_diff(target, post_list)
if not new_posts:
return []
else:
for post in new_posts:
logger.info('fetch new post from {} {}: {}'.format(self.platform_name, target, self.get_id(post)))
for user, category_getter, tag_getter in users:
required_tags = tag_getter(target) if self.enable_tag else []
cats = category_getter(target)
user_raw_post = await self.filter_user_custom(new_posts, cats, required_tags)
user_post: list[Post] = []
for raw_post in user_raw_post:
user_post.append(await self._parse_with_cache(raw_post))
res.append((user, user_post))
self.cache = {}
logger.info('fetch new post from {} {}: {}'.format(
self.platform_name,
target if self.has_target else '-',
self.get_id(post)))
res = await self.dispatch_user_post(target, new_posts, users)
self.parse_cache = {}
return res
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []
class StatusChange(
Platform,
StorageMixinProto,
PlaformNameMixin,
UserCustomFilterMixin,
abstract=True
):
"Watch a status, and fire a post when status changes"
class PlatformNoTarget(PlatformProto):
@abstractmethod
async def get_status(self, target: Target) -> Any:
...
categories: dict[Category, str]
has_target = False
platform_name: str
enable_tag: bool
@abstractmethod
def compare_status(self, target: Target, old_status, new_status) -> Optional[RawPost]:
...
async def get_sub_list(self) -> list[RawPost]:
"Get post list of the given target"
raise NotImplementedError()
@abstractmethod
async def parse(self, raw_post: RawPost) -> Post:
...
def __init__(self):
self.exists_posts = set()
self.inited = False
self.reverse_category = {}
self.cache: dict[Any, Post] = {}
for key, val in self.categories.items():
self.reverse_category[val] = key
async def filter_common(self, raw_post_list: list[RawPost]) -> list[RawPost]:
if not self.inited and plugin_config.hk_reporter_init_filter:
# target not init
for raw_post in raw_post_list:
post_id = self.get_id(raw_post)
self.exists_posts.add(post_id)
logger.info('init {} with {}'.format(self.platform_name, self.exists_posts))
self.inited = True
return []
return self._do_filter_common(raw_post_list, self.exists_posts)
async def fetch_new_post(self, _: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
try:
post_list = await self.get_sub_list()
new_posts = await self.filter_common(post_list)
res: list[tuple[User, list[Post]]] = []
if not new_posts:
return []
else:
for post in new_posts:
logger.info('fetch new post from {}: {}'.format(self.platform_name, self.get_id(post)))
for user, category_getter, tag_getter in users:
required_tags = tag_getter(Target('default'))
cats = category_getter(Target('default'))
user_raw_post = await self.filter_user_custom(new_posts, cats, required_tags)
user_post: list[Post] = []
for raw_post in user_raw_post:
user_post.append(await self._parse_with_cache(raw_post))
res.append((user, user_post))
self.cache = {}
new_status = await self.get_status(target)
res = []
if old_status := self.get_stored_data(target):
diff = self.compare_status(target, old_status, new_status)
if diff:
res = await self.dispatch_user_post(target, [diff], users)
self.set_stored_data(target, new_status)
return res
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
@@ -7,9 +7,9 @@ import httpx
from ..post import Post
from ..types import RawPost, Target
from .platform import Platform
from .platform import NewMessage, TargetMixin
class Rss(Platform):
class Rss(NewMessage, TargetMixin):
categories = {}
enable_tag = False
@@ -17,10 +17,11 @@ class Rss(Platform):
name = "Rss"
enabled = True
is_common = True
schedule_interval = 30
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
@staticmethod
async def get_account_name(target: Target) -> Optional[str]:
async def get_target_name(target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
res = await client.get(target, timeout=10.0)
feed = feedparser.parse(res.text)
@@ -9,70 +9,70 @@ import httpx
from ..post import Post
from ..types import *
from .platform import Platform
# from .platform import Platform
class Wechat(Platform):
# class Wechat(Platform):
categories = {}
enable_tag = False
platform_name = 'wechat'
enabled = False
is_common = False
name = '微信公众号'
# categories = {}
# enable_tag = False
# platform_name = 'wechat'
# enabled = False
# is_common = False
# name = '微信公众号'
@classmethod
def _get_query_url(cls, target: Target):
return 'https://weixin.sogou.com/weixin?type=1&s_from=input&query={}&ie=utf8&_sug_=n&_sug_type_='.format(target)
# @classmethod
# def _get_query_url(cls, target: Target):
# return 'https://weixin.sogou.com/weixin?type=1&s_from=input&query={}&ie=utf8&_sug_=n&_sug_type_='.format(target)
@classmethod
async def _get_target_soup(cls, target: Target) -> Optional[bs]:
target_url = cls._get_query_url(target)
async with httpx.AsyncClient() as client:
res = await client.get(target_url)
soup = bs(res.text, 'html.parser')
blocks = soup.find(class_='news-list2').find_all('li',recursive=False)
for block in blocks:
if block.find(string=[target]):
return block
# @classmethod
# async def _get_target_soup(cls, target: Target) -> Optional[bs]:
# target_url = cls._get_query_url(target)
# async with httpx.AsyncClient() as client:
# res = await client.get(target_url)
# soup = bs(res.text, 'html.parser')
# blocks = soup.find(class_='news-list2').find_all('li',recursive=False)
# for block in blocks:
# if block.find(string=[target]):
# return block
@classmethod
async def get_account_name(cls, target: Target) -> Optional[str]:
if not (block := await cls._get_target_soup(target)):
return None
return block.find('p', class_='tit').find('a').text
# @classmethod
# async def get_account_name(cls, target: Target) -> Optional[str]:
# if not (block := await cls._get_target_soup(target)):
# return None
# return block.find('p', class_='tit').find('a').text
async def get_sub_list(self, target: Target) -> list[RawPost]:
block = await self._get_target_soup(target)
if (last_post_dt := block.find('dt', string='最近文章:')):
post = {
'title': last_post_dt.find_parent().find('a').text,
'target': target,
'page_url': self._get_query_url(target),
'name': block.find('p', class_='tit').find('a').text
}
return [post]
else:
return []
# async def get_sub_list(self, target: Target) -> list[RawPost]:
# block = await self._get_target_soup(target)
# if (last_post_dt := block.find('dt', string='最近文章:')):
# post = {
# 'title': last_post_dt.find_parent().find('a').text,
# 'target': target,
# 'page_url': self._get_query_url(target),
# 'name': block.find('p', class_='tit').find('a').text
# }
# return [post]
# else:
# return []
def get_id(self, post: RawPost) -> Any:
return post['title']
# def get_id(self, post: RawPost) -> Any:
# return post['title']
def get_date(self, post: RawPost):
return None
# def get_date(self, post: RawPost):
# return None
def get_tags(self, post: RawPost):
return None
# def get_tags(self, post: RawPost):
# return None
def get_category(self, post: RawPost):
return None
# def get_category(self, post: RawPost):
# return None
async def parse(self, raw_post: RawPost) -> Post:
# TODO get content of post
return Post(target_type='wechat',
text='{}\n详细内容请自行查看公众号'.format(raw_post['title']),
target_name=raw_post['name'],
pics=[],
url=''
)
# async def parse(self, raw_post: RawPost) -> Post:
# # TODO get content of post
# return Post(target_type='wechat',
# text='{}\n详细内容请自行查看公众号'.format(raw_post['title']),
# target_name=raw_post['name'],
# pics=[],
# url=''
# )
@@ -9,9 +9,9 @@ from nonebot import logger
from ..post import Post
from ..types import *
from .platform import Platform
from .platform import NewMessage, TargetMixin
class Weibo(Platform):
class Weibo(NewMessage, TargetMixin):
categories = {
1: '转发',
@@ -23,14 +23,11 @@ class Weibo(Platform):
name = '新浪微博'
enabled = True
is_common = True
schedule_interval = 10
def __init__(self):
self.top : dict[Target, RawPost] = dict()
super().__init__()
schedule_type = 'interval'
schedule_kw = {'seconds': 10}
@staticmethod
async def get_account_name(target: Target) -> Optional[str]:
async def get_target_name(target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
param = {'containerid': '100505' + target}
res = await client.get('https://m.weibo.cn/api/container/getIndex', params=param)
@@ -47,7 +44,8 @@ class Weibo(Platform):
res_data = json.loads(res.text)
if not res_data['ok']:
return []
return res_data['data']['cards']
custom_filter: Callable[[RawPost], bool] = lambda d: d['card_type'] == 9
return list(filter(custom_filter, res_data['data']['cards']))
def get_id(self, post: RawPost) -> Any:
return post['mblog']['id']
+5 -4
View File
@@ -10,11 +10,12 @@ from .types import UserSubInfo
scheduler = AsyncIOScheduler()
@get_driver().on_startup
async def _start():
scheduler.configure({"apscheduler.timezone": "Asia/Shanghai"})
scheduler.start()
get_driver().on_startup(_start)
# get_driver().on_startup(_start)
async def fetch_and_send(target_type: str):
config = Config()
@@ -41,10 +42,10 @@ async def fetch_and_send(target_type: str):
send_msgs(bot, user.user, user.user_type, await send_post.generate_messages())
for platform_name, platform in platform_manager.items():
if isinstance(platform.schedule_interval, int):
logger.info(f'start scheduler for {platform_name} with interval {platform.schedule_interval}')
if platform.schedule_type in ['cron', 'interval', 'date']:
logger.info(f'start scheduler for {platform_name} with {platform.schedule_type} {platform.schedule_kw}')
scheduler.add_job(
fetch_and_send, 'interval', seconds=platform.schedule_interval,
fetch_and_send, platform.schedule_type, **platform.schedule_kw,
args=(platform_name,))
scheduler.add_job(do_send_msgs, 'interval', seconds=0.3)