add pre-commit and format files

This commit is contained in:
hemengyang
2022-02-09 21:05:14 +08:00
parent 51c31637d5
commit 547be7608f
43 changed files with 2242 additions and 1365 deletions
@@ -1,18 +1,19 @@
from pathlib import Path
from pkgutil import iter_modules
from collections import defaultdict
from importlib import import_module
from .platform import Platform, NoTargetGroup
from pkgutil import iter_modules
from pathlib import Path
from importlib import import_module
_package_dir = str(Path(__file__).resolve().parent)
for (_, module_name, _) in iter_modules([_package_dir]):
import_module(f'{__name__}.{module_name}')
import_module(f"{__name__}.{module_name}")
async def check_sub_target(target_type, target):
return await platform_manager[target_type].get_target_name(target)
_platform_list = defaultdict(list)
for _platform in Platform.registry:
if not _platform.enabled:
@@ -24,5 +25,6 @@ for name, platform_list in _platform_list.items():
if len(platform_list) == 1:
platform_manager[name] = platform_list[0]()
else:
platform_manager[name] = NoTargetGroup([_platform() for _platform in platform_list])
platform_manager[name] = NoTargetGroup(
[_platform() for _platform in platform_list]
)
+90 -58
View File
@@ -1,37 +1,39 @@
import json
from typing import Any
from bs4 import BeautifulSoup as bs
import httpx
from bs4 import BeautifulSoup as bs
from ..post import Post
from ..types import Category, RawPost, Target
from ..utils import Render
from .platform import CategoryNotSupport, NewMessage, StatusChange
from ..types import Target, RawPost, Category
from .platform import NewMessage, StatusChange, CategoryNotSupport
class Arknights(NewMessage):
categories = {1: '游戏公告'}
platform_name = 'arknights'
name = '明日方舟游戏信息'
categories = {1: "游戏公告"}
platform_name = "arknights"
name = "明日方舟游戏信息"
enable_tag = False
enabled = True
is_common = False
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
schedule_type = "interval"
schedule_kw = {"seconds": 30}
has_target = False
async def get_target_name(self, _: Target) -> str:
return '明日方舟游戏信息'
return "明日方舟游戏信息"
async def get_sub_list(self, _) -> list[RawPost]:
async with httpx.AsyncClient() as client:
raw_data = await client.get('https://ak-conf.hypergryph.com/config/prod/announce_meta/IOS/announcement.meta.json')
return json.loads(raw_data.text)['announceList']
raw_data = await client.get(
"https://ak-conf.hypergryph.com/config/prod/announce_meta/IOS/announcement.meta.json"
)
return json.loads(raw_data.text)["announceList"]
def get_id(self, post: RawPost) -> Any:
return post['announceId']
return post["announceId"]
def get_date(self, _: RawPost) -> None:
return None
@@ -40,64 +42,85 @@ class Arknights(NewMessage):
return Category(1)
async def parse(self, raw_post: RawPost) -> Post:
announce_url = raw_post['webUrl']
text = ''
announce_url = raw_post["webUrl"]
text = ""
async with httpx.AsyncClient() as client:
raw_html = await client.get(announce_url)
soup = bs(raw_html, 'html.parser')
soup = bs(raw_html, "html.parser")
pics = []
if soup.find("div", class_="standerd-container"):
# 图文
render = Render()
viewport = {'width': 320, 'height': 6400, 'deviceScaleFactor': 3}
pic_data = await render.render(announce_url, viewport=viewport, target='div.main')
viewport = {"width": 320, "height": 6400, "deviceScaleFactor": 3}
pic_data = await render.render(
announce_url, viewport=viewport, target="div.main"
)
if pic_data:
pics.append(pic_data)
else:
text = '图片渲染失败'
elif (pic := soup.find('img', class_='banner-image')):
pics.append(pic['src'])
text = "图片渲染失败"
elif pic := soup.find("img", class_="banner-image"):
pics.append(pic["src"])
else:
raise CategoryNotSupport()
return Post('arknights', text=text, url='', target_name="明日方舟游戏内公告", pics=pics, compress=True, override_use_pic=False)
return Post(
"arknights",
text=text,
url="",
target_name="明日方舟游戏内公告",
pics=pics,
compress=True,
override_use_pic=False,
)
class AkVersion(StatusChange):
categories = {2: '更新信息'}
platform_name = 'arknights'
name = '明日方舟游戏信息'
categories = {2: "更新信息"}
platform_name = "arknights"
name = "明日方舟游戏信息"
enable_tag = False
enabled = True
is_common = False
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
schedule_type = "interval"
schedule_kw = {"seconds": 30}
has_target = False
async def get_target_name(self, _: Target) -> str:
return '明日方舟游戏信息'
return "明日方舟游戏信息"
async def get_status(self, _):
async with httpx.AsyncClient() as client:
res_ver = await client.get('https://ak-conf.hypergryph.com/config/prod/official/IOS/version')
res_preanounce = await client.get('https://ak-conf.hypergryph.com/config/prod/announce_meta/IOS/preannouncement.meta.json')
res_ver = await client.get(
"https://ak-conf.hypergryph.com/config/prod/official/IOS/version"
)
res_preanounce = await client.get(
"https://ak-conf.hypergryph.com/config/prod/announce_meta/IOS/preannouncement.meta.json"
)
res = res_ver.json()
res.update(res_preanounce.json())
return res
def compare_status(self, _, old_status, new_status):
res = []
if old_status.get('preAnnounceType') == 2 and new_status.get('preAnnounceType') == 0:
res.append(Post('arknights',
text='登录界面维护公告上线(大概是开始维护了)',
target_name='明日方舟更新信息'))
elif old_status.get('preAnnounceType') == 0 and new_status.get('preAnnounceType') == 2:
res.append(Post('arknights',
text='登录界面维护公告下线(大概是开服了,冲!)',
target_name='明日方舟更新信息'))
if old_status.get('clientVersion') != new_status.get('clientVersion'):
res.append(Post('arknights', text='游戏本体更新(大更新)', target_name='明日方舟更新信息'))
if old_status.get('resVersion') != new_status.get('resVersion'):
res.append(Post('arknights', text='游戏资源更新(小更新)', target_name='明日方舟更新信息'))
if (
old_status.get("preAnnounceType") == 2
and new_status.get("preAnnounceType") == 0
):
res.append(
Post("arknights", text="登录界面维护公告上线(大概是开始维护了)", target_name="明日方舟更新信息")
)
elif (
old_status.get("preAnnounceType") == 0
and new_status.get("preAnnounceType") == 2
):
res.append(
Post("arknights", text="登录界面维护公告下线(大概是开服了,冲!)", target_name="明日方舟更新信息")
)
if old_status.get("clientVersion") != new_status.get("clientVersion"):
res.append(Post("arknights", text="游戏本体更新(大更新)", target_name="明日方舟更新信息"))
if old_status.get("resVersion") != new_status.get("resVersion"):
res.append(Post("arknights", text="游戏资源更新(小更新)", target_name="明日方舟更新信息"))
return res
def get_category(self, _):
@@ -106,28 +129,29 @@ class AkVersion(StatusChange):
async def parse(self, raw_post):
return raw_post
class MonsterSiren(NewMessage):
categories = {3: '塞壬唱片新闻'}
platform_name = 'arknights'
name = '明日方舟游戏信息'
categories = {3: "塞壬唱片新闻"}
platform_name = "arknights"
name = "明日方舟游戏信息"
enable_tag = False
enabled = True
is_common = False
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
schedule_type = "interval"
schedule_kw = {"seconds": 30}
has_target = False
async def get_target_name(self, _: Target) -> str:
return '明日方舟游戏信息'
return "明日方舟游戏信息"
async def get_sub_list(self, _) -> list[RawPost]:
async with httpx.AsyncClient() as client:
raw_data = await client.get('https://monster-siren.hypergryph.com/api/news')
return raw_data.json()['data']['list']
raw_data = await client.get("https://monster-siren.hypergryph.com/api/news")
return raw_data.json()["data"]["list"]
def get_id(self, post: RawPost) -> Any:
return post['cid']
return post["cid"]
def get_date(self, _) -> None:
return None
@@ -138,13 +162,21 @@ class MonsterSiren(NewMessage):
async def parse(self, raw_post: RawPost) -> Post:
url = f'https://monster-siren.hypergryph.com/info/{raw_post["cid"]}'
async with httpx.AsyncClient() as client:
res = await client.get(f'https://monster-siren.hypergryph.com/api/news/{raw_post["cid"]}')
res = await client.get(
f'https://monster-siren.hypergryph.com/api/news/{raw_post["cid"]}'
)
raw_data = res.json()
content = raw_data['data']['content']
content = content.replace('</p>', '</p>\n')
soup = bs(content, 'html.parser')
imgs = list(map(lambda x: x['src'], soup('img')))
content = raw_data["data"]["content"]
content = content.replace("</p>", "</p>\n")
soup = bs(content, "html.parser")
imgs = list(map(lambda x: x["src"], soup("img")))
text = f'{raw_post["title"]}\n{soup.text.strip()}'
return Post('monster-siren', text=text, pics=imgs,
url=url, target_name="塞壬唱片新闻", compress=True,
override_use_pic=False)
return Post(
"monster-siren",
text=text,
pics=imgs,
url=url,
target_name="塞壬唱片新闻",
compress=True,
override_use_pic=False,
)
+64 -45
View File
@@ -4,51 +4,58 @@ from typing import Any, Optional
import httpx
from ..post import Post
from ..types import Category, RawPost, Tag, Target
from ..types import Tag, Target, RawPost, Category
from .platform import NewMessage, CategoryNotSupport
class Bilibili(NewMessage):
categories = {
1: "一般动态",
2: "专栏文章",
3: "视频",
4: "纯文字",
5: "转发"
# 5: "短视频"
}
platform_name = 'bilibili'
1: "一般动态",
2: "专栏文章",
3: "视频",
4: "纯文字",
5: "转发"
# 5: "短视频"
}
platform_name = "bilibili"
enable_tag = True
enabled = True
is_common = True
schedule_type = 'interval'
schedule_kw = {'seconds': 10}
name = 'B站'
schedule_type = "interval"
schedule_kw = {"seconds": 10}
name = "B站"
has_target = True
async def get_target_name(self, target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
res = await client.get('https://api.bilibili.com/x/space/acc/info', params={'mid': target})
res = await client.get(
"https://api.bilibili.com/x/space/acc/info", params={"mid": target}
)
res_data = json.loads(res.text)
if res_data['code']:
if res_data["code"]:
return None
return res_data['data']['name']
return res_data["data"]["name"]
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with httpx.AsyncClient() as client:
params = {'host_uid': target, 'offset': 0, 'need_top': 0}
res = await client.get('https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history', params=params, timeout=4.0)
params = {"host_uid": target, "offset": 0, "need_top": 0}
res = await client.get(
"https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history",
params=params,
timeout=4.0,
)
res_dict = json.loads(res.text)
if res_dict['code'] == 0:
return res_dict['data']['cards']
if res_dict["code"] == 0:
return res_dict["data"]["cards"]
else:
return []
def get_id(self, post: RawPost) -> Any:
return post['desc']['dynamic_id']
return post["desc"]["dynamic_id"]
def get_date(self, post: RawPost) -> int:
return post['desc']['timestamp']
return post["desc"]["timestamp"]
def _do_get_category(self, post_type: int) -> Category:
if post_type == 2:
@@ -65,63 +72,75 @@ class Bilibili(NewMessage):
raise CategoryNotSupport()
def get_category(self, post: RawPost) -> Category:
post_type = post['desc']['type']
post_type = post["desc"]["type"]
return self._do_get_category(post_type)
def get_tags(self, raw_post: RawPost) -> list[Tag]:
return [*map(lambda tp: tp['topic_name'], raw_post['display']['topic_info']['topic_details'])]
return [
*map(
lambda tp: tp["topic_name"],
raw_post["display"]["topic_info"]["topic_details"],
)
]
def _get_info(self, post_type: Category, card) -> tuple[str, list]:
if post_type == 1:
# 一般动态
text = card['item']['description']
pic = [img['img_src'] for img in card['item']['pictures']]
text = card["item"]["description"]
pic = [img["img_src"] for img in card["item"]["pictures"]]
elif post_type == 2:
# 专栏文章
text = '{} {}'.format(card['title'], card['summary'])
pic = card['image_urls']
text = "{} {}".format(card["title"], card["summary"])
pic = card["image_urls"]
elif post_type == 3:
# 视频
text = card['dynamic']
pic = [card['pic']]
text = card["dynamic"]
pic = [card["pic"]]
elif post_type == 4:
# 纯文字
text = card['item']['content']
text = card["item"]["content"]
pic = []
else:
raise CategoryNotSupport()
return text, pic
async def parse(self, raw_post: RawPost) -> Post:
card_content = json.loads(raw_post['card'])
card_content = json.loads(raw_post["card"])
post_type = self.get_category(raw_post)
target_name = raw_post['desc']['user_profile']['info']['uname']
target_name = raw_post["desc"]["user_profile"]["info"]["uname"]
if post_type >= 1 and post_type < 5:
url = ''
url = ""
if post_type == 1:
# 一般动态
url = 'https://t.bilibili.com/{}'.format(raw_post['desc']['dynamic_id_str'])
url = "https://t.bilibili.com/{}".format(
raw_post["desc"]["dynamic_id_str"]
)
elif post_type == 2:
# 专栏文章
url = 'https://www.bilibili.com/read/cv{}'.format(raw_post['desc']['rid'])
url = "https://www.bilibili.com/read/cv{}".format(
raw_post["desc"]["rid"]
)
elif post_type == 3:
# 视频
url = 'https://www.bilibili.com/video/{}'.format(raw_post['desc']['bvid'])
url = "https://www.bilibili.com/video/{}".format(
raw_post["desc"]["bvid"]
)
elif post_type == 4:
# 纯文字
url = 'https://t.bilibili.com/{}'.format(raw_post['desc']['dynamic_id_str'])
url = "https://t.bilibili.com/{}".format(
raw_post["desc"]["dynamic_id_str"]
)
text, pic = self._get_info(post_type, card_content)
elif post_type == 5:
# 转发
url = 'https://t.bilibili.com/{}'.format(raw_post['desc']['dynamic_id_str'])
text = card_content['item']['content']
orig_type = card_content['item']['orig_type']
orig = json.loads(card_content['origin'])
url = "https://t.bilibili.com/{}".format(raw_post["desc"]["dynamic_id_str"])
text = card_content["item"]["content"]
orig_type = card_content["item"]["orig_type"]
orig = json.loads(card_content["origin"])
orig_text, _ = self._get_info(self._do_get_category(orig_type), orig)
text += '\n--------------\n'
text += "\n--------------\n"
text += orig_text
pic = []
else:
raise CategoryNotSupport(post_type)
return Post('bilibili', text=text, url=url, pics=pic, target_name=target_name)
return Post("bilibili", text=text, url=url, pics=pic, target_name=target_name)
@@ -1,54 +1,58 @@
from typing import Any, Optional
import httpx
from ..post import Post
from ..types import RawPost, Target
from .platform import NewMessage
from ..types import Target, RawPost
class NcmArtist(NewMessage):
categories = {}
platform_name = 'ncm-artist'
platform_name = "ncm-artist"
enable_tag = False
enabled = True
is_common = True
schedule_type = 'interval'
schedule_kw = {'minutes': 1}
schedule_type = "interval"
schedule_kw = {"minutes": 1}
name = "网易云-歌手"
has_target = True
async def get_target_name(self, target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
res = await client.get(
"https://music.163.com/api/artist/albums/{}".format(target),
headers={'Referer': 'https://music.163.com/'}
)
"https://music.163.com/api/artist/albums/{}".format(target),
headers={"Referer": "https://music.163.com/"},
)
res_data = res.json()
if res_data['code'] != 200:
if res_data["code"] != 200:
return
return res_data['artist']['name']
return res_data["artist"]["name"]
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with httpx.AsyncClient() as client:
res = await client.get(
"https://music.163.com/api/artist/albums/{}".format(target),
headers={'Referer': 'https://music.163.com/'}
)
"https://music.163.com/api/artist/albums/{}".format(target),
headers={"Referer": "https://music.163.com/"},
)
res_data = res.json()
if res_data['code'] != 200:
if res_data["code"] != 200:
return []
else:
return res_data['hotAlbums']
return res_data["hotAlbums"]
def get_id(self, post: RawPost) -> Any:
return post['id']
return post["id"]
def get_date(self, post: RawPost) -> int:
return post['publishTime'] // 1000
return post["publishTime"] // 1000
async def parse(self, raw_post: RawPost) -> Post:
text = '新专辑发布:{}'.format(raw_post['name'])
target_name = raw_post['artist']['name']
pics = [raw_post['picUrl']]
url = "https://music.163.com/#/album?id={}".format(raw_post['id'])
return Post('ncm-artist', text=text, url=url, pics=pics, target_name=target_name)
text = "新专辑发布:{}".format(raw_post["name"])
target_name = raw_post["artist"]["name"]
pics = [raw_post["picUrl"]]
url = "https://music.163.com/#/album?id={}".format(raw_post["id"])
return Post(
"ncm-artist", text=text, url=url, pics=pics, target_name=target_name
)
+25 -23
View File
@@ -1,56 +1,58 @@
from typing import Any, Optional
import httpx
from ..post import Post
from ..types import RawPost, Target
from .platform import NewMessage
from ..types import Target, RawPost
class NcmRadio(NewMessage):
categories = {}
platform_name = 'ncm-radio'
platform_name = "ncm-radio"
enable_tag = False
enabled = True
is_common = False
schedule_type = 'interval'
schedule_kw = {'minutes': 10}
schedule_type = "interval"
schedule_kw = {"minutes": 10}
name = "网易云-电台"
has_target = True
async def get_target_name(self, target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
res = await client.post(
"http://music.163.com/api/dj/program/byradio",
headers={'Referer': 'https://music.163.com/'},
data={"radioId": target, "limit": 1000, "offset": 0}
)
"http://music.163.com/api/dj/program/byradio",
headers={"Referer": "https://music.163.com/"},
data={"radioId": target, "limit": 1000, "offset": 0},
)
res_data = res.json()
if res_data['code'] != 200 or res_data['programs'] == 0:
if res_data["code"] != 200 or res_data["programs"] == 0:
return
return res_data['programs'][0]['radio']['name']
return res_data["programs"][0]["radio"]["name"]
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with httpx.AsyncClient() as client:
res = await client.post(
"http://music.163.com/api/dj/program/byradio",
headers={'Referer': 'https://music.163.com/'},
data={"radioId": target, "limit": 1000, "offset": 0}
)
"http://music.163.com/api/dj/program/byradio",
headers={"Referer": "https://music.163.com/"},
data={"radioId": target, "limit": 1000, "offset": 0},
)
res_data = res.json()
if res_data['code'] != 200:
if res_data["code"] != 200:
return []
else:
return res_data['programs']
return res_data["programs"]
def get_id(self, post: RawPost) -> Any:
return post['id']
return post["id"]
def get_date(self, post: RawPost) -> int:
return post['createTime'] // 1000
return post["createTime"] // 1000
async def parse(self, raw_post: RawPost) -> Post:
text = '网易云电台更新:{}'.format(raw_post['name'])
target_name = raw_post['radio']['name']
pics = [raw_post['coverUrl']]
url = "https://music.163.com/#/program/{}".format(raw_post['id'])
return Post('ncm-radio', text=text, url=url, pics=pics, target_name=target_name)
text = "网易云电台更新:{}".format(raw_post["name"])
target_name = raw_post["radio"]["name"]
pics = [raw_post["coverUrl"]]
url = "https://music.163.com/#/program/{}".format(raw_post["id"])
return Post("ncm-radio", text=text, url=url, pics=pics, target_name=target_name)
+109 -51
View File
@@ -1,15 +1,15 @@
from abc import abstractmethod, ABC
from collections import defaultdict
from dataclasses import dataclass
import time
from typing import Any, Collection, Optional, Literal
from dataclasses import dataclass
from abc import ABC, abstractmethod
from collections import defaultdict
from typing import Any, Literal, Optional, Collection
import httpx
from nonebot import logger
from ..plugin_config import plugin_config
from ..post import Post
from ..types import Category, RawPost, Tag, Target, User, UserSubInfo
from ..plugin_config import plugin_config
from ..types import Tag, User, Target, RawPost, Category, UserSubInfo
class CategoryNotSupport(Exception):
@@ -17,26 +17,27 @@ class CategoryNotSupport(Exception):
class RegistryMeta(type):
def __new__(cls, name, bases, namespace, **kwargs):
return super().__new__(cls, name, bases, namespace)
def __init__(cls, name, bases, namespace, **kwargs):
if kwargs.get('base'):
if kwargs.get("base"):
# this is the base class
cls.registry = []
elif not kwargs.get('abstract'):
elif not kwargs.get("abstract"):
# this is the subclass
cls.registry.append(cls)
super().__init__(name, bases, namespace, **kwargs)
class RegistryABCMeta(RegistryMeta, ABC):
...
class Platform(metaclass=RegistryABCMeta, base=True):
schedule_type: Literal['date', 'interval', 'cron']
schedule_type: Literal["date", "interval", "cron"]
schedule_kw: dict
is_common: bool
enabled: bool
@@ -52,7 +53,9 @@ class Platform(metaclass=RegistryABCMeta, base=True):
...
@abstractmethod
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
async def fetch_new_post(
self, target: Target, users: list[UserSubInfo]
) -> list[tuple[User, list[Post]]]:
...
@abstractmethod
@@ -67,7 +70,7 @@ class Platform(metaclass=RegistryABCMeta, base=True):
super().__init__()
self.reverse_category = {}
for key, val in self.categories.items():
self.reverse_category[val] = key
self.reverse_category[val] = key
self.store = dict()
@abstractmethod
@@ -75,12 +78,14 @@ class Platform(metaclass=RegistryABCMeta, base=True):
"Return Tag list of given RawPost"
def get_stored_data(self, target: Target) -> Any:
return self.store.get(target)
return self.store.get(target)
def set_stored_data(self, target: Target, data: Any):
self.store[target] = data
async def filter_user_custom(self, raw_post_list: list[RawPost], cats: list[Category], tags: list[Tag]) -> list[RawPost]:
async def filter_user_custom(
self, raw_post_list: list[RawPost], cats: list[Category], tags: list[Tag]
) -> list[RawPost]:
res: list[RawPost] = []
for raw_post in raw_post_list:
if self.categories:
@@ -99,12 +104,16 @@ class Platform(metaclass=RegistryABCMeta, base=True):
res.append(raw_post)
return res
async def dispatch_user_post(self, target: Target, new_posts: list[RawPost], users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
async def dispatch_user_post(
self, target: Target, new_posts: list[RawPost], users: list[UserSubInfo]
) -> list[tuple[User, list[Post]]]:
res: list[tuple[User, list[Post]]] = []
for user, category_getter, tag_getter in users:
required_tags = tag_getter(target) if self.enable_tag else []
cats = category_getter(target)
user_raw_post = await self.filter_user_custom(new_posts, cats, required_tags)
user_raw_post = await self.filter_user_custom(
new_posts, cats, required_tags
)
user_post: list[Post] = []
for raw_post in user_raw_post:
user_post.append(await self.do_parse(raw_post))
@@ -116,6 +125,7 @@ class Platform(metaclass=RegistryABCMeta, base=True):
"Return category of given Rawpost"
raise NotImplementedError()
class MessageProcess(Platform, abstract=True):
"General message process fetch, parse, filter progress"
@@ -127,7 +137,6 @@ class MessageProcess(Platform, abstract=True):
def get_id(self, post: RawPost) -> Any:
"Get post id of given RawPost"
async def do_parse(self, raw_post: RawPost) -> Post:
post_id = self.get_id(raw_post)
if post_id not in self.parse_cache:
@@ -156,8 +165,11 @@ class MessageProcess(Platform, abstract=True):
# post_id = self.get_id(raw_post)
# if post_id in exists_posts_set:
# continue
if (post_time := self.get_date(raw_post)) and time.time() - post_time > 2 * 60 * 60 and \
plugin_config.bison_init_filter:
if (
(post_time := self.get_date(raw_post))
and time.time() - post_time > 2 * 60 * 60
and plugin_config.bison_init_filter
):
continue
try:
self.get_category(raw_post)
@@ -168,15 +180,18 @@ class MessageProcess(Platform, abstract=True):
res.append(raw_post)
return res
class NewMessage(MessageProcess, abstract=True):
"Fetch a list of messages, filter the new messages, dispatch it to different users"
@dataclass
class MessageStorage():
class MessageStorage:
inited: bool
exists_posts: set[Any]
async def filter_common_with_diff(self, target: Target, raw_post_list: list[RawPost]) -> list[RawPost]:
async def filter_common_with_diff(
self, target: Target, raw_post_list: list[RawPost]
) -> list[RawPost]:
filtered_post = await self.filter_common(raw_post_list)
store = self.get_stored_data(target) or self.MessageStorage(False, set())
res = []
@@ -185,7 +200,11 @@ class NewMessage(MessageProcess, abstract=True):
for raw_post in filtered_post:
post_id = self.get_id(raw_post)
store.exists_posts.add(post_id)
logger.info('init {}-{} with {}'.format(self.platform_name, target, store.exists_posts))
logger.info(
"init {}-{} with {}".format(
self.platform_name, target, store.exists_posts
)
)
store.inited = True
else:
for raw_post in filtered_post:
@@ -197,8 +216,9 @@ class NewMessage(MessageProcess, abstract=True):
self.set_stored_data(target, store)
return res
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
async def fetch_new_post(
self, target: Target, users: list[UserSubInfo]
) -> list[tuple[User, list[Post]]]:
try:
post_list = await self.get_sub_list(target)
new_posts = await self.filter_common_with_diff(target, post_list)
@@ -206,17 +226,25 @@ class NewMessage(MessageProcess, abstract=True):
return []
else:
for post in new_posts:
logger.info('fetch new post from {} {}: {}'.format(
self.platform_name,
target if self.has_target else '-',
self.get_id(post)))
logger.info(
"fetch new post from {} {}: {}".format(
self.platform_name,
target if self.has_target else "-",
self.get_id(post),
)
)
res = await self.dispatch_user_post(target, new_posts, users)
self.parse_cache = {}
return res
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
logger.warning(
"network connection error: {}, url: {}".format(
type(err), err.request.url
)
)
return []
class StatusChange(Platform, abstract=True):
"Watch a status, and fire a post when status changes"
@@ -232,49 +260,69 @@ class StatusChange(Platform, abstract=True):
async def parse(self, raw_post: RawPost) -> Post:
...
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
async def fetch_new_post(
self, target: Target, users: list[UserSubInfo]
) -> list[tuple[User, list[Post]]]:
try:
new_status = await self.get_status(target)
res = []
if old_status := self.get_stored_data(target):
diff = self.compare_status(target, old_status, new_status)
if diff:
logger.info("status changes {} {}: {} -> {}".format(
self.platform_name,
target if self.has_target else '-',
old_status, new_status
))
logger.info(
"status changes {} {}: {} -> {}".format(
self.platform_name,
target if self.has_target else "-",
old_status,
new_status,
)
)
res = await self.dispatch_user_post(target, diff, users)
self.set_stored_data(target, new_status)
return res
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
logger.warning(
"network connection error: {}, url: {}".format(
type(err), err.request.url
)
)
return []
class SimplePost(MessageProcess, abstract=True):
"Fetch a list of messages, dispatch it to different users"
async def fetch_new_post(self, target: Target, users: list[UserSubInfo]) -> list[tuple[User, list[Post]]]:
async def fetch_new_post(
self, target: Target, users: list[UserSubInfo]
) -> list[tuple[User, list[Post]]]:
try:
new_posts = await self.get_sub_list(target)
if not new_posts:
return []
else:
for post in new_posts:
logger.info('fetch new post from {} {}: {}'.format(
self.platform_name,
target if self.has_target else '-',
self.get_id(post)))
logger.info(
"fetch new post from {} {}: {}".format(
self.platform_name,
target if self.has_target else "-",
self.get_id(post),
)
)
res = await self.dispatch_user_post(target, new_posts, users)
self.parse_cache = {}
return res
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
logger.warning(
"network connection error: {}, url: {}".format(
type(err), err.request.url
)
)
return []
class NoTargetGroup(Platform, abstract=True):
enable_tag = False
DUMMY_STR = '_DUMMY'
DUMMY_STR = "_DUMMY"
enabled = True
has_target = False
@@ -287,24 +335,35 @@ class NoTargetGroup(Platform, abstract=True):
self.schedule_kw = platform_list[0].schedule_kw
for platform in platform_list:
if platform.has_target:
raise RuntimeError('Platform {} should have no target'.format(platform.name))
raise RuntimeError(
"Platform {} should have no target".format(platform.name)
)
if name == self.DUMMY_STR:
name = platform.name
elif name != platform.name:
raise RuntimeError('Platform name for {} not fit'.format(self.platform_name))
raise RuntimeError(
"Platform name for {} not fit".format(self.platform_name)
)
platform_category_key_set = set(platform.categories.keys())
if platform_category_key_set & categories_keys:
raise RuntimeError('Platform categories for {} duplicate'.format(self.platform_name))
raise RuntimeError(
"Platform categories for {} duplicate".format(self.platform_name)
)
categories_keys |= platform_category_key_set
self.categories.update(platform.categories)
if platform.schedule_kw != self.schedule_kw or platform.schedule_type != self.schedule_type:
raise RuntimeError('Platform scheduler for {} not fit'.format(self.platform_name))
if (
platform.schedule_kw != self.schedule_kw
or platform.schedule_type != self.schedule_type
):
raise RuntimeError(
"Platform scheduler for {} not fit".format(self.platform_name)
)
self.name = name
self.is_common = platform_list[0].is_common
super().__init__()
def __str__(self):
return '[' + ' '.join(map(lambda x: x.name, self.platform_list)) + ']'
return "[" + " ".join(map(lambda x: x.name, self.platform_list)) + "]"
async def get_target_name(self, _):
return await self.platform_list[0].get_target_name(_)
@@ -316,4 +375,3 @@ class NoTargetGroup(Platform, abstract=True):
for user, posts in platform_res:
res[user].extend(posts)
return [[key, val] for key, val in res.items()]
+19 -12
View File
@@ -1,31 +1,32 @@
import calendar
from typing import Any, Optional
from bs4 import BeautifulSoup as bs
import feedparser
import httpx
import feedparser
from bs4 import BeautifulSoup as bs
from ..post import Post
from ..types import RawPost, Target
from .platform import NewMessage
from ..types import Target, RawPost
class Rss(NewMessage):
categories = {}
enable_tag = False
platform_name = 'rss'
platform_name = "rss"
name = "Rss"
enabled = True
is_common = True
schedule_type = 'interval'
schedule_kw = {'seconds': 30}
schedule_type = "interval"
schedule_kw = {"seconds": 30}
has_target = True
async def get_target_name(self, target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
res = await client.get(target, timeout=10.0)
feed = feedparser.parse(res.text)
return feed['feed']['title']
return feed["feed"]["title"]
def get_date(self, post: RawPost) -> int:
return calendar.timegm(post.published_parsed)
@@ -39,12 +40,18 @@ class Rss(NewMessage):
feed = feedparser.parse(res)
entries = feed.entries
for entry in entries:
entry['_target_name'] = feed.feed.title
entry["_target_name"] = feed.feed.title
return feed.entries
async def parse(self, raw_post: RawPost) -> Post:
text = raw_post.get('title', '') + '\n' if raw_post.get('title') else ''
soup = bs(raw_post.description, 'html.parser')
text = raw_post.get("title", "") + "\n" if raw_post.get("title") else ""
soup = bs(raw_post.description, "html.parser")
text += soup.text.strip()
pics = list(map(lambda x: x.attrs['src'], soup('img')))
return Post('rss', text=text, url=raw_post.link, pics=pics, target_name=raw_post['_target_name'])
pics = list(map(lambda x: x.attrs["src"], soup("img")))
return Post(
"rss",
text=text,
url=raw_post.link,
pics=pics,
target_name=raw_post["_target_name"],
)
+6 -6
View File
@@ -1,14 +1,15 @@
from datetime import datetime
import hashlib
import json
import re
import json
import hashlib
from datetime import datetime
from typing import Any, Optional
from bs4 import BeautifulSoup as bs
import httpx
from bs4 import BeautifulSoup as bs
from ..post import Post
from ..types import *
from ..post import Post
# from .platform import Platform
@@ -75,4 +76,3 @@ from ..types import *
# pics=[],
# url=''
# )
+94 -63
View File
@@ -1,121 +1,152 @@
from datetime import datetime
import json
import re
import json
from datetime import datetime
from typing import Any, Optional
from bs4 import BeautifulSoup as bs
import httpx
from nonebot import logger
from bs4 import BeautifulSoup as bs
from ..post import Post
from ..types import *
from ..post import Post
from .platform import NewMessage
class Weibo(NewMessage):
categories = {
1: '转发',
2: '视频',
3: '图文',
4: '文字',
}
1: "转发",
2: "视频",
3: "图文",
4: "文字",
}
enable_tag = True
platform_name = 'weibo'
name = '新浪微博'
platform_name = "weibo"
name = "新浪微博"
enabled = True
is_common = True
schedule_type = 'interval'
schedule_kw = {'seconds': 3}
schedule_type = "interval"
schedule_kw = {"seconds": 3}
has_target = True
async def get_target_name(self, target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
param = {'containerid': '100505' + target}
res = await client.get('https://m.weibo.cn/api/container/getIndex', params=param)
param = {"containerid": "100505" + target}
res = await client.get(
"https://m.weibo.cn/api/container/getIndex", params=param
)
res_dict = json.loads(res.text)
if res_dict.get('ok') == 1:
return res_dict['data']['userInfo']['screen_name']
if res_dict.get("ok") == 1:
return res_dict["data"]["userInfo"]["screen_name"]
else:
return None
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with httpx.AsyncClient() as client:
params = { 'containerid': '107603' + target}
res = await client.get('https://m.weibo.cn/api/container/getIndex?', params=params, timeout=4.0)
params = {"containerid": "107603" + target}
res = await client.get(
"https://m.weibo.cn/api/container/getIndex?", params=params, timeout=4.0
)
res_data = json.loads(res.text)
if not res_data['ok']:
if not res_data["ok"]:
return []
custom_filter: Callable[[RawPost], bool] = lambda d: d['card_type'] == 9
return list(filter(custom_filter, res_data['data']['cards']))
custom_filter: Callable[[RawPost], bool] = lambda d: d["card_type"] == 9
return list(filter(custom_filter, res_data["data"]["cards"]))
def get_id(self, post: RawPost) -> Any:
return post['mblog']['id']
return post["mblog"]["id"]
def filter_platform_custom(self, raw_post: RawPost) -> bool:
return raw_post['card_type'] == 9
return raw_post["card_type"] == 9
def get_date(self, raw_post: RawPost) -> float:
created_time = datetime.strptime(raw_post['mblog']['created_at'], '%a %b %d %H:%M:%S %z %Y')
created_time = datetime.strptime(
raw_post["mblog"]["created_at"], "%a %b %d %H:%M:%S %z %Y"
)
return created_time.timestamp()
def get_tags(self, raw_post: RawPost) -> Optional[list[Tag]]:
"Return Tag list of given RawPost"
text = raw_post['mblog']['text']
soup = bs(text, 'html.parser')
res = list(map(
lambda x: x[1:-1],
filter(
lambda s: s[0] == '#' and s[-1] == '#',
map(lambda x:x.text, soup.find_all('span', class_='surl-text'))
)
))
super_topic_img = soup.find('img', src=re.compile(r'timeline_card_small_super_default'))
text = raw_post["mblog"]["text"]
soup = bs(text, "html.parser")
res = list(
map(
lambda x: x[1:-1],
filter(
lambda s: s[0] == "#" and s[-1] == "#",
map(lambda x: x.text, soup.find_all("span", class_="surl-text")),
),
)
)
super_topic_img = soup.find(
"img", src=re.compile(r"timeline_card_small_super_default")
)
if super_topic_img:
try:
res.append(super_topic_img.parent.parent.find('span', class_='surl-text').text + '超话')
res.append(
super_topic_img.parent.parent.find("span", class_="surl-text").text
+ "超话"
)
except:
logger.info('super_topic extract error: {}'.format(text))
logger.info("super_topic extract error: {}".format(text))
return res
def get_category(self, raw_post: RawPost) -> Category:
if raw_post['mblog'].get('retweeted_status'):
if raw_post["mblog"].get("retweeted_status"):
return Category(1)
elif raw_post['mblog'].get('page_info') and raw_post['mblog']['page_info'].get('type') == 'video':
elif (
raw_post["mblog"].get("page_info")
and raw_post["mblog"]["page_info"].get("type") == "video"
):
return Category(2)
elif raw_post['mblog'].get('pics'):
elif raw_post["mblog"].get("pics"):
return Category(3)
else:
return Category(4)
def _get_text(self, raw_text: str) -> str:
text = raw_text.replace('<br />', '\n')
return bs(text, 'html.parser').text
text = raw_text.replace("<br />", "\n")
return bs(text, "html.parser").text
async def parse(self, raw_post: RawPost) -> Post:
header = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'zh-CN,zh;q=0.9',
'authority': 'm.weibo.cn',
'cache-control': 'max-age=0',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'same-origin',
'sec-fetch-site': 'same-origin',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 '
'Mobile Safari/537.36'}
info = raw_post['mblog']
if info['isLongText'] or info['pic_num'] > 9:
"accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9",
"accept-language": "zh-CN,zh;q=0.9",
"authority": "m.weibo.cn",
"cache-control": "max-age=0",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "same-origin",
"sec-fetch-site": "same-origin",
"upgrade-insecure-requests": "1",
"user-agent": "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) "
"AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 "
"Mobile Safari/537.36",
}
info = raw_post["mblog"]
if info["isLongText"] or info["pic_num"] > 9:
async with httpx.AsyncClient() as client:
res = await client.get('https://m.weibo.cn/detail/{}'.format(info['mid']), headers=header)
res = await client.get(
"https://m.weibo.cn/detail/{}".format(info["mid"]), headers=header
)
try:
full_json_text = re.search(r'"status": ([\s\S]+),\s+"hotScheme"', res.text).group(1)
full_json_text = re.search(
r'"status": ([\s\S]+),\s+"hotScheme"', res.text
).group(1)
info = json.loads(full_json_text)
except:
logger.info('detail message error: https://m.weibo.cn/detail/{}'.format(info['mid']))
parsed_text = self._get_text(info['text'])
pic_urls = [img['large']['url'] for img in info.get('pics', [])]
detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
logger.info(
"detail message error: https://m.weibo.cn/detail/{}".format(
info["mid"]
)
)
parsed_text = self._get_text(info["text"])
pic_urls = [img["large"]["url"] for img in info.get("pics", [])]
detail_url = "https://weibo.com/{}/{}".format(info["user"]["id"], info["bid"])
# return parsed_text, detail_url, pic_urls
return Post('weibo', text=parsed_text, url=detail_url, pics=pic_urls, target_name=info['user']['screen_name'])
return Post(
"weibo",
text=parsed_text,
url=detail_url,
pics=pic_urls,
target_name=info["user"]["screen_name"],
)