This commit is contained in:
felinae98 2021-02-16 20:43:03 +08:00
parent dca94671a6
commit 6bdcdc3242
No known key found for this signature in database
GPG Key ID: 00C8B010587FF610
7 changed files with 71 additions and 214 deletions

View File

@ -1,12 +1,14 @@
from .utils import Singleton, supported_target_type
from .types import User
from .plugin_config import plugin_config
from os import path
import nonebot
from tinydb import TinyDB, Query
from collections import defaultdict
from typing import DefaultDict
from os import path
import os
from typing import DefaultDict
import nonebot
from tinydb import Query, TinyDB
from .plugin_config import plugin_config
from .types import User
from .utils import Singleton, supported_target_type
def get_config_path() -> str:

View File

@ -1,16 +1,16 @@
from nonebot.rule import to_me
from nonebot.typing import T_State
from nonebot import logger, on_command
from nonebot.adapters.cqhttp import Bot, Event, GroupMessageEvent
from nonebot.adapters.cqhttp.message import Message
from nonebot.permission import Permission, SUPERUSER
from nonebot.adapters.cqhttp.permission import GROUP_ADMIN, GROUP_MEMBER, GROUP_OWNER
from nonebot import on_command, logger
from nonebot.permission import Permission, SUPERUSER
from nonebot.rule import to_me
from nonebot.typing import T_State
from .platform.utils import check_sub_target
from .platform import platform_manager
from .config import Config, NoSuchSubscribeException
from .utils import parse_text
from .platform import platform_manager
from .platform.utils import check_sub_target
from .send import send_msgs
from .utils import parse_text
add_sub = on_command("添加订阅", rule=to_me(), permission=GROUP_ADMIN | GROUP_OWNER | SUPERUSER, priority=5)
@add_sub.got('platform', '请输入想要订阅的平台,目前支持:{}'.format(', '.join(platform_manager.keys())))

View File

@ -1,97 +1,12 @@
from collections import defaultdict
import json
from typing import Any, Optional
from ..types import Category, RawPost, Tag, Target
from ..utils import Singleton
from ..post import Post
from collections import defaultdict
from nonebot import logger
import httpx
import json
import time
from .platform import Platform, CategoryNotSupport
class Bilibili_(metaclass=Singleton):
def __init__(self):
self.exists_posts = defaultdict(set)
self.inited = defaultdict(lambda: False)
async def get_user_post_list(self, user_id):
async with httpx.AsyncClient() as client:
params = {'host_uid': user_id, 'offset': 0, 'need_top': 0}
res = await client.get('https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history', params=params, timeout=4.0)
res_dict = json.loads(res.text)
if res_dict['code'] == 0:
return res_dict['data']
def filter(self, data, target, init=False) -> list[Post]:
cards = data['cards']
res: list[Post] = []
for card in cards:
dynamic_id = card['desc']['dynamic_id']
if init:
self.exists_posts[target].add(dynamic_id)
continue
if dynamic_id in self.exists_posts[target]:
continue
if time.time() - card['desc']['timestamp'] > 60 * 60 * 2:
continue
res.append(self.parse(card, target))
if None in res:
res.remove(None)
return res
def parse(self, card, target) -> Post:
card_content = json.loads(card['card'])
dynamic_id = card['desc']['dynamic_id']
self.exists_posts[target].add(dynamic_id)
if card['desc']['type'] == 2:
# 一般动态
text = card_content['item']['description']
url = 'https://t.bilibili.com/{}'.format(card['desc']['dynamic_id'])
pic = [img['img_src'] for img in card_content['item']['pictures']]
elif card['desc']['type'] == 64:
# 专栏文章
text = '{} {}'.format(card_content['title'], card_content['summary'])
url = 'https://www.bilibili.com/read/cv{}'.format(card['desc']['rid'])
pic = card_content['image_urls']
elif card['desc']['type'] == 8:
# 视频
text = card_content['dynamic']
url = 'https://www.bilibili.com/video/{}'.format(card['desc']['bvid'])
pic = [card_content['pic']]
elif card['desc']['type'] == 4:
# 纯文字
text = card_content['item']['content']
url = 'https://t.bilibili.com/{}'.format(card['desc']['dynamic_id'])
pic = []
else:
logger.error(card)
return None
return Post('bilibili', text, url, pic)
async def fetch_new_post(self, target) -> list[Post]:
try:
post_list_data = await self.get_user_post_list(target)
if self.inited[target]:
return self.filter(post_list_data, target)
else:
self.filter(post_list_data, target, True)
logger.info('bilibili init {} success'.format(target))
logger.info('post list: {}'.format(self.exists_posts[target]))
self.inited[target] = True
return []
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []
async def get_user_info(mid):
async with httpx.AsyncClient() as client:
res = await client.get('https://api.bilibili.com/x/space/acc/info', params={'mid': mid})
res_data = json.loads(res.text)
if res_data['code']:
return None
return res_data['data']['name']
from ..post import Post
from ..types import Category, RawPost, Tag, Target
from .platform import CategoryNotSupport, Platform
class Bilibili(Platform):

View File

@ -1,13 +1,14 @@
import time
from collections import defaultdict
from typing import Any, Literal, Optional, Protocol
from typing import Any, Optional
import httpx
from nonebot import logger
from ..config import Config
from ..post import Post
from ..types import Category, RawPost, Tag, Target, User
from ..utils import Singleton
from ..types import Category, Tag, RawPost, Target, User
class CategoryNotSupport(Exception):
@ -82,7 +83,7 @@ class Platform(PlatformProto):
return self.cache[post_id]
async def filter_common(self, target: Target, raw_post_list: list[RawPost]) -> list[RawPost]:
if False and not self.inited.get(target, False):
if not self.inited.get(target, False):
# target not init
for raw_post in raw_post_list:
post_id = self.get_id(raw_post)
@ -95,8 +96,8 @@ class Platform(PlatformProto):
post_id = self.get_id(raw_post)
if post_id in self.exists_posts[target]:
continue
# if (post_time := self.get_date(raw_post)) and time.time() - post_time > 2 * 60 * 60:
# continue
if (post_time := self.get_date(raw_post)) and time.time() - post_time > 2 * 60 * 60:
continue
try:
if not self.filter_platform_custom(raw_post):
continue
@ -132,25 +133,30 @@ class Platform(PlatformProto):
return res
async def fetch_new_post(self, target: Target, users: list[User]) -> list[tuple[User, list[Post]]]:
config = Config()
post_list = await self.get_sub_list(target)
new_posts = await self.filter_common(target, post_list)
res: list[tuple[User, list[Post]]] = []
if not new_posts:
try:
config = Config()
post_list = await self.get_sub_list(target)
new_posts = await self.filter_common(target, post_list)
res: list[tuple[User, list[Post]]] = []
if not new_posts:
return []
else:
for post in new_posts:
logger.info('fetch new post from {} {}: {}'.format(self.platform_name, target, self.get_id(post)))
for user in users:
required_tags = config.get_sub_tags(self.platform_name, target, user.user_type, user.user) if self.enable_tag else []
cats = config.get_sub_category(self.platform_name, target, user.user_type, user.user)
user_raw_post = await self.filter_user_custom(new_posts, cats, required_tags)
user_post: list[Post] = []
for raw_post in user_raw_post:
user_post.append(await self._parse_with_cache(raw_post))
res.append((user, user_post))
self.cache = {}
return res
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []
else:
for post in new_posts:
logger.info('fetch new post from {} {}: {}'.format(self.platform_name, target, self.get_id(post)))
for user in users:
required_tags = config.get_sub_tags(self.platform_name, target, user.user_type, user.user) if self.enable_tag else []
cats = config.get_sub_category(self.platform_name, target, user.user_type, user.user)
user_raw_post = await self.filter_user_custom(new_posts, cats, required_tags)
user_post: list[Post] = []
for raw_post in user_raw_post:
user_post.append(await self._parse_with_cache(raw_post))
res.append((user, user_post))
self.cache = {}
return res
class PlatformNoTarget(PlatformProto):

View File

@ -1,15 +1,13 @@
import calendar
from typing import Any, Optional
from ..types import RawPost, Target
from ..utils import Singleton
from ..post import Post
from .platform import Platform
from collections import defaultdict
from bs4 import BeautifulSoup as bs
from nonebot import logger
import feedparser
import httpx
import time
import calendar
from ..post import Post
from ..types import RawPost, Target
from .platform import Platform
class Rss(Platform):

View File

@ -28,12 +28,11 @@ async def fetch_and_send(target_type: str):
send_list = config.target_user_cache[target_type][target]
bot_list = list(nonebot.get_bots().values())
bot = bot_list[0] if bot_list else None
if target_type == 'rss':
to_send = await platform_manager[target_type].fetch_new_post(target, send_list)
for user, send_list in to_send:
for send_post in send_list:
logger.debug('send to {}: {}'.format(user, send_post))
if not bot:
logger.warning('no bot connected')
else:
send_msgs(bot, user.user, user.user_type, await send_post.generate_messages())
to_send = await platform_manager[target_type].fetch_new_post(target, send_list)
for user, send_list in to_send:
for send_post in send_list:
logger.debug('send to {}: {}'.format(user, send_post))
if not bot:
logger.warning('no bot connected')
else:
send_msgs(bot, user.user, user.user_type, await send_post.generate_messages())

View File

@ -1,81 +1,18 @@
import httpx
from collections import defaultdict
from datetime import datetime
import json
import time
from collections import defaultdict
from bs4 import BeautifulSoup as bs
from datetime import datetime
from nonebot import logger
from typing import Any, Optional
from ..utils import Singleton
from bs4 import BeautifulSoup as bs
import httpx
from nonebot import logger
from ..post import Post
from ..types import *
from ..utils import Singleton
from .platform import Platform
class Weibo_(metaclass=Singleton):
def __init__(self):
self.exists_posts = defaultdict(set)
self.inited = defaultdict(lambda: False)
async def get_user_post_list(self, weibo_id: str):
async with httpx.AsyncClient() as client:
params = { 'containerid': '107603' + weibo_id}
res = await client.get('https://m.weibo.cn/api/container/getIndex?', params=params, timeout=4.0)
return res.text
def filter_weibo(self, weibo_raw_text, target, init=False):
weibo_dict = json.loads(weibo_raw_text)
weibos = weibo_dict['data']['cards']
res: list[Post] = []
for weibo in weibos:
if weibo['card_type'] != 9:
continue
info = weibo['mblog']
if init:
self.exists_posts[target].add(info['id'])
continue
if info['id'] in self.exists_posts[target]:
continue
created_time = datetime.strptime(info['created_at'], '%a %b %d %H:%M:%S %z %Y')
if time.time() - created_time.timestamp() > 60 * 60 * 2:
continue
res.append(self.parse_weibo(weibo, target))
return res
def parse_weibo(self, weibo_dict, target):
info = weibo_dict['mblog']
parsed_text = bs(info['text'], 'html.parser').text
pic_urls = [img['large']['url'] for img in info.get('pics', [])]
self.exists_posts[target].add(info['id'])
detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
# return parsed_text, detail_url, pic_urls
return Post('weibo', parsed_text, detail_url, pic_urls)
async def fetch_new_post(self, target):
try:
post_list = await self.get_user_post_list(target)
if not self.inited[target]:
self.filter_weibo(post_list, target, True)
logger.info('weibo init {} success'.format(target))
logger.info('post list: {}'.format(self.exists_posts[target]))
self.inited[target] = True
return []
return self.filter_weibo(post_list, target)
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []
async def get_user_info(id):
async with httpx.AsyncClient() as client:
param = {'containerid': '100505' + id}
res = await client.get('https://m.weibo.cn/api/container/getIndex', params=param)
res_dict = json.loads(res.text)
if res_dict.get('ok') == 1:
return res_dict['data']['userInfo']['screen_name']
else:
return None
class Weibo(Platform):
categories = {