init proj

This commit is contained in:
felinae98 2021-02-03 16:58:49 +08:00
commit 180c606950
No known key found for this signature in database
GPG Key ID: 00C8B010587FF610
16 changed files with 2435 additions and 0 deletions

182
.gitignore vendored Normal file
View File

@ -0,0 +1,182 @@
# Created by https://www.toptal.com/developers/gitignore/api/python,linux,vim
# Edit at https://www.toptal.com/developers/gitignore?templates=python,linux,vim
### Linux ###
*~
# temporary files which can be created if a process still has a handle open of a deleted file
.fuse_hidden*
# KDE directory preferences
.directory
# Linux trash folder which might appear on any partition or disk
.Trash-*
# .nfs files are created when an open file is removed but is still being accessed
.nfs*
### Python ###
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
pytestdebug.log
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
doc/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
pythonenv*
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
# pytype static type analyzer
.pytype/
# profiling data
.prof
### Vim ###
# Swap
[._]*.s[a-v][a-z]
!*.svg # comment out if you don't need vector files
[._]*.sw[a-p]
[._]s[a-rt-v][a-z]
[._]ss[a-gi-z]
[._]sw[a-p]
# Session
Session.vim
Sessionx.vim
# Temporary
.netrwhist
# Auto-generated tag files
tags
# Persistent undo
[._]*.un~
# End of https://www.toptal.com/developers/gitignore/api/python,linux,vim
data/*
.env.*

16
bot.py Normal file
View File

@ -0,0 +1,16 @@
import nonebot
from nonebot.adapters.cqhttp import Bot as CQHTTPBot
nonebot.init(command_start=[""])
app = nonebot.get_asgi()
driver = nonebot.get_driver()
driver.register_adapter('cqhttp', CQHTTPBot)
nonebot.load_builtin_plugins()
nonebot.load_plugins('src/plugins')
nonebot.load_plugin("nonebot_plugin_apscheduler")
nonebot.load_plugin('nonebot_plugin_test')
if __name__ == "__main__":
nonebot.run(app="bot:app")

1658
poetry.lock generated Normal file

File diff suppressed because it is too large Load Diff

30
pyproject.toml Normal file
View File

@ -0,0 +1,30 @@
[tool.poetry]
name = "nonebot-hk-reporter"
version = "0.1.0"
description = "Subscribe message from social medias"
authors = ["felinae98 <felinae225@qq.com>"]
license = "MIT"
[tool.poetry.dependencies]
python = "^3.9"
nonebot2 = "^2.0.0-alpha.8"
httpx = "^0.16.1"
bs4 = "^0.0.1"
tinydb = "^4.3.0"
nonebot_plugin_apscheduler = "^0.1.2"
feedparser = "^6.0.2"
[tool.poetry.dev-dependencies]
pylint = "^2.6.0"
jedi = "^0.18.0"
nb-cli = "^0.3.2"
ipdb = "^0.13.4"
[build-system]
requires = ["poetry>=0.12"]
build-backend = "poetry.masonry.api"
[[tool.poetry.source]]
name = "aliyun"
url = "https://mirrors.aliyun.com/pypi/simple/"
default = true

View File

@ -0,0 +1,6 @@
from . import config_manager
from . import config
from . import scheduler
from . import send
import nonebot

View File

@ -0,0 +1,96 @@
from .utils import Singleton, supported_target_type
from os import path
import nonebot
from tinydb import TinyDB, Query
from collections import defaultdict
import os
def get_config_path() -> str:
working_dir = os.getcwd()
data_dir = path.join(working_dir, 'data')
if not path.isdir(data_dir):
os.makedirs(data_dir)
return path.join(data_dir, 'hk_reporter.json')
class NoSuchUserException(Exception):
pass
class NoSuchSubscribeException(Exception):
pass
@Singleton
class Config():
migrate_version = 1
def __init__(self):
self.db = TinyDB(get_config_path(), encoding='utf-8')
self.kv_config = self.db.table('kv')
self.user_target = self.db.table('user_target')
self.target_user_cache = {}
self.target_list = {}
self.next_index = defaultdict(lambda: 0)
def add_subscribe(self, user, user_type, target, target_name, target_type):
user_query = Query()
query = (user_query.user == user) & (user_query.user_type == user_type)
if (user_data := self.user_target.get(query)):
# update
subs: list = user_data.get('subs', [])
subs.append({"target": target, "target_type": target_type, 'target_name': target_name})
self.user_target.update({"subs": subs}, query)
else:
# insert
self.user_target.insert({'user': user, 'user_type': user_type, 'subs': [{'target': target, 'target_type': target_type, 'target_name': target_name}]})
self.update_send_cache()
def list_subscribe(self, user, user_type):
query = Query()
return self.user_target.get((query.user == user) & (query.user_type ==user_type))['subs']
def del_subscribe(self, user, user_type, target, target_type):
user_query = Query()
query = (user_query.user == user) & (user_query.user_type == user_type)
if not (query_res := self.user_target.get(query)):
raise NoSuchUserException()
subs = query_res.get('subs', [])
for idx, sub in enumerate(subs):
if sub.get('target') == target and sub.get('target_type') == target_type:
subs.pop(idx)
self.user_target.update({'subs': subs}, query)
self.update_send_cache()
return
raise NoSuchSubscribeException()
def update_send_cache(self):
res = {target_type: defaultdict(list) for target_type in supported_target_type}
# res = {target_type: defaultdict(lambda: defaultdict(list)) for target_type in supported_target_type}
for user in self.user_target.all():
for sub in user.get('subs', []):
if not sub.get('target_type') in supported_target_type:
continue
res[sub['target_type']][sub['target']].append({"user": user['user'], "user_type": user['user_type']})
self.target_user_cache = res
for target_type in self.target_user_cache:
self.target_list[target_type] = list(self.target_user_cache[target_type].keys())
def get_next_target(self, target_type):
# FIXME 插入或删除target后对队列的影响但是并不是大问题
if not self.target_list[target_type]:
return None
self.next_index[target_type] %= len(self.target_list[target_type])
res = self.target_list[target_type][self.next_index[target_type]]
self.next_index[target_type] += 1
return res
def start_up():
config = Config()
if not (search_res := config.kv_config.search(Query().name=="version")):
config.kv_config.insert({"name": "version", "value": config.migrate_version})
elif search_res[0].get("value") < config.migrate_version:
pass
# do migration
config.update_send_cache()
nonebot.get_driver().on_startup(start_up)

View File

@ -0,0 +1,55 @@
from nonebot.rule import to_me
from nonebot.typing import T_State
from nonebot.adapters.cqhttp import Bot, Event, GroupMessageEvent
from nonebot.permission import Permission
from nonebot.adapters.cqhttp.permission import GROUP_ADMIN, GROUP_MEMBER, GROUP_OWNER
from nonebot import on_command
from .platform.utils import check_sub_target
from .config import Config, NoSuchSubscribeException
async def check_is_owner_or_admin(bot: Bot, event: Event):
return await (GROUP_ADMIN | GROUP_OWNER)(bot, event)
# add_sub = on_command("添加订阅", rule=to_me(), permission=Permission(check_is_owner_or_admin), priority=5)
add_sub = on_command("添加订阅", rule=to_me(), priority=5)
# add_sub = on_command("添加订阅", rule=to_me() & check_is_owner_or_admin, priority=5)
@add_sub.handle()
async def _(bot: Bot, event: Event, state: T_State):
args = str(event.get_message()).strip().split()
if len(args) != 2:
await add_sub.reject("使用方法为: 添加订阅 平台 id")
return
target_type, target = args
if (name := await check_sub_target(target_type, target)):
config: Config = Config()
config.add_subscribe(event.group_id, "group", target, name, target_type)
await add_sub.finish("成功添加 {}".format(name))
else:
await add_sub.reject("平台或者id不存在")
query_sub = on_command("查询订阅", rule=to_me(), priority=5)
@query_sub.handle()
async def _(bot: Bot, event: Event, state: T_State):
config: Config = Config()
sub_list = config.list_subscribe(event.group_id, "group")
res = '订阅的帐号为:\n'
for sub in sub_list:
res += '{} {} {}\n'.format(sub['target_type'], sub['target_name'], sub['target'])
await query_sub.finish(res)
del_sub = on_command("删除订阅", rule=to_me(), priority=5)
@del_sub.handle()
async def _(bot: Bot, event: Event, state: T_State):
args = str(event.get_message()).strip().split()
if len(args) != 2:
await del_sub.reject("使用方法为: 删除订阅 平台 id")
return
target_type, target = args
config = Config()
try:
config.del_subscribe(event.group_id, "group", target, target_type)
except NoSuchSubscribeException:
await del_sub.reject('平台或id不存在')
await del_sub.finish('删除成功')

View File

@ -0,0 +1,92 @@
from ..utils import Singleton
from ..post import Post
from collections import defaultdict
from nonebot import logger
import httpx
import json
import time
@Singleton
class Bilibili:
def __init__(self):
self.exists_posts = defaultdict(set)
self.inited = defaultdict(lambda: False)
async def get_user_post_list(self, user_id):
async with httpx.AsyncClient() as client:
params = {'host_uid': user_id, 'offset': 0, 'need_top': 0}
res = await client.get('https://api.vc.bilibili.com/dynamic_svr/v1/dynamic_svr/space_history', params=params, timeout=4.0)
res_dict = json.loads(res.text)
if res_dict['code'] == 0:
return res_dict['data']
def filter(self, data, target, init=False) -> list[Post]:
cards = data['cards']
res: list[Post] = []
for card in cards:
dynamic_id = card['desc']['dynamic_id']
if init:
self.exists_posts[target].add(dynamic_id)
continue
if dynamic_id in self.exists_posts[target]:
continue
if time.time() - card['desc']['timestamp'] > 60 * 60 * 2:
continue
res.append(self.parse(card, target))
if None in res:
res.remove(None)
return res
def parse(self, card, target) -> Post:
card_content = json.loads(card['card'])
dynamic_id = card['desc']['dynamic_id']
self.exists_posts[target].add(dynamic_id)
if card['desc']['type'] == 2:
# 一般动态
text = card_content['item']['description']
url = 'https://t.bilibili.com/{}'.format(card['desc']['dynamic_id'])
pic = [img['img_src'] for img in card_content['item']['pictures']]
elif card['desc']['type'] == 64:
# 专栏文章
text = '{} {}'.format(card_content['title'], card_content['summary'])
url = 'https://www.bilibili.com/read/cv{}'.format(card['desc']['rid'])
pic = card_content['image_urls']
elif card['desc']['type'] == 8:
# 视频
text = card_content['dynamic']
url = 'https://www.bilibili.com/video/{}'.format(card['desc']['bvid'])
pic = [card_content['pic']]
elif card['desc']['type'] == 4:
# 纯文字
text = card_content['item']['content']
url = 'https://t.bilibili.com/{}'.format(card['desc']['dynamic_id'])
pic = []
else:
logger.error(card)
return None
return Post('bilibili', text, url, pic)
async def fetch_new_post(self, target) -> list[Post]:
try:
post_list_data = await self.get_user_post_list(target)
if self.inited[target]:
return self.filter(post_list_data, target)
else:
self.filter(post_list_data, target, True)
logger.info('bilibili init {} success'.format(target))
logger.info('post list: {}'.format(self.exists_posts[target]))
self.inited[target] = True
return []
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []
async def get_user_info(mid):
async with httpx.AsyncClient() as client:
res = await client.get('https://api.bilibili.com/x/space/acc/info', params={'mid': mid})
res_data = json.loads(res.text)
if res_data['code']:
return None
return res['data']['name']

View File

@ -0,0 +1,63 @@
from ..utils import Singleton
from ..post import Post
from collections import defaultdict
from bs4 import BeautifulSoup as bs
from nonebot import logger
import feedparser
import httpx
import time
import calendar
async def get_rss_raw_data(url) -> str:
async with httpx.AsyncClient() as client:
res = await client.get(url, timeout=10.0)
return res.text
async def get_rss_info(url) -> str:
data = await get_rss_raw_data(url)
return data.feed.title
@Singleton
class Rss:
def __init__(self):
self.exists_posts = defaultdict(set)
self.inited = defaultdict(lambda: False)
def filter(self, data, target, init=False) -> list[Post]:
feed = feedparser.parse(data)
entries = feed.entries
res = []
for entry in entries:
entry_id = entry.id
if init:
self.exists_posts[target].add(entry_id)
continue
if entry_id in self.exists_posts[target]:
continue
# if time.time() - calendar.timegm(entry.published_parsed) > 2 * 60 * 60:
# continue
res.append(self.parse(entry, target))
return res
def parse(self, entry, target) -> Post:
soup = bs(entry.description, 'html.parser')
text = soup.text
pics = list(map(lambda x: x.attrs['src'], soup('img')))
self.exists_posts[target].add(entry.id)
return Post('rss', text, entry.link, pics)
async def fetch_new_post(self, target) -> list[Post]:
try:
raw_data = await get_rss_raw_data(target)
if self.inited[target]:
return self.filter(raw_data, target)
else:
self.filter(raw_data, target, True)
logger.info('rss init {} success'.format(target))
logger.info('post list: {}'.format(self.exists_posts[target]))
self.inited[target] = True
return []
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []

View File

@ -0,0 +1,26 @@
import time
import asyncio
from collections import defaultdict
from .weibo import Weibo, get_user_info as weibo_user_info
from .bilibili import get_user_info as bilibili_user_info
from .rss import get_rss_info as rss_info
async def check_sub_target(target_type, target):
if target_type == 'weibo':
return await weibo_user_info(target)
elif target_type == 'bilibili':
return await bilibili_user_info(target)
elif target_type == 'rss':
return await rss_info(target)
else:
return None
scheduler_last_run = defaultdict(lambda: 0)
async def scheduler(fun, target_type):
platform_interval = {
'weibo': 3
}
if (wait_time := time.time() - scheduler_last_run[target_type]) < platform_interval[target_type]:
await asyncio.sleep(wait_time)
await fun()

View File

@ -0,0 +1,75 @@
import httpx
import json
import time
from collections import defaultdict
from bs4 import BeautifulSoup as bs
from datetime import datetime
from nonebot import logger
from ..utils import Singleton
from ..post import Post
@Singleton
class Weibo:
def __init__(self):
self.exists_posts = defaultdict(set)
self.inited = defaultdict(lambda: False)
async def get_user_post_list(self, weibo_id: str):
async with httpx.AsyncClient() as client:
params = { 'containerid': '107603' + weibo_id}
res = await client.get('https://m.weibo.cn/api/container/getIndex?', params=params, timeout=4.0)
return res.text
def filter_weibo(self, weibo_raw_text, target, init=False):
weibo_dict = json.loads(weibo_raw_text)
weibos = weibo_dict['data']['cards']
res: list[Post] = []
for weibo in weibos:
if weibo['card_type'] != 9:
continue
info = weibo['mblog']
if init:
self.exists_posts[target].add(info['id'])
continue
if info['id'] in self.exists_posts[target]:
continue
created_time = datetime.strptime(info['created_at'], '%a %b %d %H:%M:%S %z %Y')
if time.time() - created_time.timestamp() > 60 * 60 * 2:
continue
res.append(self.parse_weibo(weibo, target))
return res
def parse_weibo(self, weibo_dict, target):
info = weibo_dict['mblog']
parsed_text = bs(info['text'], 'html.parser').text
pic_urls = [img['large']['url'] for img in info.get('pics', [])]
self.exists_posts[target].add(info['id'])
detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
# return parsed_text, detail_url, pic_urls
return Post('weibo', parsed_text, detail_url, pic_urls)
async def fetch_new_post(self, target):
try:
post_list = await self.get_user_post_list(target)
if not self.inited[target]:
self.filter_weibo(post_list, target, True)
logger.info('weibo init {} success'.format(target))
logger.info('post list: {}'.format(self.exists_posts[target]))
self.inited[target] = True
return []
return self.filter_weibo(post_list, target)
except httpx.RequestError as err:
logger.warning("network connection error: {}, url: {}".format(type(err), err.request.url))
return []
async def get_user_info(id):
async with httpx.AsyncClient() as client:
param = {'containerid': '100505' + id}
res = await client.get('https://m.weibo.cn/api/container/getIndex', params=param)
res_dict = json.loads(res.text)
if res_dict.get('ok') == 1:
return res_dict['data']['userInfo']['screen_name']
else:
return None

View File

@ -0,0 +1,22 @@
class Post:
target_type: str
text: str
url: str
pics: list[str]
def generate_messages(self):
first_msg = '来源: {}\n{}\n详情:{}'.format(self.target_type, self.text, self.url)
res = [first_msg]
for pic in self.pics:
res.append("[CQ:image,file={url}]".format(url=pic))
return res
def __init__(self, target_type, text, url, pics=[]):
self.target_type = target_type
self.text = text
self.url = url
self.pics = pics
def __str__(self):
return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text, self.url, ','.join(self.pics))

View File

@ -0,0 +1,76 @@
import nonebot
from .config import Config
from nonebot import require
from nonebot import logger
from .platform.bilibili import Bilibili
from .platform.weibo import Weibo
from .platform.rss import Rss
from .post import Post
from .send import send_msgs, do_send_msgs
from apscheduler.schedulers.asyncio import AsyncIOScheduler
scheduler: AsyncIOScheduler = require('nonebot_plugin_apscheduler').scheduler
config: Config = Config()
weibo: Weibo = Weibo()
bilibili: Bilibili = Bilibili()
rss: Rss = Rss()
@scheduler.scheduled_job('interval', seconds=10)
async def weibo_check():
target = config.get_next_target('weibo')
if not target:
return
logger.debug('try to fecth new posts from weibo, target: {}'.format(target))
new_weibos: list[Post] = await weibo.fetch_new_post(target)
send_list = config.target_user_cache['weibo'][target]
bot_list = list(nonebot.get_bots().values())
bot = bot_list[0] if bot_list else None
for new_weibo in new_weibos:
logger.warning('get new weibo post: {}'.format(new_weibo.url))
if not bot:
logger.warning('no bot connected')
else:
for to_send in send_list:
send_msgs(bot, to_send['user'], to_send['user_type'], new_weibo.generate_messages())
@scheduler.scheduled_job('interval', seconds=10)
async def bilibili_check():
target = config.get_next_target('bilibili')
if not target:
return
logger.debug('try to fecth new posts from bilibili, target: {}'.format(target))
new_posts: list[Post] = await bilibili.fetch_new_post(target)
send_list = config.target_user_cache['bilibili'][target]
bot_list = list(nonebot.get_bots().values())
bot = bot_list[0] if bot_list else None
for new_post in new_posts:
logger.warning('get new bilibili dynamic: {}'.format(new_post.url))
logger.warning(new_post)
if not bot:
logger.warning('no bot connected')
else:
for to_send in send_list:
send_msgs(bot, to_send['user'], to_send['user_type'], new_post.generate_messages())
@scheduler.scheduled_job('interval', seconds=30)
async def rss_check():
target = config.get_next_target('rss')
if not target:
return
logger.debug('try to fecth new posts from rss, target: {}'.format(target))
new_posts: list[Post] = await rss.fetch_new_post(target)
send_list = config.target_user_cache['rss'][target]
bot_list = list(nonebot.get_bots().values())
bot = bot_list[0] if bot_list else None
for new_post in new_posts:
logger.warning('get new rss entry: {}'.format(new_post.url))
logger.warning(new_post)
if not bot:
logger.warning('no bot connected')
else:
for to_send in send_list:
send_msgs(bot, to_send['user'], to_send['user_type'], new_post.generate_messages())
@scheduler.scheduled_job('interval', seconds=1)
async def _():
await do_send_msgs()

View File

@ -0,0 +1,26 @@
from nonebot.adapters.cqhttp import Bot
import nonebot
import time
import asyncio
QUEUE = []
LAST_SEND_TIME = time.time()
async def do_send_msgs():
global LAST_SEND_TIME
if time.time() - LAST_SEND_TIME < 1.4:
return
if QUEUE:
bot, user, user_type, msg = QUEUE.pop(0)
if user_type == 'group':
await bot.call_api('send_group_msg', group_id=user, message=msg)
elif user_type == 'private':
await bot.call_api('send_private_msg', user_id=user, message=msg)
LAST_SEND_TIME = time.time()
def send_msgs(bot, user, user_type, msgs):
for msg in msgs:
QUEUE.append((bot, user, user_type, msg))

View File

@ -0,0 +1,12 @@
class Singleton(object):
def __init__(self, cls):
self._cls = cls
self._instance = {}
def __call__(self):
if self._cls not in self._instance:
self._instance[self._cls] = self._cls()
return self._instance[self._cls]
supported_target_type = ('weibo', 'bilibili', 'rss')