2021-03-03 20:36:48 +08:00

136 lines
5.7 KiB
Python

from datetime import datetime
import json
import re
from typing import Any, Optional
from bs4 import BeautifulSoup as bs
import httpx
from nonebot import logger
from ..post import Post
from ..types import *
from .platform import Platform
class Weibo(Platform):
categories = {
1: '转发',
2: '视频',
3: '图文',
50: '撤置顶'
}
enable_tag = False
platform_name = 'weibo'
def __init__(self):
self.top : dict[Target, RawPost] = dict()
super().__init__()
@staticmethod
async def get_account_name(target: Target) -> Optional[str]:
async with httpx.AsyncClient() as client:
param = {'containerid': '100505' + target}
res = await client.get('https://m.weibo.cn/api/container/getIndex', params=param)
res_dict = json.loads(res.text)
if res_dict.get('ok') == 1:
return res_dict['data']['userInfo']['screen_name']
else:
return None
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with httpx.AsyncClient() as client:
params = { 'containerid': '107603' + target}
res = await client.get('https://m.weibo.cn/api/container/getIndex?', params=params, timeout=4.0)
res_data = json.loads(res.text)
if not res_data['ok']:
return []
return res_data['data']['cards']
def get_id(self, post: RawPost) -> Any:
if post.get('_type'):
return None
return post['mblog']['id']
def filter_platform_custom(self, raw_post: RawPost) -> bool:
return raw_post['card_type'] == 9
def get_date(self, raw_post: RawPost) -> float:
created_time = datetime.strptime(raw_post['mblog']['created_at'], '%a %b %d %H:%M:%S %z %Y')
return created_time.timestamp()
def get_tags(self, raw_post: RawPost) -> Optional[list[Tag]]:
"Return Tag list of given RawPost"
return None
def get_category(self, raw_post: RawPost) -> Category:
if (custom_cat := raw_post.get('_type')):
return Category(custom_cat)
if raw_post['mblog'].get('retweeted_status'):
return Category(1)
elif raw_post['mblog'].get('page_info') and raw_post['mblog']['page_info'].get('type') == 'video':
return Category(2)
else:
return Category(3)
def _get_text(self, raw_text: str) -> str:
text = raw_text.replace('<br />', '\n')
return bs(text, 'html.parser').text
def _get_top(self, raw_post_list: list[RawPost]) -> Optional[RawPost]:
for raw_post in raw_post_list:
if raw_post['card_type'] == 9:
if raw_post['mblog'].get('isTop'):
return raw_post
return None
async def filter_common(self, target: Target, raw_post_list: list[RawPost]) -> list[RawPost]:
if not self.inited.get(target, False):
self.top[target] = self._get_top(raw_post_list)
await super().filter_common(target, raw_post_list)
return []
else:
if not raw_post_list:
return []
new_post = self._get_top(raw_post_list)
res = await super().filter_common(target, raw_post_list)
if (self.top[target] is not None and new_post is None) or \
(self.top[target] is not None and new_post is not None and self.get_id(self.top[target]) != self.get_id(new_post)):
if new_post:
logger.info('cancel top: {}'.format(new_post))
else:
logger.info('cancel top: {}'.format(raw_post_list))
res.append({'_type': 50, 'target': self.top[target]['mblog']['user']['screen_name']})
self.top[target] = new_post
return res
async def parse(self, raw_post: RawPost) -> Post:
header = {
'accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'accept-language': 'zh-CN,zh;q=0.9',
'authority': 'm.weibo.cn',
'cache-control': 'max-age=0',
'sec-fetch-dest': 'empty',
'sec-fetch-mode': 'same-origin',
'sec-fetch-site': 'same-origin',
'upgrade-insecure-requests': '1',
'user-agent': 'Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) '
'AppleWebKit/537.36 (KHTML, like Gecko) Chrome/89.0.4389.72 '
'Mobile Safari/537.36'}
if raw_post.get('_type') == 50:
# cancel top
return Post('weibo', text="撤置顶", url='', pics=[], target_name=raw_post['target'], override_use_pic=False)
info = raw_post['mblog']
if info['isLongText'] or info['pic_num'] > 9:
async with httpx.AsyncClient() as client:
res = await client.get('https://m.weibo.cn/detail/{}'.format(info['mid']), headers=header)
try:
full_json_text = re.search(r'"status": ([\s\S]+),\s+"hotScheme"', res.text).group(1)
info = json.loads(full_json_text)
except:
logger.info('detail message error: https://m.weibo.cn/detail/{}'.format(info['mid']))
parsed_text = self._get_text(info['text'])
pic_urls = [img['large']['url'] for img in info.get('pics', [])]
detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
# return parsed_text, detail_url, pic_urls
return Post('weibo', text=parsed_text, url=detail_url, pics=pic_urls, target_name=info['user']['screen_name'])