mirror of
https://github.com/suyiiyii/nonebot-bison.git
synced 2025-06-04 02:26:11 +08:00
optimise weibo post
This commit is contained in:
parent
59dc3290e0
commit
e43c4edea4
@ -1,6 +1,7 @@
|
||||
from collections import defaultdict
|
||||
from datetime import datetime
|
||||
import json
|
||||
import re
|
||||
import time
|
||||
from typing import Any, Optional
|
||||
|
||||
@ -64,10 +65,19 @@ class Weibo(Platform):
|
||||
return Category(2)
|
||||
else:
|
||||
return Category(3)
|
||||
|
||||
def _get_text(self, raw_text: str) -> str:
|
||||
text = raw_text.replace('<br />', '\n')
|
||||
return bs(text).text
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
info = raw_post['mblog']
|
||||
parsed_text = bs(info['text'], 'html.parser').text
|
||||
if info['isLongText'] or info['pic_num'] > 9:
|
||||
async with httpx.AsyncClient() as client:
|
||||
res = await client.get('https://m.weibo.cn/detail/{}'.format(info['mid']))
|
||||
full_json_text = re.search(r'"status": ([\s\S]+),\s+"hotScheme"', res.text).group(1)
|
||||
info = json.loads(full_json_text)
|
||||
parsed_text = self._get_text(info['text'])
|
||||
pic_urls = [img['large']['url'] for img in info.get('pics', [])]
|
||||
detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
|
||||
# return parsed_text, detail_url, pic_urls
|
||||
|
@ -25,4 +25,4 @@ class Post:
|
||||
return res
|
||||
|
||||
def __str__(self):
|
||||
return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text[:50], self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics)))
|
||||
return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text, self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics)))
|
||||
|
Loading…
x
Reference in New Issue
Block a user