optimise weibo post

This commit is contained in:
felinae98 2021-02-17 22:55:57 +08:00
parent 59dc3290e0
commit e43c4edea4
No known key found for this signature in database
GPG Key ID: 00C8B010587FF610
2 changed files with 12 additions and 2 deletions

View File

@ -1,6 +1,7 @@
from collections import defaultdict
from datetime import datetime
import json
import re
import time
from typing import Any, Optional
@ -64,10 +65,19 @@ class Weibo(Platform):
return Category(2)
else:
return Category(3)
def _get_text(self, raw_text: str) -> str:
text = raw_text.replace('<br />', '\n')
return bs(text).text
async def parse(self, raw_post: RawPost) -> Post:
info = raw_post['mblog']
parsed_text = bs(info['text'], 'html.parser').text
if info['isLongText'] or info['pic_num'] > 9:
async with httpx.AsyncClient() as client:
res = await client.get('https://m.weibo.cn/detail/{}'.format(info['mid']))
full_json_text = re.search(r'"status": ([\s\S]+),\s+"hotScheme"', res.text).group(1)
info = json.loads(full_json_text)
parsed_text = self._get_text(info['text'])
pic_urls = [img['large']['url'] for img in info.get('pics', [])]
detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
# return parsed_text, detail_url, pic_urls

View File

@ -25,4 +25,4 @@ class Post:
return res
def __str__(self):
return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text[:50], self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics)))
return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text, self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics)))