optimise weibo post

This commit is contained in:
felinae98 2021-02-17 22:55:57 +08:00
parent 59dc3290e0
commit e43c4edea4
No known key found for this signature in database
GPG Key ID: 00C8B010587FF610
2 changed files with 12 additions and 2 deletions

View File

@ -1,6 +1,7 @@
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
import json import json
import re
import time import time
from typing import Any, Optional from typing import Any, Optional
@ -64,10 +65,19 @@ class Weibo(Platform):
return Category(2) return Category(2)
else: else:
return Category(3) return Category(3)
def _get_text(self, raw_text: str) -> str:
text = raw_text.replace('<br />', '\n')
return bs(text).text
async def parse(self, raw_post: RawPost) -> Post: async def parse(self, raw_post: RawPost) -> Post:
info = raw_post['mblog'] info = raw_post['mblog']
parsed_text = bs(info['text'], 'html.parser').text if info['isLongText'] or info['pic_num'] > 9:
async with httpx.AsyncClient() as client:
res = await client.get('https://m.weibo.cn/detail/{}'.format(info['mid']))
full_json_text = re.search(r'"status": ([\s\S]+),\s+"hotScheme"', res.text).group(1)
info = json.loads(full_json_text)
parsed_text = self._get_text(info['text'])
pic_urls = [img['large']['url'] for img in info.get('pics', [])] pic_urls = [img['large']['url'] for img in info.get('pics', [])]
detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid']) detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
# return parsed_text, detail_url, pic_urls # return parsed_text, detail_url, pic_urls

View File

@ -25,4 +25,4 @@ class Post:
return res return res
def __str__(self): def __str__(self):
return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text[:50], self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics))) return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text, self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics)))