optimise weibo post

This commit is contained in:
felinae98
2021-02-17 22:55:57 +08:00
parent 59dc3290e0
commit e43c4edea4
2 changed files with 12 additions and 2 deletions
+11 -1
View File
@@ -1,6 +1,7 @@
from collections import defaultdict from collections import defaultdict
from datetime import datetime from datetime import datetime
import json import json
import re
import time import time
from typing import Any, Optional from typing import Any, Optional
@@ -65,9 +66,18 @@ class Weibo(Platform):
else: else:
return Category(3) return Category(3)
def _get_text(self, raw_text: str) -> str:
text = raw_text.replace('<br />', '\n')
return bs(text).text
async def parse(self, raw_post: RawPost) -> Post: async def parse(self, raw_post: RawPost) -> Post:
info = raw_post['mblog'] info = raw_post['mblog']
parsed_text = bs(info['text'], 'html.parser').text if info['isLongText'] or info['pic_num'] > 9:
async with httpx.AsyncClient() as client:
res = await client.get('https://m.weibo.cn/detail/{}'.format(info['mid']))
full_json_text = re.search(r'"status": ([\s\S]+),\s+"hotScheme"', res.text).group(1)
info = json.loads(full_json_text)
parsed_text = self._get_text(info['text'])
pic_urls = [img['large']['url'] for img in info.get('pics', [])] pic_urls = [img['large']['url'] for img in info.get('pics', [])]
detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid']) detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
# return parsed_text, detail_url, pic_urls # return parsed_text, detail_url, pic_urls
+1 -1
View File
@@ -25,4 +25,4 @@ class Post:
return res return res
def __str__(self): def __str__(self):
return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text[:50], self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics))) return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text, self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics)))