From e43c4edea411fd2c63d7460d727410a7e90dde31 Mon Sep 17 00:00:00 2001
From: felinae98 <731499577@qq.com>
Date: Wed, 17 Feb 2021 22:55:57 +0800
Subject: [PATCH] optimise weibo post

---
 src/plugins/hk_reporter/platform/weibo.py | 12 +++++++++++-
 src/plugins/hk_reporter/post.py           |  2 +-
 2 files changed, 12 insertions(+), 2 deletions(-)
diff --git a/src/plugins/hk_reporter/platform/weibo.py b/src/plugins/hk_reporter/platform/weibo.py
index 676dd72..07a0392 100644
--- a/src/plugins/hk_reporter/platform/weibo.py
+++ b/src/plugins/hk_reporter/platform/weibo.py
@@ -1,6 +1,7 @@
 from collections import defaultdict
 from datetime import datetime
 import json
+import re
 import time
 from typing import Any, Optional
 
@@ -64,10 +65,19 @@ class Weibo(Platform):
             return Category(2)
         else:
             return Category(3)
+
+    def _get_text(self, raw_text: str) -> str:
+        text = raw_text.replace('<br />', '\n')
+        return bs(text).text
  
     async def parse(self, raw_post: RawPost) -> Post:
         info = raw_post['mblog']
-        parsed_text = bs(info['text'], 'html.parser').text
+        if info['isLongText'] or info['pic_num'] > 9:
+            async with httpx.AsyncClient() as client:
+                res = await client.get('https://m.weibo.cn/detail/{}'.format(info['mid']))
+            full_json_text = re.search(r'"status": ([\s\S]+),\s+"hotScheme"', res.text).group(1)
+            info = json.loads(full_json_text)
+        parsed_text = self._get_text(info['text'])
         pic_urls = [img['large']['url'] for img in info.get('pics', [])]
         detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid'])
         # return parsed_text, detail_url, pic_urls
diff --git a/src/plugins/hk_reporter/post.py b/src/plugins/hk_reporter/post.py
index 63cc46c..ed74300 100644
--- a/src/plugins/hk_reporter/post.py
+++ b/src/plugins/hk_reporter/post.py
@@ -25,4 +25,4 @@ class Post:
         return res
 
     def __str__(self):
-        return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text[:50], self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics)))
+        return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text, self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics)))