From 59dc3290e0456b818c93d3e517bfe37075c00fc9 Mon Sep 17 00:00:00 2001 From: felinae98 <731499577@qq.com> Date: Wed, 17 Feb 2021 17:21:47 +0800 Subject: [PATCH 1/3] to 0.2.2 --- pyproject.toml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pyproject.toml b/pyproject.toml index 462ce0d..24b08db 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "nonebot-hk-reporter" -version = "0.2.1" +version = "0.2.2" description = "Subscribe message from social medias" authors = ["felinae98 "] license = "MIT" From e43c4edea411fd2c63d7460d727410a7e90dde31 Mon Sep 17 00:00:00 2001 From: felinae98 <731499577@qq.com> Date: Wed, 17 Feb 2021 22:55:57 +0800 Subject: [PATCH 2/3] optimise weibo post --- src/plugins/hk_reporter/platform/weibo.py | 12 +++++++++++- src/plugins/hk_reporter/post.py | 2 +- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/src/plugins/hk_reporter/platform/weibo.py b/src/plugins/hk_reporter/platform/weibo.py index 676dd72..07a0392 100644 --- a/src/plugins/hk_reporter/platform/weibo.py +++ b/src/plugins/hk_reporter/platform/weibo.py @@ -1,6 +1,7 @@ from collections import defaultdict from datetime import datetime import json +import re import time from typing import Any, Optional @@ -64,10 +65,19 @@ class Weibo(Platform): return Category(2) else: return Category(3) + + def _get_text(self, raw_text: str) -> str: + text = raw_text.replace('
', '\n') + return bs(text).text async def parse(self, raw_post: RawPost) -> Post: info = raw_post['mblog'] - parsed_text = bs(info['text'], 'html.parser').text + if info['isLongText'] or info['pic_num'] > 9: + async with httpx.AsyncClient() as client: + res = await client.get('https://m.weibo.cn/detail/{}'.format(info['mid'])) + full_json_text = re.search(r'"status": ([\s\S]+),\s+"hotScheme"', res.text).group(1) + info = json.loads(full_json_text) + parsed_text = self._get_text(info['text']) pic_urls = [img['large']['url'] for img in info.get('pics', [])] detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid']) # return parsed_text, detail_url, pic_urls diff --git a/src/plugins/hk_reporter/post.py b/src/plugins/hk_reporter/post.py index 63cc46c..ed74300 100644 --- a/src/plugins/hk_reporter/post.py +++ b/src/plugins/hk_reporter/post.py @@ -25,4 +25,4 @@ class Post: return res def __str__(self): - return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text[:50], self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics))) + return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text, self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics))) From 30447f129948720b7de1e72130a5e8b0bb634344 Mon Sep 17 00:00:00 2001 From: felinae98 <731499577@qq.com> Date: Thu, 18 Feb 2021 01:05:04 +0800 Subject: [PATCH 3/3] update post --- src/plugins/hk_reporter/platform/bilibili.py | 3 +- src/plugins/hk_reporter/platform/rss.py | 2 +- src/plugins/hk_reporter/platform/weibo.py | 4 +-- src/plugins/hk_reporter/post.py | 35 ++++++++++++++------ 4 files changed, 29 insertions(+), 15 deletions(-) diff --git a/src/plugins/hk_reporter/platform/bilibili.py b/src/plugins/hk_reporter/platform/bilibili.py index ba5bdba..77fe41a 100644 --- a/src/plugins/hk_reporter/platform/bilibili.py +++ b/src/plugins/hk_reporter/platform/bilibili.py @@ -66,6 +66,7 @@ class Bilibili(Platform): async def parse(self, raw_post: RawPost) -> Post: card_content = json.loads(raw_post['card']) post_type = self.get_category(raw_post) + target_name = raw_post['desc']['user_profile']['info']['uname'] if post_type == 1: # 一般动态 text = card_content['item']['description'] @@ -88,5 +89,5 @@ class Bilibili(Platform): pic = [] else: raise CategoryNotSupport(post_type) - return Post('bilibili', text, url, pic) + return Post('bilibili', text=text, url=url, pics=pic, target_name=target_name) diff --git a/src/plugins/hk_reporter/platform/rss.py b/src/plugins/hk_reporter/platform/rss.py index f5c1ac5..caf76d2 100644 --- a/src/plugins/hk_reporter/platform/rss.py +++ b/src/plugins/hk_reporter/platform/rss.py @@ -38,4 +38,4 @@ class Rss(Platform): soup = bs(raw_post.description, 'html.parser') text = soup.text pics = list(map(lambda x: x.attrs['src'], soup('img'))) - return Post('rss', text, raw_post.link, pics) + return Post('rss', text=text, url=raw_post.link, pics=pics) diff --git a/src/plugins/hk_reporter/platform/weibo.py b/src/plugins/hk_reporter/platform/weibo.py index 07a0392..078c4e1 100644 --- a/src/plugins/hk_reporter/platform/weibo.py +++ b/src/plugins/hk_reporter/platform/weibo.py @@ -69,7 +69,7 @@ class Weibo(Platform): def _get_text(self, raw_text: str) -> str: text = raw_text.replace('
', '\n') return bs(text).text - + async def parse(self, raw_post: RawPost) -> Post: info = raw_post['mblog'] if info['isLongText'] or info['pic_num'] > 9: @@ -81,4 +81,4 @@ class Weibo(Platform): pic_urls = [img['large']['url'] for img in info.get('pics', [])] detail_url = 'https://weibo.com/{}/{}'.format(info['user']['id'], info['bid']) # return parsed_text, detail_url, pic_urls - return Post('weibo', parsed_text, detail_url, pic_urls) + return Post('weibo', text=parsed_text, url=detail_url, pics=pic_urls, target_name=info['user']['screen_name']) diff --git a/src/plugins/hk_reporter/post.py b/src/plugins/hk_reporter/post.py index ed74300..819c8fb 100644 --- a/src/plugins/hk_reporter/post.py +++ b/src/plugins/hk_reporter/post.py @@ -1,4 +1,5 @@ from dataclasses import dataclass, field +from typing import Optional from .plugin_config import plugin_config from .utils import parse_text @@ -8,21 +9,33 @@ class Post: target_type: str text: str url: str + target_name: Optional[str] = None + show_text: bool = True pics: list[str] = field(default_factory=list) async def generate_messages(self): - if plugin_config.hk_reporter_use_pic: - text_msg = '来源: {}\n{}'.format(self.target_type, self.text) - if self.target_type == 'rss': - res = [await parse_text(text_msg)] + msgs = [] + if self.show_text: + text = '来源: {}'.format(self.target_type) + if self.target_name: + text += '\n{}'.format(self.target_name) + if self.text: + text += '\n{}'.format(self.text) + if plugin_config.hk_reporter_use_pic: + msgs.append(await parse_text(text)) + if not self.target_type == 'rss': + msgs.append(self.url) else: - res = [await parse_text(text_msg), self.url] - else: - first_msg = '来源: {}\n{}\n详情:{}'.format(self.target_type, self.text, self.url) - res = [first_msg] + text += '详情: {}'.format(self.url) + msgs.append(text) for pic in self.pics: - res.append("[CQ:image,file={url}]".format(url=pic)) - return res + msgs.append("[CQ:image,file={url}]".format(url=pic)) + return msgs def __str__(self): - return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format(self.target_type, self.text, self.url, ','.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics))) + return 'type: {}\ntext: {}\nurl: {}\npic: {}'.format( + self.target_type, + self.text, + self.url, + ', '.join(map(lambda x: 'b64img' if x.startswith('base64') else x, self.pics)) + )