mirror of
https://github.com/suyiiyii/nonebot-bison.git
synced 2026-05-09 10:17:56 +08:00
✨ 实现 Post.content 相关扩展协议 (#553)
* ✨ `post` 新增 `get_content()` 方法 * 🚨 make linter happy * 💄 auto fix by pre-commit hooks * 🐛 fix: 调整函数使用 * 💄 auto fix by pre-commit hooks * ✨ 转用函数处理文本 * 💄 auto fix by pre-commit hooks * 🔨 使用`Dict`存储`content_handlers` * 💄 auto fix by pre-commit hooks * 🎨 fix * :arts: 简化函数使用 * 🐛 移除`Theme`的过时参数 * 🗑️ 复用 `self.plain_content` * 💄 auto fix by pre-commit hooks * ✨ 注册式装饰器写法 * 💄 auto fix by pre-commit hooks * 🐛 fix * 💄 auto fix by pre-commit hooks * :feat: 通用纯文本处理函数 * 💄 auto fix by pre-commit hooks * :downgrade: 复用`==`处理标题 * 🎨 简化(?)写法 * ✅ 测试修复 * ♻️ via ContentSupport extensions * 🐛 fix test * 💄 auto fix by pre-commit hooks * 🐛 for clean text * 🐛 fix * 💄 auto fix by pre-commit hooks * fix: for xml * 💄 auto fix by pre-commit hooks * chore: art * 💄 auto fix by pre-commit hooks --------- Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com>
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
import re
|
||||
import html
|
||||
from typing import Any
|
||||
from functools import partial
|
||||
|
||||
@@ -10,6 +12,7 @@ from nonebot.compat import type_validate_python
|
||||
from ..post import Post
|
||||
from ..utils import Site
|
||||
from ..types import Target, RawPost, Category
|
||||
from ..post.protocol import HTMLContentSupport
|
||||
from .platform import NewMessage, StatusChange
|
||||
|
||||
|
||||
@@ -58,6 +61,39 @@ class ArknightsSite(Site):
|
||||
schedule_setting = {"seconds": 30}
|
||||
|
||||
|
||||
class ArknightsPost(Post, HTMLContentSupport):
|
||||
def _cleantext(self, text: str, old_split="\n", new_split="\n") -> str:
|
||||
"""清理文本:去掉所有多余的空格和换行"""
|
||||
lines = text.strip().split(old_split)
|
||||
cleaned_lines = [line.strip() for line in lines if line != ""]
|
||||
return new_split.join(cleaned_lines)
|
||||
|
||||
async def get_html_content(self) -> str:
|
||||
return self.content
|
||||
|
||||
async def get_plain_content(self) -> str:
|
||||
content = html.unescape(self.content) # 转义HTML特殊字符
|
||||
content = re.sub(
|
||||
r'\<p style="text-align:center;"\>(.*?)\<strong\>(.*?)\<span style=(.*?)\>(.*?)\<\/span\>(.*?)\<\/strong\>(.*?)<\/p\>', # noqa: E501
|
||||
r"==\4==\n",
|
||||
content,
|
||||
flags=re.DOTALL,
|
||||
) # 去“标题型”p
|
||||
content = re.sub(
|
||||
r'\<p style="text-align:(left|right);"?\>(.*?)\<\/p\>',
|
||||
r"\2\n",
|
||||
content,
|
||||
flags=re.DOTALL,
|
||||
) # 去左右对齐的p
|
||||
content = re.sub(r"\<p\>(.*?)\</p\>", r"\1\n", content, flags=re.DOTALL) # 去普通p
|
||||
content = re.sub(r'\<a href="(.*?)" target="_blank">(.*?)\<\/a\>', r"\1", content, flags=re.DOTALL) # 去a
|
||||
content = re.sub(r"<br/>", "\n", content) # 去br
|
||||
content = re.sub(r"\<strong\>(.*?)\</strong\>", r"\1", content) # 去strong
|
||||
content = re.sub(r'<span style="color:(#.*?)">(.*?)</span>', r"\2", content) # 去color
|
||||
content = re.sub(r'<div class="media-wrap image-wrap">(.*?)</div>', "", content) # 去img
|
||||
return self._cleantext(content)
|
||||
|
||||
|
||||
class Arknights(NewMessage):
|
||||
categories = {1: "游戏公告"}
|
||||
platform_name = "arknights"
|
||||
@@ -108,7 +144,7 @@ class Arknights(NewMessage):
|
||||
# 只有一张图片
|
||||
title = title_escape(data.title)
|
||||
|
||||
return Post(
|
||||
return ArknightsPost(
|
||||
self,
|
||||
content=data.content,
|
||||
title=title,
|
||||
@@ -205,7 +241,7 @@ class MonsterSiren(NewMessage):
|
||||
text = f'{raw_post["title"]}\n{soup.text.strip()}'
|
||||
return Post(
|
||||
self,
|
||||
text,
|
||||
content=text,
|
||||
images=imgs,
|
||||
url=url,
|
||||
nickname="塞壬唱片新闻",
|
||||
@@ -246,7 +282,7 @@ class TerraHistoricusComic(NewMessage):
|
||||
url = f'https://terra-historicus.hypergryph.com/comic/{raw_post["comicCid"]}/episode/{raw_post["episodeCid"]}'
|
||||
return Post(
|
||||
self,
|
||||
raw_post["subtitle"],
|
||||
content=raw_post["subtitle"],
|
||||
title=f'{raw_post["title"]} - {raw_post["episodeShortTitle"]}',
|
||||
images=[raw_post["coverUrl"]],
|
||||
url=url,
|
||||
|
||||
@@ -465,7 +465,7 @@ class Bilibililive(StatusChange):
|
||||
target_name = f"{raw_post.uname} {raw_post.area_name}"
|
||||
return Post(
|
||||
self,
|
||||
"",
|
||||
content="",
|
||||
title=title,
|
||||
url=url,
|
||||
images=list(pic),
|
||||
@@ -550,7 +550,7 @@ class BilibiliBangumi(StatusChange):
|
||||
title = lastest_episode["share_copy"]
|
||||
return Post(
|
||||
self,
|
||||
content,
|
||||
content=content,
|
||||
title=title,
|
||||
url=url,
|
||||
images=list(pic),
|
||||
|
||||
@@ -44,4 +44,4 @@ class FF14(NewMessage):
|
||||
title = raw_post["Title"]
|
||||
text = raw_post["Summary"]
|
||||
url = raw_post["Author"]
|
||||
return Post(self, text, title=title, url=url, nickname="最终幻想XIV官方公告")
|
||||
return Post(self, content=text, title=title, url=url, nickname="最终幻想XIV官方公告")
|
||||
|
||||
@@ -69,7 +69,7 @@ class NcmArtist(NewMessage):
|
||||
target_name = raw_post["artist"]["name"]
|
||||
pics = [raw_post["picUrl"]]
|
||||
url = "https://music.163.com/#/album?id={}".format(raw_post["id"])
|
||||
return Post(self, text, url=url, images=pics, nickname=target_name)
|
||||
return Post(self, content=text, url=url, images=pics, nickname=target_name)
|
||||
|
||||
|
||||
class NcmRadio(NewMessage):
|
||||
@@ -130,4 +130,4 @@ class NcmRadio(NewMessage):
|
||||
target_name = raw_post["radio"]["name"]
|
||||
pics = [raw_post["coverUrl"]]
|
||||
url = "https://music.163.com/#/program/{}".format(raw_post["id"])
|
||||
return Post(self, text, url=url, images=pics, nickname=target_name)
|
||||
return Post(self, content=text, url=url, images=pics, nickname=target_name)
|
||||
|
||||
@@ -9,7 +9,7 @@ from bs4 import BeautifulSoup as bs
|
||||
from ..post import Post
|
||||
from .platform import NewMessage
|
||||
from ..types import Target, RawPost
|
||||
from ..utils import Site, text_similarity
|
||||
from ..utils import Site, text_fletten, text_similarity
|
||||
|
||||
|
||||
class RssSite(Site):
|
||||
@@ -18,6 +18,23 @@ class RssSite(Site):
|
||||
schedule_setting = {"seconds": 30}
|
||||
|
||||
|
||||
class RssPost(Post):
|
||||
|
||||
async def get_plain_content(self) -> str:
|
||||
soup = bs(self.content, "html.parser")
|
||||
|
||||
for img in soup.find_all("img"):
|
||||
img.replace_with("[图片]")
|
||||
|
||||
for br in soup.find_all("br"):
|
||||
br.replace_with("\n")
|
||||
|
||||
for p in soup.find_all("p"):
|
||||
p.insert_after("\n")
|
||||
|
||||
return text_fletten(soup.get_text())
|
||||
|
||||
|
||||
class Rss(NewMessage):
|
||||
categories = {}
|
||||
enable_tag = False
|
||||
@@ -72,9 +89,9 @@ class Rss(NewMessage):
|
||||
for media in raw_post["media_content"]:
|
||||
if media.get("medium") == "image" and media.get("url"):
|
||||
pics.append(media.get("url"))
|
||||
return Post(
|
||||
return RssPost(
|
||||
self,
|
||||
desc,
|
||||
content=desc,
|
||||
title=title,
|
||||
url=raw_post.link,
|
||||
images=pics,
|
||||
|
||||
@@ -183,7 +183,13 @@ class Weibo(NewMessage):
|
||||
res.raise_for_status()
|
||||
pics.append(res.content)
|
||||
detail_url = f"https://weibo.com/{info['user']['id']}/{info['bid']}"
|
||||
return Post(self, parsed_text, url=detail_url, images=pics, nickname=info["user"]["screen_name"])
|
||||
return Post(
|
||||
self,
|
||||
content=parsed_text,
|
||||
url=detail_url,
|
||||
images=pics,
|
||||
nickname=info["user"]["screen_name"],
|
||||
)
|
||||
|
||||
async def parse(self, raw_post: RawPost) -> Post:
|
||||
info = raw_post["mblog"]
|
||||
|
||||
@@ -10,6 +10,7 @@ from nonebot_plugin_saa import MessageSegmentFactory
|
||||
from ..theme import theme_manager
|
||||
from .abstract_post import AbstractPost
|
||||
from ..plugin_config import plugin_config
|
||||
from .protocol import PlainContentSupport
|
||||
from ..theme.types import ThemeRenderError, ThemeRenderUnsupportError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -17,7 +18,7 @@ if TYPE_CHECKING:
|
||||
|
||||
|
||||
@dataclass
|
||||
class Post(AbstractPost):
|
||||
class Post(AbstractPost, PlainContentSupport):
|
||||
"""最通用的Post,理论上包含所有常用的数据
|
||||
|
||||
对于更特殊的需要,可以考虑另外实现一个Post
|
||||
@@ -62,6 +63,12 @@ class Post(AbstractPost):
|
||||
themes_by_priority.append("basic")
|
||||
return themes_by_priority
|
||||
|
||||
async def get_content(self):
|
||||
return self.content
|
||||
|
||||
async def get_plain_content(self):
|
||||
return self.content
|
||||
|
||||
async def generate(self) -> list[MessageSegmentFactory]:
|
||||
"""生成消息"""
|
||||
themes = self.get_priority_themes()
|
||||
@@ -95,12 +102,13 @@ class Post(AbstractPost):
|
||||
来源: <Platform {self.platform.platform_name}>
|
||||
"""
|
||||
post_format += "附加信息:\n"
|
||||
for field in fields(self):
|
||||
if field.name in ("content", "platform", "repost"):
|
||||
for cls_field in fields(self):
|
||||
if cls_field.name in ("content", "platform", "repost"):
|
||||
continue
|
||||
value = getattr(self, field.name)
|
||||
if value is not None:
|
||||
post_format += f"- {field.name}: {aRepr.repr(value)}\n"
|
||||
else:
|
||||
value = getattr(self, cls_field.name)
|
||||
if value is not None:
|
||||
post_format += f"- {cls_field.name}: {aRepr.repr(value)}\n"
|
||||
|
||||
if self.repost:
|
||||
post_format += "\n转发:\n"
|
||||
|
||||
@@ -0,0 +1,16 @@
|
||||
from typing import Protocol, runtime_checkable
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class PlainContentSupport(Protocol):
|
||||
async def get_plain_content(self) -> str: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class HTMLContentSupport(Protocol):
|
||||
async def get_html_content(self) -> str: ...
|
||||
|
||||
|
||||
@runtime_checkable
|
||||
class MarkdownContentSupport(Protocol):
|
||||
async def get_markdown_content(self) -> str: ...
|
||||
@@ -10,7 +10,7 @@ from nonebot_bison.theme.utils import web_embed_image
|
||||
from nonebot_bison.theme import Theme, ThemeRenderError, ThemeRenderUnsupportError
|
||||
|
||||
if TYPE_CHECKING:
|
||||
from nonebot_bison.post import Post
|
||||
from nonebot_bison.platform.arknights import ArknightsPost
|
||||
|
||||
|
||||
@dataclass
|
||||
@@ -32,7 +32,7 @@ class ArknightsTheme(Theme):
|
||||
template_path: Path = Path(__file__).parent / "templates"
|
||||
template_name: str = "announce.html.jinja"
|
||||
|
||||
async def render(self, post: "Post"):
|
||||
async def render(self, post: "ArknightsPost"):
|
||||
from nonebot_plugin_htmlrender import template_to_pic
|
||||
|
||||
if not post.title:
|
||||
@@ -49,10 +49,9 @@ class ArknightsTheme(Theme):
|
||||
raise ThemeRenderUnsupportError(
|
||||
f"图片类型错误, 期望 str | Path | bytes | BytesIO | None, 实际为 {type(banner)}"
|
||||
)
|
||||
|
||||
ark_data = ArkData(
|
||||
announce_title=text_fletten(post.title),
|
||||
content=post.content,
|
||||
content=await post.get_content(),
|
||||
banner_image_url=banner,
|
||||
)
|
||||
|
||||
|
||||
@@ -24,12 +24,15 @@ class BasicTheme(Theme):
|
||||
|
||||
text += f"{post.title}\n\n" if post.title else ""
|
||||
|
||||
text += post.content if len(post.content) < 500 else f"{post.content[:500]}..."
|
||||
content = await post.get_plain_content()
|
||||
text += content if len(content) < 500 else f"{content[:500]}..."
|
||||
|
||||
if rp := post.repost:
|
||||
text += f"\n--------------\n转发自 {rp.nickname or ''}:\n"
|
||||
text += f"{rp.title}\n\n" if rp.title else ""
|
||||
text += rp.content if len(rp.content) < 500 else f"{rp.content[:500]}..."
|
||||
rp_content = await rp.get_plain_content()
|
||||
|
||||
text += rp_content if len(rp_content) < 500 else f"{rp_content[:500]}..."
|
||||
|
||||
text += "\n--------------\n"
|
||||
|
||||
|
||||
@@ -95,7 +95,7 @@ class CeobeCanteenTheme(Theme):
|
||||
if post.images:
|
||||
images = await self.merge_pics(post.images, http_client)
|
||||
|
||||
content = CeoboContent(text=post.content)
|
||||
content = CeoboContent(text=await post.get_content())
|
||||
|
||||
retweet: CeoboRetweet | None = None
|
||||
if post.repost:
|
||||
@@ -106,7 +106,9 @@ class CeobeCanteenTheme(Theme):
|
||||
images.extend(repost_images)
|
||||
|
||||
repost_nickname = f"@{post.repost.nickname}:" if post.repost.nickname else ""
|
||||
retweet = CeoboRetweet(image=repost_head_pic, content=post.repost.content, author=repost_nickname)
|
||||
retweet = CeoboRetweet(
|
||||
image=repost_head_pic, content=await post.repost.get_content(), author=repost_nickname
|
||||
)
|
||||
|
||||
return (
|
||||
CeobeCard(
|
||||
|
||||
@@ -5,6 +5,7 @@ from typing import TYPE_CHECKING, Literal
|
||||
from nonebot_plugin_saa import Text, Image, MessageSegmentFactory
|
||||
|
||||
from nonebot_bison.theme import Theme, ThemeRenderError
|
||||
from nonebot_bison.post.protocol import HTMLContentSupport
|
||||
from nonebot_bison.utils import pic_merge, is_pics_mergable
|
||||
|
||||
if TYPE_CHECKING:
|
||||
@@ -30,16 +31,28 @@ class Ht2iTheme(Theme):
|
||||
raise ThemeRenderError(f"渲染文本失败: {e}")
|
||||
|
||||
async def render(self, post: "Post"):
|
||||
|
||||
md_text = ""
|
||||
|
||||
md_text += f"## {post.title}\n\n" if post.title else ""
|
||||
|
||||
md_text += post.content if len(post.content) < 500 else f"{post.content[:500]}..."
|
||||
if isinstance(post, HTMLContentSupport):
|
||||
content = await post.get_html_content()
|
||||
else:
|
||||
content = await post.get_content()
|
||||
md_text += content if len(content) < 500 else f"{content[:500]}..."
|
||||
md_text += "\n\n"
|
||||
if rp := post.repost:
|
||||
md_text += f"> 转发自 {f'**{rp.nickname}**' if rp.nickname else ''}: \n"
|
||||
md_text += f"> {rp.title} \n" if rp.title else ""
|
||||
md_text += "> \n> " + rp.content if len(rp.content) < 500 else f"{rp.content[:500]}..." + " \n"
|
||||
if isinstance(rp, HTMLContentSupport):
|
||||
rp_content = await rp.get_html_content()
|
||||
else:
|
||||
rp_content = await rp.get_content()
|
||||
|
||||
md_text += (
|
||||
"> \n> " + rp_content if len(rp_content) < 500 else f"{rp_content[:500]}..." + " \n" # noqa: E501
|
||||
) # noqa: E501
|
||||
md_text += "\n\n"
|
||||
|
||||
md_text += f"###### 来源: {post.platform.name} {post.nickname or ''}\n"
|
||||
|
||||
@@ -38,7 +38,6 @@ class Theme(ABC, BaseModel):
|
||||
raise ThemeRenderUnsupportError(f"Theme [{self.name}] does not support render {post} by support check")
|
||||
|
||||
await self.prepare()
|
||||
|
||||
return await self.render(post)
|
||||
|
||||
def check_htmlrender_plugin_enable(self):
|
||||
|
||||
Reference in New Issue
Block a user