🐛 Rss 不再删除格式化字符

This commit is contained in:
2024-08-19 14:40:16 +08:00
committed by felinae98
parent 7d80b44d2a
commit cf38500be7
2 changed files with 21 additions and 7 deletions
+3 -3
View File
@@ -9,7 +9,7 @@ from bs4 import BeautifulSoup as bs
from ..post import Post
from .platform import NewMessage
from ..types import Target, RawPost
from ..utils import Site, text_fletten, text_similarity
from ..utils import Site, text_similarity
class RssSite(Site):
@@ -32,7 +32,7 @@ class RssPost(Post):
for p in soup.find_all("p"):
p.insert_after("\n")
return text_fletten(soup.get_text())
return soup.get_text()
class Rss(NewMessage):
@@ -82,7 +82,7 @@ class Rss(NewMessage):
async def parse(self, raw_post: RawPost) -> Post:
title = raw_post.get("title", "")
soup = bs(raw_post.description, "html.parser")
desc = soup.text.strip()
desc = raw_post.description
title, desc = self._text_process(title, desc)
pics = [x.attrs["src"] for x in soup("img")]
if raw_post.get("media_content"):