2022-07-23 16:03:48 +08:00

67 lines
2.0 KiB
Python

import calendar
from typing import Any, Optional
import feedparser
from bs4 import BeautifulSoup as bs
from ..post import Post
from ..types import RawPost, Target
from ..utils import SchedulerConfig, http_client
from .platform import NewMessage
class RssSchedConf(SchedulerConfig, name="rss"):
schedule_type = "interval"
schedule_setting = {"seconds": 30}
class Rss(NewMessage):
categories = {}
enable_tag = False
platform_name = "rss"
name = "Rss"
enabled = True
is_common = True
scheduler_class = "rss"
has_target = True
async def get_target_name(self, target: Target) -> Optional[str]:
async with http_client() as client:
res = await client.get(target, timeout=10.0)
feed = feedparser.parse(res.text)
return feed["feed"]["title"]
def get_date(self, post: RawPost) -> int:
return calendar.timegm(post.published_parsed)
def get_id(self, post: RawPost) -> Any:
return post.id
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with http_client() as client:
res = await client.get(target, timeout=10.0)
feed = feedparser.parse(res)
entries = feed.entries
for entry in entries:
entry["_target_name"] = feed.feed.title
return feed.entries
async def parse(self, raw_post: RawPost) -> Post:
text = raw_post.get("title", "") + "\n" if raw_post.get("title") else ""
soup = bs(raw_post.description, "html.parser")
text += soup.text.strip()
pics = list(map(lambda x: x.attrs["src"], soup("img")))
if raw_post.get("media_content"):
for media in raw_post["media_content"]:
if media.get("medium") == "image" and media.get("url"):
pics.append(media.get("url"))
return Post(
"rss",
text=text,
url=raw_post.link,
pics=pics,
target_name=raw_post["_target_name"],
)