mirror of
https://github.com/suyiiyii/nonebot-bison.git
synced 2025-06-06 03:46:10 +08:00
50 lines
1.6 KiB
Python
50 lines
1.6 KiB
Python
import calendar
|
|
from typing import Any, Optional
|
|
|
|
from bs4 import BeautifulSoup as bs
|
|
import feedparser
|
|
import httpx
|
|
|
|
from ..post import Post
|
|
from ..types import RawPost, Target
|
|
from .platform import NewMessage, TargetMixin
|
|
|
|
class Rss(NewMessage, TargetMixin):
|
|
|
|
categories = {}
|
|
enable_tag = False
|
|
platform_name = 'rss'
|
|
name = "Rss"
|
|
enabled = True
|
|
is_common = True
|
|
schedule_type = 'interval'
|
|
schedule_kw = {'seconds': 30}
|
|
|
|
async def get_target_name(self, target: Target) -> Optional[str]:
|
|
async with httpx.AsyncClient() as client:
|
|
res = await client.get(target, timeout=10.0)
|
|
feed = feedparser.parse(res.text)
|
|
return feed['feed']['title']
|
|
|
|
def get_date(self, post: RawPost) -> int:
|
|
return calendar.timegm(post.published_parsed)
|
|
|
|
def get_id(self, post: RawPost) -> Any:
|
|
return post.id
|
|
|
|
async def get_sub_list(self, target: Target) -> list[RawPost]:
|
|
async with httpx.AsyncClient() as client:
|
|
res = await client.get(target, timeout=10.0)
|
|
feed = feedparser.parse(res)
|
|
entries = feed.entries
|
|
for entry in entries:
|
|
entry['_target_name'] = feed.feed.title
|
|
return feed.entries
|
|
|
|
async def parse(self, raw_post: RawPost) -> Post:
|
|
text = raw_post.get('title', '') + '\n' if raw_post.get('title') else ''
|
|
soup = bs(raw_post.description, 'html.parser')
|
|
text += soup.text.strip()
|
|
pics = list(map(lambda x: x.attrs['src'], soup('img')))
|
|
return Post('rss', text=text, url=raw_post.link, pics=pics, target_name=raw_post['_target_name'])
|