2021-02-27 14:03:46 +08:00

76 lines
2.3 KiB
Python

from datetime import datetime
import hashlib
import json
import re
from typing import Any, Optional
from bs4 import BeautifulSoup as bs
import httpx
from ..post import Post
from ..types import *
from .platform import Platform
class Wechat(Platform):
categories = {}
enable_tag = False
platform_name = 'wechat'
@classmethod
def _get_query_url(cls, target: Target):
return 'https://weixin.sogou.com/weixin?type=1&s_from=input&query={}&ie=utf8&_sug_=n&_sug_type_='.format(target)
@classmethod
async def _get_target_soup(cls, target: Target) -> Optional[bs]:
target_url = cls._get_query_url(target)
async with httpx.AsyncClient() as client:
res = await client.get(target_url)
soup = bs(res.text, 'html.parser')
blocks = soup.find(class_='news-list2').find_all('li',recursive=False)
for block in blocks:
if block.find(string=[target]):
return block
@classmethod
async def get_account_name(cls, target: Target) -> Optional[str]:
if not (block := await cls._get_target_soup(target)):
return None
return block.find('p', class_='tit').find('a').text
async def get_sub_list(self, target: Target) -> list[RawPost]:
block = await self._get_target_soup(target)
if (last_post_dt := block.find('dt', string='最近文章:')):
post = {
'title': last_post_dt.find_parent().find('a').text,
'target': target,
'page_url': self._get_query_url(target),
'name': block.find('p', class_='tit').find('a').text
}
return [post]
else:
return []
def get_id(self, post: RawPost) -> Any:
return post['title']
def get_date(self, post: RawPost):
return None
def get_tags(self, post: RawPost):
return None
def get_category(self, post: RawPost):
return None
async def parse(self, raw_post: RawPost) -> Post:
# TODO get content of post
return Post(target_type='wechat',
text='{}\n详细内容请自行查看公众号'.format(raw_post['title']),
target_name=raw_post['name'],
pics=[],
url=''
)