add weibo text category, support super topic

This commit is contained in:
felinae98
2021-07-06 13:30:27 +08:00
parent 9f08e3cf16
commit 6b81e2e7c1
3 changed files with 28 additions and 2 deletions
@@ -17,6 +17,7 @@ class Weibo(NewMessage, TargetMixin):
1: '转发',
2: '视频',
3: '图文',
4: '文字',
}
enable_tag = True
platform_name = 'weibo'
@@ -61,21 +62,30 @@ class Weibo(NewMessage, TargetMixin):
"Return Tag list of given RawPost"
text = raw_post['mblog']['text']
soup = bs(text, 'html.parser')
return list(map(
res = list(map(
lambda x: x[1:-1],
filter(
lambda s: s[0] == '#' and s[-1] == '#',
map(lambda x:x.text, soup.find_all('span', class_='surl-text'))
)
))
super_topic_img = soup.find('img', src=re.compile(r'timeline_card_small_super_default'))
if super_topic_img:
try:
res.append(super_topic_img.parent.parent.find('span', class_='surl-text').text + '超话')
except:
logger.info('super_topic extract error: {}'.format(text))
return res
def get_category(self, raw_post: RawPost) -> Category:
if raw_post['mblog'].get('retweeted_status'):
return Category(1)
elif raw_post['mblog'].get('page_info') and raw_post['mblog']['page_info'].get('type') == 'video':
return Category(2)
else:
elif raw_post['mblog'].get('pics'):
return Category(3)
else:
return Category(4)
def _get_text(self, raw_text: str) -> str:
text = raw_text.replace('<br />', '\n')