add weibo text category, support super topic

This commit is contained in:
felinae98 2021-07-06 13:30:27 +08:00
parent 9f08e3cf16
commit 6b81e2e7c1
No known key found for this signature in database
GPG Key ID: 00C8B010587FF610
3 changed files with 28 additions and 2 deletions

View File

@ -17,6 +17,7 @@ class Weibo(NewMessage, TargetMixin):
1: '转发',
2: '视频',
3: '图文',
4: '文字',
}
enable_tag = True
platform_name = 'weibo'
@ -61,21 +62,30 @@ class Weibo(NewMessage, TargetMixin):
"Return Tag list of given RawPost"
text = raw_post['mblog']['text']
soup = bs(text, 'html.parser')
return list(map(
res = list(map(
lambda x: x[1:-1],
filter(
lambda s: s[0] == '#' and s[-1] == '#',
map(lambda x:x.text, soup.find_all('span', class_='surl-text'))
)
))
super_topic_img = soup.find('img', src=re.compile(r'timeline_card_small_super_default'))
if super_topic_img:
try:
res.append(super_topic_img.parent.parent.find('span', class_='surl-text').text + '超话')
except:
logger.info('super_topic extract error: {}'.format(text))
return res
def get_category(self, raw_post: RawPost) -> Category:
if raw_post['mblog'].get('retweeted_status'):
return Category(1)
elif raw_post['mblog'].get('page_info') and raw_post['mblog']['page_info'].get('type') == 'video':
return Category(2)
else:
elif raw_post['mblog'].get('pics'):
return Category(3)
else:
return Category(4)
def _get_text(self, raw_text: str) -> str:
text = raw_text.replace('<br />', '\n')

View File

@ -62,9 +62,12 @@ async def test_classification(weibo):
tuwen = mock_data['data']['cards'][1]
retweet = mock_data['data']['cards'][3]
video = mock_data['data']['cards'][0]
mock_data_ys = get_json('weibo_ys_list_0.json')
text = mock_data_ys['data']['cards'][2]
assert(weibo.get_category(retweet) == 1)
assert(weibo.get_category(video) == 2)
assert(weibo.get_category(tuwen) == 3)
assert(weibo.get_category(text) == 4)
@pytest.mark.asyncio
@respx.mock
@ -93,3 +96,15 @@ async def test_rsshub_compare(weibo):
for entry in feedres.entries[:5]:
# print(entry)
assert(entry.link in url_set)
test_post = {
"mblog": {
"text": "<a href=\"https://m.weibo.cn/search?containerid=231522type%3D1%26t%3D10%26q%3D%23%E5%88%9A%E5%87%BA%E7%94%9F%E7%9A%84%E5%B0%8F%E7%BE%8A%E9%A9%BC%E9%95%BF%E5%95%A5%E6%A0%B7%23&extparam=%23%E5%88%9A%E5%87%BA%E7%94%9F%E7%9A%84%E5%B0%8F%E7%BE%8A%E9%A9%BC%E9%95%BF%E5%95%A5%E6%A0%B7%23&luicode=10000011&lfid=1076036003966749\" data-hide=\"\"><span class=\"surl-text\">#刚出生的小羊驼长啥样#</span></a> <br />小羊驼三三来也<span class=\"url-icon\"><img alt=[好喜欢] src=\"https://h5.sinaimg.cn/m/emoticon/icon/lxh/lxh_haoxihuan-51860b62e6.png\" style=\"width:1em; height:1em;\" /></span><br /><a href=\"https://m.weibo.cn/p/index?extparam=%E5%B0%8F%E7%BE%8A%E9%A9%BC%E4%B8%89%E4%B8%89&containerid=1008085ae16d2046db677de1b8491d2b708597&luicode=10000011&lfid=1076036003966749\" data-hide=\"\"><span class='url-icon'><img style='width: 1rem;height: 1rem' src='https://n.sinaimg.cn/photo/5213b46e/20180926/timeline_card_small_super_default.png'></span><span class=\"surl-text\">小羊驼三三</span></a> ",
"bid": "KnssqeqKK"
}
}
def test_chaohua_tag(weibo):
tags = weibo.get_tags(test_post)
assert('刚出生的小羊驼长啥样' in tags)
assert('小羊驼三三超话' in tags)

File diff suppressed because one or more lines are too long