🐛 download weibo pics in parse

This commit is contained in:
felinae98
2022-11-22 22:01:06 +08:00
parent 02c03bf80d
commit e249a8d685
5 changed files with 211 additions and 180 deletions
+28 -25
View File
@@ -41,16 +41,15 @@ class Weibo(NewMessage):
async def get_target_name(
cls, client: AsyncClient, target: Target
) -> Optional[str]:
async with http_client() as client:
param = {"containerid": "100505" + target}
res = await client.get(
"https://m.weibo.cn/api/container/getIndex", params=param
)
res_dict = json.loads(res.text)
if res_dict.get("ok") == 1:
return res_dict["data"]["userInfo"]["screen_name"]
else:
return None
param = {"containerid": "100505" + target}
res = await client.get(
"https://m.weibo.cn/api/container/getIndex", params=param
)
res_dict = json.loads(res.text)
if res_dict.get("ok") == 1:
return res_dict["data"]["userInfo"]["screen_name"]
else:
return None
@classmethod
async def parse_target(cls, target_text: str) -> Target:
@@ -63,16 +62,15 @@ class Weibo(NewMessage):
raise cls.ParseTargetException()
async def get_sub_list(self, target: Target) -> list[RawPost]:
async with http_client() as client:
params = {"containerid": "107603" + target}
res = await client.get(
"https://m.weibo.cn/api/container/getIndex?", params=params, timeout=4.0
)
res_data = json.loads(res.text)
if not res_data["ok"]:
return []
custom_filter: Callable[[RawPost], bool] = lambda d: d["card_type"] == 9
return list(filter(custom_filter, res_data["data"]["cards"]))
params = {"containerid": "107603" + target}
res = await self.client.get(
"https://m.weibo.cn/api/container/getIndex?", params=params, timeout=4.0
)
res_data = json.loads(res.text)
if not res_data["ok"]:
return []
custom_filter: Callable[[RawPost], bool] = lambda d: d["card_type"] == 9
return list(filter(custom_filter, res_data["data"]["cards"]))
def get_id(self, post: RawPost) -> Any:
return post["mblog"]["id"]
@@ -149,10 +147,9 @@ class Weibo(NewMessage):
retweeted = True
pic_num = info["retweeted_status"]["pic_num"] if retweeted else info["pic_num"]
if info["isLongText"] or pic_num > 9:
async with http_client() as client:
res = await client.get(
"https://m.weibo.cn/detail/{}".format(info["mid"]), headers=header
)
res = await self.client.get(
"https://m.weibo.cn/detail/{}".format(info["mid"]), headers=header
)
try:
match = re.search(r'"status": ([\s\S]+),\s+"call"', res.text)
assert match
@@ -171,12 +168,18 @@ class Weibo(NewMessage):
else info.get("pics", [])
)
pic_urls = [img["large"]["url"] for img in raw_pics_list]
pics = []
for pic_url in pic_urls:
async with http_client(headers={"referer": "https://weibo.com"}) as client:
res = await client.get(pic_url)
res.raise_for_status()
pics.append(res.content)
detail_url = "https://weibo.com/{}/{}".format(info["user"]["id"], info["bid"])
# return parsed_text, detail_url, pic_urls
return Post(
"weibo",
text=parsed_text,
url=detail_url,
pics=pic_urls,
pics=pics,
target_name=info["user"]["screen_name"],
)
+14 -2
View File
@@ -6,7 +6,19 @@ from ..plugin_config import plugin_config
http_args = {
"proxies": plugin_config.bison_proxy or None,
"headers": {"user-agent": plugin_config.bison_ua},
}
http_headers = {"user-agent": plugin_config.bison_ua}
http_client = functools.partial(httpx.AsyncClient, **http_args)
@functools.wraps(httpx.AsyncClient)
def http_client(*args, **kwargs):
if headers := kwargs.get("headers"):
new_headers = http_headers.copy()
new_headers.update(headers)
kwargs["headers"] = new_headers
else:
kwargs["headers"] = http_headers
return httpx.AsyncClient(*args, **kwargs)
http_client = functools.partial(http_client, **http_args)