diff --git a/docs/代理使用.md b/docs/代理使用.md index cc9c729..528aeb2 100644 --- a/docs/代理使用.md +++ b/docs/代理使用.md @@ -1,5 +1,6 @@ ## 代理 IP 使用说明 -> 还是得跟大家再次强调下,不要对一些自媒体平台进行大规模爬虫或其他非法行为,要踩缝纫机的哦🤣 +> 还是得跟大家再次强调下,不要对一些自媒体平台进行大规模爬虫或其他非法行为,要踩缝纫机的哦🤣 +> 另外如果要是用代理功能,请安装Redis并设置一个密码, 从下面的流程图讲解了redis在这个缓存功能中起到的作用 ### 简易的流程图 ![代理 IP 使用流程图](../static/images/代理IP%20流程图.drawio.png) diff --git a/media_platform/bilibili/core.py b/media_platform/bilibili/core.py index 7c3e571..730b464 100644 --- a/media_platform/bilibili/core.py +++ b/media_platform/bilibili/core.py @@ -225,7 +225,7 @@ class BilibiliCrawler(AbstractCrawler): "password": ip_proxy_info.password, } httpx_proxy = { - f"{ip_proxy_info.protocol}{ip_proxy_info.ip}": f"{ip_proxy_info.protocol}{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" + f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" } return playwright_proxy, httpx_proxy diff --git a/media_platform/douyin/core.py b/media_platform/douyin/core.py index 2770bb9..4e1ee56 100644 --- a/media_platform/douyin/core.py +++ b/media_platform/douyin/core.py @@ -160,7 +160,7 @@ class DouYinCrawler(AbstractCrawler): "password": ip_proxy_info.password, } httpx_proxy = { - f"{ip_proxy_info.protocol}{ip_proxy_info.ip}": f"{ip_proxy_info.protocol}{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" + f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" } return playwright_proxy, httpx_proxy diff --git a/media_platform/kuaishou/core.py b/media_platform/kuaishou/core.py index d5cc928..28585c3 100644 --- a/media_platform/kuaishou/core.py +++ b/media_platform/kuaishou/core.py @@ -191,7 +191,7 @@ class KuaishouCrawler(AbstractCrawler): "password": ip_proxy_info.password, } httpx_proxy = { - f"{ip_proxy_info.protocol}{ip_proxy_info.ip}": f"{ip_proxy_info.protocol}{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" + f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" } return playwright_proxy, httpx_proxy diff --git a/media_platform/weibo/core.py b/media_platform/weibo/core.py index 0ebd89a..44d630f 100644 --- a/media_platform/weibo/core.py +++ b/media_platform/weibo/core.py @@ -243,7 +243,7 @@ class WeiboCrawler(AbstractCrawler): "password": ip_proxy_info.password, } httpx_proxy = { - f"{ip_proxy_info.protocol}{ip_proxy_info.ip}": f"{ip_proxy_info.protocol}{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" + f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" } return playwright_proxy, httpx_proxy diff --git a/media_platform/xhs/core.py b/media_platform/xhs/core.py index a734a62..56dd010 100644 --- a/media_platform/xhs/core.py +++ b/media_platform/xhs/core.py @@ -185,7 +185,7 @@ class XiaoHongShuCrawler(AbstractCrawler): "password": ip_proxy_info.password, } httpx_proxy = { - f"{ip_proxy_info.protocol}{ip_proxy_info.ip}":f"{ip_proxy_info.protocol}{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" + f"{ip_proxy_info.protocol}": f"http://{ip_proxy_info.user}:{ip_proxy_info.password}@{ip_proxy_info.ip}:{ip_proxy_info.port}" } return playwright_proxy, httpx_proxy diff --git a/proxy/proxy_ip_pool.py b/proxy/proxy_ip_pool.py index dd41e70..be5e79f 100644 --- a/proxy/proxy_ip_pool.py +++ b/proxy/proxy_ip_pool.py @@ -38,12 +38,10 @@ class ProxyIpPool: utils.logger.info(f"[ProxyIpPool.is_valid_proxy] testing {proxy.ip} is it valid ") try: - httpx_proxy = f"{proxy.protocol}{proxy.ip}:{proxy.port}" - proxy_auth = httpx.BasicAuth(proxy.user, proxy.password) - proxies = { - f"{proxy.protocol}{proxy.ip}": httpx_proxy + httpx_proxy = { + f"{proxy.protocol}": f"http://{proxy.user}:{proxy.password}@{proxy.ip}:{proxy.port}" } - async with httpx.AsyncClient(proxies=proxies, auth=proxy_auth) as client: + async with httpx.AsyncClient(proxies=httpx_proxy) as client: response = await client.get(self.valid_ip_url) if response.status_code == 200: return True diff --git a/requirements.txt b/requirements.txt index d16c91b..46712e0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -5,7 +5,7 @@ tenacity==8.2.2 tornado==6.3.2 PyExecJS==1.5.1 opencv-python==4.7.0.72 -tortoise-orm +tortoise-orm==0.20.0 aiomysql==0.2.0 aerich==0.7.2 numpy~=1.24.4