0.2.0
This commit is contained in:
@@ -1,3 +1,5 @@
|
||||
# 0.2.0
|
||||
- 优化自动化代码 提高容错
|
||||
# 0.1.9
|
||||
- 新增禁跑时间窗口:每日 18:30~20:00 不运行脚本
|
||||
- 在循环入口添加休眠至 20:00 的逻辑,避免创建任务
|
||||
|
||||
@@ -1,4 +1,3 @@
|
||||
from tkinter import N
|
||||
import requests
|
||||
from loguru import logger
|
||||
import csv
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
import os
|
||||
import time
|
||||
import aiohttp
|
||||
import asyncio
|
||||
import requests
|
||||
from loguru import logger
|
||||
from functools import wraps
|
||||
@@ -311,10 +308,11 @@ def main2():
|
||||
bit = BitBrowser()
|
||||
browser_id = '5ba9eb974c7c45e2bb086585c75f70e8'
|
||||
# 关闭浏览器
|
||||
res = bit.bit_browser_close(browser_id)
|
||||
print(res)
|
||||
# res = bit.bit_browser_close(browser_id)
|
||||
# res = bit.bit_browser_get()
|
||||
# print(res)
|
||||
|
||||
# if __name__ == '__main__':
|
||||
# main2()
|
||||
# main2()
|
||||
|
||||
bit_browser = BitBrowser()
|
||||
502
spider/main.py
502
spider/main.py
@@ -1,9 +1,6 @@
|
||||
from math import log
|
||||
import random
|
||||
from re import S
|
||||
import time
|
||||
from datetime import datetime
|
||||
from tkinter import N
|
||||
from DrissionPage import Chromium
|
||||
from loguru import logger
|
||||
from work import get_random_canada_info
|
||||
@@ -11,6 +8,9 @@ from mail_ import mail_
|
||||
from bit_browser import bit_browser
|
||||
from api import api
|
||||
from proxys import proxy_list
|
||||
import asyncio
|
||||
import threading
|
||||
from concurrent.futures import ThreadPoolExecutor, as_completed
|
||||
|
||||
|
||||
|
||||
@@ -523,60 +523,148 @@ class Auto:
|
||||
jc += 1
|
||||
|
||||
|
||||
# 取对应城市的代理
|
||||
def get_proxy(city: str):
|
||||
if city == "Calgary":
|
||||
return "us.novproxy.io:1000:ozua8623-region-CA-st-Alberta-city-Calgary:6wdcv4gq".split(':')
|
||||
elif city == 'Edmonton':
|
||||
return 'us.novproxy.io:1000:ozua8623-region-CA-st-Alberta-city-Edmonton:6wdcv4gq'.split(':')
|
||||
elif city == 'Vancouver':
|
||||
return 'us.novproxy.io:1000:ozua8623-region-CA-st-British Columbia-city-Vancouver:6wdcv4gq'.split(':')
|
||||
elif city == 'Halifax':
|
||||
return 'us.novproxy.io:1000:ozua8623-region-CA-st-Nova Scotia-city-Halifax:6wdcv4gq'.split(':')
|
||||
elif city == 'Toronto':
|
||||
return 'us.novproxy.io:1000:ozua8623-region-CA-st-Ontario-city-Toronto:6wdcv4gq'.split(':')
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def get_random_proxy() -> list[str] | None:
|
||||
def parse_proxy(proxy: str) -> tuple[str, int, str, str] | None:
|
||||
"""
|
||||
随机选择一个代理配置(按指纹浏览器数量随机取 IP)
|
||||
解析代理字符串为四元组 `(host, port, user, pwd)`
|
||||
|
||||
参数:
|
||||
proxy: 形如 `host:port:user:pwd`
|
||||
|
||||
返回值:
|
||||
list[str] | None: 代理参数列表 `[host, port, user, pwd]`;无可用代理返回 None
|
||||
(host, port, user, pwd) 或 None(格式错误)
|
||||
"""
|
||||
proxy_list = [
|
||||
"us.novproxy.io:1000:zhiyu111-region-CA:zhiyu111",
|
||||
"us.novproxy.io:1000:zhiyu222-region-US:zhiyu222",
|
||||
"us.novproxy.io:1000:zhiyu333-region-CA:zhiyu333",
|
||||
"us.novproxy.io:1000:zhiyu444-region-US:zhiyu444",
|
||||
]
|
||||
try:
|
||||
return random.choice(proxy_list).split(':')
|
||||
host, port, user, pwd = proxy.split(":", 3)
|
||||
return host, int(port), user, pwd
|
||||
except Exception:
|
||||
logger.error(f"代理格式错误: {proxy}")
|
||||
return None
|
||||
|
||||
|
||||
def get_all_proxies() -> list[list[str]]:
|
||||
def create_fingerprint_browser(proxy: str) -> tuple[str, str] | None:
|
||||
"""
|
||||
返回固定代理列表(与提供的代理一一对应)
|
||||
创建指纹浏览器并打开窗口,返回 `(browser_id, debugger_http)`
|
||||
|
||||
参数:
|
||||
proxy: 代理字符串
|
||||
|
||||
返回值:
|
||||
list[list[str]]: 每个元素为 `[host, port, user, pwd]`
|
||||
(browser_id, http) 或 None(失败)
|
||||
"""
|
||||
proxy_list = [
|
||||
"us.novproxy.io:1000:zhiyu111-region-CA:zhiyu111",
|
||||
"us.novproxy.io:1000:zhiyu222-region-US:zhiyu222",
|
||||
"us.novproxy.io:1000:zhiyu333-region-CA:zhiyu333",
|
||||
"us.novproxy.io:1000:zhiyu444-region-US:zhiyu444",
|
||||
]
|
||||
return [p.split(":") for p in proxy_list]
|
||||
info = parse_proxy(proxy)
|
||||
if info is None:
|
||||
return None
|
||||
host, port, user, pwd = info
|
||||
try:
|
||||
browser_id = bit_browser.bit_browser_create(
|
||||
remark=f"{user}",
|
||||
proxy_type="socks5",
|
||||
host=host,
|
||||
port=str(port),
|
||||
proxy_user=user,
|
||||
proxy_pwd=pwd,
|
||||
)
|
||||
if not browser_id:
|
||||
return None
|
||||
logger.info(f"创建指纹浏览器成功: {browser_id}")
|
||||
time.sleep(0.1)
|
||||
http = bit_browser.bit_browser_open(browser_id)
|
||||
if not http:
|
||||
return None
|
||||
logger.info(f"打开指纹浏览器成功: {browser_id}")
|
||||
return browser_id, http
|
||||
except Exception as e:
|
||||
logger.error(f"创建指纹浏览器失败: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def is_quiet_time() -> bool:
|
||||
def close_and_delete_browser(browser_id: str) -> None:
|
||||
"""
|
||||
判断当前是否处于禁跑时段(18:30~20:00)
|
||||
关闭并删除指定指纹浏览器
|
||||
|
||||
参数:
|
||||
browser_id: 指纹浏览器ID
|
||||
"""
|
||||
try:
|
||||
bit_browser.bit_browser_close(browser_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"关闭浏览器失败或已关闭: {browser_id} - {e}")
|
||||
time.sleep(0.1)
|
||||
try:
|
||||
bit_browser.bit_browser_delete(browser_id)
|
||||
except Exception as e:
|
||||
logger.warning(f"删除浏览器失败或已删除: {browser_id} - {e}")
|
||||
|
||||
|
||||
def run_task_with_proxy(proxy: str, stop_event: threading.Event) -> None:
|
||||
"""
|
||||
使用代理创建指纹浏览器、执行自动化,并在结束后清理
|
||||
|
||||
参数:
|
||||
proxy: 代理字符串
|
||||
"""
|
||||
browser_id: str | None = None
|
||||
try:
|
||||
created = create_fingerprint_browser(proxy)
|
||||
if not created:
|
||||
return
|
||||
browser_id, http = created
|
||||
if stop_event.is_set():
|
||||
return
|
||||
auto = Auto(http=http)
|
||||
auto.open_url('https://veritaconnect.ca/canadianbreadsettlement/en-us')
|
||||
if stop_event.is_set():
|
||||
return
|
||||
if not auto.wait_home():
|
||||
return
|
||||
if stop_event.is_set():
|
||||
return
|
||||
if not auto.click_continue():
|
||||
return
|
||||
if stop_event.is_set():
|
||||
return
|
||||
auto.fill_questionnaire()
|
||||
except Exception as e:
|
||||
logger.error(f"执行任务异常: {e}")
|
||||
finally:
|
||||
if browser_id:
|
||||
try:
|
||||
close_and_delete_browser(browser_id)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def proxy_loop(proxy: str, stop_event: threading.Event) -> None:
|
||||
"""
|
||||
为单个代理保持持续运行:任务结束后立即重建并再次执行
|
||||
|
||||
参数:
|
||||
proxy: 代理字符串
|
||||
stop_event: 停止事件,用于外部触发退出循环
|
||||
"""
|
||||
while not stop_event.is_set():
|
||||
try:
|
||||
if is_forbidden_time():
|
||||
cleanup_all_browsers()
|
||||
secs = seconds_until(20, 0)
|
||||
if stop_event.wait(timeout=secs):
|
||||
break
|
||||
continue
|
||||
run_task_with_proxy(proxy, stop_event)
|
||||
except Exception as e:
|
||||
logger.error(f"代理循环异常: {proxy} - {e}")
|
||||
if stop_event.is_set():
|
||||
break
|
||||
if stop_event.wait(timeout=0.1):
|
||||
break
|
||||
|
||||
|
||||
def is_forbidden_time() -> bool:
|
||||
"""
|
||||
判断当前是否处于禁跑时段(每日 18:30 ~ 20:00,本地时间)
|
||||
|
||||
返回值:
|
||||
bool: True 表示处于禁跑时段
|
||||
"""
|
||||
now = datetime.now()
|
||||
start = now.replace(hour=18, minute=30, second=0, microsecond=0)
|
||||
@@ -584,260 +672,132 @@ def is_quiet_time() -> bool:
|
||||
return start <= now < end
|
||||
|
||||
|
||||
def sleep_until_quiet_end():
|
||||
def seconds_until(hour: int, minute: int) -> float:
|
||||
"""
|
||||
在禁跑时段内休眠至 20:00
|
||||
计算到今天指定时间点的剩余秒数
|
||||
|
||||
参数:
|
||||
hour: 目标小时(24小时制)
|
||||
minute: 目标分钟
|
||||
|
||||
返回值:
|
||||
float: 剩余秒数,若目标时间已过则为 0
|
||||
"""
|
||||
now = datetime.now()
|
||||
end = now.replace(hour=20, minute=0, second=0, microsecond=0)
|
||||
if now < end:
|
||||
seconds = (end - now).total_seconds()
|
||||
logger.info(f"当前处于禁跑时段,休眠至 20:00({int(seconds)} 秒)")
|
||||
time.sleep(seconds)
|
||||
target = now.replace(hour=hour, minute=minute, second=0, microsecond=0)
|
||||
if target <= now:
|
||||
return 0.0
|
||||
return (target - now).total_seconds()
|
||||
|
||||
|
||||
"""指纹浏览器操作"""
|
||||
# 创建指纹浏览器
|
||||
|
||||
|
||||
def create_fingerprint_browser(city: str | None = None):
|
||||
def count_fingerprint_browsers() -> int:
|
||||
"""
|
||||
创建指纹浏览器并执行一次流程(支持随机 IP 与指定城市)
|
||||
统计当前指纹浏览器数量
|
||||
|
||||
参数:
|
||||
city (str | None): 指定城市使用其对应代理;None 则使用随机代理并随机选择城市
|
||||
返回值:
|
||||
int: 当前总数量
|
||||
"""
|
||||
browser_id = None
|
||||
try:
|
||||
if is_quiet_time():
|
||||
logger.info("处于禁跑时段(18:30~20:00),跳过本次运行")
|
||||
return
|
||||
if city is not None:
|
||||
proxy = get_proxy(city)
|
||||
if proxy is None:
|
||||
logger.error(f"{city} 未配置对应代理,结束该线程")
|
||||
return
|
||||
remark = city
|
||||
else:
|
||||
proxy = get_random_proxy()
|
||||
if proxy is None:
|
||||
logger.error("未获取到随机代理,结束该线程")
|
||||
return
|
||||
remark = "random-ip"
|
||||
logger.info("准备创建指纹浏览器")
|
||||
browser_id = bit_browser.bit_browser_create(
|
||||
remark=remark,
|
||||
host=proxy[0],
|
||||
port=proxy[1],
|
||||
proxy_user=proxy[2],
|
||||
proxy_pwd=proxy[3],
|
||||
proxy_type='socks5'
|
||||
)
|
||||
logger.debug(browser_id)
|
||||
# 打开指纹浏览器
|
||||
http = bit_browser.bit_browser_open(browser_id)
|
||||
logger.debug(http)
|
||||
auto = Auto(http)
|
||||
auto.open_url(
|
||||
"https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm")
|
||||
bol = auto.wait_home()
|
||||
if not bol:
|
||||
logger.error("进入首页失败,结束该线程")
|
||||
return
|
||||
|
||||
bol = auto.click_continue()
|
||||
if not bol:
|
||||
logger.error("点击 Continue 失败,结束该线程")
|
||||
return
|
||||
auto.fill_questionnaire()
|
||||
# fill_city = city if city is not None else random.choice(['Calgary', 'Edmonton', 'Vancouver', 'Halifax', 'Toronto'])
|
||||
# auto.fill_questionnaire(fill_city)
|
||||
# time.sleep(5)
|
||||
finally:
|
||||
if browser_id:
|
||||
# 关闭指纹浏览器
|
||||
try:
|
||||
bit_browser.bit_browser_close(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"关闭浏览器异常: {e}")
|
||||
# 删除指纹浏览器
|
||||
try:
|
||||
bit_browser.bit_browser_delete(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"删除浏览器异常: {e}")
|
||||
res = bit_browser.bit_browser_get(0, 100)
|
||||
data = res.get("data", {}) if isinstance(res, dict) else {}
|
||||
total = data.get("totalNum")
|
||||
lst = data.get("list", [])
|
||||
if isinstance(total, int) and total >= 0:
|
||||
return total
|
||||
return len(lst)
|
||||
except Exception as e:
|
||||
logger.warning(f"统计指纹浏览器数量失败: {e}")
|
||||
return 0
|
||||
|
||||
|
||||
def run_city_forever(city: str):
|
||||
def cleanup_all_browsers() -> None:
|
||||
"""
|
||||
持续循环运行指定城市流程:完成一次即关闭并删除浏览器,然后重新创建继续运行
|
||||
|
||||
参数:
|
||||
city (str): 城市名称
|
||||
关闭并删除所有指纹浏览器
|
||||
"""
|
||||
while True:
|
||||
if is_quiet_time():
|
||||
sleep_until_quiet_end()
|
||||
continue
|
||||
try:
|
||||
create_fingerprint_browser(city)
|
||||
except Exception as e:
|
||||
logger.error(f"{city} 流程异常: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def run_all_cities_concurrently(num: int):
|
||||
"""
|
||||
多线程并发运行城市流程(支持随机选择)
|
||||
|
||||
参数:
|
||||
num (int | None): 随机选择并启动的城市数量;None 表示全部
|
||||
"""
|
||||
import threading
|
||||
threads = []
|
||||
for i in range(num):
|
||||
t = threading.Thread(target=run_random_ips_forever,
|
||||
name=f"random-ip-thread-{i}")
|
||||
t.start()
|
||||
threads.append(t)
|
||||
logger.info(f"随机 IP 线程 {i} 已启动")
|
||||
for t in threads:
|
||||
t.join()
|
||||
logger.info("所有随机 IP 流程执行完成")
|
||||
|
||||
|
||||
def run_random_ips_forever():
|
||||
"""
|
||||
持续使用随机 IP 执行流程:每次完成后关闭并删除浏览器再重建
|
||||
"""
|
||||
while True:
|
||||
if is_quiet_time():
|
||||
sleep_until_quiet_end()
|
||||
continue
|
||||
try:
|
||||
create_fingerprint_browser(None)
|
||||
except Exception as e:
|
||||
logger.error(f"随机 IP 流程异常: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def run_random_ips_concurrently(num: int):
|
||||
"""
|
||||
根据指纹浏览器数量并发运行流程(随机取 IP)
|
||||
|
||||
参数:
|
||||
num (int): 并发指纹浏览器数量(每个使用随机代理)
|
||||
"""
|
||||
import threading
|
||||
if num <= 0:
|
||||
logger.warning("num 不合法(<=0),不启动任何线程")
|
||||
return
|
||||
threads = []
|
||||
for i in range(num):
|
||||
t = threading.Thread(target=run_random_ips_forever,
|
||||
name=f"random-ip-thread-{i}")
|
||||
t.start()
|
||||
threads.append(t)
|
||||
logger.info(f"随机 IP 线程 {i} 已启动")
|
||||
for t in threads:
|
||||
t.join()
|
||||
logger.info("随机 IP 并发流程执行完成")
|
||||
|
||||
|
||||
def create_fingerprint_browser_with_proxy(proxy: list[str]):
|
||||
"""
|
||||
使用指定代理创建指纹浏览器并执行一次流程(一一对应)
|
||||
|
||||
参数:
|
||||
proxy (list[str]): `[host, port, user, pwd]`
|
||||
"""
|
||||
browser_id = None
|
||||
try:
|
||||
if is_quiet_time():
|
||||
logger.info("处于禁跑时段(18:30~20:00),跳过本次运行")
|
||||
return
|
||||
if not proxy or len(proxy) < 4:
|
||||
logger.error("代理参数不完整,结束该线程")
|
||||
return
|
||||
# 随机等待0.1秒
|
||||
time.sleep(random.uniform(0.1, 1.0))
|
||||
logger.info(f"使用代理 {proxy[2]} 创建浏览器")
|
||||
browser_id = bit_browser.bit_browser_create(
|
||||
remark=f"{proxy[2]}",
|
||||
host=proxy[0],
|
||||
port=proxy[1],
|
||||
proxy_user=proxy[2],
|
||||
proxy_pwd=proxy[3],
|
||||
proxy_type='socks5'
|
||||
)
|
||||
logger.debug(f"创建浏览器 {browser_id}")
|
||||
time.sleep(random.uniform(0.1, 1.0))
|
||||
http = bit_browser.bit_browser_open(browser_id)
|
||||
logger.debug(f"打开浏览器 {browser_id}")
|
||||
auto = Auto(http)
|
||||
auto.open_url(
|
||||
"https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm")
|
||||
bol = auto.wait_home()
|
||||
if not bol:
|
||||
logger.error("进入首页失败,结束该线程")
|
||||
return
|
||||
bol = auto.click_continue()
|
||||
if not bol:
|
||||
logger.error("点击 Continue 失败,结束该线程")
|
||||
return
|
||||
auto.fill_questionnaire()
|
||||
finally:
|
||||
if browser_id:
|
||||
try:
|
||||
bit_browser.bit_browser_close(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"关闭浏览器异常: {e}")
|
||||
try:
|
||||
bit_browser.bit_browser_delete(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"删除浏览器异常: {e}")
|
||||
res = bit_browser.bit_browser_get(0, 100)
|
||||
data = res.get("data", {}) if isinstance(res, dict) else {}
|
||||
lst = data.get("list", [])
|
||||
ids = [i.get("id") for i in lst if i.get("id")]
|
||||
for bid in ids:
|
||||
close_and_delete_browser(bid)
|
||||
except Exception as e:
|
||||
logger.warning(f"清理所有指纹浏览器失败: {e}")
|
||||
|
||||
|
||||
def run_proxies_forever(proxy: list[str]):
|
||||
def monitor_browsers_and_restart(limit: int, stop_event: threading.Event, restart_event: threading.Event) -> None:
|
||||
"""
|
||||
持续使用指定代理执行流程:完成后关闭并删除浏览器再重建
|
||||
每 30 秒检测指纹浏览器数量,超过 `limit` 则触发重启事件并清理所有浏览器
|
||||
|
||||
参数:
|
||||
proxy (list[str]): `[host, port, user, pwd]`
|
||||
limit: 允许的最大浏览器数量(通常为代理数量)
|
||||
restart_event: 触发重启的事件
|
||||
"""
|
||||
while not stop_event.is_set():
|
||||
time.sleep(30)
|
||||
count = count_fingerprint_browsers()
|
||||
if count > limit:
|
||||
logger.warning(f"指纹浏览器数量 {count} 超过限制 {limit},执行重启")
|
||||
restart_event.set()
|
||||
stop_event.set()
|
||||
cleanup_all_browsers()
|
||||
break
|
||||
|
||||
|
||||
def main():
|
||||
"""
|
||||
多线程并发管理:按代理数量并发创建指纹浏览器并执行任务;每 30 秒监控数量,超限则重启。
|
||||
"""
|
||||
proxies = list(proxy_list)
|
||||
while True:
|
||||
if is_quiet_time():
|
||||
sleep_until_quiet_end()
|
||||
stop_event = threading.Event()
|
||||
restart_event = threading.Event()
|
||||
|
||||
if is_forbidden_time():
|
||||
cleanup_all_browsers()
|
||||
secs = seconds_until(20, 0)
|
||||
logger.info(f"处于禁跑时段,休眠至20:00,剩余 {int(secs)} 秒")
|
||||
time.sleep(secs)
|
||||
continue
|
||||
try:
|
||||
create_fingerprint_browser_with_proxy(proxy)
|
||||
except Exception as e:
|
||||
logger.error(f"固定代理流程异常: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
with ThreadPoolExecutor(max_workers=len(proxies)) as executor:
|
||||
futures_map = {executor.submit(proxy_loop, p, stop_event): p for p in proxies}
|
||||
|
||||
monitor_thread = threading.Thread(
|
||||
target=monitor_browsers_and_restart,
|
||||
args=(len(proxies), stop_event, restart_event),
|
||||
daemon=True,
|
||||
)
|
||||
monitor_thread.start()
|
||||
|
||||
while True:
|
||||
if restart_event.is_set():
|
||||
stop_event.set()
|
||||
try:
|
||||
executor.shutdown(wait=False)
|
||||
except Exception:
|
||||
pass
|
||||
break
|
||||
# 进入禁跑时段时,立即停止并清理浏览器
|
||||
if is_forbidden_time():
|
||||
logger.info("进入禁跑时段,停止当前批次并清理指纹浏览器")
|
||||
stop_event.set()
|
||||
try:
|
||||
executor.shutdown(wait=False)
|
||||
except Exception:
|
||||
pass
|
||||
cleanup_all_browsers()
|
||||
break
|
||||
time.sleep(0.2)
|
||||
|
||||
try:
|
||||
monitor_thread.join(timeout=5)
|
||||
except Exception:
|
||||
pass
|
||||
continue
|
||||
|
||||
|
||||
def run_all_proxies_concurrently():
|
||||
"""
|
||||
按固定代理列表一一创建并发浏览器
|
||||
"""
|
||||
import threading
|
||||
# proxies = get_all_proxies()
|
||||
proxies = [p.split(":") for p in proxy_list]
|
||||
if not proxies:
|
||||
logger.warning("未找到可用代理,结束执行")
|
||||
return
|
||||
threads = []
|
||||
for i, proxy in enumerate(proxies):
|
||||
t = threading.Thread(target=run_proxies_forever,
|
||||
args=(proxy,), name=f"proxy-thread-{i}")
|
||||
t.start()
|
||||
threads.append(t)
|
||||
logger.info(f"固定代理线程 {i} 已启动: {proxy[0]}:{proxy[1]} @ {proxy[2]}")
|
||||
for t in threads:
|
||||
t.join()
|
||||
logger.info("固定代理并发流程执行完成")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# auto = Auto()
|
||||
# auto.get_random_food('a')
|
||||
run_all_proxies_concurrently()
|
||||
main()
|
||||
|
||||
888
spider/main2.py
Normal file
888
spider/main2.py
Normal file
@@ -0,0 +1,888 @@
|
||||
from math import log
|
||||
import random
|
||||
from re import S
|
||||
import time
|
||||
from datetime import datetime
|
||||
from tkinter import N
|
||||
from DrissionPage import Chromium
|
||||
from loguru import logger
|
||||
from work import get_random_canada_info
|
||||
from mail_ import mail_
|
||||
from bit_browser import bit_browser
|
||||
from api import api
|
||||
from proxys import proxy_list
|
||||
import threading
|
||||
|
||||
|
||||
def _install_thread_excepthook():
|
||||
"""
|
||||
安装线程异常钩子,捕获并记录线程中的未捕获异常
|
||||
|
||||
说明:
|
||||
在 Python 3.8+ 中,`threading.excepthook` 可用于统一处理线程异常,
|
||||
避免线程静默退出导致任务中断。
|
||||
"""
|
||||
try:
|
||||
if hasattr(threading, "excepthook"):
|
||||
def _hook(args):
|
||||
logger.error(f"线程异常: name={args.thread.name} type={type(args.exc_value).__name__} msg={args.exc_value}")
|
||||
threading.excepthook = _hook
|
||||
except Exception as e:
|
||||
logger.warning(f"安装线程异常钩子失败: {e}")
|
||||
|
||||
|
||||
_install_thread_excepthook()
|
||||
|
||||
|
||||
|
||||
class Auto:
|
||||
def __init__(self, http: str = None):
|
||||
self.browser = Chromium(http)
|
||||
self.tab = self.browser.latest_tab
|
||||
pass
|
||||
|
||||
# cf打码
|
||||
def solve_cloudflare(self, is_ok: bool = False):
|
||||
tab = self.browser.latest_tab
|
||||
for _ in range(5):
|
||||
tab.wait(1)
|
||||
res = tab.ele(
|
||||
't:h1@text()=Sorry, you have been blocked', timeout=1)
|
||||
if res:
|
||||
logger.error("Cloudflare验证失败")
|
||||
return False
|
||||
|
||||
try:
|
||||
shadow1 = tab.ele(
|
||||
'x://*[@name="cf-turnstile-response"]').parent().shadow_root
|
||||
iframe = shadow1.get_frame(1)
|
||||
if iframe:
|
||||
logger.debug("找到Cloudflare iframe")
|
||||
shadow2 = iframe.ele('x:/html/body').shadow_root
|
||||
if shadow2:
|
||||
logger.debug("找到Cloudflare iframe body shadow root")
|
||||
status = shadow2.ele(
|
||||
'x://span[text()="Verifying..."]', timeout=1.5)
|
||||
if status:
|
||||
tab.wait(3)
|
||||
status = shadow2.ele(
|
||||
'x://span[text()="Success!"]', timeout=1.5)
|
||||
if status:
|
||||
logger.debug("Cloudflare验证成功")
|
||||
return True
|
||||
checkbox = shadow2.ele(
|
||||
'x://input[@type="checkbox"]', timeout=1.5)
|
||||
if checkbox:
|
||||
checkbox.click()
|
||||
logger.debug("点击Cloudflare复选框")
|
||||
tab.wait(3)
|
||||
logger.debug("重新获取状态")
|
||||
# return False
|
||||
except Exception as e:
|
||||
# logger.error(f"处理Cloudflare异常: {e}")
|
||||
if is_ok:
|
||||
logger.debug(f"cloudflare处理通过: {e}")
|
||||
return True
|
||||
return self.solve_cloudflare(is_ok=True)
|
||||
tab.wait(1)
|
||||
return False
|
||||
|
||||
# 打开URL
|
||||
def open_url(self, url: str):
|
||||
self.tab.get(url)
|
||||
|
||||
# 等待进入首页
|
||||
def wait_home(self):
|
||||
logger.debug("等待进入首页")
|
||||
jc = 0
|
||||
while True:
|
||||
if jc > 3:
|
||||
logger.error("等待进入首页超过5次,未成功")
|
||||
return False
|
||||
self.tab.wait(1)
|
||||
# 判断cf是否通过
|
||||
bol = self.solve_cloudflare()
|
||||
if not bol:
|
||||
logger.debug("Cloudflare验证失败.")
|
||||
# 刷新网页
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
jc += 1
|
||||
continue
|
||||
else:
|
||||
logger.debug("Cloudflare验证成功.")
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele(
|
||||
't:h1@text()=Sorry, you have been blocked', timeout=1)
|
||||
if bol:
|
||||
logger.debug("ip被ban秒")
|
||||
return False
|
||||
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_TIMED_OUT', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_SOCKS_CONNECTION_FAILED', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
html = self.tab.url
|
||||
logger.debug(f"当前URL: {html}")
|
||||
if 'https://veritaconnect.ca/canadianbreadsettlement/en-us' == html:
|
||||
logger.debug("成功进入首页")
|
||||
return True
|
||||
jc += 1
|
||||
|
||||
# 点击continue按钮
|
||||
def click_continue(self, bl: bool = False):
|
||||
logger.debug("点击Continue按钮")
|
||||
jc = 0
|
||||
while True:
|
||||
if jc > 3:
|
||||
logger.error("点击Continue按钮超过5次,未成功")
|
||||
return False
|
||||
try:
|
||||
continue_button = self.tab.ele(
|
||||
't:button@text():Continue', timeout=1)
|
||||
if continue_button:
|
||||
jc += 1
|
||||
# 滚动到最底部
|
||||
self.tab.scroll.to_bottom()
|
||||
self.tab.wait(1)
|
||||
# 判断cf是否通过
|
||||
bol = self.solve_cloudflare()
|
||||
if not bol:
|
||||
logger.debug("Cloudflare验证失败..")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
continue
|
||||
else:
|
||||
logger.debug("Cloudflare验证成功..")
|
||||
self.tab.wait(3)
|
||||
continue_button.click()
|
||||
logger.debug("点击Continue按钮成功")
|
||||
self.tab.wait(1.5)
|
||||
# bol = self.tab.ele('@text():Loading', timeout=1)
|
||||
# if bol:
|
||||
# logger.debug("Loading...")
|
||||
# if bl:
|
||||
# logger.debug("多次异常界面, 结束继续点击")
|
||||
# return False
|
||||
# logger.debug("异常界面")
|
||||
# self.tab.wait(1)
|
||||
# return self.click_continue(bl=True)
|
||||
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_TIMED_OUT', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_SOCKS_CONNECTION_FAILED', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele(
|
||||
't:h1@text()=Sorry, you have been blocked', timeout=1)
|
||||
if bol:
|
||||
logger.debug("ip被ban秒")
|
||||
# 刷新网页
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
|
||||
bol = self.tab.ele(
|
||||
't:h2@text()=You are being rate limited', timeout=1)
|
||||
if bol:
|
||||
logger.debug("被限流, 退出")
|
||||
return False
|
||||
bol = self.tab.ele(
|
||||
't:li@text():There was a problem, please try again.', timeout=1)
|
||||
if bol:
|
||||
if bl:
|
||||
logger.debug("多次异常界面, 结束继续点击")
|
||||
return False
|
||||
logger.debug("异常界面")
|
||||
self.tab.wait(1)
|
||||
return self.click_continue(bl=True)
|
||||
html = self.tab.url
|
||||
logger.debug(f"当前URL: {html}")
|
||||
if 'https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm' in html:
|
||||
logger.debug("成功进入问卷界面")
|
||||
return True
|
||||
jc += 1
|
||||
except Exception as e:
|
||||
logger.error(f"点击Continue按钮异常: {e}")
|
||||
self.tab.wait(1)
|
||||
|
||||
# 随机取城市
|
||||
def get_random_city(self, province: str | None = None):
|
||||
cities = {
|
||||
"Alberta": ["Calgary", "Edmonton"],
|
||||
"British Columbia": ["Vancouver"],
|
||||
# "Manitoba": ["Winnipeg", "Rochester"],
|
||||
# "New Brunswick": ["Fredericton", "Moncton"],
|
||||
# "Newfoundland and Labrador": ["St. John's", "Halifax"],
|
||||
"Nova Scotia": ["Halifax"],
|
||||
"Ontario": ["Toronto"],
|
||||
# "Prince Edward Island": ["Charlottetown", "St. John's"],
|
||||
# "Quebec": ["Quebec City", "Montreal"],
|
||||
# "Saskatchewan": ["Saskatoon", "Regina"],
|
||||
}
|
||||
if province is None:
|
||||
province = random.choice(list(cities.keys()))
|
||||
return province, random.choice(cities.get(province, []))
|
||||
|
||||
def get_province_by_city(self) -> str | None:
|
||||
"""
|
||||
根据城市名称解析对应省份
|
||||
|
||||
参数:
|
||||
city (str): 城市名称,例如 `Calgary`、`Edmonton` 等
|
||||
|
||||
返回值:
|
||||
str | None: 对应的省份名称;未匹配返回 None
|
||||
"""
|
||||
mapping = {
|
||||
"Calgary": "Alberta",
|
||||
"Edmonton": "Alberta",
|
||||
"Vancouver": "British Columbia",
|
||||
"Halifax": "Nova Scotia",
|
||||
"Toronto": "Ontario",
|
||||
"Ottawa": "Ontario",
|
||||
"Mississauga": "Ontario",
|
||||
"Brampton": "Ontario",
|
||||
"Hamilton": "Ontario",
|
||||
"Kitchener": "Ontario",
|
||||
"London": "Ontario",
|
||||
"Markham": "Ontario",
|
||||
"Vaughan": "Ontario",
|
||||
"Windsor": "Ontario",
|
||||
"Oshawa": "Ontario",
|
||||
"Brantford": "Ontario",
|
||||
"Barrie": "Ontario",
|
||||
"Sudbury": "Ontario",
|
||||
"Kingston": "Ontario",
|
||||
"Guelph": "Ontario",
|
||||
"Cambridge": "Ontario",
|
||||
"Sarnia": "Ontario",
|
||||
"Peterborough": "Ontario",
|
||||
"Waterloo": "Ontario",
|
||||
"Belleville": "Ontario",
|
||||
"Brockville": "Ontario",
|
||||
"Burlington": "Ontario",
|
||||
"Cornwall": "Ontario",
|
||||
"Kawartha Lakes": "Ontario",
|
||||
"North Bay": "Ontario",
|
||||
"Orillia": "Ontario",
|
||||
"Pickering": "Ontario",
|
||||
"Sault Ste. Marie": "Ontario",
|
||||
"Stratford": "Ontario",
|
||||
"Durham": "Ontario",
|
||||
"Norfolk County": "Ontario",
|
||||
"Prince Edward County": "Ontario",
|
||||
"Quinte West": "Ontario",
|
||||
"St. Catharines": "Ontario",
|
||||
"Welland": "Ontario",
|
||||
"Thorold": "Ontario",
|
||||
"Niagara Falls": "Ontario",
|
||||
"Pelham": "Ontario",
|
||||
"Port Colborne": "Ontario",
|
||||
}
|
||||
# 随机返回一条 key 和 value
|
||||
return random.choice(list(mapping.items()))
|
||||
|
||||
# 随机实物
|
||||
|
||||
def get_random_food(self, city: str, shop: str) -> list[str]:
|
||||
"""
|
||||
随机选择 1~2 种食物类别,并为每个类别至少选择 1 个具体产品
|
||||
|
||||
参数:
|
||||
shop (str): 商店名称(当前未使用,占位参数)
|
||||
|
||||
返回值:
|
||||
list[str]: 随机选取的产品名称列表
|
||||
"""
|
||||
categories = [
|
||||
[
|
||||
'Wonder Bread White',
|
||||
'Villaggio White Bread',
|
||||
'No Name Sliced White Bread',
|
||||
"President's Choice White Sliced Bread",
|
||||
],
|
||||
[
|
||||
"Ben's Original Whole Wheat Bread",
|
||||
"POM Whole Wheat Bread",
|
||||
"Silver Hills Bakery Whole Wheat Sliced Bread",
|
||||
"Country Harvest Whole Wheat Bread",
|
||||
],
|
||||
[
|
||||
"Wonder Bread Hot Dog Buns",
|
||||
"Villaggio Hamburger Buns",
|
||||
"Dempster's Dinner Rolls",
|
||||
"No Frills Hot Dog Buns",
|
||||
],
|
||||
[
|
||||
"Stonemill Bakehouse Bagels",
|
||||
"Wonder Bagels",
|
||||
"Montreal Bagels (pre-packaged, e.g., St. Lawrence brand)",
|
||||
"President's Choice Bagels",
|
||||
],
|
||||
[
|
||||
"Silver Hills Multi-Grain Sliced Bread",
|
||||
"POM Multi-Grain Bread",
|
||||
"Country Harvest Multi-Grain Loaf",
|
||||
],
|
||||
[
|
||||
"President's Choice French Stick",
|
||||
"Dempster's Italian Style Bread",
|
||||
"Wonder Italian Bread",
|
||||
"Villaggio Country Style Loaf",
|
||||
],
|
||||
]
|
||||
|
||||
# 随机选择 1~2 个类别(不重复)
|
||||
category_count = random.randint(1, 2)
|
||||
chosen_categories = random.sample(categories, k=category_count)
|
||||
|
||||
# 每个类别至少选择 1 个产品,最多选择 3 个以避免过多
|
||||
selected_products: list[str] = []
|
||||
for cat in chosen_categories:
|
||||
max_pick = min(3, len(cat))
|
||||
pick_count = random.randint(1, max_pick)
|
||||
selected_products.extend(random.sample(cat, k=pick_count))
|
||||
logger.debug(f"随机选择的产品: {selected_products}")
|
||||
text = f'{shop}, {city} buy: '
|
||||
for p in selected_products:
|
||||
text += f'{p} * {random.randint(1, 3)}, '
|
||||
text = text[:-2]
|
||||
text = text + '.'
|
||||
logger.debug(f'随机选择的产品文本: {text}')
|
||||
return text
|
||||
|
||||
# 填写问卷
|
||||
def fill_questionnaire(self):
|
||||
"""
|
||||
完成问卷填写
|
||||
|
||||
参数:
|
||||
city (str): 线程启动时传入的城市名称,用于匹配省份并填写数据
|
||||
"""
|
||||
try:
|
||||
city, province = self.get_province_by_city()
|
||||
if province is None:
|
||||
logger.error(f"未找到城市对应省份")
|
||||
return
|
||||
j = 0
|
||||
while True:
|
||||
if j > 3:
|
||||
return False
|
||||
info = get_random_canada_info(province, city)
|
||||
if len(info.get('postcode')) > 5:
|
||||
break
|
||||
j += 1
|
||||
first_name = info["firstname"]
|
||||
last_name = info["lastname"]
|
||||
# 将生日格式从 '8/28/1995' 转为 'yyyy-mm-dd',日月不足两位补0
|
||||
birthday = info["birthday"]
|
||||
current_address = info["address_str"]
|
||||
# 保持使用线程传入的城市与解析出的省份
|
||||
postal_code = info["postcode"]
|
||||
email = mail_.email_create_random()
|
||||
phone = info["phone"]
|
||||
shop = api.get_random_shop()
|
||||
if shop is None:
|
||||
return None
|
||||
street = shop.get('street')
|
||||
if street is None:
|
||||
return None
|
||||
text = self.get_random_food(shop.get('city'), street)
|
||||
# 人数
|
||||
person_count = str(random.randint(3, 5))
|
||||
logger.debug("填写问卷")
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写first_name: {first_name}")
|
||||
self.tab.ele('t:input@id=FirstName').set.value(first_name)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写last_name: {last_name}")
|
||||
self.tab.ele('t:input@id=LastName').set.value(last_name)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写birthday: {birthday}")
|
||||
self.tab.ele('t:input@id=DateOfBirth').set.value(birthday)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写current_address: {current_address}")
|
||||
self.tab.ele('t:input@id=AddressLine1').set.value(current_address)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写city: {city}")
|
||||
self.tab.ele('t:input@id=City').set.value(city)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写province: {province}")
|
||||
self.tab.ele(
|
||||
't:select@id=CanProv').ele(f't:option@text()={province}').click()
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写postal_code: {postal_code}")
|
||||
self.tab.ele('t:input@id=CanPostal').set.value(postal_code)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写NumberOfAdults: {person_count}")
|
||||
self.tab.ele(
|
||||
't:select@id=NumberOfAdults').ele(f't:option@text()={person_count}').click()
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"选择地址没变")
|
||||
self.tab.eles('t:input@id=IsDifferentAddress')[1].click()
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写email: {email}")
|
||||
self.tab.ele('t:input@id=EmailAddress').set.value(email)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写ConfirmEmailAddress: {email}")
|
||||
self.tab.ele('t:input@id=ConfirmEmailAddress').set.value(email)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写phone: {phone}")
|
||||
self.tab.ele('t:input@id=PhoneNumber').set.value(phone)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"选择同意条款")
|
||||
self.tab.ele('t:input@id=IVerify').click()
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"选择没有申请过")
|
||||
self.tab.eles('t:input@id=IsCompensated')[1].click()
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写text: {text}")
|
||||
self.tab.ele('t:textarea@id=MetaAnswerA').set.value(text)
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"勾选同意我的名字")
|
||||
self.tab.ele('t:input@id=IDeclare').click()
|
||||
self.tab.wait(0.1)
|
||||
logger.debug(f"填写PrintName: {last_name+' '+first_name}")
|
||||
self.tab.ele(
|
||||
't:input@id=PrintName').set.value(last_name+' '+first_name)
|
||||
self.tab.wait(0.1)
|
||||
return self.submit_file(first_name, last_name, birthday, current_address, city, phone, postal_code, province, email, text)
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"填写问卷失败: {e}")
|
||||
|
||||
# 提交问卷
|
||||
def submit_file(self, first_name: str, last_name: str, birthday: str, current_address: str, city: str, phone: str, postal_code: str, province: str, email: str, text: str):
|
||||
jc = 0
|
||||
while True:
|
||||
if jc >= 3:
|
||||
logger.error("提交问卷失败")
|
||||
return False
|
||||
res = self.tab.ele(
|
||||
't:h2@text()=CLAIM SUBMISSION CONFIRMATION', timeout=3)
|
||||
if res:
|
||||
logger.info("提交问卷成功")
|
||||
res = self.tab.ele('@text():Your claim number: ')
|
||||
if res:
|
||||
logger.info(f"反馈地址: {res.text}")
|
||||
text =f"{text}----{res.text}"
|
||||
status = True
|
||||
|
||||
else:
|
||||
status=False
|
||||
|
||||
api.create_info(
|
||||
first_name=first_name,
|
||||
last_name=last_name,
|
||||
birthday=birthday,
|
||||
current_address=current_address,
|
||||
city=city,
|
||||
phone=phone,
|
||||
postal_code=postal_code,
|
||||
province=province,
|
||||
email=email,
|
||||
text=text,
|
||||
status=status
|
||||
)
|
||||
return True
|
||||
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_TIMED_OUT', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_SSL_PROTOCOL_ERROR', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele(
|
||||
't:div@text():ERR_SOCKS_CONNECTION_FAILED', timeout=1)
|
||||
if bol:
|
||||
logger.debug("刷新网页")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
bol = self.solve_cloudflare()
|
||||
if not bol:
|
||||
logger.debug("Cloudflare验证失败.")
|
||||
self.tab.wait(1)
|
||||
else:
|
||||
logger.debug("Cloudflare验证成功.")
|
||||
logger.debug(f"点击Submit按钮")
|
||||
self.tab.ele('t:button@text():Submit').click()
|
||||
self.tab.wait(3)
|
||||
jc += 1
|
||||
|
||||
|
||||
# 取对应城市的代理
|
||||
def get_proxy(city: str):
|
||||
if city == "Calgary":
|
||||
return "us.novproxy.io:1000:ozua8623-region-CA-st-Alberta-city-Calgary:6wdcv4gq".split(':')
|
||||
elif city == 'Edmonton':
|
||||
return 'us.novproxy.io:1000:ozua8623-region-CA-st-Alberta-city-Edmonton:6wdcv4gq'.split(':')
|
||||
elif city == 'Vancouver':
|
||||
return 'us.novproxy.io:1000:ozua8623-region-CA-st-British Columbia-city-Vancouver:6wdcv4gq'.split(':')
|
||||
elif city == 'Halifax':
|
||||
return 'us.novproxy.io:1000:ozua8623-region-CA-st-Nova Scotia-city-Halifax:6wdcv4gq'.split(':')
|
||||
elif city == 'Toronto':
|
||||
return 'us.novproxy.io:1000:ozua8623-region-CA-st-Ontario-city-Toronto:6wdcv4gq'.split(':')
|
||||
else:
|
||||
return None
|
||||
|
||||
|
||||
def get_random_proxy() -> list[str] | None:
|
||||
"""
|
||||
随机选择一个代理配置(按指纹浏览器数量随机取 IP)
|
||||
|
||||
返回值:
|
||||
list[str] | None: 代理参数列表 `[host, port, user, pwd]`;无可用代理返回 None
|
||||
"""
|
||||
proxy_list = [
|
||||
"us.novproxy.io:1000:zhiyu111-region-CA:zhiyu111",
|
||||
"us.novproxy.io:1000:zhiyu222-region-US:zhiyu222",
|
||||
"us.novproxy.io:1000:zhiyu333-region-CA:zhiyu333",
|
||||
"us.novproxy.io:1000:zhiyu444-region-US:zhiyu444",
|
||||
]
|
||||
try:
|
||||
return random.choice(proxy_list).split(':')
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
|
||||
def get_all_proxies() -> list[list[str]]:
|
||||
"""
|
||||
返回固定代理列表(与提供的代理一一对应)
|
||||
|
||||
返回值:
|
||||
list[list[str]]: 每个元素为 `[host, port, user, pwd]`
|
||||
"""
|
||||
proxy_list = [
|
||||
"us.novproxy.io:1000:zhiyu111-region-CA:zhiyu111",
|
||||
"us.novproxy.io:1000:zhiyu222-region-US:zhiyu222",
|
||||
"us.novproxy.io:1000:zhiyu333-region-CA:zhiyu333",
|
||||
"us.novproxy.io:1000:zhiyu444-region-US:zhiyu444",
|
||||
]
|
||||
return [p.split(":") for p in proxy_list]
|
||||
|
||||
|
||||
def is_quiet_time() -> bool:
|
||||
"""
|
||||
判断当前是否处于禁跑时段(18:30~20:00)
|
||||
"""
|
||||
now = datetime.now()
|
||||
start = now.replace(hour=18, minute=30, second=0, microsecond=0)
|
||||
end = now.replace(hour=20, minute=0, second=0, microsecond=0)
|
||||
return start <= now < end
|
||||
|
||||
|
||||
def sleep_until_quiet_end():
|
||||
"""
|
||||
在禁跑时段内休眠至 20:00
|
||||
"""
|
||||
now = datetime.now()
|
||||
end = now.replace(hour=20, minute=0, second=0, microsecond=0)
|
||||
if now < end:
|
||||
seconds = (end - now).total_seconds()
|
||||
logger.info(f"当前处于禁跑时段,休眠至 20:00({int(seconds)} 秒)")
|
||||
time.sleep(seconds)
|
||||
|
||||
|
||||
"""指纹浏览器操作"""
|
||||
# 创建指纹浏览器
|
||||
|
||||
|
||||
def create_fingerprint_browser(city: str | None = None):
|
||||
"""
|
||||
创建指纹浏览器并执行一次流程(支持随机 IP 与指定城市)
|
||||
|
||||
参数:
|
||||
city (str | None): 指定城市使用其对应代理;None 则使用随机代理并随机选择城市
|
||||
"""
|
||||
browser_id = None
|
||||
try:
|
||||
if is_quiet_time():
|
||||
logger.info("处于禁跑时段(18:30~20:00),跳过本次运行")
|
||||
return
|
||||
if city is not None:
|
||||
proxy = get_proxy(city)
|
||||
if proxy is None:
|
||||
logger.error(f"{city} 未配置对应代理,结束该线程")
|
||||
return
|
||||
remark = city
|
||||
else:
|
||||
proxy = get_random_proxy()
|
||||
if proxy is None:
|
||||
logger.error("未获取到随机代理,结束该线程")
|
||||
return
|
||||
remark = "random-ip"
|
||||
logger.info("准备创建指纹浏览器")
|
||||
browser_id = bit_browser.bit_browser_create(
|
||||
remark=remark,
|
||||
host=proxy[0],
|
||||
port=proxy[1],
|
||||
proxy_user=proxy[2],
|
||||
proxy_pwd=proxy[3],
|
||||
proxy_type='socks5'
|
||||
)
|
||||
logger.debug(browser_id)
|
||||
# 打开指纹浏览器
|
||||
http = bit_browser.bit_browser_open(browser_id)
|
||||
logger.debug(http)
|
||||
auto = Auto(http)
|
||||
auto.open_url(
|
||||
"https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm")
|
||||
bol = auto.wait_home()
|
||||
if not bol:
|
||||
logger.error("进入首页失败,结束该线程")
|
||||
return
|
||||
|
||||
bol = auto.click_continue()
|
||||
if not bol:
|
||||
logger.error("点击 Continue 失败,结束该线程")
|
||||
return
|
||||
auto.fill_questionnaire()
|
||||
# fill_city = city if city is not None else random.choice(['Calgary', 'Edmonton', 'Vancouver', 'Halifax', 'Toronto'])
|
||||
# auto.fill_questionnaire(fill_city)
|
||||
# time.sleep(5)
|
||||
finally:
|
||||
if browser_id:
|
||||
# 关闭指纹浏览器
|
||||
try:
|
||||
bit_browser.bit_browser_close(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"关闭浏览器异常: {e}")
|
||||
# 删除指纹浏览器
|
||||
try:
|
||||
bit_browser.bit_browser_delete(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"删除浏览器异常: {e}")
|
||||
|
||||
|
||||
def run_city_forever(city: str):
|
||||
"""
|
||||
持续循环运行指定城市流程:完成一次即关闭并删除浏览器,然后重新创建继续运行
|
||||
|
||||
参数:
|
||||
city (str): 城市名称
|
||||
"""
|
||||
while True:
|
||||
if is_quiet_time():
|
||||
sleep_until_quiet_end()
|
||||
continue
|
||||
try:
|
||||
create_fingerprint_browser(city)
|
||||
except Exception as e:
|
||||
logger.error(f"{city} 流程异常: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def run_all_cities_concurrently(num: int):
|
||||
"""
|
||||
多线程并发运行城市流程(支持随机选择),并在线程异常退出后自动重启
|
||||
|
||||
参数:
|
||||
num (int | None): 随机选择并启动的城市数量;None 表示全部
|
||||
"""
|
||||
import threading
|
||||
if num <= 0:
|
||||
logger.warning("num 不合法(<=0),不启动任何线程")
|
||||
return
|
||||
threads = []
|
||||
for i in range(num):
|
||||
t = threading.Thread(target=run_random_ips_forever,
|
||||
name=f"random-ip-thread-{i}")
|
||||
t.start()
|
||||
threads.append(t)
|
||||
logger.info(f"随机 IP 线程 {i} 已启动")
|
||||
# 看门狗:检测线程是否存活,异常退出则重启
|
||||
while True:
|
||||
for i, t in enumerate(threads):
|
||||
if not t.is_alive():
|
||||
logger.warning(f"随机 IP 线程 {i} 已退出,正在重启")
|
||||
nt = threading.Thread(target=run_random_ips_forever,
|
||||
name=f"random-ip-thread-{i}")
|
||||
nt.start()
|
||||
threads[i] = nt
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
def run_random_ips_forever():
|
||||
"""
|
||||
持续使用随机 IP 执行流程:每次完成后关闭并删除浏览器再重建
|
||||
"""
|
||||
while True:
|
||||
if is_quiet_time():
|
||||
sleep_until_quiet_end()
|
||||
continue
|
||||
try:
|
||||
create_fingerprint_browser(None)
|
||||
except Exception as e:
|
||||
logger.error(f"随机 IP 流程异常: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def run_random_ips_concurrently(num: int):
|
||||
"""
|
||||
根据指纹浏览器数量并发运行流程(随机取 IP),并在线程异常退出后自动重启
|
||||
|
||||
参数:
|
||||
num (int): 并发指纹浏览器数量(每个使用随机代理)
|
||||
"""
|
||||
import threading
|
||||
if num <= 0:
|
||||
logger.warning("num 不合法(<=0),不启动任何线程")
|
||||
return
|
||||
threads = []
|
||||
for i in range(num):
|
||||
t = threading.Thread(target=run_random_ips_forever,
|
||||
name=f"random-ip-thread-{i}")
|
||||
t.start()
|
||||
threads.append(t)
|
||||
logger.info(f"随机 IP 线程 {i} 已启动")
|
||||
# 看门狗:检测线程是否存活,异常退出则重启
|
||||
while True:
|
||||
for i, t in enumerate(threads):
|
||||
if not t.is_alive():
|
||||
logger.warning(f"随机 IP 线程 {i} 已退出,正在重启")
|
||||
nt = threading.Thread(target=run_random_ips_forever,
|
||||
name=f"random-ip-thread-{i}")
|
||||
nt.start()
|
||||
threads[i] = nt
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
def create_fingerprint_browser_with_proxy(proxy: list[str]):
|
||||
"""
|
||||
使用指定代理创建指纹浏览器并执行一次流程(一一对应)
|
||||
|
||||
参数:
|
||||
proxy (list[str]): `[host, port, user, pwd]`
|
||||
"""
|
||||
browser_id = None
|
||||
try:
|
||||
if is_quiet_time():
|
||||
logger.info("处于禁跑时段(18:30~20:00),跳过本次运行")
|
||||
return
|
||||
if not proxy or len(proxy) < 4:
|
||||
logger.error("代理参数不完整,结束该线程")
|
||||
return
|
||||
# 随机等待0.1秒
|
||||
time.sleep(random.uniform(0.1, 1.0))
|
||||
logger.info(f"使用代理 {proxy[2]} 创建浏览器")
|
||||
browser_id = bit_browser.bit_browser_create(
|
||||
remark=f"{proxy[2]}",
|
||||
host=proxy[0],
|
||||
port=proxy[1],
|
||||
proxy_user=proxy[2],
|
||||
proxy_pwd=proxy[3],
|
||||
proxy_type='socks5'
|
||||
)
|
||||
logger.debug(f"创建浏览器 {browser_id}")
|
||||
time.sleep(random.uniform(0.1, 1.0))
|
||||
http = bit_browser.bit_browser_open(browser_id)
|
||||
logger.debug(f"打开浏览器 {browser_id}")
|
||||
auto = Auto(http)
|
||||
auto.open_url(
|
||||
"https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm")
|
||||
bol = auto.wait_home()
|
||||
if not bol:
|
||||
logger.error("进入首页失败,结束该线程")
|
||||
return
|
||||
bol = auto.click_continue()
|
||||
if not bol:
|
||||
logger.error("点击 Continue 失败,结束该线程")
|
||||
return
|
||||
auto.fill_questionnaire()
|
||||
finally:
|
||||
if browser_id:
|
||||
try:
|
||||
bit_browser.bit_browser_close(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"关闭浏览器异常: {e}")
|
||||
try:
|
||||
bit_browser.bit_browser_delete(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"删除浏览器异常: {e}")
|
||||
|
||||
|
||||
def run_proxies_forever(proxy: list[str]):
|
||||
"""
|
||||
持续使用指定代理执行流程:完成后关闭并删除浏览器再重建
|
||||
|
||||
参数:
|
||||
proxy (list[str]): `[host, port, user, pwd]`
|
||||
"""
|
||||
while True:
|
||||
if is_quiet_time():
|
||||
sleep_until_quiet_end()
|
||||
continue
|
||||
try:
|
||||
create_fingerprint_browser_with_proxy(proxy)
|
||||
except Exception as e:
|
||||
logger.error(f"固定代理流程异常: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def run_all_proxies_concurrently():
|
||||
"""
|
||||
按固定代理列表一一创建并发浏览器,并在线程异常退出后自动重启
|
||||
"""
|
||||
import threading
|
||||
proxies = [p.split(":") for p in proxy_list]
|
||||
if not proxies:
|
||||
logger.warning("未找到可用代理,结束执行")
|
||||
return
|
||||
threads = []
|
||||
for i, proxy in enumerate(proxies):
|
||||
t = threading.Thread(target=run_proxies_forever,
|
||||
args=(proxy,), name=f"proxy-thread-{i}")
|
||||
t.start()
|
||||
threads.append(t)
|
||||
logger.info(f"固定代理线程 {i} 已启动: {proxy[0]}:{proxy[1]} @ {proxy[2]}")
|
||||
# 看门狗:检测线程是否存活,异常退出则重启
|
||||
while True:
|
||||
for i, t in enumerate(threads):
|
||||
if not t.is_alive():
|
||||
proxy = proxies[i]
|
||||
logger.warning(f"固定代理线程 {i} 已退出,正在重启: {proxy[0]}:{proxy[1]} @ {proxy[2]}")
|
||||
nt = threading.Thread(target=run_proxies_forever,
|
||||
args=(proxy,), name=f"proxy-thread-{i}")
|
||||
nt.start()
|
||||
threads[i] = nt
|
||||
time.sleep(3)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# auto = Auto()
|
||||
# auto.get_random_food('a')
|
||||
run_all_proxies_concurrently()
|
||||
@@ -93,4 +93,4 @@ hz = [
|
||||
]
|
||||
|
||||
|
||||
proxy_list = work
|
||||
proxy_list = xy
|
||||
Reference in New Issue
Block a user