0.1.2
This commit is contained in:
191
spider/main.py
191
spider/main.py
@@ -20,8 +20,13 @@ class Auto:
|
||||
# cf打码
|
||||
def solve_cloudflare(self, is_ok: bool = False):
|
||||
tab = self.browser.latest_tab
|
||||
for _ in range(3):
|
||||
self.tab.wait(1)
|
||||
for _ in range(5):
|
||||
tab.wait(1)
|
||||
res = tab.ele('t:h1@text()=Sorry, you have been blocked',timeout=1)
|
||||
if res:
|
||||
logger.error("Cloudflare验证失败")
|
||||
return False
|
||||
|
||||
try:
|
||||
shadow1 = tab.ele(
|
||||
'x://*[@name="cf-turnstile-response"]').parent().shadow_root
|
||||
@@ -74,6 +79,10 @@ class Auto:
|
||||
bol = self.solve_cloudflare()
|
||||
if not bol:
|
||||
logger.debug("Cloudflare验证失败.")
|
||||
# 刷新网页
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
jc += 1
|
||||
continue
|
||||
else:
|
||||
logger.debug("Cloudflare验证成功.")
|
||||
@@ -115,6 +124,9 @@ class Auto:
|
||||
bol = self.solve_cloudflare()
|
||||
if not bol:
|
||||
logger.debug("Cloudflare验证失败..")
|
||||
self.tab.refresh()
|
||||
self.tab.wait(1.5)
|
||||
jc += 1
|
||||
continue
|
||||
else:
|
||||
logger.debug("Cloudflare验证成功..")
|
||||
@@ -122,15 +134,15 @@ class Auto:
|
||||
continue_button.click()
|
||||
logger.debug("点击Continue按钮成功")
|
||||
self.tab.wait(1.5)
|
||||
bol = self.tab.ele('@text():Loading', timeout=1)
|
||||
if bol:
|
||||
logger.debug("Loading...")
|
||||
if bl:
|
||||
logger.debug("多次异常界面, 结束继续点击")
|
||||
return False
|
||||
logger.debug("异常界面")
|
||||
self.tab.wait(1)
|
||||
return self.click_continue(bl=True)
|
||||
# bol = self.tab.ele('@text():Loading', timeout=1)
|
||||
# if bol:
|
||||
# logger.debug("Loading...")
|
||||
# if bl:
|
||||
# logger.debug("多次异常界面, 结束继续点击")
|
||||
# return False
|
||||
# logger.debug("异常界面")
|
||||
# self.tab.wait(1)
|
||||
# return self.click_continue(bl=True)
|
||||
bol = self.tab.ele(
|
||||
't:h2@text()=You are being rate limited', timeout=1)
|
||||
if bol:
|
||||
@@ -173,7 +185,7 @@ class Auto:
|
||||
province = random.choice(list(cities.keys()))
|
||||
return province, random.choice(cities.get(province, []))
|
||||
|
||||
def get_province_by_city(self, city: str) -> str | None:
|
||||
def get_province_by_city(self) -> str | None:
|
||||
"""
|
||||
根据城市名称解析对应省份
|
||||
|
||||
@@ -189,8 +201,49 @@ class Auto:
|
||||
"Vancouver": "British Columbia",
|
||||
"Halifax": "Nova Scotia",
|
||||
"Toronto": "Ontario",
|
||||
"Ottawa": "Ontario",
|
||||
"Mississauga": "Ontario",
|
||||
"Brampton": "Ontario",
|
||||
"Hamilton": "Ontario",
|
||||
"Kitchener": "Ontario",
|
||||
"London": "Ontario",
|
||||
"Markham": "Ontario",
|
||||
"Vaughan": "Ontario",
|
||||
"Windsor": "Ontario",
|
||||
"Oshawa": "Ontario",
|
||||
"Brantford": "Ontario",
|
||||
"Barrie": "Ontario",
|
||||
"Sudbury": "Ontario",
|
||||
"Kingston": "Ontario",
|
||||
"Guelph": "Ontario",
|
||||
"Cambridge": "Ontario",
|
||||
"Sarnia": "Ontario",
|
||||
"Peterborough": "Ontario",
|
||||
"Waterloo": "Ontario",
|
||||
"Belleville": "Ontario",
|
||||
"Brockville": "Ontario",
|
||||
"Burlington": "Ontario",
|
||||
"Cornwall": "Ontario",
|
||||
"Kawartha Lakes": "Ontario",
|
||||
"North Bay": "Ontario",
|
||||
"Orillia": "Ontario",
|
||||
"Pickering": "Ontario",
|
||||
"Sault Ste. Marie": "Ontario",
|
||||
"Stratford": "Ontario",
|
||||
"Durham": "Ontario",
|
||||
"Norfolk County": "Ontario",
|
||||
"Prince Edward County": "Ontario",
|
||||
"Quinte West": "Ontario",
|
||||
"St. Catharines": "Ontario",
|
||||
"Welland": "Ontario",
|
||||
"Thorold": "Ontario",
|
||||
"Niagara Falls": "Ontario",
|
||||
"Pelham": "Ontario",
|
||||
"Port Colborne": "Ontario",
|
||||
}
|
||||
return mapping.get(city)
|
||||
# 随机返回一条 key 和 value
|
||||
return random.choice(list(mapping.items()))
|
||||
|
||||
|
||||
# 随机实物
|
||||
def get_random_food(self, city: str, shop: str) -> list[str]:
|
||||
@@ -261,17 +314,17 @@ class Auto:
|
||||
return text
|
||||
|
||||
# 填写问卷
|
||||
def fill_questionnaire(self, city: str):
|
||||
def fill_questionnaire(self):
|
||||
"""
|
||||
根据传入的城市解析省份并完成问卷填写
|
||||
完成问卷填写
|
||||
|
||||
参数:
|
||||
city (str): 线程启动时传入的城市名称,用于匹配省份并填写数据
|
||||
"""
|
||||
try:
|
||||
province = self.get_province_by_city(city)
|
||||
city, province = self.get_province_by_city()
|
||||
if province is None:
|
||||
logger.error(f"未找到城市对应省份: {city}")
|
||||
logger.error(f"未找到城市对应省份")
|
||||
return
|
||||
j = 0
|
||||
while True:
|
||||
@@ -378,7 +431,7 @@ class Auto:
|
||||
email=email,
|
||||
text=text
|
||||
)
|
||||
self.tab.wait(2)
|
||||
self.tab.wait(3)
|
||||
except Exception as e:
|
||||
logger.error(f"填写问卷失败: {e}")
|
||||
|
||||
@@ -398,27 +451,50 @@ def get_proxy(city: str):
|
||||
else:
|
||||
return None
|
||||
|
||||
def get_random_proxy() -> list[str] | None:
|
||||
"""
|
||||
随机选择一个代理配置(按指纹浏览器数量随机取 IP)
|
||||
|
||||
返回值:
|
||||
list[str] | None: 代理参数列表 `[host, port, user, pwd]`;无可用代理返回 None
|
||||
"""
|
||||
proxy_list = [
|
||||
"us.novproxy.io:1000:ozua8623-region-CA:6wdcv4gq",
|
||||
"us.novproxy.io:1000:ozua8623-region-US:6wdcv4gq",
|
||||
]
|
||||
try:
|
||||
return random.choice(proxy_list).split(':')
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
"""指纹浏览器操作"""
|
||||
# 创建指纹浏览器
|
||||
|
||||
|
||||
def create_fingerprint_browser(city: str):
|
||||
def create_fingerprint_browser(city: str | None = None):
|
||||
"""
|
||||
根据城市创建指纹浏览器并执行问卷流程
|
||||
创建指纹浏览器并执行一次流程(支持随机 IP 与指定城市)
|
||||
|
||||
参数:
|
||||
city (str): 城市名称,例如 `Calgary`、`Edmonton` 等
|
||||
city (str | None): 指定城市使用其对应代理;None 则使用随机代理并随机选择城市
|
||||
"""
|
||||
browser_id = None
|
||||
try:
|
||||
if city is not None:
|
||||
proxy = get_proxy(city)
|
||||
if proxy is None:
|
||||
logger.error(f"{city} 未配置对应代理,结束该线程")
|
||||
return
|
||||
logger.info(f"{city} 准备创建指纹浏览器")
|
||||
remark = city
|
||||
else:
|
||||
proxy = get_random_proxy()
|
||||
if proxy is None:
|
||||
logger.error("未获取到随机代理,结束该线程")
|
||||
return
|
||||
remark = "random-ip"
|
||||
logger.info("准备创建指纹浏览器")
|
||||
browser_id = bit_browser.bit_browser_create(
|
||||
remark=city,
|
||||
remark=remark,
|
||||
host=proxy[0],
|
||||
port=proxy[1],
|
||||
proxy_user=proxy[2],
|
||||
@@ -434,27 +510,29 @@ def create_fingerprint_browser(city: str):
|
||||
"https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm")
|
||||
bol = auto.wait_home()
|
||||
if not bol:
|
||||
logger.error(f"{city} 进入首页失败,结束该线程")
|
||||
logger.error("进入首页失败,结束该线程")
|
||||
return
|
||||
|
||||
bol = auto.click_continue()
|
||||
if not bol:
|
||||
logger.error(f"{city} 点击 Continue 失败,结束该线程")
|
||||
logger.error("点击 Continue 失败,结束该线程")
|
||||
return
|
||||
auto.fill_questionnaire(city)
|
||||
time.sleep(5)
|
||||
auto.fill_questionnaire()
|
||||
# fill_city = city if city is not None else random.choice(['Calgary', 'Edmonton', 'Vancouver', 'Halifax', 'Toronto'])
|
||||
# auto.fill_questionnaire(fill_city)
|
||||
# time.sleep(5)
|
||||
finally:
|
||||
if browser_id:
|
||||
# 关闭指纹浏览器
|
||||
try:
|
||||
bit_browser.bit_browser_close(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"{city} 关闭浏览器异常: {e}")
|
||||
logger.error(f"关闭浏览器异常: {e}")
|
||||
# 删除指纹浏览器
|
||||
try:
|
||||
bit_browser.bit_browser_delete(browser_id)
|
||||
except Exception as e:
|
||||
logger.error(f"{city} 删除浏览器异常: {e}")
|
||||
logger.error(f"删除浏览器异常: {e}")
|
||||
|
||||
|
||||
def run_city_forever(city: str):
|
||||
@@ -472,27 +550,60 @@ def run_city_forever(city: str):
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def run_all_cities_concurrently():
|
||||
def run_all_cities_concurrently(num: int):
|
||||
"""
|
||||
多线程并发运行所有城市流程
|
||||
多线程并发运行城市流程(支持随机选择)
|
||||
|
||||
参数:
|
||||
num (int | None): 随机选择并启动的城市数量;None 表示全部
|
||||
"""
|
||||
import threading
|
||||
cities = ['Calgary', 'Edmonton', 'Vancouver', 'Halifax', 'Toronto']
|
||||
# cities = ['Calgary']
|
||||
threads = []
|
||||
for city in cities:
|
||||
t = threading.Thread(target=run_city_forever,
|
||||
args=(city,), name=f"{city}-thread")
|
||||
for i in range(num):
|
||||
t = threading.Thread(target=run_random_ips_forever, name=f"random-ip-thread-{i}")
|
||||
t.start()
|
||||
threads.append(t)
|
||||
logger.info(f"{city} 线程已启动")
|
||||
# time.sleep(2)
|
||||
logger.info(f"随机 IP 线程 {i} 已启动")
|
||||
for t in threads:
|
||||
t.join()
|
||||
logger.info("所有城市流程执行完成")
|
||||
logger.info("所有随机 IP 流程执行完成")
|
||||
|
||||
|
||||
def run_random_ips_forever():
|
||||
"""
|
||||
持续使用随机 IP 执行流程:每次完成后关闭并删除浏览器再重建
|
||||
"""
|
||||
while True:
|
||||
try:
|
||||
create_fingerprint_browser(None)
|
||||
except Exception as e:
|
||||
logger.error(f"随机 IP 流程异常: {e}")
|
||||
time.sleep(2)
|
||||
|
||||
|
||||
def run_random_ips_concurrently(num: int):
|
||||
"""
|
||||
根据指纹浏览器数量并发运行流程(随机取 IP)
|
||||
|
||||
参数:
|
||||
num (int): 并发指纹浏览器数量(每个使用随机代理)
|
||||
"""
|
||||
import threading
|
||||
if num <= 0:
|
||||
logger.warning("num 不合法(<=0),不启动任何线程")
|
||||
return
|
||||
threads = []
|
||||
for i in range(num):
|
||||
t = threading.Thread(target=run_random_ips_forever, name=f"random-ip-thread-{i}")
|
||||
t.start()
|
||||
threads.append(t)
|
||||
logger.info(f"随机 IP 线程 {i} 已启动")
|
||||
for t in threads:
|
||||
t.join()
|
||||
logger.info("随机 IP 并发流程执行完成")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
# auto = Auto()
|
||||
# auto.get_random_food('a')
|
||||
run_all_cities_concurrently()
|
||||
run_all_cities_concurrently(1)
|
||||
|
||||
Reference in New Issue
Block a user