0.1.2
This commit is contained in:
@@ -1,3 +1,5 @@
|
|||||||
|
# 0.1.2
|
||||||
|
- 优化自动化脚本
|
||||||
# 0.1.1
|
# 0.1.1
|
||||||
- 修正店铺信息
|
- 修正店铺信息
|
||||||
# 0.1.0
|
# 0.1.0
|
||||||
|
|||||||
191
spider/main.py
191
spider/main.py
@@ -20,8 +20,13 @@ class Auto:
|
|||||||
# cf打码
|
# cf打码
|
||||||
def solve_cloudflare(self, is_ok: bool = False):
|
def solve_cloudflare(self, is_ok: bool = False):
|
||||||
tab = self.browser.latest_tab
|
tab = self.browser.latest_tab
|
||||||
for _ in range(3):
|
for _ in range(5):
|
||||||
self.tab.wait(1)
|
tab.wait(1)
|
||||||
|
res = tab.ele('t:h1@text()=Sorry, you have been blocked',timeout=1)
|
||||||
|
if res:
|
||||||
|
logger.error("Cloudflare验证失败")
|
||||||
|
return False
|
||||||
|
|
||||||
try:
|
try:
|
||||||
shadow1 = tab.ele(
|
shadow1 = tab.ele(
|
||||||
'x://*[@name="cf-turnstile-response"]').parent().shadow_root
|
'x://*[@name="cf-turnstile-response"]').parent().shadow_root
|
||||||
@@ -74,6 +79,10 @@ class Auto:
|
|||||||
bol = self.solve_cloudflare()
|
bol = self.solve_cloudflare()
|
||||||
if not bol:
|
if not bol:
|
||||||
logger.debug("Cloudflare验证失败.")
|
logger.debug("Cloudflare验证失败.")
|
||||||
|
# 刷新网页
|
||||||
|
self.tab.refresh()
|
||||||
|
self.tab.wait(1.5)
|
||||||
|
jc += 1
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
logger.debug("Cloudflare验证成功.")
|
logger.debug("Cloudflare验证成功.")
|
||||||
@@ -115,6 +124,9 @@ class Auto:
|
|||||||
bol = self.solve_cloudflare()
|
bol = self.solve_cloudflare()
|
||||||
if not bol:
|
if not bol:
|
||||||
logger.debug("Cloudflare验证失败..")
|
logger.debug("Cloudflare验证失败..")
|
||||||
|
self.tab.refresh()
|
||||||
|
self.tab.wait(1.5)
|
||||||
|
jc += 1
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
logger.debug("Cloudflare验证成功..")
|
logger.debug("Cloudflare验证成功..")
|
||||||
@@ -122,15 +134,15 @@ class Auto:
|
|||||||
continue_button.click()
|
continue_button.click()
|
||||||
logger.debug("点击Continue按钮成功")
|
logger.debug("点击Continue按钮成功")
|
||||||
self.tab.wait(1.5)
|
self.tab.wait(1.5)
|
||||||
bol = self.tab.ele('@text():Loading', timeout=1)
|
# bol = self.tab.ele('@text():Loading', timeout=1)
|
||||||
if bol:
|
# if bol:
|
||||||
logger.debug("Loading...")
|
# logger.debug("Loading...")
|
||||||
if bl:
|
# if bl:
|
||||||
logger.debug("多次异常界面, 结束继续点击")
|
# logger.debug("多次异常界面, 结束继续点击")
|
||||||
return False
|
# return False
|
||||||
logger.debug("异常界面")
|
# logger.debug("异常界面")
|
||||||
self.tab.wait(1)
|
# self.tab.wait(1)
|
||||||
return self.click_continue(bl=True)
|
# return self.click_continue(bl=True)
|
||||||
bol = self.tab.ele(
|
bol = self.tab.ele(
|
||||||
't:h2@text()=You are being rate limited', timeout=1)
|
't:h2@text()=You are being rate limited', timeout=1)
|
||||||
if bol:
|
if bol:
|
||||||
@@ -173,7 +185,7 @@ class Auto:
|
|||||||
province = random.choice(list(cities.keys()))
|
province = random.choice(list(cities.keys()))
|
||||||
return province, random.choice(cities.get(province, []))
|
return province, random.choice(cities.get(province, []))
|
||||||
|
|
||||||
def get_province_by_city(self, city: str) -> str | None:
|
def get_province_by_city(self) -> str | None:
|
||||||
"""
|
"""
|
||||||
根据城市名称解析对应省份
|
根据城市名称解析对应省份
|
||||||
|
|
||||||
@@ -189,8 +201,49 @@ class Auto:
|
|||||||
"Vancouver": "British Columbia",
|
"Vancouver": "British Columbia",
|
||||||
"Halifax": "Nova Scotia",
|
"Halifax": "Nova Scotia",
|
||||||
"Toronto": "Ontario",
|
"Toronto": "Ontario",
|
||||||
|
"Ottawa": "Ontario",
|
||||||
|
"Mississauga": "Ontario",
|
||||||
|
"Brampton": "Ontario",
|
||||||
|
"Hamilton": "Ontario",
|
||||||
|
"Kitchener": "Ontario",
|
||||||
|
"London": "Ontario",
|
||||||
|
"Markham": "Ontario",
|
||||||
|
"Vaughan": "Ontario",
|
||||||
|
"Windsor": "Ontario",
|
||||||
|
"Oshawa": "Ontario",
|
||||||
|
"Brantford": "Ontario",
|
||||||
|
"Barrie": "Ontario",
|
||||||
|
"Sudbury": "Ontario",
|
||||||
|
"Kingston": "Ontario",
|
||||||
|
"Guelph": "Ontario",
|
||||||
|
"Cambridge": "Ontario",
|
||||||
|
"Sarnia": "Ontario",
|
||||||
|
"Peterborough": "Ontario",
|
||||||
|
"Waterloo": "Ontario",
|
||||||
|
"Belleville": "Ontario",
|
||||||
|
"Brockville": "Ontario",
|
||||||
|
"Burlington": "Ontario",
|
||||||
|
"Cornwall": "Ontario",
|
||||||
|
"Kawartha Lakes": "Ontario",
|
||||||
|
"North Bay": "Ontario",
|
||||||
|
"Orillia": "Ontario",
|
||||||
|
"Pickering": "Ontario",
|
||||||
|
"Sault Ste. Marie": "Ontario",
|
||||||
|
"Stratford": "Ontario",
|
||||||
|
"Durham": "Ontario",
|
||||||
|
"Norfolk County": "Ontario",
|
||||||
|
"Prince Edward County": "Ontario",
|
||||||
|
"Quinte West": "Ontario",
|
||||||
|
"St. Catharines": "Ontario",
|
||||||
|
"Welland": "Ontario",
|
||||||
|
"Thorold": "Ontario",
|
||||||
|
"Niagara Falls": "Ontario",
|
||||||
|
"Pelham": "Ontario",
|
||||||
|
"Port Colborne": "Ontario",
|
||||||
}
|
}
|
||||||
return mapping.get(city)
|
# 随机返回一条 key 和 value
|
||||||
|
return random.choice(list(mapping.items()))
|
||||||
|
|
||||||
|
|
||||||
# 随机实物
|
# 随机实物
|
||||||
def get_random_food(self, city: str, shop: str) -> list[str]:
|
def get_random_food(self, city: str, shop: str) -> list[str]:
|
||||||
@@ -261,17 +314,17 @@ class Auto:
|
|||||||
return text
|
return text
|
||||||
|
|
||||||
# 填写问卷
|
# 填写问卷
|
||||||
def fill_questionnaire(self, city: str):
|
def fill_questionnaire(self):
|
||||||
"""
|
"""
|
||||||
根据传入的城市解析省份并完成问卷填写
|
完成问卷填写
|
||||||
|
|
||||||
参数:
|
参数:
|
||||||
city (str): 线程启动时传入的城市名称,用于匹配省份并填写数据
|
city (str): 线程启动时传入的城市名称,用于匹配省份并填写数据
|
||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
province = self.get_province_by_city(city)
|
city, province = self.get_province_by_city()
|
||||||
if province is None:
|
if province is None:
|
||||||
logger.error(f"未找到城市对应省份: {city}")
|
logger.error(f"未找到城市对应省份")
|
||||||
return
|
return
|
||||||
j = 0
|
j = 0
|
||||||
while True:
|
while True:
|
||||||
@@ -378,7 +431,7 @@ class Auto:
|
|||||||
email=email,
|
email=email,
|
||||||
text=text
|
text=text
|
||||||
)
|
)
|
||||||
self.tab.wait(2)
|
self.tab.wait(3)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"填写问卷失败: {e}")
|
logger.error(f"填写问卷失败: {e}")
|
||||||
|
|
||||||
@@ -398,27 +451,50 @@ def get_proxy(city: str):
|
|||||||
else:
|
else:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_random_proxy() -> list[str] | None:
|
||||||
|
"""
|
||||||
|
随机选择一个代理配置(按指纹浏览器数量随机取 IP)
|
||||||
|
|
||||||
|
返回值:
|
||||||
|
list[str] | None: 代理参数列表 `[host, port, user, pwd]`;无可用代理返回 None
|
||||||
|
"""
|
||||||
|
proxy_list = [
|
||||||
|
"us.novproxy.io:1000:ozua8623-region-CA:6wdcv4gq",
|
||||||
|
"us.novproxy.io:1000:ozua8623-region-US:6wdcv4gq",
|
||||||
|
]
|
||||||
|
try:
|
||||||
|
return random.choice(proxy_list).split(':')
|
||||||
|
except Exception:
|
||||||
|
return None
|
||||||
|
|
||||||
"""指纹浏览器操作"""
|
"""指纹浏览器操作"""
|
||||||
# 创建指纹浏览器
|
# 创建指纹浏览器
|
||||||
|
|
||||||
|
|
||||||
def create_fingerprint_browser(city: str):
|
def create_fingerprint_browser(city: str | None = None):
|
||||||
"""
|
"""
|
||||||
根据城市创建指纹浏览器并执行问卷流程
|
创建指纹浏览器并执行一次流程(支持随机 IP 与指定城市)
|
||||||
|
|
||||||
参数:
|
参数:
|
||||||
city (str): 城市名称,例如 `Calgary`、`Edmonton` 等
|
city (str | None): 指定城市使用其对应代理;None 则使用随机代理并随机选择城市
|
||||||
"""
|
"""
|
||||||
browser_id = None
|
browser_id = None
|
||||||
try:
|
try:
|
||||||
|
if city is not None:
|
||||||
proxy = get_proxy(city)
|
proxy = get_proxy(city)
|
||||||
if proxy is None:
|
if proxy is None:
|
||||||
logger.error(f"{city} 未配置对应代理,结束该线程")
|
logger.error(f"{city} 未配置对应代理,结束该线程")
|
||||||
return
|
return
|
||||||
logger.info(f"{city} 准备创建指纹浏览器")
|
remark = city
|
||||||
|
else:
|
||||||
|
proxy = get_random_proxy()
|
||||||
|
if proxy is None:
|
||||||
|
logger.error("未获取到随机代理,结束该线程")
|
||||||
|
return
|
||||||
|
remark = "random-ip"
|
||||||
|
logger.info("准备创建指纹浏览器")
|
||||||
browser_id = bit_browser.bit_browser_create(
|
browser_id = bit_browser.bit_browser_create(
|
||||||
remark=city,
|
remark=remark,
|
||||||
host=proxy[0],
|
host=proxy[0],
|
||||||
port=proxy[1],
|
port=proxy[1],
|
||||||
proxy_user=proxy[2],
|
proxy_user=proxy[2],
|
||||||
@@ -434,27 +510,29 @@ def create_fingerprint_browser(city: str):
|
|||||||
"https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm")
|
"https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm")
|
||||||
bol = auto.wait_home()
|
bol = auto.wait_home()
|
||||||
if not bol:
|
if not bol:
|
||||||
logger.error(f"{city} 进入首页失败,结束该线程")
|
logger.error("进入首页失败,结束该线程")
|
||||||
return
|
return
|
||||||
|
|
||||||
bol = auto.click_continue()
|
bol = auto.click_continue()
|
||||||
if not bol:
|
if not bol:
|
||||||
logger.error(f"{city} 点击 Continue 失败,结束该线程")
|
logger.error("点击 Continue 失败,结束该线程")
|
||||||
return
|
return
|
||||||
auto.fill_questionnaire(city)
|
auto.fill_questionnaire()
|
||||||
time.sleep(5)
|
# fill_city = city if city is not None else random.choice(['Calgary', 'Edmonton', 'Vancouver', 'Halifax', 'Toronto'])
|
||||||
|
# auto.fill_questionnaire(fill_city)
|
||||||
|
# time.sleep(5)
|
||||||
finally:
|
finally:
|
||||||
if browser_id:
|
if browser_id:
|
||||||
# 关闭指纹浏览器
|
# 关闭指纹浏览器
|
||||||
try:
|
try:
|
||||||
bit_browser.bit_browser_close(browser_id)
|
bit_browser.bit_browser_close(browser_id)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"{city} 关闭浏览器异常: {e}")
|
logger.error(f"关闭浏览器异常: {e}")
|
||||||
# 删除指纹浏览器
|
# 删除指纹浏览器
|
||||||
try:
|
try:
|
||||||
bit_browser.bit_browser_delete(browser_id)
|
bit_browser.bit_browser_delete(browser_id)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"{city} 删除浏览器异常: {e}")
|
logger.error(f"删除浏览器异常: {e}")
|
||||||
|
|
||||||
|
|
||||||
def run_city_forever(city: str):
|
def run_city_forever(city: str):
|
||||||
@@ -472,27 +550,60 @@ def run_city_forever(city: str):
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
def run_all_cities_concurrently():
|
def run_all_cities_concurrently(num: int):
|
||||||
"""
|
"""
|
||||||
多线程并发运行所有城市流程
|
多线程并发运行城市流程(支持随机选择)
|
||||||
|
|
||||||
|
参数:
|
||||||
|
num (int | None): 随机选择并启动的城市数量;None 表示全部
|
||||||
"""
|
"""
|
||||||
import threading
|
import threading
|
||||||
cities = ['Calgary', 'Edmonton', 'Vancouver', 'Halifax', 'Toronto']
|
|
||||||
# cities = ['Calgary']
|
|
||||||
threads = []
|
threads = []
|
||||||
for city in cities:
|
for i in range(num):
|
||||||
t = threading.Thread(target=run_city_forever,
|
t = threading.Thread(target=run_random_ips_forever, name=f"random-ip-thread-{i}")
|
||||||
args=(city,), name=f"{city}-thread")
|
|
||||||
t.start()
|
t.start()
|
||||||
threads.append(t)
|
threads.append(t)
|
||||||
logger.info(f"{city} 线程已启动")
|
logger.info(f"随机 IP 线程 {i} 已启动")
|
||||||
# time.sleep(2)
|
|
||||||
for t in threads:
|
for t in threads:
|
||||||
t.join()
|
t.join()
|
||||||
logger.info("所有城市流程执行完成")
|
logger.info("所有随机 IP 流程执行完成")
|
||||||
|
|
||||||
|
|
||||||
|
def run_random_ips_forever():
|
||||||
|
"""
|
||||||
|
持续使用随机 IP 执行流程:每次完成后关闭并删除浏览器再重建
|
||||||
|
"""
|
||||||
|
while True:
|
||||||
|
try:
|
||||||
|
create_fingerprint_browser(None)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"随机 IP 流程异常: {e}")
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
|
|
||||||
|
def run_random_ips_concurrently(num: int):
|
||||||
|
"""
|
||||||
|
根据指纹浏览器数量并发运行流程(随机取 IP)
|
||||||
|
|
||||||
|
参数:
|
||||||
|
num (int): 并发指纹浏览器数量(每个使用随机代理)
|
||||||
|
"""
|
||||||
|
import threading
|
||||||
|
if num <= 0:
|
||||||
|
logger.warning("num 不合法(<=0),不启动任何线程")
|
||||||
|
return
|
||||||
|
threads = []
|
||||||
|
for i in range(num):
|
||||||
|
t = threading.Thread(target=run_random_ips_forever, name=f"random-ip-thread-{i}")
|
||||||
|
t.start()
|
||||||
|
threads.append(t)
|
||||||
|
logger.info(f"随机 IP 线程 {i} 已启动")
|
||||||
|
for t in threads:
|
||||||
|
t.join()
|
||||||
|
logger.info("随机 IP 并发流程执行完成")
|
||||||
|
|
||||||
|
|
||||||
if __name__ == "__main__":
|
if __name__ == "__main__":
|
||||||
# auto = Auto()
|
# auto = Auto()
|
||||||
# auto.get_random_food('a')
|
# auto.get_random_food('a')
|
||||||
run_all_cities_concurrently()
|
run_all_cities_concurrently(1)
|
||||||
|
|||||||
Reference in New Issue
Block a user