diff --git a/README.md b/README.md index 206faf6..0552582 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,5 @@ +# 0.1.2 +- 优化自动化脚本 # 0.1.1 - 修正店铺信息 # 0.1.0 diff --git a/spider/main.py b/spider/main.py index 9cbb79a..2a31ff1 100644 --- a/spider/main.py +++ b/spider/main.py @@ -20,8 +20,13 @@ class Auto: # cf打码 def solve_cloudflare(self, is_ok: bool = False): tab = self.browser.latest_tab - for _ in range(3): - self.tab.wait(1) + for _ in range(5): + tab.wait(1) + res = tab.ele('t:h1@text()=Sorry, you have been blocked',timeout=1) + if res: + logger.error("Cloudflare验证失败") + return False + try: shadow1 = tab.ele( 'x://*[@name="cf-turnstile-response"]').parent().shadow_root @@ -74,6 +79,10 @@ class Auto: bol = self.solve_cloudflare() if not bol: logger.debug("Cloudflare验证失败.") + # 刷新网页 + self.tab.refresh() + self.tab.wait(1.5) + jc += 1 continue else: logger.debug("Cloudflare验证成功.") @@ -115,6 +124,9 @@ class Auto: bol = self.solve_cloudflare() if not bol: logger.debug("Cloudflare验证失败..") + self.tab.refresh() + self.tab.wait(1.5) + jc += 1 continue else: logger.debug("Cloudflare验证成功..") @@ -122,15 +134,15 @@ class Auto: continue_button.click() logger.debug("点击Continue按钮成功") self.tab.wait(1.5) - bol = self.tab.ele('@text():Loading', timeout=1) - if bol: - logger.debug("Loading...") - if bl: - logger.debug("多次异常界面, 结束继续点击") - return False - logger.debug("异常界面") - self.tab.wait(1) - return self.click_continue(bl=True) + # bol = self.tab.ele('@text():Loading', timeout=1) + # if bol: + # logger.debug("Loading...") + # if bl: + # logger.debug("多次异常界面, 结束继续点击") + # return False + # logger.debug("异常界面") + # self.tab.wait(1) + # return self.click_continue(bl=True) bol = self.tab.ele( 't:h2@text()=You are being rate limited', timeout=1) if bol: @@ -173,7 +185,7 @@ class Auto: province = random.choice(list(cities.keys())) return province, random.choice(cities.get(province, [])) - def get_province_by_city(self, city: str) -> str | None: + def get_province_by_city(self) -> str | None: """ 根据城市名称解析对应省份 @@ -189,8 +201,49 @@ class Auto: "Vancouver": "British Columbia", "Halifax": "Nova Scotia", "Toronto": "Ontario", + "Ottawa": "Ontario", + "Mississauga": "Ontario", + "Brampton": "Ontario", + "Hamilton": "Ontario", + "Kitchener": "Ontario", + "London": "Ontario", + "Markham": "Ontario", + "Vaughan": "Ontario", + "Windsor": "Ontario", + "Oshawa": "Ontario", + "Brantford": "Ontario", + "Barrie": "Ontario", + "Sudbury": "Ontario", + "Kingston": "Ontario", + "Guelph": "Ontario", + "Cambridge": "Ontario", + "Sarnia": "Ontario", + "Peterborough": "Ontario", + "Waterloo": "Ontario", + "Belleville": "Ontario", + "Brockville": "Ontario", + "Burlington": "Ontario", + "Cornwall": "Ontario", + "Kawartha Lakes": "Ontario", + "North Bay": "Ontario", + "Orillia": "Ontario", + "Pickering": "Ontario", + "Sault Ste. Marie": "Ontario", + "Stratford": "Ontario", + "Durham": "Ontario", + "Norfolk County": "Ontario", + "Prince Edward County": "Ontario", + "Quinte West": "Ontario", + "St. Catharines": "Ontario", + "Welland": "Ontario", + "Thorold": "Ontario", + "Niagara Falls": "Ontario", + "Pelham": "Ontario", + "Port Colborne": "Ontario", } - return mapping.get(city) + # 随机返回一条 key 和 value + return random.choice(list(mapping.items())) + # 随机实物 def get_random_food(self, city: str, shop: str) -> list[str]: @@ -261,17 +314,17 @@ class Auto: return text # 填写问卷 - def fill_questionnaire(self, city: str): + def fill_questionnaire(self): """ - 根据传入的城市解析省份并完成问卷填写 + 完成问卷填写 参数: city (str): 线程启动时传入的城市名称,用于匹配省份并填写数据 """ try: - province = self.get_province_by_city(city) + city, province = self.get_province_by_city() if province is None: - logger.error(f"未找到城市对应省份: {city}") + logger.error(f"未找到城市对应省份") return j = 0 while True: @@ -378,7 +431,7 @@ class Auto: email=email, text=text ) - self.tab.wait(2) + self.tab.wait(3) except Exception as e: logger.error(f"填写问卷失败: {e}") @@ -398,27 +451,50 @@ def get_proxy(city: str): else: return None +def get_random_proxy() -> list[str] | None: + """ + 随机选择一个代理配置(按指纹浏览器数量随机取 IP) + + 返回值: + list[str] | None: 代理参数列表 `[host, port, user, pwd]`;无可用代理返回 None + """ + proxy_list = [ + "us.novproxy.io:1000:ozua8623-region-CA:6wdcv4gq", + "us.novproxy.io:1000:ozua8623-region-US:6wdcv4gq", + ] + try: + return random.choice(proxy_list).split(':') + except Exception: + return None """指纹浏览器操作""" # 创建指纹浏览器 -def create_fingerprint_browser(city: str): +def create_fingerprint_browser(city: str | None = None): """ - 根据城市创建指纹浏览器并执行问卷流程 + 创建指纹浏览器并执行一次流程(支持随机 IP 与指定城市) 参数: - city (str): 城市名称,例如 `Calgary`、`Edmonton` 等 + city (str | None): 指定城市使用其对应代理;None 则使用随机代理并随机选择城市 """ browser_id = None try: - proxy = get_proxy(city) - if proxy is None: - logger.error(f"{city} 未配置对应代理,结束该线程") - return - logger.info(f"{city} 准备创建指纹浏览器") + if city is not None: + proxy = get_proxy(city) + if proxy is None: + logger.error(f"{city} 未配置对应代理,结束该线程") + return + remark = city + else: + proxy = get_random_proxy() + if proxy is None: + logger.error("未获取到随机代理,结束该线程") + return + remark = "random-ip" + logger.info("准备创建指纹浏览器") browser_id = bit_browser.bit_browser_create( - remark=city, + remark=remark, host=proxy[0], port=proxy[1], proxy_user=proxy[2], @@ -434,27 +510,29 @@ def create_fingerprint_browser(city: str): "https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm") bol = auto.wait_home() if not bol: - logger.error(f"{city} 进入首页失败,结束该线程") + logger.error("进入首页失败,结束该线程") return bol = auto.click_continue() if not bol: - logger.error(f"{city} 点击 Continue 失败,结束该线程") + logger.error("点击 Continue 失败,结束该线程") return - auto.fill_questionnaire(city) - time.sleep(5) + auto.fill_questionnaire() + # fill_city = city if city is not None else random.choice(['Calgary', 'Edmonton', 'Vancouver', 'Halifax', 'Toronto']) + # auto.fill_questionnaire(fill_city) + # time.sleep(5) finally: if browser_id: # 关闭指纹浏览器 try: bit_browser.bit_browser_close(browser_id) except Exception as e: - logger.error(f"{city} 关闭浏览器异常: {e}") + logger.error(f"关闭浏览器异常: {e}") # 删除指纹浏览器 try: bit_browser.bit_browser_delete(browser_id) except Exception as e: - logger.error(f"{city} 删除浏览器异常: {e}") + logger.error(f"删除浏览器异常: {e}") def run_city_forever(city: str): @@ -472,27 +550,60 @@ def run_city_forever(city: str): time.sleep(2) -def run_all_cities_concurrently(): +def run_all_cities_concurrently(num: int): """ - 多线程并发运行所有城市流程 + 多线程并发运行城市流程(支持随机选择) + + 参数: + num (int | None): 随机选择并启动的城市数量;None 表示全部 """ import threading - cities = ['Calgary', 'Edmonton', 'Vancouver', 'Halifax', 'Toronto'] - # cities = ['Calgary'] threads = [] - for city in cities: - t = threading.Thread(target=run_city_forever, - args=(city,), name=f"{city}-thread") + for i in range(num): + t = threading.Thread(target=run_random_ips_forever, name=f"random-ip-thread-{i}") t.start() threads.append(t) - logger.info(f"{city} 线程已启动") - # time.sleep(2) + logger.info(f"随机 IP 线程 {i} 已启动") for t in threads: t.join() - logger.info("所有城市流程执行完成") + logger.info("所有随机 IP 流程执行完成") + + +def run_random_ips_forever(): + """ + 持续使用随机 IP 执行流程:每次完成后关闭并删除浏览器再重建 + """ + while True: + try: + create_fingerprint_browser(None) + except Exception as e: + logger.error(f"随机 IP 流程异常: {e}") + time.sleep(2) + + +def run_random_ips_concurrently(num: int): + """ + 根据指纹浏览器数量并发运行流程(随机取 IP) + + 参数: + num (int): 并发指纹浏览器数量(每个使用随机代理) + """ + import threading + if num <= 0: + logger.warning("num 不合法(<=0),不启动任何线程") + return + threads = [] + for i in range(num): + t = threading.Thread(target=run_random_ips_forever, name=f"random-ip-thread-{i}") + t.start() + threads.append(t) + logger.info(f"随机 IP 线程 {i} 已启动") + for t in threads: + t.join() + logger.info("随机 IP 并发流程执行完成") if __name__ == "__main__": # auto = Auto() # auto.get_random_food('a') - run_all_cities_concurrently() + run_all_cities_concurrently(1)