This commit is contained in:
2025-11-20 11:42:18 +08:00
parent 1bd91df9a1
commit e2d2b0b75b
29 changed files with 2100 additions and 48 deletions

522
spider/bit_browser.py Normal file
View File

@@ -0,0 +1,522 @@
import os
import time
import aiohttp
import asyncio
import requests
from loguru import logger
from functools import wraps
def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return func(*args, **kwargs)
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
time.sleep(current_delay)
current_delay *= backoff
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
def async_retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
支持异步函数的通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return await func(*args, **kwargs) # 直接执行原始方法
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
await asyncio.sleep(current_delay) # 异步延迟
current_delay *= backoff # 根据backoff递增延迟
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
# 比特浏览器模块
class BitBrowser:
def __init__(self):
self.bit_host = "http://127.0.0.1"
pass
# 创建比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_create(self, remark: str = '指纹浏览器', ua: str = None, host: str = None, port: str = None,
proxy_user: str = None,
proxy_pwd: str = None, proxy_type: str = 'noproxy', urls: str = None,
bit_port: str = "54345") -> str:
"""
创建比特币浏览器
:param bit_port: 可选默认54345
:param ua: 可选,默认随机
:param proxy_type: 代理类型 (可选) ['noproxy', 'http', 'https', 'socks5', 'ssh']
:param urls: 额外打开的url (可选) 多个用,分割
:param host: 代理IP地址 (可选)
:param port: 代理IP端口 (可选)
:param proxy_user: 代理账号 (可选)
:param proxy_pwd: 代理密码 (可选)
:param remark: 备注 (可选)
:param bit_port: 可选默认54345
:return: 返回浏览器ID
"""
url = f"{self.bit_host}:{bit_port}/browser/update"
headers = {'Content-Type': 'application/json'}
data = {
'name': f'{remark if len(remark) < 40 else remark[:40]}', # 窗口名称
'remark': f'{remark}', # 备注
'proxyMethod': 2, # 代理方式 2自定义 3 提取IP
# 代理类型 ['noproxy', 'http', 'https', 'socks5', 'ssh']
'proxyType': f'{proxy_type}',
"browserFingerPrint": {"userAgent": ua} # 留空,随机指纹
}
if host is not None:
data['host'] = host
if port is not None:
data['port'] = port
if proxy_user is not None:
data['proxyUserName'] = proxy_user
if proxy_pwd is not None:
data['proxyPassword'] = proxy_pwd
if urls is not None:
data['url'] = urls # 额外打开的url 多个用,分割
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
browser_pk = res['data']['id']
return browser_pk
# 修改比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_update(self, pk: str, remark: str = None, proxyType: str = 'noproxy', host: str = None,
port: str = None, proxy_user: str = None, proxy_pwd: str = None, urls: str = None,
bit_port: str = "54345") -> bool:
"""
修改比特币浏览器 传入某个参数则修改某个参数
:param proxyType: 代理类型 noproxy|http|https|socks5(默认noproxy)
:param pk: # 浏览器ID
:param remark: # 备注
:param host: # 代理主机
:param port: # 代理端口
:param proxy_user: # 代理账号
:param proxy_pwd: # 代理密码
:param urls: # 额外打开的url 多个用,分割
:param bit_port: # 可选默认54345
:return: bool
"""
url = f"{self.bit_host}:{bit_port}/browser/update/partial"
headers = {'Content-Type': 'application/json'}
data = dict()
data['ids'] = [pk]
if remark is not None:
data['remark'] = remark
data['name'] = remark
if urls is not None:
data['url'] = urls
if proxyType != 'noproxy':
data['proxyType'] = proxyType
if host is not None:
data['host'] = host
if port is not None:
data['port'] = port if isinstance(port, int) else int(port)
if proxy_user is not None:
data['proxyUserName'] = proxy_user
if proxy_pwd is not None:
data['proxyPassword'] = proxy_pwd
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return True
# 打开比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_open(self, pk: str, bit_port: str = "54345") -> str:
"""
打开比特币浏览器
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 返回浏览器地址
"""
url = f"{self.bit_host}:{bit_port}/browser/open"
data = {"id": f'{pk}'}
headers = {'Content-Type': 'application/json'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
debugger_address = res['data']['http']
return debugger_address
# 关闭比特币浏览器
def bit_browser_close(self, pk: str, bit_port: str = "54345"):
"""
关闭比特币浏览器 - 执行后需要等待5s
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 无返回值
"""
url = f"{self.bit_host}:{bit_port}/browser/close"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
requests.post(url, json=data, headers=headers).json()
time.sleep(5) # 等待5s等待浏览器关闭
# 关闭浏览器进程
# pid = self.bit_browser_pid(pk, bit_port)
# if pid is not None:
# os.system(f"kill -9 {pid}")
# 删除比特币浏览器
def bit_browser_delete(self, pk: str, bit_port: str = "54345"):
"""
删除比特币浏览器
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 无返回值
"""
url = f"{self.bit_host}:{bit_port}/browser/delete"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
print(requests.post(url, json=data, headers=headers).json())
# 获取所有比特币浏览器
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_get(self, page: int = 0, limit: int = 10, group_id: str | None = None,
bit_port: str | None = "54345") -> dict:
"""
获取所有比特币浏览器
:param page: 页码
:param limit: 每页数量
:param group_id: 组ID(可选)
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'page': 1, 'pageSize': 10, 'totalNum': 128, 'list': [{'id': '12a3126accc14c93bd34adcccfc3083c'},{'id':'edc5d61a56214e9f8a8bbf1a2e1b405d'}]}}
"""
url = f"{self.bit_host}:{bit_port}/browser/list"
headers = {'Content-Type': 'application/json'}
data = {'page': page, 'pageSize': limit}
if group_id is not None:
data['groupId'] = group_id
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return res
# 获取比特浏览器窗口详情
@retry(max_retries=3, delay=1.0, backoff=1.0)
def bit_browser_detail(self, pk: str, bit_port: str = "54345") -> dict:
"""
获取比特浏览器窗口详情
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', '
"""
url = f"{self.bit_host}:{bit_port}/browser/detail"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return res
# 获取比特浏览器的进程id
def bit_browser_pid(self, pk: str, bit_port: str = "54345") -> str:
"""
获取比特浏览器的进程id
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 返回进程id
"""
url = f"{self.bit_host}:{bit_port}/browser/pids/alive"
headers = {'Content-Type': 'application/json'}
data = {
"ids": [pk]
}
res = requests.post(url, json=data, headers=headers).json()
if not res.get('success'):
raise Exception(res)
return res['data'][pk]
@staticmethod
async def __request(method: str, url: str, params: dict = None, **kwargs) -> dict:
"""
通用异步请求方法
:param method: HTTP方法 (GET, POST, PUT, DELETE)
:param endpoint: API接口地址
:param kwargs: 其他请求参数 (json, params等)
:return: 返回JSON数据
"""
if params:
# 将布尔值转换为字符串或整数
params = {k: str(v).lower() if isinstance(v, bool) else v for k, v in params.items()}
async with aiohttp.ClientSession() as session:
async with session.request(method, url, params=params, **kwargs) as response:
return await response.json()
# 创建比特币浏览器
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _bit_browser_create(self, remark: str = '指纹浏览器', ua: str = None, host: str = None, port: str = None,
proxy_user: str = None,
proxy_pwd: str = None, proxy_type: str = 'noproxy', urls: str = None,
bit_port: str = "54345") -> str:
"""
创建比特币浏览器
:param urls: 额外打开的url (可选) 多个用,分割
:param remark: 备注 (可选)
:param bit_port: 可选默认54345
:return: 返回浏览器ID
"""
url = f"{self.bit_host}:{bit_port}/browser/update"
headers = {'Content-Type': 'application/json'}
data = {
'name': f'{remark if len(remark) < 40 else remark[:40]}', # 窗口名称
'remark': f'{remark}', # 备注
'proxyType': f'{proxy_type}',
"browserFingerPrint": {"userAgent": ua} # 留空,随机指纹
}
if host is not None:
data['host'] = host
if port is not None:
data['port'] = port
if proxy_user is not None:
data['proxyUserName'] = proxy_user
if proxy_pwd is not None:
data['proxyPassword'] = proxy_pwd
if urls is not None:
data['url'] = urls # 额外打开的url 多个用,分割
res = await self.__request('POST', url, json=data, headers=headers)
if not res.get('success'):
raise Exception(res)
browser_pk = res['data']['id']
return browser_pk
# 修改比特币浏览器
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _bit_browser_update(self, pk: str, remark: str = None, proxyType: str = 'noproxy', host: str = None,
port: str = None, proxy_user: str = None, proxy_pwd: str = None, urls: str = None,
bit_port: str = "54345") -> bool:
"""
修改比特币浏览器 传入某个参数则修改某个参数
:param pk: # 浏览器ID
:param remark: # 备注
:param urls: # 额外打开的url 多个用,分割
:param bit_port: # 可选默认54345
:return:
"""
url = f"{self.bit_host}:{bit_port}/browser/update/partial"
headers = {'Content-Type': 'application/json'}
data = dict()
data['ids'] = [pk]
if remark is not None:
data['remark'] = remark
data['name'] = remark
if urls is not None:
data['url'] = urls
if proxyType != 'noproxy':
data['proxyType'] = proxyType
if host is not None:
data['host'] = host
if port is not None:
data['port'] = port if isinstance(port, int) else int(port)
if proxy_user is not None:
data['proxyUserName'] = proxy_user
if proxy_pwd is not None:
data['proxyPassword'] = proxy_pwd
res = await self.__request('POST', url, json=data, headers=headers)
if not res.get('success'):
raise Exception(res)
return True
# 打开比特币浏览器
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _bit_browser_open(self, pk: str, bit_port: str = "54345") -> str:
"""
打开比特币浏览器
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 返回浏览器地址
"""
url = f"{self.bit_host}:{bit_port}/browser/open"
data = {"id": f'{pk}'}
headers = {'Content-Type': 'application/json'}
res = await self.__request('POST', url, json=data, headers=headers)
if not res.get('success'):
raise Exception(res)
debugger_address = res['data']['http']
return debugger_address
# 关闭比特币浏览器
async def _bit_browser_close(self, pk: str, bit_port: str = "54345"):
"""
关闭比特币浏览器 - 执行后需要等待5s
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 无返回值
"""
url = f"{self.bit_host}:{bit_port}/browser/close"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
await self.__request('POST', url, json=data, headers=headers)
await asyncio.sleep(5) # 等待5s等待浏览器关闭
# 删除比特币浏览器
async def _bit_browser_delete(self, pk: str, bit_port: str = "54345"):
"""
删除比特币浏览器
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 无返回值
"""
url = f"{self.bit_host}:{bit_port}/browser/delete"
headers = {'Content-Type': 'application/json'}
data = {'id': f'{pk}'}
print(await self.__request('POST', url, json=data, headers=headers))
# 获取所有比特币浏览器
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _bit_browser_get(self, page: int = 0, limit: int = 10, group_id: str | None = None,
bit_port: str | None = "54345",
) -> dict:
"""
获取所有比特币浏览器
:param page: 页码
:param group_id: 分组ID
:param limit: 每页数量
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'page': 1, 'pageSize': 10, 'totalNum': 128, 'list': [{'id': '12a3126accc14c93bd34adcccfc3083c'},{'id':'edc5d61a56214e9f8a8bbf1a2e1b405d'}]}}
"""
url = f"{self.bit_host}:{bit_port}/browser/list"
headers = {'Content-Type': 'application/json'}
data = {'page': page, 'pageSize': limit}
if group_id is not None:
data['groupId'] = group_id
res = await self.__request('POST', url, json=data, headers=headers)
if not res.get('success'):
raise Exception(res)
return res
# 获取比特浏览器窗口详情
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _bit_browser_detail(self, pk: str, bit_port: str = "54345") -> dict:
"""
获取比特浏览器窗口详情
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: {'success': True, 'data': {'id': '12a3126accc14c93bd34adcccfc3083c', 'name': '12a3126accc14c93bd34adcccfc3083c', 'remark': '12a3126accc14c93bd34adcccfc3083c', 'groupId': '12a3126accc14c93bd34adcccfc3083c', 'proxyType
"""
url = f"{self.bit_host}:{bit_port}/browser/detail"
headers = {'Content-Type': 'application/json'}
data = {
"id": pk
}
res = await self.__request('POST', url, json=data, headers=headers)
if not res.get('success'):
raise Exception(res)
return res
# 获取比特浏览器的进程id并杀死进程
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _bit_browser_kill_pid(self, pk: str, bit_port: str = "54345") -> str:
"""
获取比特浏览器的进程id
:param pk: 浏览器ID
:param bit_port: 可选默认54345
:return: 返回进程id
"""
url = f"{self.bit_host}:{bit_port}/browser/pids/alive"
headers = {'Content-Type': 'application/json'}
data = {
"ids": [pk]
}
res = await self.__request('POST', url, json=data, headers=headers)
if not res.get('success'):
raise Exception(res)
pid = res['data'][pk]
# 检测系统 并杀死进程
if pid is not None:
if os.name == 'nt':
os.system(f"taskkill /F /PID {pid}")
else:
os.system(f"kill -9 {pid}")
return pid
async def main():
bit = BitBrowser()
# res = await bit._bit_browser_get()
jc = 0
while 1:
res = await bit._bit_browser_get(
page=jc,
limit=100,
group_id='4028808b9a52223a019a581bbea1275c')
li = res["data"]["list"]
if len(li) == 0:
break
for i in li:
id = i["id"]
# 读取浏览器详情
res = await bit._bit_browser_detail(id)
# print(f'id -->{id} --> {res}')
data = res["data"]
ua = data["browserFingerPrint"]["userAgent"]
proxy_type = data.get("proxyType")
host = data.get("host")
port = data.get("port")
proxy_account = data.get("proxyUserName")
proxy_password = data.get("proxyPassword")
print(f'id -->{id}')
print(f'ua -->{ua}')
print(f'proxy_type -->{proxy_type}')
print(f'host -->{host}')
print(f'port -->{port}')
print(f'proxy_account -->{proxy_account}')
print(f'proxy_password -->{proxy_password}')
print(f'='*50)
jc += 1
bit_browser = BitBrowser()
# if __name__ == '__main__':
# asyncio.run(main())

835
spider/mail_.py Normal file
View File

@@ -0,0 +1,835 @@
import asyncio
import imaplib
import email
import random
import socket
import string
import time
from email.header import decode_header
from datetime import timezone, timedelta
import email.utils
import aiohttp
import socks
import requests
import smtplib
from email.mime.text import MIMEText
from email.header import Header
from functools import wraps
from loguru import logger
def retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return func(*args, **kwargs)
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
time.sleep(current_delay)
current_delay *= backoff
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
def async_retry(max_retries: int = 3, delay: float = 1.0, backoff: float = 1.0):
"""
支持异步函数的通用重试装饰器
:param max_retries: 最大重试次数
:param delay: 每次重试的初始延迟(秒)
:param backoff: 每次重试延迟的递增倍数
"""
def decorator(func):
@wraps(func)
async def wrapper(*args, **kwargs):
retries = 0
current_delay = delay
while retries < max_retries:
try:
return await func(*args, **kwargs) # 直接执行原始方法
except Exception as e:
retries += 1
if retries >= max_retries:
logger.warning(f"函数 {func.__name__} 在尝试了 {max_retries} 次后失败,错误信息: {e}")
return None # 重试次数用尽后返回 None
logger.warning(f"正在重试 {func.__name__} {retries + 1}/{max_retries} 因错误: {e}")
await asyncio.sleep(current_delay) # 异步延迟
current_delay *= backoff # 根据backoff递增延迟
return None # 三次重试仍未成功,返回 None
return wrapper
return decorator
# 域名管理类 - 高内聚低耦合的域名管理方案
class DomainManager:
"""
域名管理器 - 统一管理所有邮箱域名相关操作
实现高内聚低耦合的设计原则
"""
def __init__(self):
# 域名列表 - 只需要在这里添加新域名
self._domains = [
"gmail.com",
"qianyouduo.com",
"rxybb.com",
"cqrxy.vip",
"0n.lv",
"qianyouduo.com",
"ziyouzuan.com",
"emaing.online",
"emaing.fun",
"emaing.asia",
"isemaing.site",
"emaing.cyou",
"emaing.site",
"emaing.icu",
"emaing.store",
"emaing.pw",
"emaing.xyz",
"qydkjgs.asia",
"qydgs.autos",
"qydkj.homes",
"qydkjgs.baby",
"qydkj.baby",
"qydkj.cyou",
"qydkjgs.autos",
"qydkj.autos",
"qydkjgs.cyou",
"qydkjgs.homes",
"qydgs.asia",
"qydkj.asia",
"qydgs.baby",
"qydgs.cyou",
"qydgs.homes",
"lulanjing.asia",
"lisihan.asia",
"mmwan.asia",
"xyttan.asia",
"zpaily.asia",
"youxinzhiguo.asia",
"huijinfenmu.asia",
"linghao.asia",
"cqhc.asia",
"huacun.asia",
"huachen.asia",
"yisabeier.asia",
"xinxinr.cyou",
"lilisi.asia",
"xybbwan.cyou",
"zhongjing.cyou",
"zprxy.cyou",
"cqhuacun.cyou",
"huazong.icu",
"huacun.cyou"
]
def get_domain_by_type(self, mail_type: int) -> str:
"""
根据邮箱类型获取域名
:param mail_type: 邮箱类型编号
:return: 对应的域名
"""
if 0 <= mail_type < len(self._domains):
return self._domains[mail_type]
return self._domains[1] # 默认返回 qianyouduo.com
def get_domain_type(self, domain: str) -> int:
"""
根据域名获取类型编号
:param domain: 域名
:return: 对应的类型编号如果不存在返回1
"""
try:
return self._domains.index(domain)
except ValueError:
return 1 # 默认返回 qianyouduo.com 的类型
def get_imap_server(self, mail_type: int) -> str:
"""
根据邮箱类型获取IMAP服务器地址
:param mail_type: 邮箱类型编号
:return: IMAP服务器地址
"""
domain = self.get_domain_by_type(mail_type)
return f"imap.{domain}"
def get_imap_server_by_domain(self, domain: str) -> str:
"""
根据域名获取IMAP服务器地址
:param domain: 域名
:return: IMAP服务器地址
"""
return f"imap.{domain}"
def is_valid_domain(self, domain: str) -> bool:
"""
检查域名是否在支持列表中
:param domain: 域名
:return: 是否支持该域名
"""
return domain in self._domains
def get_all_domains(self) -> list:
"""
获取所有支持的域名列表
:return: 域名列表的副本
"""
return self._domains.copy()
def get_domain_count(self) -> int:
"""
获取支持的域名总数
:return: 域名总数
"""
return len(self._domains)
def get_creatable_domains(self) -> list:
"""
获取可用于创建邮箱的域名列表排除gmail.com
:return: 可创建邮箱的域名列表
"""
return [domain for domain in self._domains if domain != "gmail.com"]
def get_creatable_domain_by_type(self, mail_type: int) -> str:
"""
根据邮箱类型获取可创建的域名排除gmail.com
:param mail_type: 邮箱类型编号
:return: 对应的域名如果是gmail.com则返回默认域名
"""
domain = self.get_domain_by_type(mail_type)
if domain == "gmail.com":
return self._domains[1] # 返回qianyouduo.com作为默认
return domain
# 邮箱模块
class Mail:
def __init__(self):
self.domain_manager = DomainManager()
self.api_host = 'http://111.10.175.206:5020'
def email_account_read(self, pk: int = None, account: str = None, status: bool = None, host: str = None,
proxy_account: str = None,
parent_account: str = None, order_by: str = None, level: int = None,
update_time_start: str = None, update_time_end: str = None, res_count: bool = False,
create_time_start: str = None, create_time_end: str = None, page: int = None,
limit: int = None) -> dict:
"""
读取mail账号
:param level: 邮箱等级(可选)
:param status: 状态(可选)
:param update_time_start: 更新时间起始(可选)
:param update_time_end: 更新时间结束(可选)
:param res_count: 返回总数 (可选)
:param parent_account: 母邮箱账号 (可选)
:param pk: 主键 (可选)
:param account: 账号 (可选)
:param host: 代理 (可选)
:param proxy_account: 代理账号 (可选)
:param order_by: 排序方式 (可选) id|create_time|update_time 前面加-表示倒序
:param create_time_start: 创建起始时间 (可选)
:param create_time_end: 创建结束时间 (可选)
:param page: 页码 (可选)
:param limit: 每页数量 (可选)
:return: 返回json 成功字段code=200
"""
if pk is not None:
url = f'{self.api_host}/mail/account/{pk}'
return requests.get(url).json()
url = f'{self.api_host}/mail/account'
data = dict()
if account is not None:
data['account'] = account
if status is not None:
data['status'] = status
if host is not None:
data['host'] = host
if proxy_account is not None:
data['proxy_account'] = proxy_account
if parent_account is not None:
data['parent_account'] = parent_account
if order_by is not None:
data['order_by'] = order_by
if level is not None:
data['level'] = level
if create_time_start is not None:
data['create_time_start'] = create_time_start
if create_time_end is not None:
data['create_time_end'] = create_time_end
if update_time_start is not None:
data['update_time_start'] = update_time_start
if update_time_end is not None:
data['update_time_end'] = update_time_end
if res_count:
data['res_count'] = res_count
if page is not None:
data['page'] = page
if limit is not None:
data['limit'] = limit
res = requests.get(url, params=data).json()
if res.get('code') not in [200, 400, 404]:
raise Exception(res)
return res
# 创建随机邮箱
@retry(max_retries=3, delay=1.0, backoff=1.0)
def email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int = 1) -> str:
"""
创建邮箱
:param count: 邮箱长度(默认8位)
:param pwd: 邮箱密码(默认Zpaily88)
:param mail_type: 邮箱类型(1表示qianyouduo.com 2表示rxybb.com 3表示cqrxy.vip 4表示0n.lv 默认1)
:return: 邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower()
# 使用域名管理器获取可创建的域名排除gmail.com
mail_end = self.domain_manager.get_creatable_domain_by_type(mail_type)
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
response = requests.post(url, headers=headers, json=data)
if 'Validation errors: [user] This combination of username and domain is already in database' in response.text:
return f'{name}@{mail_end}'
if response.status_code != 201:
raise Exception(response.status_code)
return f"{name}@{mail_end}"
# 异步创建随机邮箱
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _email_create_random(self, count: int = 8, pwd: str = 'Zpaily88', mail_type: int = 1) -> str:
"""
创建邮箱
:param count: 邮箱长度(默认8位)
:param pwd: 邮箱密码(默认Zpaily88)
:param mail_type: 邮箱类型(1表示qianyouduo.com 2表示rxybb.com 3表示cqrxy.vip 4表示0n.lv 默认1)
:return:邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = ''.join(random.choices(string.ascii_letters + string.digits, k=count)).lower()
# 使用域名管理器获取可创建的域名排除gmail.com
mail_end = self.domain_manager.get_creatable_domain_by_type(mail_type)
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=data) as response:
status = response.status
text = await response.text()
if 'Validation errors: [user] This combination of username and domain is already in database' in text:
return f"{name}@{mail_end}"
if status != 201:
raise Exception(status)
return f"{name}@{mail_end}"
# 创建邮箱
@retry(max_retries=3, delay=1.0, backoff=1.0)
def email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None:
"""
创建邮箱
:param account: 邮箱账号
:param pwd: 邮箱密码(默认Zpaily88)
:return:邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = account.split('@')[0]
mail_end = account.split('@')[1]
# 排除gmail.com域名
if mail_end == "gmail.com":
return None
# 验证域名是否支持
if not self.domain_manager.is_valid_domain(mail_end):
raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}")
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
response = requests.post(url, headers=headers, json=data)
print(f'创建邮箱响应: {response.status_code}')
if response.status_code not in [201, 400]:
raise Exception(response.status_code)
return f"{name}@{mail_end}"
# 异步创建邮箱
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _email_create(self, account: str, pwd: str = 'Zpaily88') -> str | None:
"""
创建邮箱
:param account: 邮箱账号
:param pwd: 邮箱密码(默认Zpaily88)
:return: 邮箱账号
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = "https://mail.qianyouduo.com/admin/api/v1/boxes"
name = account.split('@')[0]
mail_end = account.split('@')[1]
# 排除gmail.com域名
if mail_end == "gmail.com":
return None
# 验证域名是否支持
if not self.domain_manager.is_valid_domain(mail_end):
raise ValueError(f"不支持的域名: {mail_end},支持的域名列表: {self.domain_manager.get_all_domains()}")
data = {
"name": name,
"email": f"{name}@{mail_end}",
"passwordPlaintext": pwd
}
async with aiohttp.ClientSession() as session:
async with session.post(url, headers=headers, json=data) as response:
status = response.status
if status not in [201, 400]:
raise Exception(f'status code: {status}')
return f"{name}@{mail_end}"
# 删除邮箱
@retry(max_retries=3, delay=1.0, backoff=1.0)
def email_delete(self, account: str) -> bool:
"""
删除邮箱
:param account: 邮箱账号
:return: True表示删除成功False表示删除失败
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}"
if '@gmail.com' in account:
return False
response = requests.delete(url, headers=headers)
print(f'删除邮箱响应: --> {response.status_code}')
if response.status_code not in [204, 404]:
raise Exception(response.status_code)
return True
# 异步删除邮箱
@async_retry(max_retries=3, delay=1.0, backoff=1.0)
async def _email_delete(self, account: str) -> bool:
"""
删除邮箱
:param account: 邮箱账号
:return: True表示删除成功False表示删除失败
"""
headers = {
"Accept-Language": "zh-CN,zh;q=0.9",
"Authorization": "Basic YWRtaW5AcWlhbnlvdWR1by5jb206WnBhaWx5ODgh",
"Cache-Control": "no-cache",
"Connection": "keep-alive",
"Content-Type": "application/json",
"Origin": "https://mail.qianyouduo.com",
"Pragma": "no-cache",
"Referer": "https://mail.qianyouduo.com/admin/api/doc",
"Sec-Fetch-Dest": "empty",
"Sec-Fetch-Mode": "cors",
"Sec-Fetch-Site": "same-origin",
"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36",
"accept": "*/*",
"sec-ch-ua": "\"Google Chrome\";v=\"131\", \"Chromium\";v=\"131\", \"Not_A Brand\";v=\"24\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"macOS\""
}
url = f"https://mail.qianyouduo.com/admin/api/v1/boxes/{account}"
if '@gmail.com' in account:
return False
async with aiohttp.ClientSession() as session:
async with session.delete(url, headers=headers) as response:
status = response.status
if status not in [204, 404]:
raise Exception(f'status code: {status}')
return True
# 处理邮件正文
@staticmethod
def extract_body(msg):
"""
提取邮件正文,优先返回 HTML 文本
- 更健壮的字符集解析:优先使用 part 的 charset 信息,失败回退到 utf-8 / latin-1
- 仅处理 inline 的 text/html 与 text/plain 内容
"""
html_text = None
plain_text = None
def _decode_part(part):
payload = part.get_payload(decode=True)
if payload is None:
return None
# 优先从内容中解析 charset
charset = (part.get_content_charset() or part.get_param('charset') or 'utf-8')
try:
return payload.decode(charset, errors='replace')
except LookupError:
# 未知编码时回退
try:
return payload.decode('utf-8', errors='replace')
except Exception:
return payload.decode('latin-1', errors='replace')
if msg.is_multipart():
for part in msg.walk():
content_type = part.get_content_type()
content_disposition = part.get_content_disposition()
if content_type == "text/html" and (not content_disposition or content_disposition == "inline"):
html_text = _decode_part(part) or html_text
elif content_type == "text/plain" and (not content_disposition or content_disposition == "inline"):
plain_text = _decode_part(part) or plain_text
else:
content_type = msg.get_content_type()
if content_type == "text/html":
html_text = _decode_part(msg)
elif content_type == "text/plain":
plain_text = _decode_part(msg)
# 优先返回 HTML 文本,如果没有 HTML 文本,则返回纯文本
return html_text or plain_text or ""
# 转换邮件日期
@staticmethod
def convert_to_china_time(date_str):
"""
将邮件日期转换为10位时间戳中国时区
- 保留原始邮件的时区信息;若无时区,则按 UTC 处理
- 异常时返回当前时间戳,避免解析失败导致崩溃
"""
try:
email_date = email.utils.parsedate_to_datetime(date_str)
if email_date is None:
return int(time.time())
if email_date.tzinfo is None:
email_date = email_date.replace(tzinfo=timezone.utc)
china_time = email_date.astimezone(timezone(timedelta(hours=8)))
return int(china_time.timestamp())
except Exception:
return int(time.time())
# 获取邮件
def email_read(self, user: str, from_: str, limit: int = 1, is_del: bool = False) -> list | None:
"""
获取最新邮件
:param user: 母账号
:param from_: 发件人匹配关键字(可为邮箱或显示名,大小写不敏感)
:param limit: 获取邮件数量(默认1封)
:param is_del: 是否删除整个邮箱账号(非 Gmail 才会执行账号删除)
:return: 返回邮件列表,每个元素格式为:
{
"title": "邮件标题",
"from": "发件人",
"date": "邮件日期(中国时区时间戳)",
"content": "邮件正文",
"code": 200
}
"""
user_li = user.split('@')
domain = user_li[1]
# 使用域名管理器获取邮箱类型
if not self.domain_manager.is_valid_domain(domain):
return None
mail_type = self.domain_manager.get_domain_type(domain)
# 仅对 Gmail 进行点号归一化,其它域名按原样处理
local_part = user_li[0]
if domain == "gmail.com":
local_part = local_part.replace('.', '')
user = local_part + '@' + user_li[1]
proxy_host = None
proxy_port = None
proxy_user = None
proxy_pwd = None
if mail_type == 0:
res = self.email_account_read(parent_account=user, status=True, level=0)
if res['code'] != 200:
return None
pwd = res['items'][0]['parent_pwd']
proxy_host = res['items'][0]['host']
proxy_port = res['items'][0]['port']
proxy_user = res['items'][0]['proxy_account']
proxy_pwd = res['items'][0]['proxy_pwd']
else:
pwd = 'Zpaily88'
items = [] # 存储邮件列表
# 保存原始socket
original_socket = None
if proxy_host is not None and proxy_port is not None:
original_socket = socket.socket
if proxy_user is not None and proxy_pwd is not None:
socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True, proxy_user, proxy_pwd)
else:
socks.setdefaultproxy(socks.SOCKS5, proxy_host, int(proxy_port), True)
socket.socket = socks.socksocket
imap_server = None
had_error = False
try:
# 在设置代理后创建IMAP连接
imap_server = imaplib.IMAP4_SSL(self.domain_manager.get_imap_server(mail_type))
if not imap_server:
had_error = True
else:
# pwd去除空格
pwd = pwd.replace(' ', '')
# print(f'pwd: {pwd}')
imap_server.login(user, pwd)
status, _ = imap_server.select("INBOX")
if status != 'OK':
had_error = True
else:
status, email_ids = imap_server.search(None, "ALL")
if status != 'OK':
had_error = True
else:
email_id_list = email_ids[0].split()
# 获取最近limit条邮件ID
recent_ids = email_id_list[-20:] # 仍然获取最近20封以确保有足够的邮件可以筛选
found_count = 0 # 记录找到的符合条件的邮件数量
for email_id in recent_ids[::-1]: # 从最新的邮件开始处理
if found_count >= limit: # 如果已经找到足够数量的邮件,就退出循环
break
status, msg_data = imap_server.fetch(email_id, "(RFC822)")
for response in msg_data:
if isinstance(response, tuple):
msg = email.message_from_bytes(response[1])
# 兼容性发件人匹配:解析地址与显示名,大小写不敏感,支持子串匹配
from_field = msg.get("From", "")
addresses = email.utils.getaddresses([from_field])
needle = (from_ or "").lower()
candidates = []
for name, addr in addresses:
if name:
candidates.append(name.lower())
if addr:
candidates.append(addr.lower())
if any(needle in c for c in candidates):
# 标题解码,处理无标题或编码缺失的情况
raw_subject = msg.get("Subject")
subject = ""
if raw_subject is not None:
dh = decode_header(raw_subject)
if dh:
s, enc = dh[0]
if isinstance(s, bytes):
try:
subject = s.decode(enc or 'utf-8', errors='replace')
except LookupError:
subject = s.decode('utf-8', errors='replace')
else:
subject = s
item = {
"title": subject,
"from": msg["From"],
"content": self.extract_body(msg),
"code": 200
}
# 获取并转换邮件时间
date_str = msg["Date"]
if date_str:
item["date"] = self.convert_to_china_time(date_str)
items.append(item)
found_count += 1
if found_count >= limit: # 如果已经找到足够数量的邮件,就跳出内层循环
break
# 读取完成不再对单封邮件做删除标记与 expunge
except imaplib.IMAP4.error as e:
# items.append({'title': 'error', 'from': 'error', 'content': f'连接邮箱失败: {e}', 'code': 500})
had_error = True
except Exception as e:
# items.append({'title': 'error', 'from': 'error', 'content': f'获取邮件异常: {e}', 'code': 500})
had_error = True
finally:
try:
# 检查连接是否建立
if 'imap_server' in locals() and imap_server is not None:
try:
# 先检查是否处于已选择状态
if hasattr(imap_server, 'state') and imap_server.state == 'SELECTED':
imap_server.close()
except Exception as e:
logger.error(f"关闭IMAP文件夹时发生错误: {e}")
try:
# 无论如何尝试登出
imap_server.logout()
except Exception as e:
logger.error(f"登出IMAP服务器时发生错误: {e}")
# 在Windows上可能需要强制关闭socket
try:
if hasattr(imap_server, 'sock') and imap_server.sock is not None:
imap_server.sock.close()
except Exception as sock_err:
logger.error(f"强制关闭socket时发生错误: {sock_err}")
except Exception as outer_e:
logger.error(f"处理IMAP连接关闭时发生错误: {outer_e}")
finally:
# 重置socket设置如果使用了代理
if proxy_host is not None and original_socket is not None:
socket.socket = original_socket
# 若成功获取到至少一封匹配邮件且请求删除,则删除整个邮箱账号
if is_del and len(items) > 0:
try:
self.email_delete(user)
except Exception as del_err:
logger.error(f"删除邮箱账号失败: {del_err}")
if had_error:
return None
if len(items) == 0:
return None
return items # 返回邮件列表
async def main():
"""
使用示例:展示新的域名管理系统的使用方法
"""
mail = Mail()
mai = '0gz3vvd4@'+'qydgs.asia'
res = mail.email_create(mai)
print(f"创建的邮箱: {res}")
# random_email = mail.email_create_random(count=8, mail_type=1)
# print(f"创建的随机邮箱: {random_email}")
# 读取邮件
# res = mail.email_read('0gz3vvd4@qydgs.asia', '@', 1, is_del=True)
# print(f'读取的邮件: {res}')
# 删除邮箱
res = mail.email_delete(mai)
print(f"删除的邮箱: {res}")
mail_ = Mail()
# if __name__ == '__main__':
# asyncio.run(main())

323
spider/main.py Normal file
View File

@@ -0,0 +1,323 @@
from math import log
import random
from re import S
import time
from tkinter import N
from DrissionPage import Chromium
from loguru import logger
from work import get_random_canada_info
from mail_ import mail_
from bit_browser import bit_browser
class Auto:
def __init__(self,http:str):
self.browser = Chromium(http)
self.tab = self.browser.latest_tab
pass
# cf打码
def solve_cloudflare(self):
tab = self.browser.latest_tab
for _ in range(8):
self.tab.wait(1)
try:
shadow1 = tab.ele(
'x://*[@name="cf-turnstile-response"]').parent().shadow_root
iframe = shadow1.get_frame(1)
if iframe:
logger.debug("找到Cloudflare iframe")
shadow2 = iframe.ele('x:/html/body').shadow_root
if shadow2:
logger.debug("找到Cloudflare iframe body shadow root")
status = shadow2.ele(
'x://span[text()="Success!"]', timeout=1)
if status:
logger.debug("Cloudflare验证成功")
return True
checkbox = shadow2.ele(
'x://input[@type="checkbox"]', timeout=1)
if checkbox:
checkbox.click()
logger.debug("点击Cloudflare复选框")
tab.wait(2)
logger.debug("重新获取状态")
# return False
except Exception as e:
# logger.error(f"处理Cloudflare异常: {e}")
logger.debug(f"cloudflare处理通过: {e}")
return True
tab.wait(1)
return False
# 打开URL
def open_url(self, url: str):
self.tab.get(url)
# 等待进入首页
def wait_home(self):
logger.debug("等待进入首页")
jc = 0
while True:
if jc > 5:
logger.error("等待进入首页超过5次未成功")
return False
self.tab.wait(1)
# 判断cf是否通过
bol = self.solve_cloudflare()
if not bol:
logger.debug("Cloudflare验证失败.")
continue
else:
logger.debug("Cloudflare验证成功.")
self.tab.wait(1.5)
html = self.tab.url
logger.debug(f"当前URL: {html}")
if 'https://veritaconnect.ca/canadianbreadsettlement/en-us' == html:
logger.debug("成功进入首页")
return True
jc += 1
# 点击continue按钮
def click_continue(self, bl: bool = False):
logger.debug("点击Continue按钮")
jc = 0
while True:
if jc > 5:
logger.error("点击Continue按钮超过5次未成功")
return False
try:
continue_button = self.tab.ele(
't:button@text():Continue', timeout=1)
if continue_button:
# 判断cf是否通过
bol = self.solve_cloudflare()
if not bol:
logger.debug("Cloudflare验证失败..")
continue
else:
logger.debug("Cloudflare验证成功..")
continue_button.click()
logger.debug("点击Continue按钮成功")
self.tab.wait(1.5)
bol = self.tab.ele(
't:li@text():There was a problem, please try again.', timeout=1)
if bol:
if bl:
logger.debug("多次异常界面, 结束继续点击")
return False
logger.debug("异常界面")
self.tab.wait(1)
return self.click_continue(bl=True)
# bol = self.tab.ele('t:h2@text()=Claim Form', timeout=1)
# if bol:
# logger.debug("成功进入问卷界面")
# return True
html = self.tab.url
logger.debug(f"当前URL: {html}")
if 'https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm' in html:
logger.debug("成功进入问卷界面")
return True
except Exception as e:
logger.error(f"点击Continue按钮异常: {e}")
self.tab.wait(1)
return False
# 随机取城市
def get_random_city(self, province: str|None=None):
cities = {
"Alberta": ["Calgary", "Edmonton"],
"British Columbia": ["Vancouver"],
# "Manitoba": ["Winnipeg", "Rochester"],
# "New Brunswick": ["Fredericton", "Moncton"],
# "Newfoundland and Labrador": ["St. John's", "Halifax"],
"Nova Scotia": ["Halifax"],
"Ontario": ["Toronto"],
# "Prince Edward Island": ["Charlottetown", "St. John's"],
# "Quebec": ["Quebec City", "Montreal"],
# "Saskatchewan": ["Saskatoon", "Regina"],
}
if province is None:
province = random.choice(list(cities.keys()))
return province,random.choice(cities.get(province, []))
# 填写问卷
def fill_questionnaire(self):
province, city = self.get_random_city()
info = get_random_canada_info(province, city)
first_name = info["firstname"]
last_name = info["lastname"]
# 将生日格式从 '8/28/1995' 转为 'yyyy-mm-dd'日月不足两位补0
birthday = info["birthday"]
current_address = info["address_str"]
city = info["city_name"]
province = info["province"]
postal_code = info["postcode"]
email = 'sfsf@qq.com'
phone = info["phone"]
text = '3333'
# 人数
person_count = str(random.randint(3, 5))
logger.debug("填写问卷")
self.tab.wait(0.1)
logger.debug(f"填写first_name: {first_name}")
self.tab.ele('t:input@id=FirstName').set.value(first_name)
self.tab.wait(0.1)
logger.debug(f"填写last_name: {last_name}")
self.tab.ele('t:input@id=LastName').set.value(last_name)
self.tab.wait(0.1)
logger.debug(f"填写birthday: {birthday}")
self.tab.ele('t:input@id=DateOfBirth').set.value(birthday)
self.tab.wait(0.1)
logger.debug(f"填写current_address: {current_address}")
self.tab.ele('t:input@id=AddressLine1').set.value(current_address)
self.tab.wait(0.1)
logger.debug(f"填写city: {city}")
self.tab.ele('t:input@id=City').set.value(city)
self.tab.wait(0.1)
logger.debug(f"填写province: {province}")
self.tab.ele(
't:select@id=CanProv').ele(f't:option@text()={province}').click()
self.tab.wait(0.1)
logger.debug(f"填写postal_code: {postal_code}")
self.tab.ele('t:input@id=CanPostal').set.value(postal_code)
self.tab.wait(0.1)
logger.debug(f"填写NumberOfAdults: {person_count}")
self.tab.ele(
't:select@id=NumberOfAdults').ele(f't:option@text()={person_count}').click()
self.tab.wait(0.1)
logger.debug(f"选择地址没变")
self.tab.eles('t:input@id=IsDifferentAddress')[1].click()
self.tab.wait(0.1)
logger.debug(f"填写email: {email}")
self.tab.ele('t:input@id=EmailAddress').set.value(email)
self.tab.wait(0.1)
logger.debug(f"填写ConfirmEmailAddress: {email}")
self.tab.ele('t:input@id=ConfirmEmailAddress').set.value(email)
self.tab.wait(0.1)
logger.debug(f"填写phone: {phone}")
self.tab.ele('t:input@id=PhoneNumber').set.value(phone)
self.tab.wait(0.1)
logger.debug(f"选择同意条款")
self.tab.ele('t:input@id=IVerify').click()
self.tab.wait(0.1)
logger.debug(f"选择没有申请过")
self.tab.eles('t:input@id=IsCompensated')[1].click()
self.tab.wait(0.1)
logger.debug(f"填写text: {text}")
self.tab.ele('t:textarea@id=MetaAnswerA').set.value(text)
self.tab.wait(0.1)
logger.debug(f"勾选同意我的名字")
self.tab.ele('t:input@id=IDeclare').click()
self.tab.wait(0.1)
logger.debug(f"填写PrintName: {last_name+' '+first_name}")
self.tab.ele(
't:input@id=PrintName').set.value(last_name+' '+first_name)
self.tab.wait(0.1)
# logger.debug(f"点击Submit按钮")
# self.tab.ele('t:button@text():Submit').click()
# 取对应城市的代理
def get_proxy( city: str):
if city == "Calgary":
return "us.novproxy.io:1000:uwqr8065-region-CA-st-Alberta-city-Calgary:d6vqwerx".split(':')
elif city =='Edmonton':
return 'us.novproxy.io:1000:uwqr8065-region-CA-st-Alberta-city-Edmonton:d6vqwerx'.split(':')
elif city =='Vancouver':
return 'us.novproxy.io:1000:uwqr8065-region-CA-st-British Columbia-city-Vancouver:d6vqwerx'.split(':')
elif city =='Halifax':
return 'us.novproxy.io:1000:uwqr8065-region-CA-st-Nova Scotia-city-Halifax:d6vqwerx'.split(':')
elif city == 'Toronto':
return 'us.novproxy.io:1000:uwqr8065-region-CA-st-Ontario-city-Toronto:d6vqwerx'.split(':')
else:
return None
"""指纹浏览器操作"""
# 创建指纹浏览器
def create_fingerprint_browser(city: str):
"""
根据城市创建指纹浏览器并执行问卷流程
参数:
city (str): 城市名称,例如 `Calgary`、`Edmonton` 等
"""
browser_id = None
try:
proxy = get_proxy(city)
logger.info(f"{city} 准备创建指纹浏览器")
browser_id = bit_browser.bit_browser_create(
remark=city,
host=proxy[0],
port=proxy[1],
proxy_user=proxy[2],
proxy_pwd=proxy[3],
proxy_type='socks5'
)
logger.debug(browser_id)
# 打开指纹浏览器
http = bit_browser.bit_browser_open(browser_id)
logger.debug(http)
auto = Auto(http)
auto.open_url(
"https://veritaconnect.ca/canadianbreadsettlement/en-us/Claimant/UnknownClaimForm")
bol = auto.wait_home()
if not bol:
logger.error(f"{city} 进入首页失败,结束该线程")
return
bol = auto.click_continue()
if not bol:
logger.error(f"{city} 点击 Continue 失败,结束该线程")
return
auto.fill_questionnaire()
time.sleep(5)
finally:
if browser_id:
# 关闭指纹浏览器
try:
bit_browser.bit_browser_close(browser_id)
except Exception as e:
logger.error(f"{city} 关闭浏览器异常: {e}")
# 删除指纹浏览器
try:
bit_browser.bit_browser_delete(browser_id)
except Exception as e:
logger.error(f"{city} 删除浏览器异常: {e}")
def run_city_forever(city: str):
"""
持续循环运行指定城市流程:完成一次即关闭并删除浏览器,然后重新创建继续运行
参数:
city (str): 城市名称
"""
while True:
try:
create_fingerprint_browser(city)
except Exception as e:
logger.error(f"{city} 流程异常: {e}")
time.sleep(2)
def run_all_cities_concurrently():
"""
多线程并发运行所有城市流程
"""
import threading
cities = ['Calgary', 'Edmonton', 'Vancouver', 'Halifax', 'Toronto']
threads = []
for city in cities:
t = threading.Thread(target=run_city_forever, args=(city,), name=f"{city}-thread")
t.start()
threads.append(t)
logger.info(f"{city} 线程已启动")
time.sleep(2)
for t in threads:
t.join()
logger.info("所有城市流程执行完成")
if __name__ == "__main__":
run_all_cities_concurrently()

23
spider/requirements.txt Normal file
View File

@@ -0,0 +1,23 @@
aiohttp
requests
curl_cffi
aiohttp-socks
requests[socks]
fake_useragent
apscheduler
aiofiles
loguru
portalocker
aiomultiprocess
faker
eth_account
eth_utils
solders
toncli
ecdsa
base58
ddddocr
aiohttp_socks
websockets
psutil
socks

333
spider/work.py Normal file
View File

@@ -0,0 +1,333 @@
import random
import time
from datetime import date, timedelta
from typing import Optional, Dict
import requests
CA_PROVINCE_ABBR = {
"Alberta": "AB",
"British Columbia": "BC",
"Manitoba": "MB",
"New Brunswick": "NB",
"Newfoundland and Labrador": "NL",
"Nova Scotia": "NS",
"Ontario": "ON",
"Prince Edward Island": "PE",
"Quebec": "QC",
"Saskatchewan": "SK",
"Northwest Territories": "NT",
"Nunavut": "NU",
"Yukon": "YT",
}
CA_COORDS = {
"AB": [(51.044733, -114.071883, "Calgary"), (53.546124, -113.493823, "Edmonton")],
"BC": [(49.282729, -123.120738, "Vancouver"), (48.428421, -123.365644, "Victoria")],
"MB": [(49.895137, -97.138374, "Winnipeg"), (50.445211, -96.823611, "East St Paul")],
"NB": [(45.963589, -66.643115, "Fredericton"), (46.510712, -67.255044, "Woodstock")],
"NL": [(53.135509, -57.660435, "Labrador City"), (47.561510, -52.712585, "St. John's")],
"NS": [(44.648862, -63.575320, "Halifax"), (45.010474, -63.416817, "Truro")],
"ON": [(43.653225, -79.383186, "Toronto"), (45.421532, -75.697189, "Ottawa")],
"PE": [(46.238240, -63.131074, "Charlottetown"), (46.392410, -63.787629, "Summerside")],
"QC": [(45.501689, -73.567256, "Montreal"), (46.813878, -71.207980, "Quebec City")],
"SK": [(52.133214, -106.670046, "Saskatoon"), (50.445211, -104.618896, "Regina")],
"NT": [(62.4540, -114.3725, "Yellowknife"), (61.251955, -114.352482, "Yellowknife")],
"NU": [(63.7467, -68.5167, "Iqaluit"), (64.282327, -76.614813, "Nunavut")],
"YT": [(60.7212, -135.0568, "Whitehorse"), (64.000000, -138.000000, "Yukon")],
}
CA_AREA_CODES = {
"AB": ["403", "587", "825"],
"BC": ["236", "250", "604", "672", "778"],
"MB": ["204", "431"],
"NB": ["506"],
"NL": ["709"],
"NS": ["782", "902"],
"ON": ["226", "249", "289", "343", "365", "416", "437", "519", "548", "613", "639", "647", "705", "807", "905"],
"PE": ["902"],
"QC": ["418", "438", "450", "514", "579", "581", "819", "873"],
"SK": ["306", "639"],
"NT": ["867"],
"NU": ["867"],
"YT": ["867"],
}
REMOTE_PROVINCES = {"NL", "NT", "NU", "YT"}
def _normalize_province(province: str) -> str:
"""
省份入参规范化,支持全称或缩写,返回缩写
参数:
province (str): 省份,可为全称或缩写(如 "Alberta""AB"
返回值:
str: 省份缩写(如 "AB"
"""
if not province:
raise ValueError("province 不能为空")
p = province.strip()
if len(p) == 2:
return p.upper()
return CA_PROVINCE_ABBR.get(p, p)
def _pick_coords(province_abbr: str, city: Optional[str]) -> tuple[float, float, str]:
"""
按省份与可选城市选择一个坐标点
参数:
province_abbr (str): 省份缩写
city (Optional[str]): 城市名(如 "Calgary"),可为空
返回值:
(lat, lon, city_name): 选中的基础坐标及城市名
"""
coords = CA_COORDS.get(province_abbr)
if not coords:
# 默认回退至 Calgary
return 51.044733, -114.071883, "Calgary"
if city:
c = city.strip().lower()
for lat, lon, cname in coords:
if cname.lower() == c:
return lat, lon, cname
return random.choice(coords)
def _random_near(lat: float, lon: float) -> tuple[float, float]:
"""
在给定坐标附近生成一个随机偏移坐标
参数:
lat (float): 基准纬度
lon (float): 基准经度
返回值:
(new_lat, new_lon): 随机偏移后的坐标
"""
return lat + (random.random() - 0.5) * 0.1, lon + (random.random() - 0.5) * 0.1
def _reverse_geocode(lat: float, lon: float) -> Dict:
"""
使用 Nominatim 反向地理编码,返回地址字典
参数:
lat (float): 纬度
lon (float): 经度
返回值:
dict: 包含 address 字段的响应数据
"""
url = f"https://nominatim.openstreetmap.org/reverse?format=json&lat={lat}&lon={lon}&zoom=18&addressdetails=1"
headers = {"User-Agent": "ca_auto_table/1.0"}
r = requests.get(url, headers=headers, timeout=15)
r.raise_for_status()
return r.json()
def _format_address(address: Dict, province_abbr: str) -> str:
"""
将 Nominatim 的 address 格式化为完整地址字符串
参数:
address (dict): Nominatim 返回的 address 字段
province_abbr (str): 省份缩写(如 "AB"
返回值:
str: 格式化后的地址字符串
"""
house = address.get("house_number")
road = address.get("road") or address.get("residential") or address.get("footway")
city = address.get("city") or address.get("town") or address.get("village")
postcode = address.get("postcode") or ""
if house and road and city:
return f"{house} {road}, {city}, {province_abbr} {postcode}, Canada"
# 远端省份允许部分地址
return f"{city or ''}, {province_abbr} {postcode}, Canada".strip(", ")
def _random_name() -> tuple[str, str]:
"""
生成随机英文名Firstname, Lastname组合空间可达数百万以上
实现策略:
- 60% 概率使用常见英文名与姓氏列表(更自然)
- 40% 概率使用音节组合算法动态生成(数量级远超百万)
返回值:
(firstname, lastname)
"""
common_first = [
"James", "Mary", "Robert", "Patricia", "John", "Jennifer", "Michael", "Linda", "William", "Elizabeth",
"David", "Barbara", "Richard", "Susan", "Joseph", "Jessica", "Thomas", "Sarah", "Charles", "Karen",
"Christopher", "Nancy", "Daniel", "Lisa", "Matthew", "Betty", "Anthony", "Margaret", "Mark", "Sandra",
"Donald", "Ashley", "Steven", "Kimberly", "Paul", "Emily", "Andrew", "Donna", "Joshua", "Michelle",
"Kenneth", "Dorothy", "Kevin", "Carol", "Brian", "Amanda", "George", "Melissa", "Edward", "Deborah",
"Ronald", "Stephanie", "Timothy", "Rebecca", "Jason", "Laura", "Jeffrey", "Sharon", "Ryan", "Cynthia",
"Jacob", "Kathleen", "Gary", "Amy", "Nicholas", "Shirley", "Eric", "Angela", "Stephen", "Helen",
"Jonathan", "Anna", "Larry", "Brenda", "Justin", "Pamela", "Scott", "Nicole", "Brandon", "Samantha",
"Frank", "Katherine", "Benjamin", "Christine", "Gregory", "Emma", "Raymond", "Ruth", "Samuel", "Julie",
"Patrick", "Olivia", "Alexander", "Victoria"
]
common_last = [
"Smith", "Johnson", "Williams", "Brown", "Jones", "Garcia", "Miller", "Davis", "Rodriguez", "Martinez",
"Hernandez", "Lopez", "Gonzalez", "Wilson", "Anderson", "Thomas", "Taylor", "Moore", "Jackson", "Martin",
"Lee", "Perez", "Thompson", "White", "Harris", "Sanchez", "Clark", "Ramirez", "Lewis", "Robinson",
"Walker", "Young", "Allen", "King", "Wright", "Scott", "Torres", "Nguyen", "Hill", "Flores",
"Green", "Adams", "Nelson", "Baker", "Hall", "Rivera", "Campbell", "Mitchell", "Carter", "Roberts",
"Turner", "Phillips", "Parker", "Evans", "Edwards", "Collins", "Stewart", "Sanchez", "Morris", "Rogers",
"Reed", "Cook", "Morgan", "Bell", "Murphy", "Bailey", "Cooper", "Richardson", "Cox", "Howard",
"Ward", "Torres", "Peterson", "Gray", "Ramirez", "James", "Watson", "Brooks", "Kelly", "Sanders",
"Price", "Bennett", "Wood", "Barnes", "Ross", "Henderson", "Coleman", "Jenkins", "Perry", "Powell",
"Long", "Patterson", "Hughes", "Flores"
]
if random.random() < 0.6:
return random.choice(common_first), random.choice(common_last)
# 动态音节组合生成,支持数百万组合
f_beg = [
"al", "ben", "car", "dan", "el", "fran", "ge", "har", "isa", "jo", "ka", "li", "mar", "no",
"ol", "pa", "qui", "ra", "sa", "ta", "ul", "vi", "wil", "xa", "ya", "zo"
]
f_mid = [
"a", "e", "i", "o", "u", "ae", "ai", "ia", "ie", "oa", "ou"
]
f_end = [
"n", "ne", "na", "son", "ton", "la", "ra", "rie", "ry", "ley", "ly", "ah"
]
l_beg = [
"sm", "john", "dav", "wil", "and", "tho", "tay", "mo", "jack", "mar", "lee", "tho", "whi", "har",
"san", "cla", "ram", "lew", "rob", "walk", "young", "all", "king", "wri", "scott", "tor", "nguy",
"hil", "flo", "gre", "ada", "nel", "bak", "hal", "riv", "camp", "mit", "car", "rob"
]
l_mid = [
"a", "e", "i", "o", "u", "ar", "er", "or", "an", "en", "in", "on", "un"
]
l_suf = [
"son", "ton", "man", "ley", "ford", "wood", "well", "er", "ers", "ing", "s", "son", "es"
]
def build_name(beg, mid, end, syllables=(2, 3)) -> str:
parts = [random.choice(beg)]
for _ in range(random.choice(syllables) - 1):
parts.append(random.choice(mid))
parts.append(random.choice(end))
name = "".join(parts)
return name.capitalize()
first = build_name(f_beg, f_mid, f_end)
last = build_name(l_beg, l_mid, l_suf)
return first, last
def _random_birthday() -> str:
"""
生成随机生日,格式为 yyyy-mm-dd
返回值:
str: 生日字符串
"""
start = date(1950, 1, 1)
end = date(2000, 12, 31)
delta_days = (end - start).days
d = start + timedelta(days=random.randint(0, delta_days))
return f"{d.year}-{d.month:02d}-{d.day:02d}"
def _random_phone(province_abbr: str) -> str:
"""
生成随机加拿大电话号码,带区号
参数:
province_abbr (str): 省份缩写
返回值:
str: 电话,例如 "(403) 555-1234"
"""
codes = CA_AREA_CODES.get(province_abbr, ["000"])
area = random.choice(codes)
exchange = str(random.randint(200, 899)).zfill(3)
line = str(random.randint(1000, 9999)).zfill(4)
return f"({area}) {exchange}-{line}"
def generate_canada_info(province: str, city: Optional[str] = None, max_attempts: int = 15, sleep_sec: float = 0.6) -> Dict[str, str]:
"""
随机生成加拿大个人与地址信息,可指定省份(全称或缩写)与可选城市
参数:
province (str): 省份(如 "Alberta""AB"
city (Optional[str]): 城市(如 "Calgary"),不传则在省内随机
max_attempts (int): 反向地理编码最大尝试次数
sleep_sec (float): 每次失败后的等待秒数,用于尊重 Nominatim 频率限制
返回值:
dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称
"""
prov_abbr = _normalize_province(province)
base_lat, base_lon, chosen_city = _pick_coords(prov_abbr, city)
address_str = ""
city_name = ""
postcode = ""
for _ in range(max_attempts):
lat, lon = _random_near(base_lat, base_lon)
data = _reverse_geocode(lat, lon)
addr = data.get("address", {})
city_name = addr.get("city") or addr.get("town") or addr.get("village") or chosen_city
postcode = addr.get("postcode") or ""
address_str = _format_address(addr, prov_abbr)
if prov_abbr in REMOTE_PROVINCES:
break
if addr.get("house_number") and (addr.get("road") or addr.get("residential") or addr.get("footway")) and city_name:
break
time.sleep(sleep_sec)
firstname, lastname = _random_name()
full_name = f"{firstname} {lastname}"
birthday = _random_birthday()
phone = _random_phone(prov_abbr)
return {
"firstname": firstname,
"lastname": lastname,
"full_name": full_name,
"birthday": birthday,
"address_str": address_str.split(",")[0],
"city_name": city_name,
"phone": phone,
"postcode": postcode,
"province": next((k for k, v in CA_PROVINCE_ABBR.items() if v == prov_abbr), prov_abbr),
}
def get_random_canada_info(province, city) -> Dict[str, str]:
"""
随机生成加拿大个人与地址信息,省份随机选择,城市随机选择
返回值:
dict: 包含 Firstname、Lastname、全名、生日、街道地址、城市、电话、邮编、州全称
"""
return generate_canada_info(province, city)
def main() -> None:
"""
演示:生成 Alberta 省 Calgary 的随机信息;可修改为其他省/城市
"""
info = generate_canada_info("Alberta", "Calgary")
print(info)
if __name__ == "__main__":
main()