taobao_login.py 7.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190
  1. import time
  2. import json
  3. import random
  4. import signal
  5. import sys
  6. from DrissionPage import ChromiumPage, ChromiumOptions
  7. import re
  8. import socket
  9. from commons.conn_mysql import MySQLPoolOnline
  10. import hashlib
  11. from commons.Logger import logger
  12. MAX_PAGES = 5
  13. WAIT_BETWEEN_PAGES = (8, 15) # 页间等待时间范围(秒)
  14. SCROLL_DELAY = (0.3, 0.8) # 滚动延迟范围
  15. CLICK_DELAY = (0.5, 1.2) # 点击延迟范围
  16. BROWSE_TIME = (5, 10) # 浏览时间范围
  17. chrome_path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
  18. class TaobaoAutoCrawl:
  19. def __init__(self, account_name, ip, key_word):
  20. self.driver = None
  21. self.register_signal_handler()
  22. self.db = MySQLPoolOnline()
  23. self.account_name = account_name
  24. self.ip = ip
  25. self.keyword = key_word
  26. @staticmethod
  27. def _get_free_port():
  28. """获取一个当前可用的本地端口,供 Chrome 调试使用。"""
  29. with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
  30. s.bind(("127.0.0.1", 0))
  31. return s.getsockname()[1]
  32. def init_drissionpage(self):
  33. # 避免 auto_port 在部分环境下生成异常地址(无端口)导致 ChromiumPage 初始化失败
  34. co = ChromiumOptions().set_browser_path(chrome_path)
  35. debug_port = self._get_free_port()
  36. co.set_argument(f"--remote-debugging-port={debug_port}")
  37. co.set_user_data_path(f"./spider/taobao/{self.account_name}")
  38. if self.ip:
  39. proxy = self.ip.strip()
  40. if not proxy.startswith(("http://", "https://")):
  41. proxy = f"http://{proxy}"
  42. co.set_argument(f"--proxy-server={proxy}")
  43. logger.info(f"启动浏览器: account={self.account_name}, debug_port={debug_port}")
  44. self.driver = ChromiumPage(co)
  45. def register_signal_handler(self):
  46. """ 非常必要,注册信号处理,确保状态保存"""
  47. def signal_handler(signum, frame):
  48. print("\n⚠️ 收到退出信号,正在保存状态并退出...")
  49. if self.driver:
  50. self.driver.quit()
  51. sys.exit(0)
  52. signal.signal(signal.SIGINT, signal_handler)
  53. # Windows 上可能不支持 SIGTERM,做兼容处理
  54. if hasattr(signal, "SIGTERM"):
  55. signal.signal(signal.SIGTERM, signal_handler)
  56. # ==================== 人工行为模拟 ====================
  57. def random_wait(self, min_sec, max_sec=None):
  58. """随机等待"""
  59. if max_sec is None:
  60. max_sec = min_sec
  61. time.sleep(random.uniform(min_sec, max_sec))
  62. def move_mouse_to_element(self, element):
  63. """移动鼠标到元素"""
  64. if not element:
  65. return
  66. try:
  67. # 优先使用 DrissionPage 推荐方式
  68. self.driver.actions.move_to(element)
  69. except Exception:
  70. # 兼容旧逻辑:按元素中心点移动(不同版本 move 参数可能不同)
  71. box = element.rect
  72. try:
  73. center_x = int(box.x + box.width / 2)
  74. center_y = int(box.y + box.height / 2)
  75. except Exception:
  76. center_x = int(box["x"] + box["width"] / 2)
  77. center_y = int(box["y"] + box["height"] / 2)
  78. try:
  79. self.driver.actions.move(center_x, center_y)
  80. except TypeError:
  81. # 某些版本仅支持关键字参数
  82. self.driver.actions.move(offset_x=center_x, offset_y=center_y)
  83. self.random_wait(0.2, 0.5)
  84. def human_type(self, element, text):
  85. """模拟人类输入"""
  86. for char in text:
  87. element.send_keys(char)
  88. time.sleep(random.uniform(0.1, 0.3))
  89. def login(self, username, password):
  90. self.driver.get("https://login.taobao.com")
  91. self.random_wait(5, 8)
  92. # 输入账号
  93. login_name = self.driver.ele("xpath=//input[@name='fm-login-id']", timeout=30)
  94. if login_name:
  95. self.move_mouse_to_element(login_name)
  96. self.human_type(login_name, username)
  97. self.random_wait(1, 3)
  98. # 输入密码
  99. login_pass = self.driver.ele("xpath=//input[@name='fm-login-password']", timeout=30)
  100. if login_pass:
  101. self.move_mouse_to_element(login_pass)
  102. self.human_type(login_pass, password)
  103. self.random_wait(1, 3)
  104. # 点击登录
  105. login_button = self.driver.ele("xpath=//button[text()='登录']", timeout=30)
  106. if login_button:
  107. self.move_mouse_to_element(login_button)
  108. login_button.click()
  109. self.random_wait(1, 3)
  110. # 处理同意按钮
  111. login_agree = self.driver.ele("xpath=//button[text()='同意']", timeout=5)
  112. if login_agree:
  113. self.move_mouse_to_element(login_agree)
  114. login_agree.click()
  115. self.random_wait(1, 3)
  116. # 等待登录结果
  117. self.random_wait(10, 20)
  118. # 检查是否登录成功
  119. user_info = self.driver.ele("xpath=//a[@class='site-nav-login-info-nick']", timeout=10)
  120. if user_info:
  121. print("登录成功!")
  122. else:
  123. print("登录失败,请检查账号密码或验证码")
  124. def get_search(self):
  125. url = "https://www.taobao.com"
  126. self.driver.get(url, timeout=30)
  127. time.sleep(30)
  128. time.sleep(random.uniform(3, 8))
  129. # 刷新一次,否则可能未找到登录状态
  130. self.driver.refresh()
  131. self.random_wait(5, 10)
  132. # login_name = self.driver.ele("xpath=//input[@name='fm-login-id']")
  133. # if login_name:
  134. # self.login("aqwwer","wewetrv")
  135. # self.driver.refresh()
  136. # self.random_wait(5, 10)
  137. ele_iframe = self.driver.ele("xpath=//iframe[@id='baxia-dialog-content']")
  138. if ele_iframe:
  139. update_sql = f""" UPDATE `retrieve_collect_equipment_account` SET `status`= %s WHERE `nickname` = %s; """
  140. self.db.execute(update_sql, (1, self.account_name))
  141. return False
  142. ele = self.driver.ele('xpath=//*[contains(@class,"site-nav-login-info-nick")]', timeout=30)
  143. if ele:
  144. cookies_list = self.driver.cookies()
  145. cookies_dict = {c['name']: c['value'] for c in cookies_list}
  146. timestamp = int(time.time())
  147. # 保存 cookie 到文件
  148. update_sql = f""" UPDATE `retrieve_collect_equipment_account` SET `update_time` = %s, `cookie_str`= %s,`status`= %s WHERE `username` = %s; """
  149. self.db.execute(update_sql, (timestamp, json.dumps(cookies_dict), 0, self.account_name))
  150. print(f"{self.account_name},获取 cookie 成功!")
  151. logger.info(f"{self.account_name},获取 cookie 成功!")
  152. self.random_wait(3, 5)
  153. return True
  154. else:
  155. return False
  156. def run(self):
  157. bool_login = False
  158. try:
  159. self.init_drissionpage()
  160. bool_login = self.get_search()
  161. except Exception as e:
  162. logger.exception(f"{self.account_name} 获取 cookie 异常: {e}")
  163. finally:
  164. if self.driver:
  165. self.driver.quit()
  166. return bool(bool_login)