monitor.py 3.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485
  1. # mt_spider/monitor.py
  2. import threading, time, logging
  3. from collections import deque
  4. class SpiderMonitor(threading.Thread):
  5. def __init__(self, spider_instance):
  6. super().__init__(daemon=True)
  7. self.spider = spider_instance
  8. self.running = True
  9. self.pausing = threading.Event()
  10. self.last_verification_time = 0
  11. self.verification_count = 0
  12. self.MAX_VERIFICATION_RETRY = 3
  13. self.recent_clicks = deque(maxlen=10)
  14. self.logger = logging.getLogger("SpiderMonitor")
  15. self.popup_rules = {
  16. "simple": [
  17. ('//*[@text="确定"]', "点击确定"),
  18. ('//*[@text="允许"]', "点击允许"),
  19. ('//*[@text="关闭"]', "点击关闭"),
  20. ('//*[@resource-id="com.sankuai.meituan:id/close"]', "关闭按钮"),
  21. ],
  22. "verification": [
  23. '//*[contains(@text, "验证")]',
  24. '//*[contains(@text, "滑块")]',
  25. '//*[contains(@text, "点击")]',
  26. '//*[contains(@text, "请输入图片中的内容")]',
  27. '//*[contains(@text, "用最短线连接")]',
  28. '//*[contains(@text, "请拖动下方滑块完成拼图")]',
  29. '//*[contains(@resource-id, "captcha")]'
  30. ]
  31. }
  32. def run(self):
  33. while self.running:
  34. try:
  35. handled = self.check_and_handle_popup()
  36. time.sleep(2 if handled else 1)
  37. except Exception as e:
  38. self.logger.exception("监控线程异常: %s", e)
  39. time.sleep(3)
  40. def _is_recent_click(self, xpath):
  41. key = f"{xpath}_{int(time.time())}"
  42. return key in self.recent_clicks or (self.recent_clicks.append(key), False)[0]
  43. def check_and_handle_popup(self):
  44. d = self.spider.d
  45. for xpath, desc in self.popup_rules["simple"]:
  46. if d.xpath(xpath).exists and not self._is_recent_click(xpath):
  47. self.logger.info("检测到弹窗: %s", desc)
  48. d.xpath(xpath).click()
  49. return True
  50. for xpath in self.popup_rules["verification"]:
  51. if d.xpath(xpath).exists:
  52. now = time.time()
  53. if now - self.last_verification_time < 30:
  54. return False
  55. self.last_verification_time = now
  56. self.verification_count += 1
  57. self.logger.warning("验证码弹窗触发,等待人工处理...")
  58. if self.verification_count > self.MAX_VERIFICATION_RETRY:
  59. self.logger.error("验证码重试超限,终止任务")
  60. self.spider.stop_all()
  61. return True
  62. self.pausing.set()
  63. d.toast.show("需要人工处理验证码", 120)
  64. while d.xpath(xpath).exists:
  65. time.sleep(5)
  66. self.logger.info("验证码已处理")
  67. d.toast.show("验证完成", 2)
  68. self.pausing.clear()
  69. return True
  70. if d.xpath('//*[contains(@text, "广告")]').exists:
  71. w, h = d.info['displayWidth'], d.info['displayHeight']
  72. d.click(w - 50, 50)
  73. self.logger.info("关闭广告弹窗")
  74. return True
  75. return False
  76. def stop(self):
  77. self.running = False