aaa_mt2.py 182 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044
  1. import requests
  2. import base64
  3. import cv2
  4. import uiautomator2 as u2
  5. import time
  6. import subprocess
  7. import re
  8. import random
  9. import datetime
  10. import json
  11. from aip import AipOcr
  12. from apscheduler.schedulers.blocking import BlockingScheduler
  13. # from db_mysql import mysqlClient
  14. import threading
  15. from collections import deque
  16. import numpy as np
  17. import secrets
  18. import os
  19. import oss2
  20. import math
  21. import schedule
  22. import urllib.parse
  23. from pathlib import Path
  24. import city_name_to_id
  25. # 直接导出一个全局映射,供其他脚本直接 import
  26. _DEFAULT_PATH = Path(__file__).with_name("city.json")
  27. # import pyperclip
  28. from config import Config
  29. from logger import setup_logger
  30. import logging
  31. from contextlib import contextmanager
  32. from typing import Dict, Any
  33. from PIL import Image, ImageDraw, ImageFont
  34. # from database import MySQLClient
  35. # 配置日志
  36. # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  37. setup_logger("mt_spider") # 初始化日志
  38. class SpiderMonitor(threading.Thread):
  39. """全局弹窗监控线程(增强版)"""
  40. def __init__(self, spider_instance):
  41. super().__init__(daemon=True)
  42. self.spider = spider_instance
  43. self.running = True
  44. self.pausing = threading.Event() # 主线程同步事件
  45. self.last_verification_time = 0
  46. self.verification_count = 0
  47. self.MAX_VERIFICATION_RETRY = 10
  48. self.recent_clicks = deque(maxlen=10) # 防重复点击
  49. self.logger = logging.getLogger("SpiderMonitor")
  50. self.TOKEN = "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk"
  51. self.API_URL = "http://api.jfbym.com/api/YmServer/customApi"
  52. self.d = self.spider.d
  53. self.verification_in_progress = threading.Event()
  54. self.loggerMT = logging.getLogger()
  55. self.verification_retry_count = 0 # 当前验证码重试次数
  56. self.last_verification_type = None
  57. # 可配置化弹窗规则
  58. self.popup_rules = {
  59. "simple": [
  60. ('//*[@text="确定"]', "点击确定"),
  61. ('//*[@text="允许"]', "点击允许"),
  62. ('//*[@text="关闭"]', "点击关闭"),
  63. ('//*[@resource-id="com.sankuai.meituan:id/close"]', "关闭按钮"),
  64. ('//*[@resource-id="com.sankuai.meituan:id/address_center_location_close"]', "关闭按钮"),
  65. ('//*[@resource-id="com.sankuai.meituan:id/location_close"]', "关闭按钮"),
  66. ('//*[@resource-id="com.sankuai.meituan:id/btn_close"]', "关闭按钮"),
  67. ],
  68. # "verification": [
  69. # '//*[contains(@text, "验证")]',
  70. # '//*[contains(@text, "滑块")]',
  71. # '//*[contains(@text, "依次点击")]',
  72. # '//*[contains(@text, "请点击")]',
  73. # '//*[contains(@text, "拖动滑块刚")]', #这个需要拖动滑块至最右边,然后再截图
  74. # '//*[contains(@text, "请输入图片中的内容")]',
  75. # '//*[contains(@text, "用最短线连接")]',
  76. # '//*[contains(@text, "请按语序依次点击")]',
  77. # '//*[contains(@text, "请向右滑动滑块")]',
  78. # '//*[contains(@text, "请拖动下方滑块完成拼图")]',
  79. # '//*[contains(@resource-id, "captcha")]'
  80. # ]
  81. "verification": [
  82. ('//*[contains(@text, "请点击")]', "click_side"),
  83. ('//*[contains(@text, "请输入图片中的内容")]', "Numbers_English"),
  84. ('//*[contains(@text, "请向右滑动滑块")]', "Swipe_right"),
  85. ('//*[contains(@text, "请依次点击下图图标")]', "Click_images"),
  86. ('//*[contains(@text, "请拖动下方滑块完成拼图")]', "slider"),
  87. ('//*[contains(@text, "拖动滑块刚")]', "complexs"), # 这个需要拖动滑块至最右边,然后再截图
  88. ('//*[contains(@text, "请按语序依次点击")]', "Click_images"),
  89. ('//*[contains(@text, "用最短线连接")]', "Shortest_connection"),
  90. ]
  91. }
  92. def run(self):
  93. while self.running:
  94. try:
  95. handled = self.check_and_handle_popup()
  96. time.sleep(2 if handled else 1)
  97. except Exception as e:
  98. self.logger.exception("监控线程异常: %s", e)
  99. time.sleep(3)
  100. def _is_recent_click(self, xpath):
  101. """防止重复点击同一个弹窗"""
  102. key = f"{xpath}_{int(time.time())}"
  103. if key in self.recent_clicks:
  104. return True
  105. self.recent_clicks.append(key)
  106. return False
  107. @staticmethod
  108. def get_sleep_time():
  109. # return random.randint(5, 8)
  110. return random.randint(1, 3)
  111. def human_slide(self, start_x, start_y, end_x, end_y, hold_time=0):
  112. """模拟真实人类滑动轨迹 - 连续变化的速度曲线,微小偏差"""
  113. points = []
  114. # 随机参数
  115. total_steps = random.randint(60, 85) # 更多步数使曲线更平滑
  116. # 计算滑动距离
  117. distance_x = end_x - start_x
  118. distance_y = end_y - start_y
  119. total_distance = math.sqrt(distance_x ** 2 + distance_y ** 2)
  120. self.logger.info(f"滑块验证移动0")
  121. # 微小偏差设置 - 人类不完美的对齐
  122. # X方向偏差:1-6像素,70%概率过冲,30%欠冲
  123. if random.random() < 0.7:
  124. offset_x = random.randint(1, min(5, int(total_distance * 0.01)))
  125. else:
  126. offset_x = -random.randint(1, min(3, int(total_distance * 0.02)))
  127. # # Y方向微小偏差:±0-2像素
  128. # offset_y = random.randint(-2, 2)
  129. # 实际停止位置
  130. stop_x = end_x + offset_x
  131. stop_y = end_y
  132. # 物理参数:模拟手指滑动的物理过程
  133. # 使用加速度、最大速度、减速度模型
  134. accel_time_ratio = random.uniform(0.25, 0.35) # 加速阶段占总时间的比例
  135. decel_time_ratio = random.uniform(0.25, 0.35) # 减速阶段占总时间的比例
  136. max_speed = random.uniform(1.5, 2.2) # 最大速度倍数
  137. # 生成轨迹
  138. for i in range(total_steps):
  139. t = i / (total_steps - 1) # 时间进度 0-1
  140. # 物理速度曲线:连续变化的加速度过程
  141. if t < accel_time_ratio:
  142. # 加速阶段:从0加速到最大速度
  143. phase_t = t / accel_time_ratio
  144. # 使用平滑的加速曲线(二次函数)
  145. speed_factor = max_speed * phase_t * phase_t
  146. elif t < 1 - decel_time_ratio:
  147. # 匀速阶段:保持最大速度
  148. speed_factor = max_speed
  149. # 加入轻微的随机波动,模拟人类手部自然抖动
  150. speed_factor += random.uniform(-0.05, 0.05)
  151. else:
  152. # 减速阶段:从最大速度减速到0
  153. phase_t = (t - (1 - decel_time_ratio)) / decel_time_ratio
  154. # 使用平滑的减速曲线(二次函数,末尾更平缓)
  155. speed_factor = max_speed * (1 - phase_t * phase_t)
  156. self.logger.info(f"滑块验证移动1")
  157. # 计算位移(积分速度得到位置)
  158. # 使用贝塞尔曲线计算位置,让运动更自然
  159. if t < accel_time_ratio:
  160. # 加速阶段的位置
  161. phase_t = t / accel_time_ratio
  162. progress = (max_speed / 3) * phase_t * phase_t * phase_t
  163. elif t < 1 - decel_time_ratio:
  164. # 匀速阶段的位置
  165. phase_t = (t - accel_time_ratio) / (1 - accel_time_ratio - decel_time_ratio)
  166. # 匀速阶段的位移加上加速阶段完成的位移
  167. accel_distance = (max_speed / 3) # 加速阶段完成的位移
  168. progress = accel_distance + (1 - 2 * accel_distance) * phase_t
  169. else:
  170. # 减速阶段的位置
  171. phase_t = (t - (1 - decel_time_ratio)) / decel_time_ratio
  172. # 从减速起点平滑过渡到终点
  173. progress = 1 - (max_speed / 3) * (1 - phase_t) * (1 - phase_t) * (1 - phase_t)
  174. # 限制进度在0-1之间
  175. progress = max(0, min(1, progress))
  176. # 添加自然的手部抖动
  177. if t < 0.1 or t > 0.9:
  178. # 开始和结束:非常小的抖动
  179. jitter_x = random.randint(-1, 1)
  180. jitter_y = random.randint(-1, 1)
  181. elif t < 0.3 or t > 0.7:
  182. # 过渡阶段:小抖动
  183. jitter_x = random.randint(-2, 2)
  184. jitter_y = random.randint(-2, 2)
  185. else:
  186. # 中间快速阶段:稍大抖动
  187. jitter_x = random.randint(-2, 2) if random.random() < 0.3 else 0
  188. jitter_y = random.randint(-2, 2) if random.random() < 0.3 else 0
  189. # 计算当前位置
  190. current_x = start_x + (stop_x - start_x) * progress + jitter_x
  191. current_y = start_y + (stop_y - start_y) * progress + jitter_y
  192. self.logger.info(f"滑块验证移动2")
  193. # 确保轨迹单调性(不会回退)
  194. if points:
  195. if distance_x > 0: # 向右滑动
  196. current_x = max(points[-1][0], current_x)
  197. elif distance_x < 0: # 向左滑动
  198. current_x = min(points[-1][0], current_x)
  199. # 时间延迟 - 基于当前速度计算
  200. # 速度越快,延迟越短
  201. if t < 0.1: # 开始阶段
  202. delay = random.uniform(0.002, 0.008)
  203. elif t < 0.9: # 中间阶段
  204. # 延迟与速度成反比
  205. base_delay = 0.008
  206. speed_delay_factor = 1.0 / (speed_factor + 0.5)
  207. delay = base_delay * speed_delay_factor + random.uniform(-0.002, 0.002)
  208. delay = max(0.005, min(delay, 0.015))
  209. else: # 结束阶段
  210. # 逐渐增加延迟
  211. slow_factor = 1.0 + (t - 0.9) * 10
  212. delay = random.uniform(0.015, 0.025) * slow_factor
  213. points.append((current_x, current_y, delay))
  214. self.logger.info(f"滑块验证移动3")
  215. # 确保最后一点是实际停止位置
  216. if points:
  217. points[-1] = (stop_x, stop_y, 0)
  218. # 执行滑动
  219. if points:
  220. # 按下起点
  221. self.d.touch.down(points[0][0], points[0][1])
  222. time.sleep(random.uniform(0.002, 0.006))
  223. # 移动轨迹
  224. for i, point in enumerate(points[1:]):
  225. self.d.touch.move(point[0], point[1])
  226. self.logger.info(f"滑块验证移动{point[0]},{point[1]}")
  227. # 最后阶段可能的微小停顿(人类犹豫)
  228. # progress = (i + 1) / len(points[1:])
  229. # if progress > 0.98:
  230. # time.sleep(random.uniform(0.001, 0.003))
  231. time.sleep(point[2])
  232. # 抬起手指
  233. self.d.touch.up(points[-1][0], points[-1][1])
  234. # 滑动后的随机延迟
  235. hold_time = random.uniform(1, 2)
  236. time.sleep(hold_time)
  237. return points
  238. # 数英
  239. def Numbers_English_verify(self):
  240. time.sleep(5)
  241. rotate_image_xpath = '//*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.Image[1]'
  242. if not self.d.xpath(rotate_image_xpath).exists:
  243. print("数英图片元素不存在")
  244. rotate_img_path = "Numbers_English.png"
  245. try:
  246. rotate_image = self.d.xpath(rotate_image_xpath)
  247. rotate_image.screenshot().save(rotate_img_path)
  248. print(f"数英图片截图保存成功: {rotate_img_path}")
  249. except Exception as e:
  250. print(f"数英图片截图失败: {e}")
  251. try:
  252. with open(rotate_img_path, 'rb') as f:
  253. image_data = base64.b64encode(f.read()).decode()
  254. url = "http://api.jfbym.com/api/YmServer/customApi"
  255. data = {
  256. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  257. "type": 10103,
  258. "image": image_data
  259. }
  260. headers = {
  261. "Content-Type": "application/json"
  262. }
  263. response = requests.post(url, headers=headers, json=data, timeout=30)
  264. response.raise_for_status()
  265. result = response.json()
  266. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  267. Numbers_English_verify_data = result["data"]
  268. print(f"API返回: {Numbers_English_verify_data}")
  269. captcha_text = Numbers_English_verify_data.get("data")
  270. print(f"验证码: {captcha_text}")
  271. if self.d.xpath(
  272. '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.EditText[1]| //*[@resource-id="com.sankuai.meituan:id/titans_webview_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.EditText[1]').exists:
  273. self.d.xpath(
  274. '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.EditText[1]| //*[@resource-id="com.sankuai.meituan:id/titans_webview_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.EditText[1]').click()
  275. time.sleep(1)
  276. self.d.send_keys(captcha_text)
  277. time.sleep(5)
  278. self.d.xpath(
  279. '//*[@text="验证"] | //*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.widget.Button[1]').click()
  280. time.sleep(3)
  281. return True
  282. else:
  283. print("API返回错误")
  284. return False
  285. except Exception as e:
  286. print(f"数英验证码处理异常: {e}")
  287. return False
  288. # 滑块
  289. def slider_verify(self):
  290. time.sleep(5)
  291. try:
  292. slider_slot_xpath = '//*[@resource-id="puzzleSliderDrag"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.TextView[1]'
  293. slider_main_xpath = '//*[@resource-id="puzzleImageMain"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]'
  294. slider_slot_img_path = "slider_slot.png"
  295. slider_main_img_path = "slider_main.png"
  296. if self.d.xpath(slider_slot_xpath).exists:
  297. self.d.xpath(slider_slot_xpath).screenshot().save("slider_slot.png")
  298. else:
  299. print("slider_slot_xpath not exist")
  300. self.logger.info(f"slider_slot_xpath not exist")
  301. if self.d.xpath(slider_main_xpath).exists:
  302. self.d.xpath(slider_main_xpath).screenshot().save("slider_main.png")
  303. else:
  304. print("slider_main_xpath not exist")
  305. self.logger.info(f"slider_main_xpath not exist")
  306. slider_slide_distance = 0
  307. try:
  308. with open('slider_main.png', 'rb') as f:
  309. b = base64.b64encode(f.read()).decode()
  310. # API请求配置
  311. url = "http://api.jfbym.com/api/YmServer/customApi"
  312. data = {
  313. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  314. "type": 22222,
  315. "image": b
  316. }
  317. headers = {
  318. "Content-Type": "application/json"
  319. }
  320. response = requests.post(url, headers=headers, json=data, timeout=30)
  321. response.raise_for_status()
  322. result = response.json()
  323. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  324. slider_verify_data = result["data"]
  325. print(f"slider_verify_data={slider_verify_data}")
  326. slider_slide_distance = slider_verify_data["data"]
  327. print(slider_slide_distance)
  328. else:
  329. print("api 返回错误 此时滑块验证可能呈图片形式存在")
  330. except Exception as e:
  331. return {
  332. "success": False,
  333. "error_msg": f"处理异常: {str(e)}"
  334. }
  335. slider_slide_distance = float(slider_verify_data["data"])
  336. # 获取滑块元素
  337. try:
  338. slider_xpath = '//*[@resource-id="puzzleSliderBox"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.view.View[1]'
  339. slider = self.d.xpath(slider_xpath)
  340. slider_info = slider.info
  341. bounds = slider_info['bounds']
  342. start_x = ((bounds['left'] + bounds['right']) / 2) + random.uniform(-4, 4)
  343. start_y = ((bounds['top'] + bounds['bottom']) / 2) + random.uniform(-3, 3) # ●
  344. end_x = start_x + slider_slide_distance + random.uniform(-3, 3) # ●
  345. end_y = start_y + random.uniform(-1, 1)
  346. # self.swipe(start_x, start_y, end_x, end_y,
  347. # duration=random.uniform(1.2, 2.0),
  348. # deviation=random.randint(20, 40))
  349. self.human_slide(start_x, start_y, end_x, end_y)
  350. time.sleep(2) #
  351. return True
  352. except Exception as e:
  353. print(f"滑动操作时出错: {e}")
  354. return False
  355. time.sleep(2)
  356. # 检查验证码是否消失
  357. slider_xpaths = [
  358. '//*[@text="请拖动下方滑块完成拼图"]',
  359. ]
  360. for xpath in slider_xpaths:
  361. if self.d.xpath(xpath).exists:
  362. return False
  363. return True
  364. except Exception as e:
  365. self.logger.error(f"滑块验证失败: {e}")
  366. return False
  367. # 点击
  368. def Click_images(self):
  369. time.sleep(5)
  370. try:
  371. # 1. 定位图标元素
  372. Click_images_xpath = '//*[@resource-id="com.sankuai.meituan:id/titans_main_layout"] | //*[@resource-id="com.sankuai.meituan:id/h5_container"] | //*[@resource-id="root"]'
  373. # 检查元素是否存在
  374. if not self.d.xpath(Click_images_xpath).exists:
  375. print("图标元素不存在")
  376. self.logger.info("图标元素不存在")
  377. return False
  378. # 获取图片元素在屏幕中的坐标
  379. Click_image_element = self.d.xpath(Click_images_xpath)
  380. Click_image_element_info = Click_image_element.info
  381. bounds = Click_image_element_info['bounds']
  382. # 计算图片左上角在屏幕中的坐标
  383. image_left = bounds['left']
  384. image_top = bounds['top']
  385. image_width = bounds['right'] - bounds['left']
  386. image_height = bounds['bottom'] - bounds['top']
  387. print(f"图片位置: left={image_left}, top={image_top}, width={image_width}, height={image_height}")
  388. # 2. 截图
  389. image_img_path = "Click_images.png"
  390. if self.d.xpath(Click_images_xpath).exists:
  391. self.d.xpath(Click_images_xpath).screenshot().save("Click_images.png")
  392. else:
  393. print("图标元素不存在,无法截图")
  394. self.logger.info("图标元素不存在,无法截图")
  395. return False
  396. try:
  397. with open('Click_images.png', 'rb') as f:
  398. c = base64.b64encode(f.read()).decode()
  399. # API请求配置
  400. url = "http://api.jfbym.com/api/YmServer/customApi"
  401. data = {
  402. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk", # 注册后登录去用户中心获取token
  403. "type": 88888, # 打码类型ID
  404. "image": c # 图片base64字符串
  405. }
  406. headers = {
  407. "Content-Type": "application/json"
  408. }
  409. # 发送请求
  410. response = requests.post(url, headers=headers, json=data, timeout=30)
  411. response.raise_for_status() # 检查HTTP请求是否成功
  412. result = response.json()
  413. print(f"API返回结果: {result}")
  414. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  415. verify_data = result.get("data", {})
  416. print(f"verify_data={verify_data}")
  417. # 获取坐标数据字符串,格式为:"188,165|99,128|91,209|235,116"
  418. coords_str = verify_data.get("data", "")
  419. if not coords_str:
  420. print("未返回坐标数据")
  421. return False
  422. print(f"坐标字符串: {coords_str}")
  423. # 分割坐标字符串
  424. coords_list = coords_str.split('|')
  425. print(coords_list)
  426. # 依次点击每个坐标
  427. for coord in coords_list:
  428. try:
  429. # 分割字符串并转换为整数(这是图片内的相对坐标)
  430. x_img_str, y_img_str = coord.split(',')
  431. x_img = int(x_img_str.strip())
  432. y_img = int(y_img_str.strip())
  433. print(f"图片相对坐标: x={x_img}, y={y_img}")
  434. # 转换为屏幕绝对坐标
  435. x_screen = image_left + x_img # ●
  436. y_screen = image_top + y_img
  437. print(f"屏幕绝对坐标: x={x_screen}, y={y_screen}")
  438. # 点击转换后的屏幕坐标
  439. self.d.click(x_screen, y_screen)
  440. time.sleep(self.get_sleep_time())
  441. except Exception as e:
  442. print(f"处理坐标 {coord} 失败: {e}")
  443. continue
  444. time.sleep(self.get_sleep_time() * 2) # 给系统一些响应时间
  445. return True
  446. else:
  447. error_msg = result.get("msg", "未知错误")
  448. print(f"识别失败: {error_msg}")
  449. return False
  450. except requests.exceptions.RequestException as e:
  451. print(f"API请求失败: {e}")
  452. return False
  453. except Exception as e:
  454. print(f"识别过程出错: {e}")
  455. return False
  456. except Exception as e:
  457. self.logger.error(f"点击图标失败: {e}")
  458. return False
  459. # # 检查验证是否成功
  460. # if not self.d.xpath('//*[@text="请依次点击下图图标"] | //*[@text="请按语序依次点击"]').exists:
  461. # print("所有坐标点击完成,验证成功")
  462. # return True
  463. # else:
  464. # print("所有坐标点击完成,但验证文本仍然存在,可能验证失败")
  465. # return False
  466. # 向右滑动
  467. def Swipe_right(self):
  468. time.sleep(5)
  469. start_x = 0
  470. start_y = 0
  471. end_x = 0
  472. end_y = 0
  473. distance = 0
  474. if self.d.xpath(
  475. '//*[@resource-id="yodaBoxWrapper"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]').exists:
  476. Swipe = self.d.xpath(
  477. '//*[@resource-id="yodaBoxWrapper"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]')
  478. Swipe_info = Swipe.info
  479. bound = Swipe_info['bounds']
  480. Swipe_distance = bound['right'] - bound['left']
  481. if self.d.xpath(
  482. '//*[@resource-id="yodaBox"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1] ').exists:
  483. xpath = self.d.xpath(
  484. '//*[@resource-id="yodaBox"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]')
  485. xpath_info = xpath.info
  486. bounds = xpath_info['bounds']
  487. start_x = ((bounds['left'] + bounds['right']) / 2) + random.uniform(-4, 4)
  488. start_y = ((bounds['top'] + bounds['bottom']) / 2) + random.uniform(-3, 3)
  489. distance = Swipe_distance - (bounds['right'] - bounds['left'])
  490. end_x = start_x + distance + random.uniform(0, 2) # ●
  491. end_y = start_y + random.uniform(-1, 1)
  492. # self.swipe(start_x, start_y, end_x, end_y,
  493. # duration=random.uniform(1.2, 2.0),
  494. # deviation=random.randint(20, 40))
  495. print(f"滑动距离: {distance}像素")
  496. print(f"起点: ({start_x}, {start_y}), 终点: ({end_x}, {end_y})")
  497. # # 确保滑块到达最右端
  498. end_x += random.uniform(2, 5)
  499. self.human_slide_right(start_x, start_y, end_x, end_y)
  500. # self.human_slide(start_x, start_y, end_x, end_y)
  501. time.sleep(2)
  502. return True
  503. else:
  504. print("未找到滑块元素")
  505. return False
  506. else:
  507. print("未找到轨道元素")
  508. return False
  509. def Swipe_right_human_slide(self, start_x, start_y, end_x, end_y):
  510. """模拟人类滑动轨迹"""
  511. # 生成带加速度的轨迹
  512. points = []
  513. total_steps = 50
  514. distance_x = end_x - start_x
  515. distance_y = end_y - start_y
  516. previous_x = start_x # 用于记录上一个 x 坐标值
  517. for i in range(total_steps):
  518. # 非线性进度(慢-快-慢)
  519. ratio = (i / total_steps)
  520. if ratio < 0.3:
  521. progress = 0.5 * (ratio / 0.3) ** 2
  522. elif ratio < 0.7:
  523. progress = 0.5 + (ratio - 0.3) * 1.25
  524. else:
  525. progress = 0.9 + 0.5 * ((ratio - 0.7) / 0.3) ** 0.5
  526. # 添加随机抖动
  527. # offset_x = np.random.randint(-2, 3)
  528. # offset_y = np.random.randint(-2, 3)
  529. offset_x = np.random.uniform(-1, 1) # 控制抖动范围
  530. offset_y = np.random.uniform(-1, 1)
  531. x = start_x + distance_x * min(progress, 0.99) + offset_x
  532. y = start_y + distance_y * min(progress, 0.99) + offset_y
  533. # 确保 x 坐标单调递增
  534. if x < previous_x and x < end_x:
  535. x = previous_x + 1
  536. if x > end_x:
  537. x = end_x
  538. previous_x = x
  539. points.append((x, y))
  540. # 变速延迟(移动越快延迟越短)
  541. delay = 0.002 + 0.01 * (1 - abs(0.5 - ratio))
  542. time.sleep(delay)
  543. print(f"points: {points}")
  544. self.loggerMT.info(f"points: {points}")
  545. # 执行滑动轨迹
  546. self.d.touch.down(points[0][0], points[0][1])
  547. for point in points[1:]:
  548. self.d.touch.move(point[0], point[1])
  549. self.logger.info(f"滑块验证移动{point[0]},{point[1]}")
  550. self.d.touch.up(points[-1][0] + 2, points[-1][1])
  551. # print(f"points: {points}")
  552. # self.loggerPdd.info(f"points: {points}")
  553. # self.d.swipe_points(points, duration=0.05)
  554. # 拖动滑块刚
  555. def complexs(self):
  556. time.sleep(5)
  557. try:
  558. slider_xpath = '//*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.view.View[1]'
  559. track_xpath = '//*[@text="请按照说明拖动滑块"]'
  560. if not self.d.xpath(slider_xpath).exists:
  561. print("滑块元素不存在")
  562. self.logger.info("滑块元素不存在")
  563. return False
  564. if not self.d.xpath(track_xpath).exists:
  565. print("滑轨元素不存在")
  566. self.logger.info("滑轨元素不存在")
  567. return False
  568. #
  569. slider_element = self.d.xpath(slider_xpath)
  570. slider_info = slider_element.info
  571. slider_bounds = slider_info['bounds']
  572. slider_left = slider_bounds['left']
  573. slider_top = slider_bounds['top']
  574. slider_right = slider_bounds['right']
  575. slider_bottom = slider_bounds['bottom']
  576. slider_width = slider_right - slider_left
  577. slider_height = slider_bottom - slider_top
  578. slider_center_x = slider_left + slider_width / 2
  579. slider_center_y = slider_top + slider_height / 2
  580. print(f"滑块中心: ({slider_center_x}, {slider_center_y})")
  581. #
  582. track_element = self.d.xpath(track_xpath)
  583. track_info = track_element.info
  584. track_bounds = track_info['bounds']
  585. track_left = track_bounds['left']
  586. track_right = track_bounds['right']
  587. track_width = track_right - track_left
  588. # 2. 滑到滑轨最右端
  589. right_end_center_x = track_right - slider_width / 2
  590. right_end_center_y = slider_center_y
  591. print(f"最右端滑块中心坐标: ({right_end_center_x}, {right_end_center_y})")
  592. try:
  593. self.d.touch.down(slider_center_x, slider_center_y)
  594. time.sleep(0.1)
  595. # 生成滑动轨迹
  596. points = self.Swipe_trajectory(slider_center_x, slider_center_y, right_end_center_x,
  597. right_end_center_y)
  598. for point in points[1:]:
  599. self.d.touch.move(point[0], point[1])
  600. time.sleep(0.002)
  601. print("滑块已到达最右端")
  602. except Exception as e:
  603. print(f"滑动到最右端失败: {e}")
  604. return
  605. capture_xpath1 = '// *[ @ text = "身份核实"] / android.view.View[1] / android.view.View[1] / android.widget.TextView[1]'
  606. capture_xpath2 = '// * [ @ text = "身份核实"] / android.view.View[1] / android.view.View[1] / android.view.View[1]'
  607. capture_element1 = self.d.xpath(capture_xpath1)
  608. capture_element2 = self.d.xpath(capture_xpath2)
  609. capture_info1 = capture_element1.info
  610. capture_info2 = capture_element2.info
  611. capture_info1_bounds = capture_info1['bounds']
  612. capture_info2_bounds = capture_info2['bounds']
  613. capture_label_left = capture_info1_bounds['left']
  614. capture_label_top = capture_info1_bounds['top']
  615. capture_label_right = capture_info1_bounds['right']
  616. capture_label_bottom = capture_info1_bounds['bottom']
  617. capture_left = capture_info2_bounds['left']
  618. capture_top = capture_info2_bounds['top']
  619. capture_right = capture_info2_bounds['right']
  620. capture_bottom = capture_info2_bounds['bottom']
  621. capture_label_width = capture_label_right - capture_label_left
  622. capture_label_height = capture_label_bottom - capture_label_top
  623. capture_width = capture_right - capture_left
  624. capture_height = capture_bottom - capture_top
  625. print(
  626. f"截图区域1(提示文本): left={capture_label_left}, top={capture_label_top}, width={capture_label_width}, height={capture_label_height}")
  627. print(
  628. f"截图区域2(图片): left={capture_left}, top={capture_top}, width={capture_width}, height={capture_height}")
  629. # 截图并保存--2张图
  630. screenshot_label_path = "capture_label_area.png"
  631. screenshot_image_path = "capture_area.png"
  632. try:
  633. full_screenshot = self.d.screenshot()
  634. from PIL import Image
  635. import io
  636. if isinstance(full_screenshot, bytes):
  637. img = Image.open(io.BytesIO(full_screenshot))
  638. else:
  639. img = full_screenshot
  640. #
  641. # 裁剪指定区域1
  642. cropped_img_1 = img.crop(
  643. (capture_label_left, capture_label_top, capture_label_right, capture_label_bottom))
  644. cropped_img_1.save(screenshot_label_path)
  645. print(f"截图2已保存: {screenshot_label_path}")
  646. # 裁剪指定区域2
  647. cropped_img_2 = img.crop((capture_left, capture_top, capture_right, capture_bottom))
  648. cropped_img_2.save(screenshot_image_path)
  649. print(f"截图2已保存: {screenshot_image_path}")
  650. except Exception as e:
  651. print(f"截图失败: {e}")
  652. self.d.touch.up(right_end_center_x, right_end_center_y)
  653. return
  654. try:
  655. with open(screenshot_label_path, 'rb') as f:
  656. label_image_data = base64.b64encode(f.read()).decode()
  657. with open(screenshot_image_path, 'rb') as f:
  658. image_data = base64.b64encode(f.read()).decode()
  659. # API请求配置
  660. url = "http://api.jfbym.com/api/YmServer/customApi"
  661. data = {
  662. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  663. "type": 29013,
  664. "image": image_data,
  665. "label_image": label_image_data
  666. }
  667. headers = {
  668. "Content-Type": "application/json"
  669. }
  670. # 发送请求
  671. response = requests.post(url, headers=headers, json=data, timeout=30)
  672. response.raise_for_status()
  673. result = response.json()
  674. print(f"API返回结果: {result}")
  675. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  676. verify_data = result.get("data", {})
  677. print(f"verify_data={verify_data}")
  678. data_str = verify_data.get("data", "")
  679. if not data_str:
  680. print("云码未返回有效的data值")
  681. # 抬起并返回
  682. self.d.touch.up(right_end_center_x, right_end_center_y)
  683. return
  684. try:
  685. data_value = int(data_str)
  686. print(f"云码返回的像素距离: {data_value}")
  687. gray_line_target_x = 108 + data_value - 44
  688. slider_target_center_x = gray_line_target_x
  689. print(f"滑块中心目标X坐标: {slider_target_center_x}")
  690. # 5. 计算需要往回拖动的距离
  691. # 当前滑块在最右端,中心X = 612
  692. current_slider_center_x = 612
  693. back_distance = 504 - data_value - 44
  694. print(f"需要往回拖动的距离: {back_distance}")
  695. # 执行往回拖动
  696. slider_element = self.d.xpath(slider_xpath)
  697. slider_info = slider_element.info
  698. slider_bounds = slider_info['bounds']
  699. current_slider_center_x = (slider_bounds['left'] + slider_bounds['right']) / 2
  700. # 确保目标位置在滑轨范围内
  701. min_x = track_left + slider_width / 2
  702. max_x = track_right - slider_width / 2
  703. slider_target_center_x = max(min_x, min(slider_target_center_x, max_x))
  704. # 计算实际需要滑动的距离
  705. actual_distance = slider_target_center_x - current_slider_center_x
  706. print(f"实际需要滑动的距离: {actual_distance}")
  707. # 往回拖动
  708. try:
  709. # 往回拖动的轨迹
  710. back_points = self.Swipe_trajectory(
  711. right_end_center_x, right_end_center_y,
  712. slider_target_center_x, right_end_center_y
  713. )
  714. # 移动到每个点
  715. for point in back_points[1:]:
  716. self.d.touch.move(point[0], point[1])
  717. time.sleep(0.002)
  718. time.sleep(3)
  719. self.d.touch.up(slider_target_center_x, right_end_center_y)
  720. except Exception as e:
  721. print(f"往回拖动失败: {e}")
  722. self.d.touch.up(right_end_center_x, right_end_center_y)
  723. except ValueError as e:
  724. print(f"解析云码返回的data值失败: {e}")
  725. return False
  726. except Exception as e:
  727. print(f"计算滑块位置失败: {e}")
  728. return False
  729. else:
  730. error_msg = result.get("msg", "未知错误")
  731. print(f"识别失败: {error_msg}")
  732. return False
  733. except requests.exceptions.RequestException as e:
  734. print(f"API请求失败: {e}")
  735. return False
  736. except Exception as e:
  737. print(f"识别过程出错: {e}")
  738. return False
  739. except Exception as e:
  740. self.logger.error(f"滑块验证失败: {e}")
  741. return False
  742. def Swipe_trajectory(self, start_x, start_y, end_x, end_y):
  743. """生成滑动轨迹点"""
  744. points = []
  745. total_steps = 50
  746. distance_x = end_x - start_x
  747. distance_y = end_y - start_y
  748. previous_x = start_x
  749. for i in range(total_steps):
  750. # 非线性进度(慢-快-慢)
  751. ratio = (i / total_steps)
  752. if ratio < 0.3:
  753. progress = 0.5 * (ratio / 0.3) ** 2
  754. elif ratio < 0.7:
  755. progress = 0.5 + (ratio - 0.3) * 1.25
  756. else:
  757. progress = 0.9 + 0.5 * ((ratio - 0.7) / 0.3) ** 0.5
  758. # 添加随机抖动
  759. offset_x = np.random.randint(-1, 1)
  760. offset_y = np.random.randint(-1, 1)
  761. x = start_x + distance_x * min(progress, 0.99) + offset_x
  762. y = start_y + distance_y * min(progress, 0.99) + offset_y
  763. # 确保 x 坐标单调递增
  764. if x < previous_x and x < end_x:
  765. x = previous_x + 1
  766. if x > end_x:
  767. x = end_x
  768. previous_x = x
  769. points.append((x, y))
  770. return points
  771. # 最短线连接
  772. def Shortest_connection(self):
  773. time.sleep(5)
  774. try:
  775. art_text_xpath = '//*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.Image[1]'
  776. color_points_xpath = '//*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.view.View[1]/android.widget.Image[1]'
  777. art_text_img_path = "art_text.png"
  778. color_points_img_path = "color_points.png"
  779. if self.d.xpath(art_text_xpath).exists:
  780. self.d.xpath(art_text_xpath).screenshot().save(art_text_img_path)
  781. self.logger.info("艺术字截图成功")
  782. else:
  783. self.logger.warning("艺术字元素不存在")
  784. return False
  785. if self.d.xpath(color_points_xpath).exists:
  786. self.d.xpath(color_points_xpath).screenshot().save(color_points_img_path)
  787. self.logger.info("颜色元素截图成功")
  788. else:
  789. self.logger.warning("颜色点元素不存在")
  790. return False
  791. color_name = ""
  792. try:
  793. with open('art_text.png', 'rb') as f:
  794. c = base64.b64encode(f.read()).decode()
  795. url = "http://api.jfbym.com/api/YmServer/customApi"
  796. data = {
  797. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  798. "type": 10118,
  799. "image": c
  800. }
  801. headers = {
  802. "Content-Type": "application/json"
  803. }
  804. response = requests.post(url, headers=headers, json=data, timeout=30)
  805. response.raise_for_status()
  806. result = response.json()
  807. self.logger.info(f"云码API返回结果: {result}")
  808. if result.get("code") == 0:
  809. color_name = result.get("data", "")
  810. if "鼗" in color_name:
  811. color_name == "紫色"
  812. elif result.get("code") == 10000 and "data" in result:
  813. inner_data = result.get("data", {})
  814. if isinstance(inner_data, dict) and inner_data.get("code") == 0:
  815. color_name = inner_data.get("data", "")
  816. if "鼗" in color_name:
  817. color_name == "紫色"
  818. elif isinstance(inner_data, str):
  819. color_name = inner_data
  820. if "鼗" in color_name:
  821. color_name == "紫色"
  822. else:
  823. self.logger.error(f"云码API返回异常: {result}")
  824. return False
  825. if not color_name:
  826. self.logger.error("未识别到颜色名称")
  827. return False
  828. self.logger.info(f"识别到的颜色名称: {color_name}")
  829. except Exception as e:
  830. self.logger.error(f"云码API调用异常: {e}")
  831. return False
  832. # 颜色的坐标
  833. relative_coordinates = self.find_color_coordinates(color_points_img_path, color_name)
  834. if not relative_coordinates:
  835. self.logger.warning(f"未在图片中找到 {color_name} 颜色的坐标")
  836. return False
  837. color_element = self.d.xpath(color_points_xpath).info
  838. element_bounds = color_element['bounds']
  839. element_left = element_bounds['left']
  840. element_top = element_bounds['top']
  841. element_width = element_bounds['right'] - element_bounds['left']
  842. element_height = element_bounds['bottom'] - element_bounds['top']
  843. # 读取截图
  844. try:
  845. color_points_img = cv2.imread(color_points_img_path)
  846. if color_points_img is None:
  847. self.logger.error("无法读取截图")
  848. return False
  849. screenshot_height, screenshot_width = color_points_img.shape[:2]
  850. except Exception as e:
  851. self.logger.error(f"读取截图尺寸失败: {e}")
  852. # 如果无法读取,使用元素尺寸作为默认值
  853. screenshot_width = element_width
  854. screenshot_height = element_height
  855. # 屏幕坐标
  856. screen_coordinates = []
  857. for (rx, ry) in relative_coordinates:
  858. if screenshot_width > 0 and screenshot_height > 0:
  859. # 计算缩放比例
  860. scale_x = element_width / screenshot_width
  861. scale_y = element_height / screenshot_height
  862. sx = element_left + int(rx * scale_x)
  863. sy = element_top + int(ry * scale_y)
  864. else:
  865. # 如果无法获取截图尺寸,直接使用相对坐标加上元素起始位置
  866. sx = element_left + rx
  867. sy = element_top + ry
  868. screen_coordinates.append((sx, sy))
  869. self.logger.info(f"相对坐标({rx}, {ry}) -> 屏幕坐标({sx}, {sy})")
  870. # . 计算最短路径
  871. if len(screen_coordinates) < 2:
  872. self.logger.warning("需要至少2个点才能连接")
  873. return False
  874. unvisited = screen_coordinates.copy()
  875. path = [unvisited.pop(0)]
  876. while unvisited:
  877. last_point = path[-1]
  878. nearest_idx = min(range(len(unvisited)),
  879. key=lambda i: ((last_point[0] - unvisited[i][0]) ** 2 +
  880. (last_point[1] - unvisited[i][1]) ** 2) ** 0.5)
  881. path.append(unvisited.pop(nearest_idx))
  882. self.logger.info(f"最短路径点顺序: {path}")
  883. curved_path = self.human_like_path(path)
  884. # 连接
  885. result = self.simulate_human_drawing(curved_path)
  886. if result:
  887. self.logger.info("最短线连接成功")
  888. time.sleep(3)
  889. return True
  890. else:
  891. self.logger.warning("最短线连接失败")
  892. return False
  893. except Exception as e:
  894. self.logger.error(f"最短线连接验证失败: {e}")
  895. return False
  896. def find_color_coordinates(self, image_path, color_name):
  897. """查找指定颜色"""
  898. self.logger.info(f"开始查找颜色: {color_name}, 图片路径: {image_path}")
  899. # 颜色HSV范围映射
  900. color_ranges = {
  901. "红色": (([0, 120, 70], [10, 255, 255]), ([170, 120, 70], [180, 255, 255])),
  902. "红的": (([0, 120, 70], [10, 255, 255]), ([170, 120, 70], [180, 255, 255])),
  903. "绿色": (([35, 50, 50], [85, 255, 255]),),
  904. "蓝色": (([90, 50, 50], [130, 255, 255]),),
  905. "黄色": (([20, 100, 100], [30, 255, 255]),),
  906. "橙色": (([5, 100, 100], [15, 255, 255]),),
  907. "紫色": (([130, 50, 50], [160, 255, 255]),),
  908. "黑色": (([0, 0, 0], [180, 255, 50]),),
  909. "白色": (([0, 0, 200], [180, 30, 255]),),
  910. "黑色": (([0, 0, 0], [180, 255, 50]),),
  911. "褐色": (([10, 100, 20], [20, 255, 200]),),
  912. "橘色": (([5, 150, 150], [15, 255, 255])),
  913. "褐色": (([10, 50, 20], [20, 255, 150])),
  914. }
  915. if color_name not in color_ranges:
  916. self.logger.warning(f"不支持的颜色: {color_name}")
  917. return []
  918. # 读取图像
  919. image = cv2.imread(image_path)
  920. if image is None:
  921. self.logger.error(f"无法读取图像: {image_path}")
  922. return []
  923. # 转换到HSV颜色空间
  924. hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
  925. # 根据颜色名称获取HSV范围
  926. color_range = color_ranges[color_name]
  927. # 创建颜色掩码
  928. if color_name == "红色":
  929. lower1 = np.array(color_range[0][0])
  930. upper1 = np.array(color_range[0][1])
  931. lower2 = np.array(color_range[1][0])
  932. upper2 = np.array(color_range[1][1])
  933. mask1 = cv2.inRange(hsv, lower1, upper1)
  934. mask2 = cv2.inRange(hsv, lower2, upper2)
  935. mask = cv2.bitwise_or(mask1, mask2)
  936. else:
  937. lower = np.array(color_range[0][0])
  938. upper = np.array(color_range[0][1])
  939. mask = cv2.inRange(hsv, lower, upper)
  940. # 形态学操作去除噪点
  941. kernel = np.ones((3, 3), np.uint8)
  942. mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
  943. mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
  944. # 查找轮廓
  945. contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  946. # 获取每个轮廓的中心点(相对坐标)
  947. coordinates = []
  948. min_area = 30 # 最小面积阈值
  949. for i, contour in enumerate(contours):
  950. area = cv2.contourArea(contour)
  951. if area > min_area:
  952. # 计算轮廓的中心点
  953. M = cv2.moments(contour)
  954. if M["m00"] != 0:
  955. cx = int(M["m10"] / M["m00"])
  956. cy = int(M["m01"] / M["m00"])
  957. coordinates.append((cx, cy))
  958. self.logger.info(f"轮廓{i}: 面积={area}, 中心点=({cx}, {cy})")
  959. else:
  960. self.logger.info(f"轮廓{i}: 面积={area}, 无法计算中心点")
  961. self.logger.info(f"找到 {len(coordinates)} 个 {color_name} 坐标点")
  962. return coordinates
  963. def human_like_path(self, points):
  964. """生成模拟人类的弯曲路径"""
  965. if len(points) < 2:
  966. return points
  967. curved_path = []
  968. for i in range(len(points) - 1):
  969. start = points[i]
  970. end = points[i + 1]
  971. # 在两点之间添加弯曲点
  972. mid_x = (start[0] + end[0]) / 2
  973. mid_y = (start[1] + end[1]) / 2
  974. # 计算随机偏移,模拟人类手绘误差
  975. if abs(end[0] - start[0]) > abs(end[1] - start[1]):
  976. # 水平方向为主,在垂直方向添加偏移
  977. offset_x = 0
  978. offset_y = random.uniform(-15, 15)
  979. else:
  980. # 垂直方向为主,在水平方向添加偏移
  981. offset_x = random.uniform(-15, 15)
  982. offset_y = 0
  983. # 控制点(在中间点添加偏移)
  984. control_x = mid_x + offset_x
  985. control_y = mid_y + offset_y
  986. # 使用二次贝塞尔曲线生成弯曲路径
  987. curved_path.append(start)
  988. for t in np.arange(0.1, 1.0, 0.1):
  989. # 二次贝塞尔曲线公式
  990. x = (1 - t) ** 2 * start[0] + 2 * (1 - t) * t * control_x + t ** 2 * end[0]
  991. y = (1 - t) ** 2 * start[1] + 2 * (1 - t) * t * control_y + t ** 2 * end[1]
  992. curved_path.append((int(x), int(y)))
  993. # 添加最后一个点
  994. curved_path.append(points[-1])
  995. return curved_path
  996. def simulate_human_drawing(self, path):
  997. """模拟人类绘制路径"""
  998. if len(path) < 2:
  999. return False
  1000. try:
  1001. # 获取第一个点
  1002. start_x, start_y = path[0]
  1003. self.d.touch.down(start_x, start_y)
  1004. time.sleep(random.uniform(0.05, 0.1))
  1005. # 依次移动到路径中的每个点
  1006. for i in range(1, len(path)):
  1007. target_x, target_y = path[i]
  1008. # 添加随机抖动,模拟人类手部颤抖
  1009. jitter_x = random.randint(-2, 2)
  1010. jitter_y = random.randint(-2, 2)
  1011. self.d.touch.move(target_x + jitter_x, target_y + jitter_y)
  1012. # 添加随机延迟
  1013. delay = random.uniform(0.01, 0.03)
  1014. time.sleep(delay)
  1015. time.sleep(random.uniform(0.1, 0.2))
  1016. self.d.touch.up(path[-1][0], path[-1][1])
  1017. print("模拟人类绘制完成")
  1018. return True
  1019. except Exception as e:
  1020. print(f"模拟绘制失败: {e}")
  1021. return False
  1022. def click_side(self):
  1023. try:
  1024. image_xpath = '//*[@resource-id="com.sankuai.meituan:id/titans_main_layout"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.widget.Image[1]'
  1025. text = self.d.xpath('//*[contains(@text, "请点击")]').text
  1026. print(f"text: {text}")
  1027. image_element = self.d.xpath(image_xpath)
  1028. image_element_info = image_element.info
  1029. bounds = image_element_info['bounds']
  1030. image_left = bounds['left']
  1031. image_top = bounds['top']
  1032. image_width = bounds['right'] - bounds['left']
  1033. image_height = bounds['bottom'] - bounds['top']
  1034. print(f"图片位置: left={image_left}, top={image_top}, width={image_width}, height={image_height}")
  1035. #
  1036. image_img_path = "image.png"
  1037. if self.d.xpath(image_xpath).exists:
  1038. self.d.xpath(image_xpath).screenshot().save("image.png")
  1039. else:
  1040. print("image_xpath not exist")
  1041. try:
  1042. with open('image.png', 'rb') as f:
  1043. c = base64.b64encode(f.read()).decode()
  1044. url = "http://api.jfbym.com/api/YmServer/customApi"
  1045. data = {
  1046. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  1047. "type": 30101,
  1048. "extra": text,
  1049. "image": c
  1050. }
  1051. headers = {
  1052. "Content-Type": "application/json"
  1053. }
  1054. # 发送请求
  1055. response = requests.post(url, headers=headers, json=data, timeout=30)
  1056. response.raise_for_status()
  1057. result = response.json()
  1058. print(f"云码接口结果={result}")
  1059. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  1060. verify_data = result["data"]
  1061. print(f"verify_data={verify_data}")
  1062. big_click_xpath = verify_data["data"]
  1063. else:
  1064. print(f"api 返回错误{result}")
  1065. except Exception as e:
  1066. return {
  1067. "success": False,
  1068. "error_msg": f"处理异常: {str(e)}"
  1069. }
  1070. coords_str = verify_data["data"]
  1071. if coords_str:
  1072. x_img_str, y_img_str = coords_str.split(',')
  1073. x_img = int(x_img_str.strip())
  1074. y_img = int(y_img_str.strip())
  1075. x_screen = image_left + x_img
  1076. y_screen = image_top + y_img
  1077. # 点击转换后的屏幕坐标
  1078. self.d.click(x_screen, y_screen)
  1079. time.sleep(self.get_sleep_time())
  1080. return True
  1081. except Exception as e:
  1082. self.logger.error(f"空间推理验证失败: {e}")
  1083. return False
  1084. # 人工处理
  1085. def _handle_generic_captcha(self, xpath):
  1086. """处理通用验证码"""
  1087. time.sleep(5)
  1088. self.logger.warning("通用验证码弹窗触发,等待人工处理...")
  1089. start_time = time.time()
  1090. timeout = 60 * 60
  1091. while time.time() - start_time < timeout:
  1092. if not self.d.xpath(xpath).exists:
  1093. self.logger.info("验证码已处理完成")
  1094. return True
  1095. time.sleep(2)
  1096. self.logger.warning("验证码处理超时")
  1097. return False
  1098. def check_and_handle_popup(self):
  1099. d = self.spider.d
  1100. exists, captcha_type, xpath = self.is_any_verification_popup_exists(d)
  1101. if not exists:
  1102. for simple_xpath, desc in self.popup_rules["simple"]:
  1103. if d.xpath(simple_xpath).exists and not self._is_recent_click(simple_xpath):
  1104. self.logger.info(f"检测到简单弹窗: {desc}")
  1105. d.xpath(simple_xpath).click()
  1106. return True
  1107. # 处理广告弹窗
  1108. if d.xpath('//*[contains(@text, "广告")]').exists:
  1109. w, h = d.info['displayWidth'], d.info['displayHeight']
  1110. d.click(w - 50, 50)
  1111. self.logger.info("关闭广告弹窗")
  1112. return True
  1113. return False
  1114. # 开始处理
  1115. now = time.time()
  1116. # 防止过于频繁触发
  1117. if now - self.last_verification_time < 30 and self.verification_in_progress.is_set():
  1118. return False
  1119. self.last_verification_time = now
  1120. self.verification_count += 1
  1121. if captcha_type != self.last_verification_type:
  1122. self.logger.info(f"验证码类型变化: {self.last_verification_type} -> {captcha_type}")
  1123. self.verification_retry_count = 0
  1124. self.last_verification_type = captcha_type
  1125. self.verification_retry_count += 1
  1126. self.logger.warning(
  1127. f"检测到验证码弹窗,类型: {captcha_type},重试次数: {self.verification_retry_count}/{self.MAX_VERIFICATION_RETRY}")
  1128. if self.verification_retry_count > self.MAX_VERIFICATION_RETRY:
  1129. self.logger.error("重试次数超限,重启应用")
  1130. self._handle_verification_failure()
  1131. return False
  1132. self.verification_in_progress.set()
  1133. self.pausing.set()
  1134. self.logger.info("已设置主线程暂停事件")
  1135. # 处理
  1136. try:
  1137. if captcha_type == "Numbers_English":
  1138. self.logger.info(f"开始处理通用数验证")
  1139. result = self.Numbers_English_verify()
  1140. elif captcha_type == "Swipe_right":
  1141. self.logger.info(f"开始处理向右滑动")
  1142. result = self.Swipe_right()
  1143. elif captcha_type == "Click_images":
  1144. self.logger.info(f"开始处理依次点击图片或语序")
  1145. result = self.Click_images()
  1146. elif captcha_type == "slider":
  1147. self.logger.info(f"开始处理滑块验证")
  1148. result = self.slider_verify()
  1149. elif captcha_type == "complexs":
  1150. self.logger.info(f"开始处理拖动滑块刚")
  1151. result = self.complexs()
  1152. elif captcha_type == "Shortest_connection":
  1153. self.logger.info(f"开始处理最短距离连接")
  1154. result = self.Shortest_connection()
  1155. elif captcha_type == "click_side":
  1156. self.logger.info(f"开始处理空间推理")
  1157. result = self.click_side()
  1158. else:
  1159. self.logger.info(f"等待人工处理")
  1160. result = self._handle_generic_captcha(xpath)
  1161. except Exception as e:
  1162. self.logger.error(f"验证码处理异常: {e}")
  1163. result = False
  1164. time.sleep(5)
  1165. verification_cleared, remaining_type = self.wait_for_verification_clear(d, timeout=7)
  1166. if verification_cleared:
  1167. self.logger.info(f"第{self.verification_retry_count}次验证成功")
  1168. time.sleep(3)
  1169. self._handle_verification_success()
  1170. return True
  1171. else:
  1172. self.logger.warning(f"第{self.verification_retry_count}次验证失败,仍有验证码: {remaining_type}")
  1173. if self.verification_retry_count >= self.MAX_VERIFICATION_RETRY:
  1174. self._handle_verification_failure()
  1175. return False
  1176. else:
  1177. self.verification_in_progress.clear()
  1178. self.pausing.clear()
  1179. time.sleep(2)
  1180. return self.check_and_handle_popup()
  1181. '''
  1182. # 1. 处理简单弹窗
  1183. for xpath, desc in self.popup_rules["simple"]:
  1184. if d.xpath(xpath).exists and not self._is_recent_click(xpath):
  1185. self.logger.info("检测到弹窗: %s", desc)
  1186. d.xpath(xpath).click()
  1187. return True
  1188. # 2. 处理验证码弹窗
  1189. for xpath in self.popup_rules["verification"]:
  1190. if d.xpath(xpath).exists:
  1191. now = time.time()
  1192. if now - self.last_verification_time < 30:
  1193. return False # 30秒内不重复触发
  1194. self.last_verification_time = now
  1195. self.verification_count += 1
  1196. self.logger.warning("验证码弹窗触发,等待人工处理...")
  1197. if self.verification_count > self.MAX_VERIFICATION_RETRY:
  1198. self.logger.error("验证码重试超限,终止任务")
  1199. self.spider.stop_all()
  1200. return True
  1201. self.pausing.set() # 通知主线程暂停
  1202. d.toast.show("需要人工处理验证码", 120)
  1203. # 等待人工处理
  1204. start = time.time()
  1205. # while time.time() - start < 120*60:
  1206. # if not d.xpath(xpath).exists:
  1207. # self.logger.info("验证码已处理")
  1208. # d.toast.show("验证完成", 2)
  1209. # self.pausing.clear() # 放行主线程
  1210. # return True
  1211. # time.sleep(5)
  1212. while True:
  1213. if not d.xpath(xpath).exists:
  1214. self.logger.info("验证码已处理")
  1215. d.toast.show("验证完成", 2)
  1216. self.pausing.clear() # 放行主线程
  1217. return True
  1218. time.sleep(5)
  1219. self.logger.warning("验证码超时,重启APP")
  1220. self.spider.restart_app()
  1221. return True
  1222. # 3. 处理广告弹窗(点击右上角)
  1223. if d.xpath('//*[contains(@text, "广告")]').exists:
  1224. w, h = d.info['displayWidth'], d.info['displayHeight']
  1225. d.click(w - 50, 50)
  1226. self.logger.info("关闭广告弹窗")
  1227. return True
  1228. return False
  1229. '''
  1230. def is_any_verification_popup_exists(self, d=None):
  1231. """
  1232. 检查是否存在任何类型的验证码弹窗
  1233. """
  1234. if d is None:
  1235. d = self.d
  1236. for xpath, captcha_type in self.popup_rules["verification"]:
  1237. if d.xpath(xpath).exists:
  1238. return True, captcha_type, xpath
  1239. # 特殊的验证码
  1240. additional_indicators = [
  1241. ('//*[contains(@resource-id, "com.sankuai.meituan:id/yoda_activity_rootView")]', "complexs"),
  1242. ('//*[contains(@text, "拖动滑块刚")]', "complexs"),
  1243. ]
  1244. for xpath, captcha_type in additional_indicators:
  1245. if d.xpath(xpath).exists:
  1246. return True, captcha_type, xpath
  1247. return False, None, None
  1248. def wait_for_verification_clear(self, d=None, timeout=10):
  1249. """
  1250. 等待验证码完全消失
  1251. """
  1252. if d is None:
  1253. d = self.d
  1254. start_time = time.time()
  1255. while time.time() - start_time < timeout:
  1256. exists, captcha_type, _ = self.is_any_verification_popup_exists(d)
  1257. if not exists:
  1258. return True, None
  1259. time.sleep(1)
  1260. exists, captcha_type, _ = self.is_any_verification_popup_exists(d)
  1261. if exists:
  1262. self.logger.info(f"超时,类型: {captcha_type}")
  1263. return False, captcha_type
  1264. else:
  1265. return True
  1266. def _handle_verification_success(self):
  1267. """验证成功后的处理"""
  1268. time.sleep(5)
  1269. self.verification_retry_count = 0
  1270. self.last_verification_type = None
  1271. self.verification_in_progress.clear()
  1272. self.pausing.clear()
  1273. self.verification_count = 0
  1274. self.logger.info("验证成功,清除暂停状态")
  1275. def _handle_verification_failure(self):
  1276. """验证失败 - 等待人工处理"""
  1277. self.logger.error("验证码处理失败,等待人工处理...")
  1278. # 重置验证状态,但不重启应用
  1279. self.verification_retry_count = 0
  1280. self.last_verification_type = None
  1281. self.verification_in_progress.clear()
  1282. # 设置等待人工处理的超时时间(30分钟)
  1283. timeout = 30 * 60
  1284. start_time = time.time()
  1285. # 持续监控验证码状态,等待人工处理
  1286. while time.time() - start_time < timeout:
  1287. # 检查是否还有验证码存在
  1288. exists, captcha_type, xpath = self.is_any_verification_popup_exists()
  1289. if not exists:
  1290. # 验证码已消失,可能是人工处理成功
  1291. self.logger.info("验证码已消失,人工处理成功")
  1292. # 额外等待确保页面稳定
  1293. time.sleep(3)
  1294. # 清除暂停状态,放行线程
  1295. self.pausing.clear()
  1296. self.logger.info("人工处理完成,放行线程")
  1297. return
  1298. # 每隔10秒检查一次
  1299. time.sleep(10)
  1300. # 超时处理
  1301. self.logger.warning("等待人工处理超时,尝试继续执行")
  1302. # 强制清除暂停状态,放行线程
  1303. self.pausing.clear()
  1304. self.logger.warning("已超时,强制清除暂停状态,放行线程")
  1305. def stop(self):
  1306. self.running = False
  1307. def get_access_token():
  1308. AppKey = "tRK2RhyItCSh6BzyT4CNVXQa"
  1309. AppSrcret = "TDgKiPo94i2mOM1sDqOuDnlcK1bG66jh"
  1310. token_url = 'https://aip.baidubce.com/oauth/2.0/token'
  1311. url = f"{token_url}?grant_type=client_credentials&client_id={AppKey}&client_secret={AppSrcret}"
  1312. payload = ""
  1313. headers = {
  1314. 'Content-Type': 'application/json',
  1315. 'Accept': 'application/json'
  1316. }
  1317. response = requests.request("POST", url, headers=headers, data=payload)
  1318. try:
  1319. return response.json()['access_token']
  1320. except:
  1321. return None
  1322. class TaskReporter:
  1323. """任务上报管理器(线程安全)"""
  1324. def __init__(self):
  1325. self.tasks_data = {} # 存储每个任务的数据
  1326. self.lock = threading.Lock()
  1327. def start_task(self, task_id: int, start_page: int, end_page: int):
  1328. """记录任务开始"""
  1329. # with self.lock:
  1330. # self.tasks_data[task_id] = {
  1331. # 'task_id': task_id,
  1332. # 'start_time': int(time.time()),
  1333. # 'end_time': None,
  1334. # 'start_page': start_page,
  1335. # 'end_page': end_page,
  1336. # 'actual_end_page': start_page, # 实际结束页数
  1337. # 'real_count': 0, # 实际采集数量
  1338. # 'status': 'running', # running, completed, failed
  1339. # 'finish_status': 0, # 0:未完成,1:已完成
  1340. # }
  1341. def update_task_progress(self, task_id: int,
  1342. actual_end_page: int = None,
  1343. real_count: int = None):
  1344. """更新任务进度(线程安全)"""
  1345. # with self.lock:
  1346. # if task_id in self.tasks_data:
  1347. # if actual_end_page is not None:
  1348. # self.tasks_data[task_id]['actual_end_page'] = actual_end_page
  1349. # if real_count is not None:
  1350. # self.tasks_data[task_id]['real_count'] = real_count
  1351. def end_task(self, task_id: int, status: str = 'completed',
  1352. finish_status: int = 0, force_end_page: int = None):
  1353. """记录任务结束并上报"""
  1354. # with self.lock:
  1355. # if task_id in self.tasks_data:
  1356. # data = self.tasks_data[task_id]
  1357. # data['end_time'] = int(time.time())
  1358. # data['status'] = status
  1359. # data['finish_status'] = finish_status
  1360. # if force_end_page is not None:
  1361. # data['actual_end_page'] = force_end_page
  1362. #
  1363. # # 准备上报数据
  1364. # report_data = {
  1365. # "collect_task_allocate_id": data['task_id'],
  1366. # "status": 3 if data['status'] == 'completed' else 4,
  1367. # "finish_status": data['finish_status'],
  1368. # 'real_count': data['real_count'],
  1369. # 'start_time': data['start_time'],
  1370. # 'end_time': data['end_time'],
  1371. # 'start_page': data['start_page'],
  1372. # 'end_page': data['actual_end_page']
  1373. # }
  1374. #
  1375. # # 调用上报接口
  1376. # self._call_report_api(report_data)
  1377. def _call_report_api(self, data: Dict[str, Any]):
  1378. """调用上报接口"""
  1379. # try:
  1380. # url = 'https://scheduleapi.findit.ltd/api/collect_equipment_execute/result_report'
  1381. # resp = requests.post(url, json=data, timeout=10)
  1382. #
  1383. # if resp.status_code == 200:
  1384. # print(f"任务 {data['collect_task_allocate_id']} 上报成功")
  1385. # # self.loggerMT.info(f"任务 {data['collect_task_allocate_id']} 上报成功")
  1386. # else:
  1387. # print(f"任务 {data['collect_task_allocate_id']} 上报失败: {resp.status_code}")
  1388. # # self.loggerMT.info(f"任务 {data['collect_task_allocate_id']} 上报失败: {resp.status_code}")
  1389. # except Exception as e:
  1390. # print(f"上报接口调用异常: {e}")
  1391. # 全局上报管理器
  1392. reporter = TaskReporter()
  1393. class MT:
  1394. def __init__(self):
  1395. # self.package_name = 'com.sankuai.meituan'
  1396. self.package_name = Config.PACKAGE_NAME
  1397. self.access_token = get_access_token()
  1398. self.city2province = self.get_city_info()
  1399. self.APP_ID = '116857964'
  1400. self.API_KEY = '1gAzACJOAr7BeILKqkqPOETh'
  1401. self.SECRET_KEY = 'ZNArANb9GwJYgLKg4EfYhukKBfPdl1n3'
  1402. self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
  1403. # host = Config.DB_HOST #"localhost"
  1404. # user = Config.DB_USER #"root"
  1405. # password = Config.DB_PASSWORD #"dfwy2025"
  1406. # database = Config.DB_NAME #"drug_data"
  1407. # port = Config.DB_PORT#3306
  1408. # print(f'数据库配置:host:{host},user:{user},password:{password},database:{database},port:{port}')
  1409. self.table_name = Config.DB_AUTO_DRUG_TABLE # "mt_drug"
  1410. self.shop_table_name = Config.DB_SHOP_TABLE
  1411. # print(f'数据库表名:table_name:{self.table_name},shop_table_name:{self.shop_table_name}')
  1412. # self.mysql_client = mysqlClient(host, user, password, database, port)
  1413. self.loggerMT = logging.getLogger()
  1414. self.search_key = None # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒
  1415. self.unrelated_data = 0 # 无关数据数量
  1416. self.shop_data_num = 0 # 店铺数据数量
  1417. # === 新增:采集统计 ===
  1418. self.collected_count = 0 # 实际采集的商品数量
  1419. self.task_id = None # 任务ID
  1420. self.start_time = None # 任务开始时间
  1421. self.product_brand = None
  1422. self.product_specs = None
  1423. self.product_like = ''
  1424. self.company_id = None
  1425. self.title_key = None
  1426. self.current_page = 0 # 当前页码
  1427. self.task_start_page = 0 # 任务开始页码
  1428. self.task_end_page = 0 # 任务结束页码
  1429. self.city_to_name = city_name_to_id.build_city_name_to_id(_DEFAULT_PATH)
  1430. # ====================
  1431. def update_task_status(self, status):
  1432. """更新任务状态到数据库"""
  1433. if not self.task_id:
  1434. return
  1435. try:
  1436. retrieve_conn = get_retrieve_mysql()
  1437. cursor = retrieve_conn.cursor()
  1438. update_time = time.time()
  1439. update_sql = """
  1440. UPDATE retrieve_collect_task_allocate
  1441. SET status = %s, update_time = %s
  1442. WHERE id = %s
  1443. """
  1444. cursor.execute(update_sql, (status, update_time, self.task_id))
  1445. retrieve_conn.commit()
  1446. self.loggerMT.info(f"任务 {self.task_id} 状态更新为 {status}")
  1447. except Exception as e:
  1448. self.loggerMT.error(f"更新任务状态失败: {e}")
  1449. finally:
  1450. if 'cursor' in locals():
  1451. cursor.close()
  1452. if 'retrieve_conn' in locals():
  1453. retrieve_conn.close()
  1454. def stop_app(self):
  1455. self.d.app_stop(self.package_name)
  1456. time.sleep(5)
  1457. def start_app(self):
  1458. self.d.app_start(self.package_name)
  1459. time.sleep(5)
  1460. def restart_app(self):
  1461. """
  1462. 重启app
  1463. :return:
  1464. """
  1465. self.stop_app()
  1466. self.start_app()
  1467. time.sleep(5) # 增加稳定等待时间
  1468. # 强制检查弹窗
  1469. self.monitor.check_and_handle_popup()
  1470. @staticmethod
  1471. def get_sleep_time():
  1472. # return random.randint(5, 8)
  1473. return random.uniform(0.5, 1.5)
  1474. @staticmethod
  1475. def get_current_date():
  1476. return datetime.datetime.now().strftime('%Y/%m/%d')
  1477. @staticmethod
  1478. def get_city_info():
  1479. """
  1480. 获取所有的省市数据
  1481. :return:
  1482. """
  1483. file_path = '../kailin_city.json'
  1484. with open(file_path, 'r', encoding='utf-8') as f:
  1485. data = json.load(f)
  1486. province = {province_one["id"]: province_one for province_one in data['province']}
  1487. city2province = dict()
  1488. city = data['city']
  1489. for city_one in city:
  1490. name = city_one['name']
  1491. pid = city_one['pid']
  1492. if len(str(pid)) > 2:
  1493. pid = int(re.match('^\d{2}', str(pid)).group())
  1494. city2province[name] = province[pid]['name']
  1495. return city2province
  1496. # 将30分钟后采集的数据上报到服务端
  1497. def up_data_to_service(self, collect_task_allocate_id, status, finish_status, real_count, start_time, end_time,
  1498. start_page, end_page):
  1499. # 1. 请求地址
  1500. url = 'https://scheduleapi.findit.ltd/api/collect_equipment_execute/result_report'
  1501. # 2. 请求参数(字典形式)
  1502. json_data = {
  1503. "collect_task_allocate_id": collect_task_allocate_id,
  1504. "status": status,
  1505. "finish_status": finish_status,
  1506. 'real_count': real_count,
  1507. 'start_time': start_time,
  1508. 'end_time': end_time,
  1509. 'start_page': start_page,
  1510. 'end_page': end_page
  1511. }
  1512. # 3. 发送 POST 并拿到结果
  1513. # resp = requests.post(url, json=json_data, headers=headers)
  1514. resp = requests.post(url, json=json_data)
  1515. # 4. 打印结果
  1516. print("状态码:", resp.status_code)
  1517. # print("响应体(文本):\n", resp.text) # 纯文本
  1518. print("响应体(JSON):\n", resp.json()) # 如果返回的是 JSON,可改用这行
  1519. def get_shop_name(self):
  1520. """
  1521. 获取店铺名
  1522. :return:
  1523. """
  1524. try:
  1525. shop_name = self.d.xpath(
  1526. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1527. print(f'获取到店铺名:{shop_name}')
  1528. return shop_name
  1529. except:
  1530. try:
  1531. shop_name = self.d.xpath(
  1532. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1533. print(f'获取到店铺名2:{shop_name}')
  1534. return shop_name
  1535. except Exception as e:
  1536. # 点击店铺曲获取店铺名称
  1537. print("点击店铺进入后获取店铺名称")
  1538. self.enter_shop()
  1539. shop_xpath = '//*[@resource-id="com.sankuai.meituan:id/layout_header_view"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]//android.widget.FrameLayout[2]/android.widget.FrameLayout[1]/android.widget.TextView'
  1540. if self.d.xpath(shop_xpath).exists:
  1541. shop_name = self.d.xpath(shop_xpath).text
  1542. self.swipe_back(1)
  1543. return shop_name
  1544. else:
  1545. print(f'获取店铺名出错:{e}')
  1546. return None
  1547. def get_qualification_number(self):
  1548. """
  1549. 获取资质编号
  1550. :return:
  1551. """
  1552. try:
  1553. qualification_number_str = self.d.xpath(
  1554. '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.widget.TextView[2]').text
  1555. qualification_number = qualification_number_str.strip('资质编号:').strip()
  1556. return qualification_number
  1557. except:
  1558. return None
  1559. def get_shop_address(self):
  1560. try:
  1561. xpath = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView'
  1562. if self.d.xpath(xpath).exists:
  1563. shop_address = self.d.xpath(xpath).text
  1564. print(f'111-获取到店铺地址:{shop_address}')
  1565. if '发货时间' in shop_address:
  1566. print(f'店铺地址包含发货时间,再次获取店铺地址')
  1567. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.TextView'
  1568. if self.d.xpath(xpath2).exists:
  1569. shop_address = self.d.xpath(xpath2).text
  1570. print(f'222-获取到店铺地址:{shop_address}')
  1571. else:
  1572. print(f'222-xpath2获取店铺地址失败')
  1573. else:
  1574. shop_address = ''
  1575. print(f'333-获取到店铺地址:{shop_address}')
  1576. return shop_address
  1577. except:
  1578. print(f'获取店铺地址出错-get_shop_address')
  1579. return None
  1580. def enter_detail(self):
  1581. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/recycler"]/android.widget.FrameLayout[1]').click()
  1582. time.sleep(self.get_sleep_time())
  1583. def save_to_database(self, data):
  1584. print(f'保存数据到数据库:{data}')
  1585. # 连接数据库
  1586. conn = get_retrieve_mysql_real()
  1587. # 创建游标对象
  1588. cur = conn.cursor()
  1589. # add_sql = "insert into delete_friend_table(delete_user_name,delete_user_id,delete_content,delete_time) value(%s,%s,%s,%s)"
  1590. add_sql = f"""
  1591. INSERT INTO retrieve_scrape_data
  1592. (enterprise_id,product_name, min_price, manufacture_date, expiry_date, store_name, province_name, city_name, province_id, city_id, manufacturer, product_specs, approval_number, link_url,
  1593. scrape_date, is_sold_out, qualification_number, platform_id, sales,inventory,snapshot_url)
  1594. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  1595. """
  1596. cur.execute(add_sql, (
  1597. data['enterprise_id'], data['product_name'], data['min_price'], data['manufacture_date'], data['expiry_date'],
  1598. data['store_name'], data['province_name'], data['city_name'], data['province_id'], data['city_id'],
  1599. data['manufacturer'], data['product_specs'], data['approval_number'], data['link_url'],
  1600. data['scrape_date'], data['is_sold_out'], data['qualification_number'], data['platform_id'], data['sales'],
  1601. data['inventory'], data['snapshot_url']))
  1602. conn.commit() # 提交数据
  1603. # self.mysql_client.insert(self.table_name, data)
  1604. print(f"存入数据库成功")
  1605. # === 新增:更新采集计数 ===
  1606. self.collected_count += 1
  1607. if self.task_id:
  1608. # 更新上报进度
  1609. reporter.update_task_progress(
  1610. task_id=self.task_id,
  1611. real_count=self.collected_count
  1612. )
  1613. def save_shop_info_to_database(self, data):
  1614. print(f'保存店铺数据到数据库:{data}')
  1615. # 连接数据库
  1616. conn = get_retrieve_mysql_real()
  1617. # 创建游标对象
  1618. cur = conn.cursor()
  1619. add_sql = f"""
  1620. INSERT INTO {self.shop_table_name}
  1621. (shop, contact_address, qualification_number, business_license_company, business_license_address, scrape_date, platform)
  1622. VALUES (%s, %s, %s, %s, %s, %s, %s)
  1623. """
  1624. cur.execute(add_sql, (
  1625. data['shop'], data['contact_address'], data['qualification_number'], data['business_license_company'],
  1626. data['business_license_address'], data['scrape_date'], data['platform']))
  1627. conn.commit() # 提交数据
  1628. # self.mysql_client.insert(self.shop_table_name, data)
  1629. print(f'存入店铺信息到数据库成功')
  1630. def swipe_up(self):
  1631. """
  1632. 上滑
  1633. :return:
  1634. """
  1635. screen_width = self.d.info['displayWidth']
  1636. screen_height = self.d.info['displayHeight']
  1637. duration_rate = random.uniform(0, 0.3)
  1638. self.d.swipe(screen_width // 2, screen_height - 100, screen_width // 2, 100, duration=duration_rate)
  1639. no = random.uniform(0, 1)
  1640. if no > 0.85:
  1641. # 有的时候卡着 再稍微往上滑一点点
  1642. self.d.swipe_ext("up", 0.1)
  1643. time.sleep(self.get_sleep_time())
  1644. def swipe_back(self, no):
  1645. """
  1646. 返回
  1647. :param no: 回退次数
  1648. :return:
  1649. """
  1650. for idx in range(no):
  1651. self.d.press('back')
  1652. time.sleep(self.get_sleep_time())
  1653. def drug_price(self):
  1654. """
  1655. 获取药品价格
  1656. :return:
  1657. """
  1658. try:
  1659. price_str = self.d.xpath('//*[starts-with(@text,"¥")]').text
  1660. price = float(re.search('[\d\.]+', price_str).group())
  1661. print(f'获取到价格:{price}')
  1662. return price
  1663. except Exception as e:
  1664. print(f'提取价格出错-->{e}')
  1665. return None
  1666. def drug_sale_num(self):
  1667. """
  1668. 获取药品销量
  1669. :return:
  1670. """
  1671. try:
  1672. sales_element = self.d.xpath('//*[starts-with(@text,"已售")]')
  1673. if sales_element.exists:
  1674. sales_num_str = self.d.xpath('//*[starts-with(@text,"已售")]').text
  1675. sales_num_str = sales_num_str.replace("已售", "").strip()
  1676. # price = float(re.search(r'[\d\.]+', price_str).group())
  1677. print(f'获取到已售数量:{sales_num_str}')
  1678. return sales_num_str
  1679. return None
  1680. except Exception as e:
  1681. print(f'提取已售数量出错-->{e}')
  1682. return None
  1683. def restart_uiautomator_services(self, device_id):
  1684. """
  1685. 重启atx的uiautomator 服务
  1686. :param device_id:
  1687. :return:
  1688. """
  1689. stop_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d --stop'
  1690. start_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d'
  1691. # result = subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
  1692. # print(result.stdout)
  1693. subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
  1694. time.sleep(self.get_sleep_time())
  1695. subprocess.run(start_uiautomator_services, capture_output=True, text=True, shell=True)
  1696. time.sleep(self.get_sleep_time())
  1697. def connect_devices(self, device_id):
  1698. """
  1699. 连接设备
  1700. :return:
  1701. """
  1702. try:
  1703. self.d = u2.connect_usb(device_id)
  1704. # 设置隐形等待时间
  1705. # self.d.implicitly_wait(5)
  1706. self.restart_uiautomator_services(device_id)
  1707. self.oss_config = {
  1708. "access_key_id": 'LTAI5tDwjfteBvivYN41r8sJ',
  1709. "access_key_secret": 'yowuOGi2nYYnrqGpO3qcz94C4brcPp',
  1710. "endpoint": "oss-cn-shenzhen.aliyuncs.com", # 例:oss-cn-beijing.aliyuncs.com
  1711. "bucket_name": "zhijiayun-jiansuo",
  1712. "oss_prefix": "scrape_data/" # OSS中存放截图的前缀(虚拟文件夹)
  1713. }
  1714. print(f'连接到设备:{device_id}')
  1715. except Exception as e:
  1716. print(f'{device_id} 连接错误: {e}')
  1717. raise Exception(e)
  1718. def get_ocr_res(self, img):
  1719. try:
  1720. # img地址
  1721. print(f'开始识别图片:{img}')
  1722. request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/business_license"
  1723. # 二进制方式打开图片文件
  1724. f = open(img, 'rb')
  1725. img = base64.b64encode(f.read())
  1726. params = {"image": img}
  1727. # access_token = get_access_token()
  1728. request_url = request_url + "?access_token=" + self.access_token
  1729. headers = {'content-type': 'application/x-www-form-urlencoded'}
  1730. response = requests.post(request_url, data=params, headers=headers)
  1731. if response:
  1732. res = response.json()
  1733. new_dic = dict()
  1734. for ite in res['words_result'].keys():
  1735. new_dic[ite] = res['words_result'][ite]['words']
  1736. print('资质数据信息', new_dic)
  1737. return new_dic
  1738. else:
  1739. return None
  1740. except:
  1741. return None
  1742. def remove_watermark(self, img_path):
  1743. """
  1744. 图片去水印(将水印部分变成白色背景)并将数据转化为二进制数据
  1745. :param img_path: 图片路径
  1746. :return: 二进制图片数据
  1747. """
  1748. img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
  1749. endswith = os.path.splitext(img_path)[1]
  1750. new = np.clip(1.4057577998008846 * img - 38.33089999653017, 0, 255).astype(np.uint8)
  1751. _, img_binary = cv2.imencode(endswith, new)
  1752. return img_binary
  1753. def get_ocr_res_image(self, img):
  1754. try:
  1755. image = self.remove_watermark(img)
  1756. # image_file = open(img,'wb')
  1757. # image_file.write(image)
  1758. # res_image = self.client.basicAccurate(image) # 高精度
  1759. res_image = self.client.basicGeneral(image)
  1760. # print(f'百度api返回结果:{res_image}')
  1761. # print(res_image.get('words_result', ''))
  1762. # new_dic = dict()
  1763. data = res_image.get('words_result', '')
  1764. print(f'百度api返回结果:{data}')
  1765. # full_text = ';'.join(item['words'] for item in data)
  1766. # address = ''
  1767. # for item in data:
  1768. # if '企业注册号' in item['words']:
  1769. # print('come in 111')
  1770. # reg_number = item['words'].split(':', 1)[1].strip()
  1771. # elif '企业名称' in item['words']:
  1772. # print('come in 222')
  1773. # company_name = item['words'].split(':', 1)[1].strip()
  1774. # elif '所:' in item['words']:
  1775. # print('come in 333')
  1776. # address = item['words'].split(':', 1)[1].strip()
  1777. # # 输出结果
  1778. # print("企业注册号:", reg_number)
  1779. # print("企业名称:", company_name)
  1780. # print("住所:", address)
  1781. return data
  1782. except:
  1783. return None
  1784. def screenshot_the_business_license(self, qualification_number):
  1785. screenshot_path = 'screenshot1.png'
  1786. self.d.screenshot(screenshot_path)
  1787. img = cv2.imread(screenshot_path)
  1788. # 指定裁剪区域 (left, top, right, bottom)
  1789. left = 0
  1790. top = 480
  1791. right = 720
  1792. bottom = 1420
  1793. cropped_img = img[top:bottom, left:right]
  1794. if qualification_number:
  1795. cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  1796. else:
  1797. cropped_screenshot_path = 'cropped_screenshot.png'
  1798. cv2.imwrite(cropped_screenshot_path, cropped_img)
  1799. return cropped_screenshot_path
  1800. def screenshot_instruction(self):
  1801. # 获取当前时间
  1802. current_time = datetime.datetime.now()
  1803. # 格式化为时分秒
  1804. time_str = current_time.strftime("%H-%M-%S")
  1805. # 生成随机的 8 位字符串
  1806. random_str = secrets.token_hex(4) # 生成 4 个字节的随机字符串,转换为 8 位十六进制字符串
  1807. print(time_str)
  1808. screenshot_path = 'instructionscreenshot1-' + time_str + '-' + random_str + '.png'
  1809. self.d.screenshot(screenshot_path)
  1810. return screenshot_path
  1811. def extract_specification(self, text):
  1812. """提取药品规格信息"""
  1813. # 方法1:简单去除到期信息
  1814. pattern = r'^[^【]+'
  1815. match = re.search(pattern, text)
  1816. if match:
  1817. return match.group(0).strip()
  1818. return text
  1819. # 获取商品title
  1820. def get_title(self):
  1821. # try:
  1822. # title = self.d.xpath(
  1823. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1824. # except:
  1825. # title = self.d.xpath(
  1826. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView').text
  1827. # title = self.d.xpath('//*[contains(@text, "舒肝颗粒")]').text
  1828. def _inner():
  1829. print(f'获取商品title时的搜索关键字:{self.title_key}')
  1830. # title = self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text
  1831. # 初始化
  1832. drugs_name = ''
  1833. specifications = ''
  1834. title = ''
  1835. # 循环的获取title为了有时间来处理人机验证
  1836. for m in range(1, 6000):
  1837. if self.d.xpath(f'//*[contains(@text, "{self.title_key}")]').exists:
  1838. title = self.safe_exec(
  1839. lambda: self.d.xpath(f'//*[contains(@text, "{self.title_key}")]').text
  1840. )
  1841. print(f"第{m}次获取title成功")
  1842. break
  1843. else:
  1844. time.sleep(3)
  1845. # return drugs_name, specifications
  1846. title = title[1:] if title.startswith('0') else title
  1847. print(f'获取到药品标题:{title}')
  1848. # 从里面匹配出药品名和规格
  1849. # drugs_name
  1850. # specifications
  1851. # match = re.search(r'([^\d]+)([\d\D]+)', title)
  1852. if self.search_key == '999赐多康大豆':
  1853. return title, '1罐'
  1854. if self.search_key == "999感冒清热颗粒":
  1855. match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
  1856. else:
  1857. match = re.match(r'(\[[^\]]+\])(.*?)\s*((?:\d+\S*|\(.+))$', title)
  1858. if match:
  1859. # drugs_name = match.group(1).strip() + match.group(2).strip()
  1860. drugs_name = title
  1861. specifications = match.group(3).strip()
  1862. print("药品名:", drugs_name)
  1863. print("规格:", specifications)
  1864. # 如果品规中包含到期则需要再次的正则处理
  1865. if '到期' in specifications:
  1866. specifications = self.extract_specification(specifications)
  1867. # print('完整药名:', drugs_name + specifications)
  1868. return drugs_name, specifications
  1869. else:
  1870. if title == '999抗病毒口服液10ml*12' or title == '999抗病毒口服液':
  1871. drugs_name = title
  1872. specifications = '10ml*12支/盒'
  1873. return drugs_name, specifications
  1874. elif title == '999抗病毒口服液10ml*10':
  1875. drugs_name = title
  1876. specifications = '10ml*10支/盒'
  1877. return drugs_name, specifications
  1878. elif title == '999小柴胡颗粒':
  1879. drugs_name = title
  1880. specifications = '10g*9袋/盒'
  1881. return drugs_name, specifications
  1882. elif title == '999养胃舒颗粒':
  1883. drugs_name = title
  1884. specifications = '10g*10袋/盒'
  1885. return drugs_name, specifications
  1886. elif title == '三九胃泰胶囊':
  1887. drugs_name = title
  1888. specifications = '0.5g*24粒/盒'
  1889. return drugs_name, specifications
  1890. elif title == '999补脾益肠丸':
  1891. drugs_name = title
  1892. specifications = '6g*9袋/盒'
  1893. return drugs_name, specifications
  1894. elif title == '999复方感冒灵颗粒':
  1895. drugs_name = title
  1896. specifications = '12.5g*15袋/盒'
  1897. return drugs_name, specifications
  1898. else:
  1899. print("没有匹配到预期格式")
  1900. drugs_name = title
  1901. specifications = ''
  1902. return drugs_name, specifications
  1903. # 用 safe_exec 包装内部逻辑,确保验证码阻塞
  1904. return self.safe_exec(_inner)
  1905. def enter_shop(self):
  1906. """
  1907. 进店,方便提取资质环境
  1908. :return:
  1909. """
  1910. # self.d.xpath('//*[@text="进店"]').click()
  1911. self.d.xpath('//*[@text="店铺"]').click()
  1912. time.sleep(self.get_sleep_time())
  1913. def enter_shoper(self):
  1914. """
  1915. 进入商家
  1916. :return:
  1917. """
  1918. is_shoper_exists = 0
  1919. for i in range(10):
  1920. if self.d.xpath('//*[@text="商家"]').exists:
  1921. print(f'第{i}次商家存在')
  1922. is_shoper_exists = 1
  1923. break
  1924. else:
  1925. print(f'第{i}次商家不存在')
  1926. time.sleep(self.get_sleep_time())
  1927. if is_shoper_exists == 1:
  1928. self.d.xpath('//*[@text="商家"]').click()
  1929. time.sleep(self.get_sleep_time())
  1930. return True
  1931. else:
  1932. return False
  1933. # 点击查看商家资质
  1934. def scan_shoper_license(self):
  1935. exist_shoper = 0
  1936. for i in range(10):
  1937. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  1938. print(f'第{i}次查看商家资质存在')
  1939. exist_shoper = 1
  1940. break
  1941. else:
  1942. print(f'第{i}次查看商家资质不存在')
  1943. time.sleep(self.get_sleep_time())
  1944. if exist_shoper == 1:
  1945. self.d.xpath('//*[@text="查看商家资质"]').click()
  1946. time.sleep(self.get_sleep_time())
  1947. else:
  1948. self.swipe_back(1)
  1949. # 验证商品的信息是否在数据库中已存在
  1950. def data_is_exists(self, data):
  1951. """
  1952. 检查指定数据是否已存在于数据库表中(仅检查存在性)
  1953. 参数:
  1954. data: 包含查询条件的字典,键为列名,值为条件值
  1955. 返回:
  1956. True: 数据存在
  1957. False: 数据不存在
  1958. None: 检查过程中出错
  1959. """
  1960. # dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  1961. # 'platform': '美团'}
  1962. # 1. 验证必要字段
  1963. required_keys = ['product', 'min_price', 'shop', 'scrape_date', 'platform']
  1964. if not all(key in data for key in required_keys):
  1965. missing = [key for key in required_keys if key not in data]
  1966. logging.error(f"缺少必要字段: {', '.join(missing)}")
  1967. return None
  1968. try:
  1969. # 连接数据库
  1970. conn = get_retrieve_mysql_real()
  1971. # 创建游标对象
  1972. cur = conn.cursor()
  1973. # query_sql = f"SELECT * FROM {self.table_name} WHERE product = '{data['product']}' AND min_price = '{data['min_price']}' AND shop = '{data['shop']}' AND scrape_date = '{data['scrape_date']}' AND platform = '{data['platform']}'"
  1974. # cur.execute(query_sql)
  1975. query_sql = """
  1976. SELECT * FROM {}
  1977. WHERE product_name = %s
  1978. AND min_price = %s
  1979. AND store_name = %s
  1980. AND scrape_date = %s
  1981. AND platform_id = %s
  1982. """.format('retrieve_scrape_data')
  1983. cur.execute(query_sql, (
  1984. data['product'],
  1985. data['min_price'],
  1986. data['shop'],
  1987. data['scrape_date'],
  1988. '4'
  1989. ))
  1990. result = cur.fetchone()
  1991. return bool(result) # 如果存在返回True,否则False
  1992. except Exception as e:
  1993. print(f"MySQL 错误: {str(e)}")
  1994. # 验证店铺信息是否在数据库中已存在
  1995. def shop_is_exists_database(self, shop):
  1996. try:
  1997. # 连接数据库
  1998. conn = get_retrieve_mysql_real()
  1999. # 创建游标对象
  2000. cur = conn.cursor()
  2001. query_sql = """
  2002. SELECT * FROM {}
  2003. WHERE shop = %s
  2004. """.format(self.shop_table_name)
  2005. cur.execute(query_sql, (
  2006. shop
  2007. ))
  2008. result = cur.fetchone()
  2009. return bool(result) # 如果存在返回True,否则False
  2010. except Exception as e:
  2011. print(f"MySQL 错误: {str(e)}")
  2012. def wait_if_verifying(self, monitor, timeout=120):
  2013. """验证码处理期间阻塞主线程"""
  2014. start = time.time()
  2015. while monitor.pausing.is_set() and time.time() - start < timeout:
  2016. time.sleep(1)
  2017. def wait_for_ready(self, monitor, timeout=86400):
  2018. """进入每一页前都先等验证码"""
  2019. start = time.time()
  2020. while monitor.pausing.is_set() and time.time() - start < timeout:
  2021. time.sleep(1)
  2022. # 额外保险:如果验证码突然在这一秒才弹,再主动扫一次
  2023. monitor.check_and_handle_popup()
  2024. def safe_list(self, xpath, monitor):
  2025. """线程安全地拿商品列表"""
  2026. self.wait_for_ready(monitor)
  2027. return self.d.xpath(xpath).all()
  2028. def safe_exec(self, func, *args, **kwargs):
  2029. """
  2030. 万能安全壳:执行 func 前检查验证码,
  2031. 若监控线程已置位 pausing,则一直阻塞直到放行。
  2032. """
  2033. # 强制等待一小段时间,让监控线程有机会检测
  2034. time.sleep(0.1)
  2035. while self.monitor.pausing.is_set():
  2036. time.sleep(1)
  2037. # 执行真正逻辑
  2038. return func(*args, **kwargs)
  2039. def get_next_data(self, data, target):
  2040. for i, item in enumerate(data):
  2041. if item['words'] == target:
  2042. if i + 1 < len(data):
  2043. return data[i + 1]['words']
  2044. return None
  2045. def delete_instruction_screenshot(self, screenshot_path):
  2046. # 删除截图文件
  2047. try:
  2048. os.remove(screenshot_path)
  2049. print(f"截图文件已删除:{screenshot_path}")
  2050. except FileNotFoundError:
  2051. print(f"文件未找到,无法删除:{screenshot_path}")
  2052. except Exception as e:
  2053. print(f"删除文件时出错:{e}")
  2054. def get_instructions_data(self):
  2055. """
  2056. 确定有说明书之后,提取所有的说明书数据
  2057. :return:
  2058. """
  2059. self.d.xpath('//*[@text="说明"]').click()
  2060. # time.sleep(random.randint(3, 5))
  2061. time.sleep(0.5)
  2062. if self.d.xpath('//*[@text="查看详细说明"]').exists:
  2063. self.d.xpath('//*[@text="查看详细说明"]').click()
  2064. else:
  2065. for i in range(8):
  2066. if self.d.xpath('//*[@text="查看全部"]').exists:
  2067. print('开始点击查看全部')
  2068. break
  2069. self.d.swipe_ext('down', 0.3)
  2070. time.sleep(1)
  2071. if self.d.xpath('//*[@text="查看全部"]').exists:
  2072. print('开始点击查看全部2')
  2073. break
  2074. if self.d.xpath('//*[@text="查看全部"]').exists:
  2075. self.d.xpath('//*[@text="查看全部"]').click()
  2076. else:
  2077. res_data = {
  2078. "有效期": '',
  2079. "生产单位": '',
  2080. "批准文号": ''
  2081. }
  2082. self.loggerMT.info('获取到的说明书信息为空。')
  2083. return res_data
  2084. # time.sleep(random.randint(3, 5))
  2085. time.sleep(0.5)
  2086. # self.d.xpath('//*[@text="加载更多"]').click_exists()
  2087. # loop_page = 5
  2088. # new_list = list()
  2089. # new_list = []
  2090. for ii in range(8):
  2091. if self.d.xpath('//*[@text="加载更多"]').exists:
  2092. self.d.xpath('//*[@text="加载更多"]').click()
  2093. time.sleep(0.2)
  2094. break
  2095. else:
  2096. self.d.swipe(200, 1000, 200, 300, 0.3)
  2097. for iii in range(10):
  2098. if self.d.xpath('//*[@text="生产单位"]').exists and self.d.xpath('//*[@text="批准文号"]').exists:
  2099. break
  2100. else:
  2101. self.d.swipe(200, 1300, 200, 300, 0.3)
  2102. instruction_path = self.screenshot_instruction()
  2103. print(f"instruction_path= {instruction_path}")
  2104. time.sleep(2)
  2105. ocr_res = self.get_ocr_res_image(instruction_path)
  2106. # print(f'ocr_res:{ocr_res}')
  2107. if ocr_res:
  2108. # 获取有效期的下一个数据
  2109. validity = self.get_next_data(ocr_res, '有效期')
  2110. # 获取批准文号的下一个数据
  2111. approval_number = self.get_next_data(ocr_res, '批准文号')
  2112. # 获取生产单位的下一个数据
  2113. manufacturer = self.get_next_data(ocr_res, '生产单位')
  2114. else:
  2115. validity = ''
  2116. approval_number = ''
  2117. manufacturer = ''
  2118. # print("有效期:", validity)
  2119. # print("批准文号:", approval_number)
  2120. # print("生产单位:", manufacturer)
  2121. res_data = {
  2122. "有效期": validity,
  2123. "生产单位": manufacturer,
  2124. "批准文号": approval_number
  2125. }
  2126. print(f"res_data={res_data}")
  2127. time.sleep(1)
  2128. self.delete_instruction_screenshot(instruction_path)
  2129. return res_data
  2130. def has_instructions(self):
  2131. """
  2132. 是否有说明书
  2133. :return:
  2134. """
  2135. # 没有说明书的无法采集具体数据
  2136. time.sleep(self.get_sleep_time())
  2137. is_has_instructions = False
  2138. for i in range(8):
  2139. if self.d.xpath('//*[@text="说明"]').exists:
  2140. print(f"第{i}次有说明书1")
  2141. is_has_instructions = True
  2142. break
  2143. self.d.swipe_ext('down', 0.3)
  2144. time.sleep(1)
  2145. # detail_info = self.d.xpath(
  2146. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
  2147. # bounds = detail_info['bounds']
  2148. # height = bounds['bottom'] - bounds['top']
  2149. # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
  2150. if self.d.xpath('//*[@text="说明"]').exists:
  2151. is_has_instructions = True
  2152. print(f"第{i}次有说明书2")
  2153. break
  2154. # is_has_instructions = self.d.xpath('//*[@text="说明"]').exists
  2155. return is_has_instructions
  2156. def has_shop(self):
  2157. """
  2158. 是否有进店按钮
  2159. :return:
  2160. """
  2161. # self.d.swipe_ext('up', 0.1)
  2162. time.sleep(self.get_sleep_time())
  2163. is_has_enter_shop = self.d.xpath('//*[@text="进店"]').exists
  2164. return is_has_enter_shop
  2165. # 获取商品对应的店铺信息
  2166. def get_license_info_ex(self):
  2167. # self.enter_shop()
  2168. self.safe_exec(self.enter_shop)
  2169. # self.enter_shoper()
  2170. result = self.safe_exec(self.enter_shoper)
  2171. if result == False:
  2172. license_info_data = {'contact_address': '', 'qualification_number': '', 'business_license_company': '',
  2173. 'business_license_address': ''}
  2174. return license_info_data
  2175. for i in range(10):
  2176. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  2177. print(f"第{i}次有商家资质")
  2178. break
  2179. else:
  2180. print(f"第{i}次没有商家资质")
  2181. time.sleep(self.get_sleep_time())
  2182. # 获取地址
  2183. # contact_address = self.get_shop_address()
  2184. contact_address = self.safe_exec(self.get_shop_address)
  2185. # time.sleep(50000)
  2186. ###
  2187. # self.scan_shoper_license()
  2188. self.safe_exec(self.scan_shoper_license)
  2189. # 获取资质编码
  2190. # qualification_number = self.get_qualification_number()
  2191. qualification_number = self.safe_exec(self.get_qualification_number)
  2192. # qualification_number 不为None继续下一步
  2193. if qualification_number:
  2194. # 营业执照公司名称
  2195. business_license_company = ''
  2196. # 营业执照地址
  2197. business_license_address = ''
  2198. self.d.click(0.603, 0.27)
  2199. time.sleep(self.get_sleep_time())
  2200. cropped_screenshot_path = self.screenshot_the_business_license(qualification_number)
  2201. print(f'cropped_screenshot_path:{cropped_screenshot_path}')
  2202. # if qualification_number:
  2203. # cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  2204. # else:
  2205. # cropped_screenshot_path = 'cropped_screenshot.png'
  2206. # ocr_res = self.get_ocr_res('cropped_screenshot.png')
  2207. ocr_res = self.get_ocr_res(cropped_screenshot_path)
  2208. print(f'ocr_res:{ocr_res}')
  2209. # 获取ocr_res 中的地址、单位名称
  2210. if ocr_res:
  2211. if '单位名称' in ocr_res.keys():
  2212. business_license_company = ocr_res['单位名称']
  2213. if '地址' in ocr_res.keys():
  2214. business_license_address = ocr_res['地址']
  2215. license_info_data = {'contact_address': contact_address, 'qualification_number': qualification_number,
  2216. 'business_license_company': business_license_company,
  2217. 'business_license_address': business_license_address}
  2218. else:
  2219. license_info_data = {'contact_address': contact_address, 'qualification_number': '',
  2220. 'business_license_company': '', 'business_license_address': ''}
  2221. return license_info_data
  2222. def distinct_target(self):
  2223. result = False
  2224. position_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  2225. position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  2226. is_position = self.d.xpath(position_xpath).exists
  2227. is_position2 = self.d.xpath(position_xpath2).exists
  2228. xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2229. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2230. xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2231. xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2232. is_position5 = self.d.xpath(xpath).exists
  2233. is_position6 = self.d.xpath(xpath2).exists
  2234. is_position7 = self.d.xpath(xpath3).exists
  2235. is_position8 = self.d.xpath(xpath4).exists
  2236. # print(f"is_position = {is_position}")
  2237. # print(f"is_position2 = {is_position2}")
  2238. if is_position or is_position2 or is_position5 or is_position6 or is_position7 or is_position8:
  2239. result = True
  2240. return result
  2241. # return is_position
  2242. def click_element_with_retry(self, xpath, max_retries=5, timeout=5):
  2243. """
  2244. 带重试机制的点击函数
  2245. """
  2246. for attempt in range(max_retries):
  2247. try:
  2248. if self.d.xpath(xpath).exists:
  2249. self.d.xpath(xpath).click()
  2250. print(f"第{attempt + 1}次尝试点击成功")
  2251. return True
  2252. else:
  2253. print(f"第{attempt + 1}次尝试:元素不存在")
  2254. except Exception as e:
  2255. print(f"第{attempt + 1}次尝试失败: {e}")
  2256. if attempt < max_retries - 1:
  2257. time.sleep(1) # 等待1秒后重试
  2258. print(f"经过{max_retries}次尝试后点击失败")
  2259. return False
  2260. def enter_target_page(self):
  2261. self.d.xpath('//*[@content-desc="看病买药"]').click()
  2262. time.sleep(self.get_sleep_time())
  2263. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/vf_search_carousel_text"]').click()
  2264. time.sleep(self.get_sleep_time())
  2265. self.d.xpath(
  2266. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]').click()
  2267. time.sleep(self.get_sleep_time())
  2268. self.d.send_keys(self.search_key, clear=True)
  2269. time.sleep(self.get_sleep_time())
  2270. self.d.xpath('//*[@text="搜索"]').click()
  2271. time.sleep(self.get_sleep_time())
  2272. self.click_express_send()
  2273. time.sleep(self.get_sleep_time())
  2274. def click_express_send(self):
  2275. # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.HorizontalScrollView[last()]'
  2276. slide_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2277. slide_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2278. slide_xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2279. slide_xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2280. for i in range(1, 3):
  2281. if self.d.xpath(slide_xpath).exists:
  2282. bounds = self.d.xpath(slide_xpath).info['bounds']
  2283. top = bounds['top']
  2284. bottom = bounds['bottom']
  2285. print(f'top={top}')
  2286. print(f'bottom={bottom}')
  2287. y = (top + bottom) // 2
  2288. print(f'y={y}')
  2289. self.loggerMT.info('开始滑动1')
  2290. self.d.swipe(500, y, 100, y, 0.5)
  2291. time.sleep(self.get_sleep_time())
  2292. break
  2293. elif self.d.xpath(slide_xpath2).exists:
  2294. bounds = self.d.xpath(slide_xpath2).info['bounds']
  2295. top = bounds['top']
  2296. bottom = bounds['bottom']
  2297. print(f'top={top}')
  2298. print(f'bottom={bottom}')
  2299. y = (top + bottom) // 2
  2300. print(f'y={y}')
  2301. self.loggerMT.info('开始滑动2')
  2302. self.d.swipe(500, y, 100, y, 0.5)
  2303. time.sleep(self.get_sleep_time())
  2304. break
  2305. elif self.d.xpath(slide_xpath3).exists:
  2306. bounds = self.d.xpath(slide_xpath3).info['bounds']
  2307. top = bounds['top']
  2308. bottom = bounds['bottom']
  2309. print(f'top={top}')
  2310. print(f'bottom={bottom}')
  2311. y = (top + bottom) // 2
  2312. print(f'y={y}')
  2313. self.loggerMT.info('开始滑动3')
  2314. self.d.swipe(500, y, 100, y, 0.5)
  2315. time.sleep(self.get_sleep_time())
  2316. break
  2317. elif self.d.xpath(slide_xpath4).exists:
  2318. bounds = self.d.xpath(slide_xpath4).info['bounds']
  2319. top = bounds['top']
  2320. bottom = bounds['bottom']
  2321. print(f'top={top}')
  2322. print(f'bottom={bottom}')
  2323. y = (top + bottom) // 2
  2324. print(f'y={y}')
  2325. self.loggerMT.info('开始滑动4')
  2326. self.d.swipe(500, y, 100, y, 0.5)
  2327. time.sleep(self.get_sleep_time())
  2328. break
  2329. max_retry = 5 # 最多尝试次数
  2330. for idx in range(1, max_retry + 1):
  2331. # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]'
  2332. xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2333. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2334. xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2335. xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2336. # print(f"xpath:{xpath}")
  2337. # scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.HorizontalScrollView")
  2338. if self.d.xpath(xpath).exists:
  2339. self.d.xpath(xpath).click()
  2340. # time.sleep(self.get_sleep_time())
  2341. print(f"第{idx}次点击xpath快递送成功")
  2342. time.sleep(self.get_sleep_time())
  2343. break
  2344. elif self.d.xpath(xpath2).exists:
  2345. self.d.xpath(xpath2).click()
  2346. # time.sleep(self.get_sleep_time())
  2347. print(f"第{idx}次点击xpath2快递送成功")
  2348. time.sleep(self.get_sleep_time())
  2349. break
  2350. elif self.d.xpath(xpath3).exists:
  2351. self.d.xpath(xpath3).click()
  2352. # time.sleep(self.get_sleep_time())
  2353. print(f"第{idx}次点击xpath3快递送成功")
  2354. time.sleep(self.get_sleep_time())
  2355. break
  2356. elif self.d.xpath(xpath4).exists:
  2357. self.d.xpath(xpath4).click()
  2358. # time.sleep(self.get_sleep_time())
  2359. print(f"第{idx}次点击xpath4快递送成功")
  2360. time.sleep(self.get_sleep_time())
  2361. break
  2362. else:
  2363. print(f"第{idx}次点击xpath或xpath2或xpath3快递送都失败")
  2364. time.sleep(self.get_sleep_time())
  2365. def get_clipboard(self):
  2366. time.sleep(1)
  2367. self.loggerMT.info(f"Clipboard content:{self.d.clipboard}") # 打印调试信息
  2368. clipboard_content = self.d.clipboard
  2369. if clipboard_content is None:
  2370. return ''
  2371. return clipboard_content.strip()
  2372. # return self.d.clipboard.strip()
  2373. def clear_clipboard(self):
  2374. self.d.set_clipboard("", "text/plain")
  2375. def get_product_link(self):
  2376. product_link = ''
  2377. # 两种可能的“···”按钮
  2378. dots_xpaths = [
  2379. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
  2380. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
  2381. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]'
  2382. ]
  2383. max_retry = 5 # 最多尝试次数
  2384. for idx in range(1, max_retry + 1):
  2385. if product_link: # 已经拿到则退出
  2386. break
  2387. for xp in dots_xpaths:
  2388. if self.d.xpath(xp).exists:
  2389. print(f'{idx}-进入分享点点点')
  2390. self.loggerMT.info(f'{idx}-进入分享点点点')
  2391. # #先清空剪贴板的内容
  2392. # self.clear_clipboard()
  2393. # print("清空剪贴板内容成功。")
  2394. self.d.xpath(xp).click()
  2395. time.sleep(0.2)
  2396. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2397. time.sleep(0.2)
  2398. link_xpath = '//*[@text="复制链接"]'
  2399. if self.d.xpath(link_xpath).exists:
  2400. self.d.xpath(link_xpath).click()
  2401. time.sleep(1)
  2402. product_link = self.get_clipboard()
  2403. time.sleep(0.5)
  2404. print(f'{idx}-商品链接:{product_link}')
  2405. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  2406. break # 找到并执行后跳出内层循环
  2407. else:
  2408. print(f'{idx}-商品链接:{product_link}')
  2409. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  2410. product_link = ''
  2411. # self.d.xpath('//*[@text="复制链接"]').click_exists()
  2412. # time.sleep(1)
  2413. # product_link = self.get_clipboard()
  2414. # time.sleep(0.5)
  2415. # print(f'{idx}-商品链接:{product_link}')
  2416. # self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  2417. # break # 找到并执行后跳出内层循环
  2418. if not product_link and idx < max_retry:
  2419. time.sleep(0.5) # 最后一次不需要再等待
  2420. return product_link
  2421. def integrate_data(self):
  2422. """
  2423. 整合数据
  2424. :return:
  2425. """
  2426. # title_info = self.get_title() # 药品,规格
  2427. # title_info = self.safe_exec(self.get_title) # 药品,规格
  2428. product, specifications = self.safe_exec(self.get_title) # 药品,规格
  2429. if not product:
  2430. self.swipe_back(1)
  2431. return
  2432. min_price = self.drug_price() # 最低价格
  2433. sales_num = self.drug_sale_num() # 销售数量
  2434. snapshot_url = '' # 网页快照
  2435. product_link = ''
  2436. if self.d.xpath('//*[@text="自营"]').exists:
  2437. shop = "美团自营大药房(快递电商)"
  2438. # 爬取日期
  2439. scrape_date = self.get_current_date()
  2440. # scrape_date = "2025-07-18"
  2441. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  2442. 'platform': '美团'}
  2443. print(f'当前数据:{dup_data}')
  2444. if self.data_is_exists(dup_data):
  2445. print('存在相同数据不入库')
  2446. self.swipe_back(1)
  2447. return
  2448. else:
  2449. for i in range(8):
  2450. if self.d.xpath('//*[@text="进店"]').exists:
  2451. print('开始获取店铺名1')
  2452. break
  2453. self.d.swipe_ext('up', 0.3)
  2454. time.sleep(1)
  2455. if self.d.xpath('//*[@text="进店"]').exists:
  2456. print('开始获取店铺名2')
  2457. break
  2458. shop = self.get_shop_name()
  2459. # 爬取日期
  2460. scrape_date = self.get_current_date()
  2461. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  2462. 'platform': '美团'}
  2463. print(f'当前数据:{dup_data}')
  2464. is_has_enter_shop = self.has_shop()
  2465. # 需要判断shop是否已经在数据库中存在,如果存在,则不再进入店铺,直接进入下一个商品
  2466. shop_is_exists = self.shop_is_exists_database(shop)
  2467. print(f"已采集{self.shop_data_num}家店铺数据")
  2468. if is_has_enter_shop and '美团官方' not in shop and '美团自营' not in shop and not shop_is_exists and self.shop_data_num < 500:
  2469. # license_info = self.get_license_info_ex()
  2470. license_info = self.safe_exec(self.get_license_info_ex)
  2471. contact_address = license_info['contact_address']
  2472. qualification_number = license_info['qualification_number']
  2473. business_license_company = license_info['business_license_company']
  2474. business_license_address = license_info['business_license_address']
  2475. save_shop_data = {
  2476. 'shop': shop,
  2477. 'contact_address': contact_address,
  2478. 'qualification_number': qualification_number,
  2479. 'scrape_date': scrape_date,
  2480. 'business_license_company': business_license_company,
  2481. 'business_license_address': business_license_address,
  2482. 'platform': '美团'
  2483. }
  2484. self.save_shop_info_to_database(save_shop_data)
  2485. self.shop_data_num += 1 # 店铺数据数量+1
  2486. self.swipe_back(2)
  2487. else:
  2488. print('不采集店铺信息')
  2489. if self.data_is_exists(dup_data):
  2490. print('存在相同数据不入库')
  2491. self.swipe_back(1)
  2492. return
  2493. # 商品链接
  2494. product_link = self.get_product_link()
  2495. print(f'获取到product_link: {product_link}')
  2496. if not shop:
  2497. print('未获取到店铺名:开始回退')
  2498. self.swipe_back(1)
  2499. return
  2500. if not shop or '自营' in shop:
  2501. self.swipe_back(1)
  2502. return
  2503. time.sleep(self.get_sleep_time())
  2504. # 生产日期为空
  2505. manufacture_date = ''
  2506. credit_code = ''
  2507. city = ''
  2508. province = ''
  2509. expiry_date = ''
  2510. manufacturer = ''
  2511. approval_number = ''
  2512. is_has_instructions = self.safe_exec(self.has_instructions)
  2513. # 说明书等信息
  2514. if is_has_instructions:
  2515. print('开始获取说明书信息')
  2516. # instructions_info = self.get_instructions_data()
  2517. instructions_info = self.safe_exec(self.get_instructions_data)
  2518. if instructions_info['有效期'] is not None:
  2519. expiry_date = instructions_info['有效期'].strip('。')
  2520. if instructions_info['生产单位'] is not None:
  2521. manufacturer = instructions_info['生产单位'].strip('。')
  2522. if instructions_info['批准文号'] is not None:
  2523. approval_number = instructions_info['批准文号'].strip('。')
  2524. else:
  2525. expiry_date = ''
  2526. manufacturer = ''
  2527. approval_number = ''
  2528. # 爬取省份
  2529. province_id = 0
  2530. city_id = 0
  2531. if province in self.city_to_name:
  2532. province_id = self.city_to_name[province]
  2533. if city in self.city_to_name:
  2534. city_id = self.city_to_name[city]
  2535. # 是否有货
  2536. save_data = {
  2537. 'enterprise_id': self.company_id,
  2538. 'product_name': product,
  2539. 'min_price': min_price,
  2540. 'manufacture_date': manufacture_date,
  2541. 'expiry_date': expiry_date,
  2542. 'store_name': shop,
  2543. 'province_name': province,
  2544. 'city_name': city,
  2545. 'province_id': province_id,
  2546. 'city_id': city_id,
  2547. 'manufacturer': manufacturer,
  2548. 'product_specs': specifications,
  2549. 'approval_number': approval_number,
  2550. 'link_url': product_link,
  2551. 'scrape_date': scrape_date,
  2552. # 'scrape_province': scrape_province,
  2553. 'is_sold_out': 0,
  2554. 'qualification_number': credit_code,
  2555. 'platform_id': '4',
  2556. 'sales': sales_num,
  2557. 'inventory': '',
  2558. 'snapshot_url': snapshot_url,
  2559. 'insert_time': time.strftime('%Y-%m-%d %H:%M:%S'),
  2560. 'update_time': time.strftime('%Y-%m-%d %H:%M:%S'),
  2561. }
  2562. self.save_to_database(save_data)
  2563. def back_to_list_page(self):
  2564. for i in range(5):
  2565. # 最外部有个定位按钮
  2566. if self.distinct_target():
  2567. return True
  2568. print(f'第{i}次尝试退回到列表页')
  2569. self.swipe_back(1)
  2570. time.sleep(self.get_sleep_time())
  2571. print('页面出错,没有退回到列表页')
  2572. return False
  2573. # TODO 继续优化这里的判断逻辑,可以考虑搭配config的修改
  2574. # 任何一个spec满足都算有效
  2575. def is_link_spec_useful(self, product_title):
  2576. if len(self.product_specs) == 0:
  2577. return True
  2578. for spec in self.product_specs:
  2579. if spec in product_title:
  2580. return True
  2581. return False
  2582. def is_link_useful(self, product_title):
  2583. if self.title_key != "" and self.title_key not in product_title:
  2584. print(f"当前商品名称:{product_title} 不包含{self.title_key}关键字")
  2585. return False
  2586. if self.product_brand != "" and self.product_brand not in product_title and self.product_like not in product_title:
  2587. print(f"当前商品名称:{product_title} 不包含{self.product_brand}品牌")
  2588. return False
  2589. if not self.is_link_spec_useful(product_title):
  2590. print(f"当前商品名称:{product_title} 不包含{self.product_specs}品规")
  2591. return False
  2592. return True
  2593. def get_one_drug(self, drug_idx, drug_one):
  2594. bounds = drug_one.info['bounds']
  2595. top = bounds['top']
  2596. bottom = bounds['bottom']
  2597. # height = bottom - top
  2598. print(f'当前商品bottom:{bottom}')
  2599. print(f'当前商品top:{top}')
  2600. if 304 <= top and bottom <= 1475: # 默认高度241的才行 1559
  2601. print(f"这页的第几个商品:{drug_idx}")
  2602. product_title = ''
  2603. price = ''
  2604. shop_name = ''
  2605. # 商品名称的xpath
  2606. product_tittle_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{drug_idx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2607. product_tittle_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{drug_idx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2608. if self.d.xpath(product_tittle_xpath).exists:
  2609. product_title = self.d.xpath(product_tittle_xpath).text
  2610. product_title = product_title[1:] if product_title.startswith('0') else product_title
  2611. print(f"product_tittle_xpath列表当前商品名称:{product_title}")
  2612. if not self.is_link_useful(product_title):
  2613. print(f"is_link_useful 没通过1:{product_title}")
  2614. # TODO 认真确认无关数据量的条件,这里才可以设置退出
  2615. self.unrelated_data += 1
  2616. return
  2617. elif self.d.xpath(product_tittle_xpath2).exists:
  2618. product_title = self.d.xpath(product_tittle_xpath2).text
  2619. product_title = product_title[1:] if product_title.startswith('0') else product_title
  2620. print(f"product_tittle_xpath2列表当前商品名称:{product_title}")
  2621. if not self.is_link_useful(product_title):
  2622. print(f"is_link_useful 没通过2:{product_title}")
  2623. # TODO 认真确认无关数据量的条件,这里才可以设置退出
  2624. self.unrelated_data += 1
  2625. return
  2626. else:
  2627. print(f"列表当前商品名称不存在")
  2628. # TODO 认真确认无关数据量的条件,这里才可以设置退出
  2629. self.unrelated_data += 1
  2630. return
  2631. # 这里只统计连续无关链接数
  2632. self.unrelated_data = 0
  2633. # 价格
  2634. price_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{drug_idx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2635. price_xpath3 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{drug_idx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2636. price_xpath1 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{drug_idx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2637. if self.d.xpath(price_xpath).exists:
  2638. print(f"price_xpath列表当前")
  2639. price_str = self.d.xpath(price_xpath).text
  2640. print(f"price_xpath列表当前商品价格:{price_str}")
  2641. if price_str:
  2642. price = float(re.search(r'[\d\.]+', price_str).group())
  2643. elif self.d.xpath(price_xpath3).exists:
  2644. print(f"price_xpath3列表当前")
  2645. price_str = self.d.xpath(price_xpath3).text
  2646. print(f"price_xpath3列表当前商品价格:{price_str}")
  2647. if price_str:
  2648. price = float(re.search(r'[\d\.]+', price_str).group())
  2649. elif self.d.xpath(price_xpath1).exists:
  2650. print(f"price_xpath1列表当前")
  2651. price_str = self.d.xpath(price_xpath1).text
  2652. print(f"price_xpath1列表当前商品价格:{price_str}")
  2653. if price_str:
  2654. price = float(re.search(r'[\d\.]+', price_str).group())
  2655. else:
  2656. print(f"price_xpath2列表当前")
  2657. price_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{drug_idx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2658. if self.d.xpath(price_xpath2).exists:
  2659. price_str = self.d.xpath(price_xpath2).text
  2660. print(f"price_xpath2列表当前商品价格:{price_str}")
  2661. if price_str:
  2662. price = float(re.search(r'[\d\.]+', price_str).group())
  2663. else:
  2664. print(f"列表当前商品价格不存在")
  2665. # price_str = self.d.xpath(f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{drug_idx}]//*[starts-with(@text,"¥")]').text
  2666. print(f'列表获取到价格:{price}')
  2667. # 店铺名称的xpath
  2668. shop_name_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{drug_idx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
  2669. shop_name_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{drug_idx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
  2670. if self.d.xpath(shop_name_xpath).exists:
  2671. shop_name = self.d.xpath(shop_name_xpath).text
  2672. print(f"shop_name_xpath列表当前商品店铺名称:{shop_name}")
  2673. elif self.d.xpath(shop_name_xpath2).exists:
  2674. shop_name = self.d.xpath(shop_name_xpath2).text
  2675. print(f"shop_name_xpath2列表当前商品店铺名称:{shop_name}")
  2676. else:
  2677. print(f"列表当前商品店铺名称不存在")
  2678. # 如果商品的名称、价格和生产厂家都不存在则直接下一条数据。 跳过一些不是商品的数据。
  2679. if price == '':
  2680. print(f"列表当前价格不存在")
  2681. return
  2682. if shop_name == '':
  2683. print(f"列表当前商品店铺名称不存在")
  2684. return
  2685. scrape_date = self.get_current_date()
  2686. if product_title and price and shop_name:
  2687. # 判断数据表中是否存在
  2688. dup_data = {'product': product_title, 'min_price': price, 'shop': shop_name,
  2689. 'scrape_date': scrape_date, 'platform': '美团'}
  2690. if self.data_is_exists(dup_data):
  2691. print('列表存在相同数据不入库')
  2692. return
  2693. self.safe_exec(drug_one.click)
  2694. print('点击目标药品完毕')
  2695. time.sleep(2)
  2696. # 采集药品信息
  2697. try:
  2698. self.safe_exec(self.integrate_data)
  2699. print('integrate_data结束')
  2700. finally:
  2701. time.sleep(self.get_sleep_time())
  2702. def get_cur_page(self, page_no):
  2703. print(f'第{page_no + 1}页')
  2704. # 检查是否需要暂停(验证码过多)
  2705. if self.monitor.verification_count >= self.monitor.MAX_VERIFICATION_RETRY:
  2706. print("频繁遇到验证码,暂停程序")
  2707. # 等待用户点击屏幕继续
  2708. self.d.click(0, 0) # 无效点击,等待用户操作
  2709. self.monitor.verification_count = 0
  2710. # TODO ?
  2711. while True:
  2712. if self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').exists:
  2713. break
  2714. time.sleep(1)
  2715. drug_lis = self.safe_exec(
  2716. self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all)
  2717. list_len = len(drug_lis)
  2718. print(f'当前页面共有{list_len}个商品')
  2719. for drug_idx, drug_one in enumerate(drug_lis, start=1):
  2720. i = 0
  2721. while i < 3:
  2722. try:
  2723. self.get_one_drug(drug_idx, drug_one)
  2724. break
  2725. except Exception as e:
  2726. print(f'get_one_drug {drug_idx} 异常 {e}')
  2727. i += 1
  2728. finally:
  2729. # TODO 可以考虑优化这里,回退到列表页
  2730. res = self.back_to_list_page()
  2731. if not res:
  2732. print('back_to_list_page出错,退出采集')
  2733. raise "back_to_list_page出错,退出采集"
  2734. if self.d.xpath('//*[@text="已经到底啦"]').exists:
  2735. print('已经到达列表页最底部')
  2736. return
  2737. print('开始滑动')
  2738. self.d.drag(300, 1400, 300, 400, 1)
  2739. print('滑动结束')
  2740. time.sleep(self.get_sleep_time())
  2741. # 排序采集
  2742. def li_or_lo(self, key):
  2743. if key == "升序":
  2744. self.d.xpath('//*[@text="价格"]').click()
  2745. time.sleep(self.get_sleep_time())
  2746. if key == "降序":
  2747. self.d.xpath('//*[@text="价格"]').click()
  2748. time.sleep(self.get_sleep_time())
  2749. self.d.xpath('//*[@text="价格"]').click()
  2750. time.sleep(self.get_sleep_time())
  2751. def go_start_page(self, task_id, start_page):
  2752. if start_page > 1:
  2753. self.loggerMT.info(f"跳过前 {start_page - 1} 页,从第 {start_page} 页开始采集")
  2754. current_page = 1
  2755. while current_page < start_page:
  2756. # 检查是否需要暂停
  2757. if self.monitor.pausing.is_set():
  2758. self.wait_for_ready(self.monitor)
  2759. # 检查是否到达底部
  2760. if self.d.xpath('//*[@text="已经到底啦"]').exists:
  2761. self.loggerMT.info(f"在第 {current_page} 页已到达底部,无法继续翻页")
  2762. self.loggerMT.warning(f"未能到达目标页码 {start_page},实际只到达第 {current_page} 页")
  2763. if task_id:
  2764. reporter.end_task(
  2765. task_id=task_id,
  2766. status='completed',
  2767. finish_status=1,
  2768. force_end_page=current_page
  2769. )
  2770. return
  2771. # break
  2772. # 滑动到下一页
  2773. self.d.drag(300, 1400, 300, 400, 1)
  2774. time.sleep(self.get_sleep_time())
  2775. current_page += 1
  2776. # 可选:添加页码日志
  2777. self.loggerMT.debug(f"已翻到第 {current_page} 页")
  2778. # 验证是否到达目标页码
  2779. if current_page < start_page:
  2780. self.loggerMT.error(f"翻页失败!目标页码:{start_page},实际到达:{current_page}")
  2781. # 这里可以根据需要决定是否继续执行或抛出异常
  2782. # return False 或 raise Exception
  2783. else:
  2784. self.loggerMT.info(f"成功翻到第 {start_page} 页,开始采集")
  2785. # 主函数
  2786. # start_page:开始页,采集用
  2787. # end_page:结束页,采集用
  2788. # task_id:上报数据用
  2789. # 添加max_duration_minutes参数
  2790. def main(self, device_id, start_page, end_page, task_id, product_name, product_brand, product_specs, company_id,
  2791. product_like, max_duration_minutes=None, retry_count=0):
  2792. # === 新增:初始化任务信息 ===
  2793. self.task_id = task_id
  2794. self.task_start_page = start_page
  2795. self.task_end_page = end_page
  2796. self.product_brand = product_brand
  2797. self.product_specs = product_specs
  2798. self.title_key = product_name
  2799. self.company_id = company_id
  2800. self.product_like = product_like
  2801. self.search_key = product_brand + product_name + product_specs
  2802. # self.current_page = start_page
  2803. self.start_time = time.time()
  2804. # === 新增:线程启动成功后更新状态为2 ===
  2805. if self.task_id:
  2806. try:
  2807. # self.update_task_status(2) # 状态2: 执行中
  2808. self.loggerMT.info(f"任务 {task_id} 线程启动成功,状态已更新为2")
  2809. except Exception as e:
  2810. self.loggerMT.error(f"更新任务状态失败: {e}")
  2811. # =====================================
  2812. # 记录任务开始
  2813. if task_id:
  2814. reporter.start_task(task_id, start_page, end_page)
  2815. # ========================
  2816. # task_start_time = time.time() #任务开始时间
  2817. task_scape_count = 0 # 任务采集数量初始化为0
  2818. MAX_RETRY = 3 # 最大重试次数
  2819. spider_no = 0
  2820. # 计算超时时间(秒)
  2821. timeout_seconds = None
  2822. if max_duration_minutes:
  2823. timeout_seconds = max_duration_minutes * 60
  2824. self.connect_devices(device_id)
  2825. time.sleep(self.get_sleep_time())
  2826. # self.d.toast.show("测试toast", 20)
  2827. # 启动全局弹窗监控
  2828. self.monitor = SpiderMonitor(self)
  2829. self.monitor.start()
  2830. try:
  2831. # 重新开启美团应用
  2832. self.restart_app()
  2833. # 搜索关键字
  2834. # self.enter_target_page()
  2835. self.safe_exec(self.enter_target_page)
  2836. # self.li_or_lo('升序')
  2837. # === 新增:跳过前面的页面直到start_page start===
  2838. self.go_start_page(task_id, start_page)
  2839. for idx in range(start_page, end_page + 1):
  2840. # === 新增:检查是否超过结束页 ===
  2841. if idx > end_page:
  2842. self.loggerMT.info(f"已采集到指定结束页 {end_page},停止采集")
  2843. if task_id:
  2844. reporter.end_task(
  2845. task_id=task_id,
  2846. status='completed',
  2847. finish_status=1,
  2848. force_end_page=end_page
  2849. )
  2850. return
  2851. # === 新增:检查超时 ===
  2852. if timeout_seconds and (time.time() - self.start_time) > timeout_seconds:
  2853. print(f"任务 {task_id} 达到时间限制 {max_duration_minutes} 分钟,停止采集")
  2854. self.loggerMT.info(f"任务 {task_id} 达到时间限制 {max_duration_minutes} 分钟,停止采集")
  2855. # 上报未完成状态
  2856. if task_id:
  2857. reporter.end_task(
  2858. task_id=task_id,
  2859. status='completed',
  2860. finish_status=0, # 0:未完成
  2861. force_end_page=self.current_page
  2862. )
  2863. return
  2864. # ====================
  2865. # print(f'第{idx + 1}页')
  2866. print(f'第{idx}页(指定范围: {start_page}-{end_page})')
  2867. self.current_page = idx # 更新当前页码
  2868. # === 新增:更新上报进度 ===
  2869. if task_id:
  2870. reporter.update_task_progress(
  2871. task_id=task_id,
  2872. actual_end_page=self.current_page
  2873. )
  2874. # ========================
  2875. if spider_no > 30:
  2876. time.sleep(60)
  2877. spider_no = 0
  2878. print('目前无关数据量: ', self.unrelated_data)
  2879. # 检查是否需要暂停(验证码过多)
  2880. if self.monitor.verification_count >= self.monitor.MAX_VERIFICATION_RETRY:
  2881. print("频繁遇到验证码,暂停程序")
  2882. # self.d.toast("请处理验证码后点击继续", 30)
  2883. # 等待用户点击屏幕继续
  2884. self.d.click(0, 0) # 无效点击,等待用户操作
  2885. self.monitor.verification_count = 0
  2886. if self.unrelated_data > 20:
  2887. # 连续超过20个不达标的数据则停止采集
  2888. self.loggerMT.info(f"连续20个数据不达标,品规:{self.search_key}")
  2889. # === 新增:任务正常完成 ===
  2890. if task_id:
  2891. reporter.end_task(
  2892. task_id=task_id,
  2893. status='completed',
  2894. finish_status=1, # 1:已完成
  2895. force_end_page=end_page
  2896. )
  2897. # ========================
  2898. return
  2899. # 线程安全获取商品列表
  2900. # drug_lis = self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all()
  2901. # drug_lis = self.safe_list('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout', self.monitor)
  2902. while True:
  2903. if self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').exists:
  2904. break
  2905. time.sleep(1)
  2906. drug_lis = self.safe_exec(
  2907. self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all)
  2908. lis_len = len(drug_lis)
  2909. print(f'当前页面共有{lis_len}个商品')
  2910. for drug_idx, drug_one in enumerate(drug_lis, start=1):
  2911. i = 0
  2912. while i < 3:
  2913. try:
  2914. self.get_one_drug(drug_idx, drug_one)
  2915. break
  2916. except Exception as e:
  2917. print(f'get_one_drug {drug_idx} 异常 {e}')
  2918. i += 1
  2919. finally:
  2920. # TODO 可以考虑优化这里,回退到列表页
  2921. res = self.back_to_list_page()
  2922. if not res:
  2923. print('back_to_list_page出错,退出采集')
  2924. raise "back_to_list_page出错,退出采集"
  2925. # 翻页逻辑(如果是最后一页则不再翻页)
  2926. if idx < end_page:
  2927. if self.d.xpath('//*[@text="已经到底啦"]').exists:
  2928. self.loggerMT.info(f'在第 {idx} 页已到达列表最底部')
  2929. if task_id:
  2930. reporter.end_task(
  2931. task_id=task_id,
  2932. status='completed',
  2933. finish_status=1,
  2934. force_end_page=idx
  2935. )
  2936. return
  2937. # 翻页
  2938. print('开始滑动')
  2939. self.d.drag(300, 1400, 300, 400, 1)
  2940. print('滑动结束')
  2941. time.sleep(self.get_sleep_time())
  2942. # 采集完成,数据上报
  2943. if task_id:
  2944. reporter.end_task(
  2945. task_id=task_id,
  2946. status='completed',
  2947. finish_status=1,
  2948. force_end_page=end_page
  2949. )
  2950. except Exception as e:
  2951. print(f"采集任务异常: {e}")
  2952. # === 新增:异常结束上报 ===
  2953. if task_id:
  2954. reporter.end_task(
  2955. task_id=task_id,
  2956. status='failed',
  2957. finish_status=0, # 未完成
  2958. force_end_page=self.current_page
  2959. )
  2960. # ========================
  2961. raise
  2962. finally:
  2963. # 确保监控线程被停止
  2964. print(f"采集任务异常: 确保监控线程被停止")
  2965. self.monitor.stop()
  2966. self.monitor.join()
  2967. def get_mysql():
  2968. """
  2969. 建立并返回一个到数据库的连接对象
  2970. """
  2971. import pymysql
  2972. return pymysql.connect(
  2973. host=Config.DB_HOST, # "localhost", # 修改后的主机
  2974. port=Config.DB_PORT, # 3306, # 添加端口号
  2975. user=Config.DB_USER, # 'root', # 修改后的用户名
  2976. password=Config.DB_PASSWORD, # 修改后的密码
  2977. db=Config.DB_NAME, # "drug_data", # 修改后的数据库名
  2978. charset='utf8mb4'
  2979. )
  2980. # retrieve database
  2981. def get_retrieve_mysql():
  2982. """
  2983. 建立远端连接并返回一个到数据库的连接对象
  2984. """
  2985. import pymysql
  2986. return pymysql.connect(
  2987. host='39.108.116.125', # 修改后的主机
  2988. port=3306, # 添加端口号
  2989. user='drug_retrieve', # 修改后的用户名
  2990. password='Pem287cwM58jNpe2', # 修改后的密码
  2991. db='drug_retrieve', # 修改后的数据库名
  2992. charset='utf8mb4'
  2993. )
  2994. def get_retrieve_mysql_real():
  2995. """
  2996. 建立远端连接并返回一个到数据库的连接对象
  2997. """
  2998. import pymysql
  2999. return pymysql.connect(
  3000. host='120.24.49.2', # 修改后的主机
  3001. port=3306, # 添加端口号
  3002. user='drug_retrieve', # 修改后的用户名
  3003. password='ksCt3xm6chzdkafj', # 修改后的密码
  3004. db='drug_retrieve', # 修改后的数据库名
  3005. charset='utf8mb4'
  3006. )
  3007. class TimeoutException(Exception):
  3008. pass
  3009. class MTScreenshot:
  3010. def __init__(self, d, oss_config, search_key, title_key, scroll_times=4, compress_quality=7, resize_ratio=0.8):
  3011. # 接收外部已连接好的u2设备实例
  3012. self.d = d
  3013. self.search_key = search_key # 添加这行
  3014. self.title_key = title_key
  3015. # 启动全局弹窗监控
  3016. self.monitor = SpiderMonitor(self)
  3017. self.monitor.start()
  3018. self.loggerMT = logging.getLogger()
  3019. # 日志初始化
  3020. self.logger = self._init_logger()
  3021. # OSS配置与初始化(核心配置,无冗余)
  3022. self.oss_config = oss_config
  3023. self.oss_bucket = self._init_oss_bucket()
  3024. # 截图核心参数
  3025. self.scroll_times = scroll_times
  3026. self.compress_quality = compress_quality
  3027. self.resize_ratio = resize_ratio
  3028. # self.title_xpaths = [
  3029. # '//*[@resource-id="com.jd.lib.productdetail.feature:id/db"]',
  3030. # '//*[@resource-id="com.jd.lib.productdetail.feature:id/cx"]',
  3031. # '//*[@resource-id="com.jd.lib.productdetail.feature:id/cj"]'
  3032. # ]
  3033. def _init_logger(self):
  3034. # 极简日志配置,仅保留必要输出
  3035. logger = logging.getLogger("mt_screenshot")
  3036. logger.setLevel(logging.INFO)
  3037. logger.handlers.clear()
  3038. handler = logging.StreamHandler()
  3039. handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
  3040. logger.addHandler(handler)
  3041. return logger
  3042. def _init_oss_bucket(self):
  3043. # 仅做OSS配置校验和Bucket连接,无额外功能
  3044. if not all([self.oss_config.get("access_key_id"),
  3045. self.oss_config.get("access_key_secret"),
  3046. self.oss_config.get("endpoint"),
  3047. self.oss_config.get("bucket_name")]):
  3048. self.logger.warning("OSS配置不完整,无法上传")
  3049. return None
  3050. try:
  3051. auth = oss2.Auth(self.oss_config["access_key_id"], self.oss_config["access_key_secret"])
  3052. bucket = oss2.Bucket(auth, self.oss_config["endpoint"], self.oss_config["bucket_name"])
  3053. bucket.get_bucket_info() # 验证连接
  3054. self.logger.info("OSS Bucket连接成功")
  3055. return bucket
  3056. except Exception as e:
  3057. self.logger.error(f"OSS Bucket连接失败: {e}")
  3058. return None
  3059. def _upload_to_oss(self, local_path):
  3060. # 极简上传逻辑,仅返回OSS URL或None
  3061. if not self.oss_bucket or not os.path.exists(local_path):
  3062. return None
  3063. file_name = os.path.basename(local_path)
  3064. safe_name = re.sub(r'[^\w\.\-]', '_', file_name)
  3065. oss_key = f"{self.oss_config.get('oss_prefix', 'scrape_data/')}{safe_name}"
  3066. try:
  3067. oss2.resumable_upload(self.oss_bucket, oss_key, local_path)
  3068. # 生成并返回完整OSS URL
  3069. oss_file_url = f"https://{self.oss_config['bucket_name']}.{self.oss_config['endpoint']}/{urllib.parse.quote(oss_key, safe='/')}"
  3070. self.logger.info(f"OSS上传成功: {oss_file_url}")
  3071. return oss_file_url
  3072. except Exception as e:
  3073. self.logger.error(f"OSS上传失败: {e}")
  3074. return None
  3075. # def _get_title(self):
  3076. # # 仅提取标题,无冗余逻辑
  3077. # for xpath in self.title_xpaths:
  3078. # elem = self.d.xpath(xpath)
  3079. # if elem.exists:
  3080. # info = elem.info
  3081. # title = (info.get("contentDescription") or info.get("content-desc") or info.get("text") or "").strip()
  3082. # return title[:50] # 限制标题长度,避免文件名过长
  3083. # return ""
  3084. def safe_exec(self, func, *args, **kwargs):
  3085. """
  3086. 万能安全壳:执行 func 前检查验证码,
  3087. 若监控线程已置位 pausing,则一直阻塞直到放行。
  3088. """
  3089. while self.monitor.pausing.is_set():
  3090. time.sleep(1)
  3091. # 执行真正逻辑
  3092. return func(*args, **kwargs)
  3093. def _get_title(self):
  3094. # try:
  3095. # title = self.d.xpath(
  3096. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  3097. # except:
  3098. # title = self.d.xpath(
  3099. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView').text
  3100. # title = self.d.xpath('//*[contains(@text, "舒肝颗粒")]').text
  3101. def _inner():
  3102. # elif self.search_key == '三九胃泰颗粒':
  3103. # self.search_key = '三九胃泰' #兼容三九胃泰 温胃舒颗粒
  3104. print(f'获取商品title时的搜索关键字:{self.title_key}')
  3105. # title = self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text
  3106. # 初始化
  3107. drugs_name = ''
  3108. specifications = ''
  3109. title = ''
  3110. # 循环的获取title为了有时间来处理人机验证
  3111. for m in range(1, 6000):
  3112. if self.d.xpath(f'//*[contains(@text, "{self.title_key}")]').exists:
  3113. title = self.safe_exec(
  3114. lambda: self.d.xpath(f'//*[contains(@text, "{self.title_key}")]').text
  3115. )
  3116. self.loggerMT.info(f"第{m}次获取title成功")
  3117. print(f"第{m}次获取title成功")
  3118. break
  3119. else:
  3120. time.sleep(3)
  3121. # return drugs_name, specifications
  3122. # drugs_name = ''
  3123. # specifications = ''
  3124. # try:
  3125. # title_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3126. # title_xpath_2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3127. # if self.d.xpath(title_xpath).exists:
  3128. # title = self.d.xpath(title_xpath).text
  3129. # print(f"title_xpath获取的title={title}")
  3130. # if temp_search_key not in title:
  3131. # return drugs_name, specifications
  3132. # elif self.d.xpath(title_xpath_2).exists:
  3133. # title = self.d.xpath(title_xpath_2).text
  3134. # print(f"title_xpath_2获取的title={title}")
  3135. # if temp_search_key not in title:
  3136. # return drugs_name, specifications
  3137. # else:
  3138. # print('title_xpath不存在,请确认')
  3139. # return drugs_name, specifications
  3140. # # title = self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  3141. # except Exception as e:
  3142. # print(f"发生异常: {e}")
  3143. # return drugs_name, specifications
  3144. # 奇怪:有的时候title取出来的记过第一位会多一个0
  3145. # title = self.safe_exec(self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text)
  3146. # title = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  3147. title = title[1:] if title.startswith('0') else title
  3148. print(f'获取到药品标题:{title}')
  3149. # 从里面匹配出药品名和规格
  3150. # drugs_name
  3151. # specifications
  3152. # match = re.search(r'([^\d]+)([\d\D]+)', title)
  3153. if self.search_key == '999赐多康大豆':
  3154. return title, '1罐'
  3155. if self.search_key == "999感冒清热颗粒":
  3156. match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
  3157. else:
  3158. match = re.match(r'(\[[^\]]+\])(.*?)\s*((?:\d+\S*|\(.+))$', title)
  3159. if match:
  3160. # drugs_name = match.group(1).strip() + match.group(2).strip()
  3161. drugs_name = title
  3162. specifications = match.group(3).strip()
  3163. print("药品名:", drugs_name)
  3164. print("规格:", specifications)
  3165. # print('完整药名:', drugs_name + specifications)
  3166. return drugs_name # , specifications
  3167. else:
  3168. if title == '999抗病毒口服液10ml*12' or title == '999抗病毒口服液':
  3169. drugs_name = title
  3170. specifications = '10ml*12支/盒'
  3171. return drugs_name # , specifications
  3172. elif title == '999抗病毒口服液10ml*10':
  3173. drugs_name = title
  3174. specifications = '10ml*10支/盒'
  3175. return drugs_name # , specifications
  3176. elif title == '999小柴胡颗粒':
  3177. drugs_name = title
  3178. specifications = '10g*9袋/盒'
  3179. return drugs_name # , specifications
  3180. elif title == '999养胃舒颗粒':
  3181. drugs_name = title
  3182. specifications = '10g*10袋/盒'
  3183. return drugs_name # , specifications
  3184. elif title == '三九胃泰胶囊':
  3185. drugs_name = title
  3186. specifications = '0.5g*24粒/盒'
  3187. return drugs_name # , specifications
  3188. elif title == '999补脾益肠丸':
  3189. drugs_name = title
  3190. specifications = '6g*9袋/盒'
  3191. return drugs_name # , specifications
  3192. elif title == '999感冒灵颗粒':
  3193. drugs_name = title
  3194. specifications = '10g*9袋/盒'
  3195. return drugs_name # , specifications
  3196. elif title == '999感冒灵胶囊':
  3197. drugs_name = title
  3198. specifications = '0.5g*12粒/盒'
  3199. return drugs_name # , specifications
  3200. else:
  3201. print("没有匹配到预期格式")
  3202. drugs_name = title
  3203. specifications = ''
  3204. return drugs_name # , specifications
  3205. # 用 safe_exec 包装内部逻辑,确保验证码阻塞
  3206. return self.safe_exec(_inner)
  3207. def _merge_screenshots(self, screens):
  3208. # 仅拼接截图,无额外功能
  3209. if len(screens) == 1:
  3210. return screens[0].convert('RGB')
  3211. rgb_screens = [s.convert('RGB') for s in screens]
  3212. total_width = rgb_screens[0].width
  3213. total_height = sum(s.height for s in rgb_screens)
  3214. merged_img = Image.new('RGB', (total_width, total_height))
  3215. y_offset = 0
  3216. for img in rgb_screens:
  3217. merged_img.paste(img, (0, y_offset))
  3218. y_offset += img.height
  3219. return merged_img
  3220. def get_oss_url(self):
  3221. """核心方法:截图+临时本地保存+上传OSS+上传成功删本地文件+返回OSS URL,可直接赋值给oss_file"""
  3222. local_file_path = None
  3223. try:
  3224. # 1. 提取标题
  3225. title = self._get_title()
  3226. self.logger.info(f"获取标题: {title[:20]}..." if title else "未获取到标题")
  3227. # 2. 生成本地文件路径
  3228. timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
  3229. safe_title = re.sub(r'[\\/*?:"<>|]', '_', title)
  3230. local_dir = "../scrape_data"
  3231. os.makedirs(local_dir, exist_ok=True)
  3232. local_file_path = os.path.join(local_dir, f"{timestamp}_{safe_title}.jpg")
  3233. # 3. 滚动截图
  3234. screen_list = [self.d.screenshot()]
  3235. w, h = self.d.window_size()
  3236. for i in range(self.scroll_times):
  3237. # 可能滑动距离太短,截不到店名。原本是0.8
  3238. # self.d.swipe(w // 2, h * 0.9, w // 2, h * 0.1, duration=random.uniform(0.6, 1.2))
  3239. self.d.swipe(w // 2, h * 0.85, w // 2, h * 0.15, # 滑动到15%
  3240. duration=random.uniform(0.8, 1.5))
  3241. time.sleep(random.uniform(2.0, 4.0))
  3242. screen_list.append(self.d.screenshot())
  3243. if self.d(textContains='商家服务').exists:
  3244. # 看情况是否需要补滑
  3245. break
  3246. merged_img = self._merge_screenshots(screen_list)
  3247. if 0.1 < self.resize_ratio < 1.0:
  3248. new_size = (int(merged_img.width * self.resize_ratio), int(merged_img.height * self.resize_ratio))
  3249. resample_mode = Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS
  3250. merged_img = merged_img.resize(new_size, resample_mode)
  3251. # 临时保存到本地
  3252. merged_img.save(local_file_path, format='JPEG', quality=self.compress_quality)
  3253. merged_img.close() # 释放长图句柄
  3254. self.logger.info(f"临时本地保存: {local_file_path}")
  3255. # 5. 上传OSS
  3256. oss_url = self._upload_to_oss(local_file_path)
  3257. # 6. 核心:OSS上传成功后,删除本地临时文件
  3258. if oss_url is not None:
  3259. try:
  3260. # 先不删除,检查还有没有问题
  3261. # os.remove(local_file_path)
  3262. self.logger.info(f"✅ OSS上传成功,已删除本地临时文件: {local_file_path}")
  3263. # 若本地目录为空,可删除目录(按需开启)
  3264. # if not os.listdir(local_dir):
  3265. # os.rmdir(local_dir)
  3266. # self.logger.info(f"本地目录{local_dir}为空,已删除")
  3267. except Exception as e:
  3268. self.logger.warning(f"⚠️ OSS上传成功,但删除本地文件失败: {e}")
  3269. return oss_url
  3270. except Exception as e:
  3271. self.logger.error(f"截图/上传失败: {e}")
  3272. return None
  3273. # 如果需要并行处理(提高效率),可以使用线程池:
  3274. def process_tasks_in_parallel(max_workers=12):
  3275. """使用线程池并行处理多个任务""" """使用线程池并行处理多个任务,每个任务最多执行30分钟"""
  3276. from concurrent.futures import ThreadPoolExecutor, as_completed
  3277. import concurrent.futures # ← 新增导入
  3278. # retrieve_conn = get_retrieve_mysql()
  3279. # cursor = retrieve_conn.cursor()
  3280. #
  3281. # query = """
  3282. # SELECT id, collect_equipment_id, product_name, start_page, end_page, duration, product_specs, product_brand, company_id
  3283. # FROM retrieve_collect_task_allocate
  3284. # WHERE status = 12 AND platform = 4
  3285. # """
  3286. # cursor.execute(query)
  3287. # results = cursor.fetchall()
  3288. #
  3289. # print(f"获取到的任务结果={results}")
  3290. #
  3291. # if not results:
  3292. # print("MT 没有要采集的品规")
  3293. # return
  3294. # 准备任务列表
  3295. tasks = []
  3296. device_map = {}
  3297. results = [
  3298. [1, '21885f5', '正露丸', 0, 100, 240, '100', '喇叭牌', 3, ''],
  3299. [1, '97ae80e0', '正露丸', 0, 100, 240, '100', '喇叭牌', 3, ''],
  3300. # [1,'GIOFIBRKZTUGJJAE','盐酸达泊西汀',0,100,240,'30mg*1','白云山',3,''],
  3301. [1, 'IRLZAAQCDMHYWKTS', '正露丸', 0, 100, 240, '', '喇叭牌', 3, ''],
  3302. [1, 'OVFETO8PCY45E6A6', '正露丸', 0, 100, 240, '', '喇叭牌', 3, ''],
  3303. # [1,'T4UCEQGQEEYP65ZL','阿奇霉素胶囊',0,100,240,'250mg*6','白云山',3,'使力康'],
  3304. [1, 'U8ONIJJJS4CELVD6', '正露丸', 0, 100, 240, '', '喇叭牌', 3, ''],
  3305. # [1,1,'阿莫西林胶囊',0,100,240,'250mg*20','白云山',3,'抗之霸'],
  3306. # [1,1,'头孢克肟胶囊',0,100,240,'50mg*20','白云山',3,'抗之霸'],
  3307. ]
  3308. for result in results:
  3309. task_id = result[0]
  3310. collect_equipment_id = result[1]
  3311. product_name = result[2]
  3312. start_page = result[3]
  3313. end_page = result[4]
  3314. duration = result[5]
  3315. product_specs = result[6]
  3316. product_brand = result[7]
  3317. company_id = result[8]
  3318. product_like = result[9]
  3319. if collect_equipment_id != 0 and product_name and product_name.strip():
  3320. duration_minutes = duration if duration is not None else 30
  3321. tasks.append({
  3322. 'task_id': task_id,
  3323. 'device_id': collect_equipment_id,
  3324. 'product_name': product_name.strip(),
  3325. 'start_page': start_page,
  3326. 'end_page': end_page,
  3327. 'product_brand': product_brand,
  3328. 'product_specs': product_specs,
  3329. 'company_id': company_id,
  3330. 'product_like': product_like,
  3331. 'duration_minutes': duration_minutes, # 存储执行时间限制(分钟)
  3332. })
  3333. # 缓存设备查询
  3334. # if collect_equipment_id not in device_map:
  3335. # device_query = "SELECT device_id FROM retrieve_collect_equipment WHERE id = %s AND status = 0"
  3336. # cursor.execute(device_query, (collect_equipment_id,))
  3337. # device_result = cursor.fetchone()
  3338. # device_map[collect_equipment_id] = device_result[0] if device_result else None
  3339. #
  3340. # if device_map[collect_equipment_id]:
  3341. # # ↓ 使用数据库中的duration,如果没有设置则用默认值30分钟
  3342. # duration_minutes = duration if duration is not None else 30
  3343. # tasks.append({
  3344. # 'task_id': task_id,
  3345. # 'device_id': device_map[collect_equipment_id],
  3346. # 'product_name': product_name.strip(),
  3347. # 'start_page': start_page,
  3348. # 'end_page': end_page,
  3349. # 'product_brand': product_brand,
  3350. # 'product_specs': product_specs,
  3351. # 'company_id': company_id,
  3352. # 'duration_minutes': duration_minutes, # 存储执行时间限制(分钟)
  3353. # })
  3354. # cursor.close()
  3355. # retrieve_conn.close()
  3356. if not tasks:
  3357. print("没有有效的采集任务")
  3358. return
  3359. print(f"准备并行处理 {len(tasks)} 个任务")
  3360. def process_single_task(task):
  3361. """处理单个任务的函数"""
  3362. task_start_time = time.time() # ← 记录开始时间
  3363. # start_time = time.time()
  3364. try:
  3365. mt = MT()
  3366. # 执行采集,获取采集数量 关键数据:实际采集的数量,实际的页数
  3367. mt.main(
  3368. device_id=task['device_id'],
  3369. start_page=task['start_page'],
  3370. end_page=task['end_page'],
  3371. task_id=task['task_id'],
  3372. product_name=task['product_name'],
  3373. product_brand=task['product_brand'],
  3374. product_specs=task['product_specs'],
  3375. company_id=task['company_id'],
  3376. product_like=task['product_like'],
  3377. max_duration_minutes=task['duration_minutes'] # 传入时间限制
  3378. )
  3379. return {
  3380. 'task_id': task['task_id'],
  3381. 'success': True,
  3382. 'collected_count': mt.collected_count,
  3383. 'final_page': mt.current_page
  3384. }
  3385. except Exception as e:
  3386. print(f"任务 {task['task_id']} 执行异常: {e}")
  3387. return {
  3388. 'task_id': task['task_id'],
  3389. 'success': False,
  3390. 'error': str(e)
  3391. }
  3392. finally:
  3393. if 'mt' in locals() and hasattr(mt, 'close'):
  3394. try:
  3395. mt.close()
  3396. except:
  3397. pass
  3398. # 使用线程池并行执行
  3399. successful_tasks = 0
  3400. failed_tasks = 0
  3401. # total_execution_time = 0 # 初始化总执行时间变量
  3402. with ThreadPoolExecutor(max_workers=max_workers) as executor:
  3403. # 提交所有任务
  3404. future_to_task = {
  3405. executor.submit(process_single_task, task): task
  3406. for task in tasks
  3407. }
  3408. # 处理完成的任务
  3409. for future in as_completed(future_to_task):
  3410. task = future_to_task[future]
  3411. try:
  3412. task_timeout = (task['duration_minutes'] + 5) * 60 # 加5分钟缓冲
  3413. result = future.result(timeout=task_timeout) # 使用动态超时时间
  3414. if result['success']:
  3415. successful_tasks += 1
  3416. print(f"任务 {result['task_id']}: 完成,采集 {result['collected_count']} 条数据")
  3417. else:
  3418. failed_tasks += 1
  3419. print(f"任务 {result['task_id']}: 失败,错误: {result['error']}")
  3420. except concurrent.futures.TimeoutError: # ← 捕获超时异常
  3421. failed_tasks += 1
  3422. print(f"任务 {task['task_id']}: 超时(限制 {task['duration_minutes']} 分钟)")
  3423. # ↓ 超时后上报数据
  3424. if task['task_id']:
  3425. # 这里需要调用上报,但reporter可能没有这个任务的数据
  3426. # 更好的方式是在MT.main中已经上报了
  3427. pass
  3428. except Exception as e:
  3429. failed_tasks += 1
  3430. print(f"任务 {task['task_id']}: 执行异常 {e}")
  3431. print(f"\n并行采集完成:")
  3432. print(f"成功: {successful_tasks} 个")
  3433. print(f"失败: {failed_tasks} 个")
  3434. if __name__ == '__main__':
  3435. # main()
  3436. def run_collection():
  3437. """执行采集任务"""
  3438. try:
  3439. print(f"【定时任务开始】时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
  3440. process_tasks_in_parallel(max_workers=12)
  3441. print(f"【定时任务结束】时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
  3442. except Exception as e:
  3443. print(f"【定时任务异常】: {e}")
  3444. # 设置定时任务
  3445. schedule.every(10).minutes.do(run_collection)
  3446. # 立即执行一次
  3447. run_collection()
  3448. print("定时任务已设置,每40分钟执行一次采集")
  3449. # 循环执行
  3450. while True:
  3451. schedule.run_pending()
  3452. time.sleep(60) # 每分钟检查一次