mt_auto_scrape3.py 233 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740374137423743374437453746374737483749375037513752375337543755375637573758375937603761376237633764376537663767376837693770377137723773377437753776377737783779378037813782378337843785378637873788378937903791379237933794379537963797379837993800380138023803380438053806380738083809381038113812381338143815381638173818381938203821382238233824382538263827382838293830383138323833383438353836383738383839384038413842384338443845384638473848384938503851385238533854385538563857385838593860386138623863386438653866386738683869387038713872387338743875387638773878387938803881388238833884388538863887388838893890389138923893389438953896389738983899390039013902390339043905390639073908390939103911391239133914391539163917391839193920392139223923392439253926392739283929393039313932393339343935393639373938393939403941394239433944394539463947394839493950395139523953395439553956395739583959396039613962396339643965396639673968396939703971397239733974397539763977397839793980398139823983398439853986398739883989399039913992399339943995399639973998399940004001400240034004400540064007400840094010401140124013401440154016401740184019402040214022402340244025402640274028402940304031403240334034403540364037403840394040404140424043404440454046404740484049405040514052405340544055405640574058405940604061406240634064406540664067406840694070407140724073407440754076407740784079408040814082408340844085408640874088408940904091409240934094409540964097409840994100410141024103410441054106410741084109411041114112411341144115411641174118411941204121412241234124412541264127412841294130413141324133413441354136413741384139414041414142414341444145414641474148414941504151415241534154415541564157415841594160416141624163416441654166416741684169417041714172417341744175417641774178417941804181418241834184418541864187418841894190419141924193419441954196419741984199420042014202420342044205420642074208420942104211421242134214421542164217421842194220422142224223422442254226422742284229423042314232423342344235423642374238423942404241424242434244424542464247424842494250425142524253425442554256425742584259426042614262426342644265426642674268426942704271427242734274427542764277427842794280428142824283428442854286428742884289429042914292429342944295429642974298429943004301430243034304430543064307430843094310431143124313431443154316431743184319432043214322432343244325432643274328432943304331433243334334433543364337433843394340434143424343434443454346434743484349435043514352435343544355435643574358435943604361436243634364436543664367436843694370437143724373437443754376437743784379438043814382438343844385438643874388438943904391439243934394439543964397439843994400440144024403440444054406440744084409441044114412441344144415441644174418441944204421442244234424442544264427442844294430443144324433443444354436443744384439444044414442444344444445444644474448444944504451445244534454445544564457445844594460446144624463446444654466446744684469447044714472447344744475447644774478447944804481448244834484448544864487448844894490449144924493449444954496449744984499450045014502450345044505450645074508450945104511451245134514451545164517451845194520452145224523452445254526452745284529453045314532453345344535453645374538453945404541454245434544454545464547454845494550455145524553455445554556455745584559456045614562456345644565456645674568456945704571457245734574457545764577457845794580458145824583458445854586458745884589459045914592459345944595459645974598459946004601460246034604460546064607460846094610461146124613461446154616461746184619462046214622462346244625462646274628462946304631463246334634463546364637463846394640464146424643464446454646464746484649465046514652465346544655465646574658465946604661466246634664466546664667466846694670467146724673467446754676467746784679468046814682468346844685468646874688468946904691469246934694469546964697469846994700470147024703470447054706470747084709471047114712471347144715471647174718471947204721472247234724472547264727472847294730473147324733473447354736473747384739474047414742474347444745474647474748474947504751475247534754475547564757475847594760476147624763476447654766476747684769477047714772477347744775477647774778477947804781478247834784478547864787478847894790479147924793479447954796479747984799480048014802480348044805480648074808480948104811481248134814
  1. import requests
  2. import base64
  3. import cv2
  4. import uiautomator2 as u2
  5. import time
  6. import subprocess
  7. import re
  8. import random
  9. import datetime
  10. import json
  11. from aip import AipOcr
  12. from apscheduler.schedulers.blocking import BlockingScheduler
  13. # from db_mysql import mysqlClient
  14. import threading
  15. from collections import deque
  16. import numpy as np
  17. import secrets
  18. import os
  19. import math
  20. import schedule
  21. # import pyperclip
  22. from config import Config
  23. from logger import setup_logger
  24. import logging
  25. from contextlib import contextmanager
  26. from typing import Dict, Any
  27. # from database import MySQLClient
  28. # 配置日志
  29. # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  30. setup_logger("mt_spider") # 初始化日志
  31. class SpiderMonitor(threading.Thread):
  32. """全局弹窗监控线程(增强版)"""
  33. def __init__(self, spider_instance):
  34. super().__init__(daemon=True)
  35. self.spider = spider_instance
  36. self.running = True
  37. self.pausing = threading.Event() # 主线程同步事件
  38. self.last_verification_time = 0
  39. self.verification_count = 0
  40. self.MAX_VERIFICATION_RETRY = 10
  41. self.recent_clicks = deque(maxlen=10) # 防重复点击
  42. self.logger = logging.getLogger("SpiderMonitor")
  43. self.TOKEN = "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk"
  44. self.API_URL = "http://api.jfbym.com/api/YmServer/customApi"
  45. self.d = self.spider.d
  46. self.verification_in_progress = threading.Event()
  47. self.loggerMT = logging.getLogger()
  48. self.verification_retry_count = 0 # 当前验证码重试次数
  49. self.last_verification_type = None
  50. # 可配置化弹窗规则
  51. self.popup_rules = {
  52. "simple": [
  53. ('//*[@text="确定"]', "点击确定"),
  54. ('//*[@text="允许"]', "点击允许"),
  55. ('//*[@text="关闭"]', "点击关闭"),
  56. ('//*[@resource-id="com.sankuai.meituan:id/close"]', "关闭按钮"),
  57. ('//*[@resource-id="com.sankuai.meituan:id/address_center_location_close"]', "关闭按钮"),
  58. ('//*[@resource-id="com.sankuai.meituan:id/location_close"]', "关闭按钮"),
  59. ('//*[@resource-id="com.sankuai.meituan:id/btn_close"]', "关闭按钮"),
  60. ],
  61. # "verification": [
  62. # '//*[contains(@text, "验证")]',
  63. # '//*[contains(@text, "滑块")]',
  64. # '//*[contains(@text, "依次点击")]',
  65. # '//*[contains(@text, "请点击")]',
  66. # '//*[contains(@text, "拖动滑块刚")]', #这个需要拖动滑块至最右边,然后再截图
  67. # '//*[contains(@text, "请输入图片中的内容")]',
  68. # '//*[contains(@text, "用最短线连接")]',
  69. # '//*[contains(@text, "请按语序依次点击")]',
  70. # '//*[contains(@text, "请向右滑动滑块")]',
  71. # '//*[contains(@text, "请拖动下方滑块完成拼图")]',
  72. # '//*[contains(@resource-id, "captcha")]'
  73. # ]
  74. "verification": [
  75. ('//*[contains(@text, "请点击")]', "click_side"),
  76. ('//*[contains(@text, "请输入图片中的内容")]', "Numbers_English"),
  77. ('//*[contains(@text, "请向右滑动滑块")]', "Swipe_right"),
  78. ('//*[contains(@text, "请依次点击下图图标")]', "Click_images"),
  79. ('//*[contains(@text, "请拖动下方滑块完成拼图")]', "slider"),
  80. ('//*[contains(@text, "拖动滑块刚")]', "complexs"), # 这个需要拖动滑块至最右边,然后再截图
  81. ('//*[contains(@text, "请按语序依次点击")]', "Click_images"),
  82. ('//*[contains(@text, "用最短线连接")]', "Shortest_connection"),
  83. ]
  84. }
  85. def run(self):
  86. while self.running:
  87. try:
  88. handled = self.check_and_handle_popup()
  89. time.sleep(2 if handled else 1)
  90. except Exception as e:
  91. self.logger.exception("监控线程异常: %s", e)
  92. time.sleep(3)
  93. def _is_recent_click(self, xpath):
  94. """防止重复点击同一个弹窗"""
  95. key = f"{xpath}_{int(time.time())}"
  96. if key in self.recent_clicks:
  97. return True
  98. self.recent_clicks.append(key)
  99. return False
  100. @staticmethod
  101. def get_sleep_time():
  102. # return random.randint(5, 8)
  103. return random.randint(1, 3)
  104. def human_slide(self, start_x, start_y, end_x, end_y, hold_time=0):
  105. """模拟真实人类滑动轨迹 - 连续变化的速度曲线,微小偏差"""
  106. points = []
  107. # 随机参数
  108. total_steps = random.randint(60, 85) # 更多步数使曲线更平滑
  109. # 计算滑动距离
  110. distance_x = end_x - start_x
  111. distance_y = end_y - start_y
  112. total_distance = math.sqrt(distance_x ** 2 + distance_y ** 2)
  113. self.logger.info(f"滑块验证移动0")
  114. # 微小偏差设置 - 人类不完美的对齐
  115. # X方向偏差:1-6像素,70%概率过冲,30%欠冲
  116. if random.random() < 0.7:
  117. offset_x = random.randint(1, min(5, int(total_distance * 0.01)))
  118. else:
  119. offset_x = -random.randint(1, min(3, int(total_distance * 0.02)))
  120. # # Y方向微小偏差:±0-2像素
  121. # offset_y = random.randint(-2, 2)
  122. # 实际停止位置
  123. stop_x = end_x + offset_x
  124. stop_y = end_y
  125. # 物理参数:模拟手指滑动的物理过程
  126. # 使用加速度、最大速度、减速度模型
  127. accel_time_ratio = random.uniform(0.25, 0.35) # 加速阶段占总时间的比例
  128. decel_time_ratio = random.uniform(0.25, 0.35) # 减速阶段占总时间的比例
  129. max_speed = random.uniform(1.5, 2.2) # 最大速度倍数
  130. # 生成轨迹
  131. for i in range(total_steps):
  132. t = i / (total_steps - 1) # 时间进度 0-1
  133. # 物理速度曲线:连续变化的加速度过程
  134. if t < accel_time_ratio:
  135. # 加速阶段:从0加速到最大速度
  136. phase_t = t / accel_time_ratio
  137. # 使用平滑的加速曲线(二次函数)
  138. speed_factor = max_speed * phase_t * phase_t
  139. elif t < 1 - decel_time_ratio:
  140. # 匀速阶段:保持最大速度
  141. speed_factor = max_speed
  142. # 加入轻微的随机波动,模拟人类手部自然抖动
  143. speed_factor += random.uniform(-0.05, 0.05)
  144. else:
  145. # 减速阶段:从最大速度减速到0
  146. phase_t = (t - (1 - decel_time_ratio)) / decel_time_ratio
  147. # 使用平滑的减速曲线(二次函数,末尾更平缓)
  148. speed_factor = max_speed * (1 - phase_t * phase_t)
  149. self.logger.info(f"滑块验证移动1")
  150. # 计算位移(积分速度得到位置)
  151. # 使用贝塞尔曲线计算位置,让运动更自然
  152. if t < accel_time_ratio:
  153. # 加速阶段的位置
  154. phase_t = t / accel_time_ratio
  155. progress = (max_speed / 3) * phase_t * phase_t * phase_t
  156. elif t < 1 - decel_time_ratio:
  157. # 匀速阶段的位置
  158. phase_t = (t - accel_time_ratio) / (1 - accel_time_ratio - decel_time_ratio)
  159. # 匀速阶段的位移加上加速阶段完成的位移
  160. accel_distance = (max_speed / 3) # 加速阶段完成的位移
  161. progress = accel_distance + (1 - 2 * accel_distance) * phase_t
  162. else:
  163. # 减速阶段的位置
  164. phase_t = (t - (1 - decel_time_ratio)) / decel_time_ratio
  165. # 从减速起点平滑过渡到终点
  166. progress = 1 - (max_speed / 3) * (1 - phase_t) * (1 - phase_t) * (1 - phase_t)
  167. # 限制进度在0-1之间
  168. progress = max(0, min(1, progress))
  169. # 添加自然的手部抖动
  170. if t < 0.1 or t > 0.9:
  171. # 开始和结束:非常小的抖动
  172. jitter_x = random.randint(-1, 1)
  173. jitter_y = random.randint(-1, 1)
  174. elif t < 0.3 or t > 0.7:
  175. # 过渡阶段:小抖动
  176. jitter_x = random.randint(-2, 2)
  177. jitter_y = random.randint(-2, 2)
  178. else:
  179. # 中间快速阶段:稍大抖动
  180. jitter_x = random.randint(-2, 2) if random.random() < 0.3 else 0
  181. jitter_y = random.randint(-2, 2) if random.random() < 0.3 else 0
  182. # 计算当前位置
  183. current_x = start_x + (stop_x - start_x) * progress + jitter_x
  184. current_y = start_y + (stop_y - start_y) * progress + jitter_y
  185. self.logger.info(f"滑块验证移动2")
  186. # 确保轨迹单调性(不会回退)
  187. if points:
  188. if distance_x > 0: # 向右滑动
  189. current_x = max(points[-1][0], current_x)
  190. elif distance_x < 0: # 向左滑动
  191. current_x = min(points[-1][0], current_x)
  192. # 时间延迟 - 基于当前速度计算
  193. # 速度越快,延迟越短
  194. if t < 0.1: # 开始阶段
  195. delay = random.uniform(0.002, 0.008)
  196. elif t < 0.9: # 中间阶段
  197. # 延迟与速度成反比
  198. base_delay = 0.008
  199. speed_delay_factor = 1.0 / (speed_factor + 0.5)
  200. delay = base_delay * speed_delay_factor + random.uniform(-0.002, 0.002)
  201. delay = max(0.005, min(delay, 0.015))
  202. else: # 结束阶段
  203. # 逐渐增加延迟
  204. slow_factor = 1.0 + (t - 0.9) * 10
  205. delay = random.uniform(0.015, 0.025) * slow_factor
  206. points.append((current_x, current_y, delay))
  207. self.logger.info(f"滑块验证移动3")
  208. # 确保最后一点是实际停止位置
  209. if points:
  210. points[-1] = (stop_x, stop_y, 0)
  211. # 执行滑动
  212. if points:
  213. # 按下起点
  214. self.d.touch.down(points[0][0], points[0][1])
  215. time.sleep(random.uniform(0.002, 0.006))
  216. # 移动轨迹
  217. for i, point in enumerate(points[1:]):
  218. self.d.touch.move(point[0], point[1])
  219. self.logger.info(f"滑块验证移动{point[0]},{point[1]}")
  220. # 最后阶段可能的微小停顿(人类犹豫)
  221. # progress = (i + 1) / len(points[1:])
  222. # if progress > 0.98:
  223. # time.sleep(random.uniform(0.001, 0.003))
  224. time.sleep(point[2])
  225. # 抬起手指
  226. self.d.touch.up(points[-1][0], points[-1][1])
  227. # 滑动后的随机延迟
  228. hold_time = random.uniform(1, 2)
  229. time.sleep(hold_time)
  230. return points
  231. # 数英
  232. def Numbers_English_verify(self):
  233. time.sleep(5)
  234. rotate_image_xpath = '//*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.Image[1]'
  235. if not self.d.xpath(rotate_image_xpath).exists:
  236. print("数英图片元素不存在")
  237. rotate_img_path = "Numbers_English.png"
  238. try:
  239. rotate_image = self.d.xpath(rotate_image_xpath)
  240. rotate_image.screenshot().save(rotate_img_path)
  241. print(f"数英图片截图保存成功: {rotate_img_path}")
  242. except Exception as e:
  243. print(f"数英图片截图失败: {e}")
  244. try:
  245. with open(rotate_img_path, 'rb') as f:
  246. image_data = base64.b64encode(f.read()).decode()
  247. url = "http://api.jfbym.com/api/YmServer/customApi"
  248. data = {
  249. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  250. "type": 10103,
  251. "image": image_data
  252. }
  253. headers = {
  254. "Content-Type": "application/json"
  255. }
  256. response = requests.post(url, headers=headers, json=data, timeout=30)
  257. response.raise_for_status()
  258. result = response.json()
  259. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  260. Numbers_English_verify_data = result["data"]
  261. print(f"API返回: {Numbers_English_verify_data}")
  262. captcha_text = Numbers_English_verify_data.get("data")
  263. print(f"验证码: {captcha_text}")
  264. if self.d.xpath(
  265. '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.EditText[1]| //*[@resource-id="com.sankuai.meituan:id/titans_webview_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.EditText[1]').exists:
  266. self.d.xpath(
  267. '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.EditText[1]| //*[@resource-id="com.sankuai.meituan:id/titans_webview_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.EditText[1]').click()
  268. time.sleep(1)
  269. self.d.send_keys(captcha_text)
  270. time.sleep(5)
  271. self.d.xpath(
  272. '//*[@text="验证"] | //*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.widget.Button[1]').click()
  273. time.sleep(3)
  274. return True
  275. else:
  276. print("API返回错误")
  277. return False
  278. except Exception as e:
  279. print(f"数英验证码处理异常: {e}")
  280. return False
  281. # 滑块
  282. def slider_verify(self):
  283. time.sleep(5)
  284. try:
  285. slider_slot_xpath = '//*[@resource-id="puzzleSliderDrag"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.widget.TextView[1]'
  286. slider_main_xpath = '//*[@resource-id="puzzleImageMain"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]'
  287. slider_slot_img_path = "slider_slot.png"
  288. slider_main_img_path = "slider_main.png"
  289. if self.d.xpath(slider_slot_xpath).exists:
  290. self.d.xpath(slider_slot_xpath).screenshot().save("slider_slot.png")
  291. else:
  292. print("slider_slot_xpath not exist")
  293. self.logger.info(f"slider_slot_xpath not exist")
  294. if self.d.xpath(slider_main_xpath).exists:
  295. self.d.xpath(slider_main_xpath).screenshot().save("slider_main.png")
  296. else:
  297. print("slider_main_xpath not exist")
  298. self.logger.info(f"slider_main_xpath not exist")
  299. slider_slide_distance = 0
  300. try:
  301. with open('slider_main.png', 'rb') as f:
  302. b = base64.b64encode(f.read()).decode()
  303. # API请求配置
  304. url = "http://api.jfbym.com/api/YmServer/customApi"
  305. data = {
  306. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  307. "type": 22222,
  308. "image": b
  309. }
  310. headers = {
  311. "Content-Type": "application/json"
  312. }
  313. response = requests.post(url, headers=headers, json=data, timeout=30)
  314. response.raise_for_status()
  315. result = response.json()
  316. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  317. slider_verify_data = result["data"]
  318. print(f"slider_verify_data={slider_verify_data}")
  319. slider_slide_distance = slider_verify_data["data"]
  320. print(slider_slide_distance)
  321. else:
  322. print("api 返回错误 此时滑块验证可能呈图片形式存在")
  323. except Exception as e:
  324. return {
  325. "success": False,
  326. "error_msg": f"处理异常: {str(e)}"
  327. }
  328. slider_slide_distance = float(slider_verify_data["data"])
  329. # 获取滑块元素
  330. try:
  331. slider_xpath = '//*[@resource-id="puzzleSliderBox"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.view.View[1]'
  332. slider = self.d.xpath(slider_xpath)
  333. slider_info = slider.info
  334. bounds = slider_info['bounds']
  335. start_x = ((bounds['left'] + bounds['right']) / 2) + random.uniform(-4, 4)
  336. start_y = ((bounds['top'] + bounds['bottom']) / 2) + random.uniform(-3, 3) # ●
  337. end_x = start_x + slider_slide_distance + random.uniform(-3, 3) # ●
  338. end_y = start_y + random.uniform(-1, 1)
  339. # self.swipe(start_x, start_y, end_x, end_y,
  340. # duration=random.uniform(1.2, 2.0),
  341. # deviation=random.randint(20, 40))
  342. self.human_slide(start_x, start_y, end_x, end_y)
  343. time.sleep(2) #
  344. return True
  345. except Exception as e:
  346. print(f"滑动操作时出错: {e}")
  347. return False
  348. time.sleep(2)
  349. # 检查验证码是否消失
  350. slider_xpaths = [
  351. '//*[@text="请拖动下方滑块完成拼图"]',
  352. ]
  353. for xpath in slider_xpaths:
  354. if self.d.xpath(xpath).exists:
  355. return False
  356. return True
  357. except Exception as e:
  358. self.logger.error(f"滑块验证失败: {e}")
  359. return False
  360. # 点击
  361. def Click_images(self):
  362. time.sleep(5)
  363. try:
  364. # 1. 定位图标元素
  365. Click_images_xpath = '//*[@resource-id="com.sankuai.meituan:id/titans_main_layout"] | //*[@resource-id="com.sankuai.meituan:id/h5_container"] | //*[@resource-id="root"]'
  366. # 检查元素是否存在
  367. if not self.d.xpath(Click_images_xpath).exists:
  368. print("图标元素不存在")
  369. self.logger.info("图标元素不存在")
  370. return False
  371. # 获取图片元素在屏幕中的坐标
  372. Click_image_element = self.d.xpath(Click_images_xpath)
  373. Click_image_element_info = Click_image_element.info
  374. bounds = Click_image_element_info['bounds']
  375. # 计算图片左上角在屏幕中的坐标
  376. image_left = bounds['left']
  377. image_top = bounds['top']
  378. image_width = bounds['right'] - bounds['left']
  379. image_height = bounds['bottom'] - bounds['top']
  380. print(f"图片位置: left={image_left}, top={image_top}, width={image_width}, height={image_height}")
  381. # 2. 截图
  382. image_img_path = "Click_images.png"
  383. if self.d.xpath(Click_images_xpath).exists:
  384. self.d.xpath(Click_images_xpath).screenshot().save("Click_images.png")
  385. else:
  386. print("图标元素不存在,无法截图")
  387. self.logger.info("图标元素不存在,无法截图")
  388. return False
  389. try:
  390. with open('Click_images.png', 'rb') as f:
  391. c = base64.b64encode(f.read()).decode()
  392. # API请求配置
  393. url = "http://api.jfbym.com/api/YmServer/customApi"
  394. data = {
  395. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk", # 注册后登录去用户中心获取token
  396. "type": 88888, # 打码类型ID
  397. "image": c # 图片base64字符串
  398. }
  399. headers = {
  400. "Content-Type": "application/json"
  401. }
  402. # 发送请求
  403. response = requests.post(url, headers=headers, json=data, timeout=30)
  404. response.raise_for_status() # 检查HTTP请求是否成功
  405. result = response.json()
  406. print(f"API返回结果: {result}")
  407. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  408. verify_data = result.get("data", {})
  409. print(f"verify_data={verify_data}")
  410. # 获取坐标数据字符串,格式为:"188,165|99,128|91,209|235,116"
  411. coords_str = verify_data.get("data", "")
  412. if not coords_str:
  413. print("未返回坐标数据")
  414. return False
  415. print(f"坐标字符串: {coords_str}")
  416. # 分割坐标字符串
  417. coords_list = coords_str.split('|')
  418. print(coords_list)
  419. # 依次点击每个坐标
  420. for coord in coords_list:
  421. try:
  422. # 分割字符串并转换为整数(这是图片内的相对坐标)
  423. x_img_str, y_img_str = coord.split(',')
  424. x_img = int(x_img_str.strip())
  425. y_img = int(y_img_str.strip())
  426. print(f"图片相对坐标: x={x_img}, y={y_img}")
  427. # 转换为屏幕绝对坐标
  428. x_screen = image_left + x_img # ●
  429. y_screen = image_top + y_img
  430. print(f"屏幕绝对坐标: x={x_screen}, y={y_screen}")
  431. # 点击转换后的屏幕坐标
  432. self.d.click(x_screen, y_screen)
  433. time.sleep(self.get_sleep_time())
  434. except Exception as e:
  435. print(f"处理坐标 {coord} 失败: {e}")
  436. continue
  437. time.sleep(self.get_sleep_time() * 2) # 给系统一些响应时间
  438. return True
  439. else:
  440. error_msg = result.get("msg", "未知错误")
  441. print(f"识别失败: {error_msg}")
  442. return False
  443. except requests.exceptions.RequestException as e:
  444. print(f"API请求失败: {e}")
  445. return False
  446. except Exception as e:
  447. print(f"识别过程出错: {e}")
  448. return False
  449. except Exception as e:
  450. self.logger.error(f"点击图标失败: {e}")
  451. return False
  452. # # 检查验证是否成功
  453. # if not self.d.xpath('//*[@text="请依次点击下图图标"] | //*[@text="请按语序依次点击"]').exists:
  454. # print("所有坐标点击完成,验证成功")
  455. # return True
  456. # else:
  457. # print("所有坐标点击完成,但验证文本仍然存在,可能验证失败")
  458. # return False
  459. # def Click_images(self):
  460. # try:
  461. # image_xpath = '//*[@resource-id="com.sankuai.meituan:id/titans_main_layout"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]'
  462. # image_element = self.d.xpath(image_xpath)
  463. # image_element_info = image_element.info
  464. # bounds = image_element_info['bounds']
  465. # image_left = bounds['left']
  466. # image_top = bounds['top']
  467. # image_width = bounds['right'] - bounds['left']
  468. # image_height = bounds['bottom'] - bounds['top']
  469. # print(f"图片位置: left={image_left}, top={image_top}, width={image_width}, height={image_height}")
  470. # #
  471. # image_img_path = "image.png"
  472. # if self.d.xpath(image_xpath).exists:
  473. # self.d.xpath(image_xpath).screenshot().save("image.png")
  474. # else:
  475. # print("image_xpath not exist")
  476. # try:
  477. # with open('image.png', 'rb') as f:
  478. # c = base64.b64encode(f.read()).decode()
  479. # url = "http://api.jfbym.com/api/YmServer/customApi"
  480. # data = {
  481. # "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  482. # "type": 50009,
  483. # "image": c
  484. # }
  485. # headers = {
  486. # "Content-Type": "application/json"
  487. # }
  488. # # 发送请求
  489. # response = requests.post(url, headers=headers, json=data, timeout=30)
  490. # response.raise_for_status()
  491. # result = response.json()
  492. # if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  493. # side_data = result["data"]
  494. # print(f"side_data={side_data}")
  495. # big_click_xpath = side_data["data"]
  496. # else:
  497. # print("api 返回错误")
  498. # return False
  499. # except Exception as e:
  500. # return {
  501. # "success": False,
  502. # "error_msg": f"处理异常: {str(e)}"
  503. # }
  504. # coords_str = side_data["data"]
  505. # if coords_str:
  506. # x_img_str, y_img_str = coords_str.split(',')
  507. # x_img = int(x_img_str.strip())
  508. # y_img = int(y_img_str.strip())
  509. # x_screen = image_left + x_img
  510. # y_screen = image_top + y_img
  511. # self.d.click(x_screen, y_screen)
  512. # time.sleep(self.get_sleep_time())
  513. # return True
  514. # except Exception as e:
  515. # self.logger.error(f"依次点击验证失败: {e}")
  516. # return False
  517. # 向右滑动
  518. def Swipe_right(self):
  519. time.sleep(5)
  520. start_x = 0
  521. start_y = 0
  522. end_x = 0
  523. end_y = 0
  524. distance = 0
  525. if self.d.xpath(
  526. '//*[@resource-id="yodaBoxWrapper"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]').exists:
  527. Swipe = self.d.xpath(
  528. '//*[@resource-id="yodaBoxWrapper"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]')
  529. Swipe_info = Swipe.info
  530. bound = Swipe_info['bounds']
  531. Swipe_distance = bound['right'] - bound['left']
  532. if self.d.xpath(
  533. '//*[@resource-id="yodaBox"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1] ').exists:
  534. xpath = self.d.xpath(
  535. '//*[@resource-id="yodaBox"] | //*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]/android.view.View[1]')
  536. xpath_info = xpath.info
  537. bounds = xpath_info['bounds']
  538. start_x = (bounds['left'] + bounds['right']) // 2
  539. start_y = (bounds['top'] + bounds['bottom']) // 2
  540. distance = Swipe_distance - (bounds['right'] - bounds['left'])
  541. end_x = start_x + distance
  542. end_y = start_y
  543. print(f"滑动距离: {distance}像素")
  544. print(f"起点: ({start_x}, {start_y}), 终点: ({end_x}, {end_y})")
  545. # 确保滑块到达最右端
  546. end_x += 10
  547. self.Swipe_right_human_slide(start_x, start_y, end_x, end_y)
  548. time.sleep(2)
  549. return True
  550. else:
  551. print("未找到滑块元素")
  552. return False
  553. else:
  554. print("未找到轨道元素")
  555. return False
  556. def Swipe_right_human_slide(self, start_x, start_y, end_x, end_y):
  557. """模拟人类滑动轨迹"""
  558. # 生成带加速度的轨迹
  559. points = []
  560. total_steps = 50
  561. distance_x = end_x - start_x
  562. distance_y = end_y - start_y
  563. previous_x = start_x # 用于记录上一个 x 坐标值
  564. for i in range(total_steps):
  565. # 非线性进度(慢-快-慢)
  566. ratio = (i / total_steps)
  567. if ratio < 0.3:
  568. progress = 0.5 * (ratio / 0.3) ** 2
  569. elif ratio < 0.7:
  570. progress = 0.5 + (ratio - 0.3) * 1.25
  571. else:
  572. progress = 0.9 + 0.5 * ((ratio - 0.7) / 0.3) ** 0.5
  573. # 添加随机抖动
  574. # offset_x = np.random.randint(-2, 3)
  575. # offset_y = np.random.randint(-2, 3)
  576. offset_x = np.random.randint(-1, 1) # 控制抖动范围
  577. offset_y = np.random.randint(-1, 1)
  578. x = start_x + distance_x * min(progress, 0.99) + offset_x
  579. y = start_y + distance_y * min(progress, 0.99) + offset_y
  580. # 确保 x 坐标单调递增
  581. if x < previous_x and x < end_x:
  582. x = previous_x + 1
  583. if x > end_x:
  584. x = end_x
  585. previous_x = x
  586. points.append((x, y))
  587. # 变速延迟(移动越快延迟越短)
  588. delay = 0.002 + 0.01 * (1 - abs(0.5 - ratio))
  589. time.sleep(delay)
  590. print(f"points: {points}")
  591. self.loggerMT.info(f"points: {points}")
  592. # 执行滑动轨迹
  593. self.d.touch.down(points[0][0], points[0][1])
  594. for point in points[1:]:
  595. self.d.touch.move(point[0], point[1])
  596. self.d.touch.up(points[-1][0] + 2, points[-1][1])
  597. # print(f"points: {points}")
  598. # self.loggerPdd.info(f"points: {points}")
  599. # self.d.swipe_points(points, duration=0.05)
  600. # 拖动滑块刚
  601. def complexs(self):
  602. time.sleep(5)
  603. try:
  604. slider_xpath = '//*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.view.View[1]'
  605. track_xpath = '//*[@text="请按照说明拖动滑块"]'
  606. if not self.d.xpath(slider_xpath).exists:
  607. print("滑块元素不存在")
  608. self.logger.info("滑块元素不存在")
  609. return False
  610. if not self.d.xpath(track_xpath).exists:
  611. print("滑轨元素不存在")
  612. self.logger.info("滑轨元素不存在")
  613. return False
  614. #
  615. slider_element = self.d.xpath(slider_xpath)
  616. slider_info = slider_element.info
  617. slider_bounds = slider_info['bounds']
  618. slider_left = slider_bounds['left']
  619. slider_top = slider_bounds['top']
  620. slider_right = slider_bounds['right']
  621. slider_bottom = slider_bounds['bottom']
  622. slider_width = slider_right - slider_left
  623. slider_height = slider_bottom - slider_top
  624. slider_center_x = slider_left + slider_width / 2
  625. slider_center_y = slider_top + slider_height / 2
  626. print(f"滑块中心: ({slider_center_x}, {slider_center_y})")
  627. #
  628. track_element = self.d.xpath(track_xpath)
  629. track_info = track_element.info
  630. track_bounds = track_info['bounds']
  631. track_left = track_bounds['left']
  632. track_right = track_bounds['right']
  633. track_width = track_right - track_left
  634. # 2. 滑到滑轨最右端
  635. right_end_center_x = track_right - slider_width / 2
  636. right_end_center_y = slider_center_y
  637. print(f"最右端滑块中心坐标: ({right_end_center_x}, {right_end_center_y})")
  638. try:
  639. self.d.touch.down(slider_center_x, slider_center_y)
  640. time.sleep(0.1)
  641. # 生成滑动轨迹
  642. points = self.Swipe_trajectory(slider_center_x, slider_center_y, right_end_center_x,
  643. right_end_center_y)
  644. for point in points[1:]:
  645. self.d.touch.move(point[0], point[1])
  646. time.sleep(0.002)
  647. print("滑块已到达最右端")
  648. except Exception as e:
  649. print(f"滑动到最右端失败: {e}")
  650. return
  651. capture_xpath1 = '// *[ @ text = "身份核实"] / android.view.View[1] / android.view.View[1] / android.widget.TextView[1]'
  652. capture_xpath2 = '// * [ @ text = "身份核实"] / android.view.View[1] / android.view.View[1] / android.view.View[1]'
  653. capture_element1 = self.d.xpath(capture_xpath1)
  654. capture_element2 = self.d.xpath(capture_xpath2)
  655. capture_info1 = capture_element1.info
  656. capture_info2 = capture_element2.info
  657. capture_info1_bounds = capture_info1['bounds']
  658. capture_info2_bounds = capture_info2['bounds']
  659. capture_label_left = capture_info1_bounds['left']
  660. capture_label_top = capture_info1_bounds['top']
  661. capture_label_right = capture_info1_bounds['right']
  662. capture_label_bottom = capture_info1_bounds['bottom']
  663. capture_left = capture_info2_bounds['left']
  664. capture_top = capture_info2_bounds['top']
  665. capture_right = capture_info2_bounds['right']
  666. capture_bottom = capture_info2_bounds['bottom']
  667. capture_label_width = capture_label_right - capture_label_left
  668. capture_label_height = capture_label_bottom - capture_label_top
  669. capture_width = capture_right - capture_left
  670. capture_height = capture_bottom - capture_top
  671. print(
  672. f"截图区域1(提示文本): left={capture_label_left}, top={capture_label_top}, width={capture_label_width}, height={capture_label_height}")
  673. print(
  674. f"截图区域2(图片): left={capture_left}, top={capture_top}, width={capture_width}, height={capture_height}")
  675. # 截图并保存--2张图
  676. screenshot_label_path = "capture_label_area.png"
  677. screenshot_image_path = "capture_area.png"
  678. try:
  679. full_screenshot = self.d.screenshot()
  680. from PIL import Image
  681. import io
  682. if isinstance(full_screenshot, bytes):
  683. img = Image.open(io.BytesIO(full_screenshot))
  684. else:
  685. img = full_screenshot
  686. #
  687. # 裁剪指定区域1
  688. cropped_img_1 = img.crop(
  689. (capture_label_left, capture_label_top, capture_label_right, capture_label_bottom))
  690. cropped_img_1.save(screenshot_label_path)
  691. print(f"截图2已保存: {screenshot_label_path}")
  692. # 裁剪指定区域2
  693. cropped_img_2 = img.crop((capture_left, capture_top, capture_right, capture_bottom))
  694. cropped_img_2.save(screenshot_image_path)
  695. print(f"截图2已保存: {screenshot_image_path}")
  696. except Exception as e:
  697. print(f"截图失败: {e}")
  698. self.d.touch.up(right_end_center_x, right_end_center_y)
  699. return
  700. try:
  701. with open(screenshot_label_path, 'rb') as f:
  702. label_image_data = base64.b64encode(f.read()).decode()
  703. with open(screenshot_image_path, 'rb') as f:
  704. image_data = base64.b64encode(f.read()).decode()
  705. # API请求配置
  706. url = "http://api.jfbym.com/api/YmServer/customApi"
  707. data = {
  708. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  709. "type": 29013,
  710. "image": image_data,
  711. "label_image": label_image_data
  712. }
  713. headers = {
  714. "Content-Type": "application/json"
  715. }
  716. # 发送请求
  717. response = requests.post(url, headers=headers, json=data, timeout=30)
  718. response.raise_for_status()
  719. result = response.json()
  720. print(f"API返回结果: {result}")
  721. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  722. verify_data = result.get("data", {})
  723. print(f"verify_data={verify_data}")
  724. data_str = verify_data.get("data", "")
  725. if not data_str:
  726. print("云码未返回有效的data值")
  727. # 抬起并返回
  728. self.d.touch.up(right_end_center_x, right_end_center_y)
  729. return
  730. try:
  731. data_value = int(data_str)
  732. print(f"云码返回的像素距离: {data_value}")
  733. gray_line_target_x = 108 + data_value - 44
  734. slider_target_center_x = gray_line_target_x
  735. print(f"滑块中心目标X坐标: {slider_target_center_x}")
  736. # 5. 计算需要往回拖动的距离
  737. # 当前滑块在最右端,中心X = 612
  738. current_slider_center_x = 612
  739. back_distance = 504 - data_value - 44
  740. print(f"需要往回拖动的距离: {back_distance}")
  741. # 执行往回拖动
  742. slider_element = self.d.xpath(slider_xpath)
  743. slider_info = slider_element.info
  744. slider_bounds = slider_info['bounds']
  745. current_slider_center_x = (slider_bounds['left'] + slider_bounds['right']) / 2
  746. # 确保目标位置在滑轨范围内
  747. min_x = track_left + slider_width / 2
  748. max_x = track_right - slider_width / 2
  749. slider_target_center_x = max(min_x, min(slider_target_center_x, max_x))
  750. # 计算实际需要滑动的距离
  751. actual_distance = slider_target_center_x - current_slider_center_x
  752. print(f"实际需要滑动的距离: {actual_distance}")
  753. # 往回拖动
  754. try:
  755. # 往回拖动的轨迹
  756. back_points = self.Swipe_trajectory(
  757. right_end_center_x, right_end_center_y,
  758. slider_target_center_x, right_end_center_y
  759. )
  760. # 移动到每个点
  761. for point in back_points[1:]:
  762. self.d.touch.move(point[0], point[1])
  763. time.sleep(0.002)
  764. time.sleep(3)
  765. self.d.touch.up(slider_target_center_x, right_end_center_y)
  766. except Exception as e:
  767. print(f"往回拖动失败: {e}")
  768. self.d.touch.up(right_end_center_x, right_end_center_y)
  769. except ValueError as e:
  770. print(f"解析云码返回的data值失败: {e}")
  771. return False
  772. except Exception as e:
  773. print(f"计算滑块位置失败: {e}")
  774. return False
  775. else:
  776. error_msg = result.get("msg", "未知错误")
  777. print(f"识别失败: {error_msg}")
  778. return False
  779. except requests.exceptions.RequestException as e:
  780. print(f"API请求失败: {e}")
  781. return False
  782. except Exception as e:
  783. print(f"识别过程出错: {e}")
  784. return False
  785. except Exception as e:
  786. self.logger.error(f"滑块验证失败: {e}")
  787. return False
  788. def Swipe_trajectory(self, start_x, start_y, end_x, end_y):
  789. """生成滑动轨迹点"""
  790. points = []
  791. total_steps = 50
  792. distance_x = end_x - start_x
  793. distance_y = end_y - start_y
  794. previous_x = start_x
  795. for i in range(total_steps):
  796. # 非线性进度(慢-快-慢)
  797. ratio = (i / total_steps)
  798. if ratio < 0.3:
  799. progress = 0.5 * (ratio / 0.3) ** 2
  800. elif ratio < 0.7:
  801. progress = 0.5 + (ratio - 0.3) * 1.25
  802. else:
  803. progress = 0.9 + 0.5 * ((ratio - 0.7) / 0.3) ** 0.5
  804. # 添加随机抖动
  805. offset_x = np.random.randint(-1, 1)
  806. offset_y = np.random.randint(-1, 1)
  807. x = start_x + distance_x * min(progress, 0.99) + offset_x
  808. y = start_y + distance_y * min(progress, 0.99) + offset_y
  809. # 确保 x 坐标单调递增
  810. if x < previous_x and x < end_x:
  811. x = previous_x + 1
  812. if x > end_x:
  813. x = end_x
  814. previous_x = x
  815. points.append((x, y))
  816. return points
  817. # 最短线连接
  818. def Shortest_connection(self):
  819. time.sleep(5)
  820. try:
  821. art_text_xpath = '//*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[1]'
  822. color_points_xpath = '//*[@text="身份核实"]/android.view.View[1]/android.view.View[1]/android.view.View[2]/android.view.View[1]/android.widget.Image[1]'
  823. art_text_img_path = "art_text.png"
  824. color_points_img_path = "color_points.png"
  825. if self.d.xpath(art_text_xpath).exists:
  826. self.d.xpath(art_text_xpath).screenshot().save(art_text_img_path)
  827. self.logger.info("艺术字截图成功")
  828. else:
  829. self.logger.warning("艺术字元素不存在")
  830. return False
  831. if self.d.xpath(color_points_xpath).exists:
  832. self.d.xpath(color_points_xpath).screenshot().save(color_points_img_path)
  833. self.logger.info("颜色元素截图成功")
  834. else:
  835. self.logger.warning("颜色点元素不存在")
  836. return False
  837. color_name = ""
  838. try:
  839. with open('art_text.png', 'rb') as f:
  840. c = base64.b64encode(f.read()).decode()
  841. url = "http://api.jfbym.com/api/YmServer/customApi"
  842. data = {
  843. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  844. "type": 10118,
  845. "image": c
  846. }
  847. headers = {
  848. "Content-Type": "application/json"
  849. }
  850. response = requests.post(url, headers=headers, json=data, timeout=30)
  851. response.raise_for_status()
  852. result = response.json()
  853. self.logger.info(f"云码API返回结果: {result}")
  854. if result.get("code") == 0:
  855. color_name = result.get("data", "")
  856. if "鼗" in color_name:
  857. color_name == "紫色"
  858. elif result.get("code") == 10000 and "data" in result:
  859. inner_data = result.get("data", {})
  860. if isinstance(inner_data, dict) and inner_data.get("code") == 0:
  861. color_name = inner_data.get("data", "")
  862. if "鼗" in color_name:
  863. color_name == "紫色"
  864. elif isinstance(inner_data, str):
  865. color_name = inner_data
  866. if "鼗" in color_name:
  867. color_name == "紫色"
  868. else:
  869. self.logger.error(f"云码API返回异常: {result}")
  870. return False
  871. if not color_name:
  872. self.logger.error("未识别到颜色名称")
  873. return False
  874. self.logger.info(f"识别到的颜色名称: {color_name}")
  875. except Exception as e:
  876. self.logger.error(f"云码API调用异常: {e}")
  877. return False
  878. # 颜色的坐标
  879. relative_coordinates = self.find_color_coordinates(color_points_img_path, color_name)
  880. if not relative_coordinates:
  881. self.logger.warning(f"未在图片中找到 {color_name} 颜色的坐标")
  882. return False
  883. color_element = self.d.xpath(color_points_xpath).info
  884. element_bounds = color_element['bounds']
  885. element_left = element_bounds['left']
  886. element_top = element_bounds['top']
  887. element_width = element_bounds['right'] - element_bounds['left']
  888. element_height = element_bounds['bottom'] - element_bounds['top']
  889. # 读取截图
  890. try:
  891. color_points_img = cv2.imread(color_points_img_path)
  892. if color_points_img is None:
  893. self.logger.error("无法读取截图")
  894. return False
  895. screenshot_height, screenshot_width = color_points_img.shape[:2]
  896. except Exception as e:
  897. self.logger.error(f"读取截图尺寸失败: {e}")
  898. # 如果无法读取,使用元素尺寸作为默认值
  899. screenshot_width = element_width
  900. screenshot_height = element_height
  901. # 屏幕坐标
  902. screen_coordinates = []
  903. for (rx, ry) in relative_coordinates:
  904. if screenshot_width > 0 and screenshot_height > 0:
  905. # 计算缩放比例
  906. scale_x = element_width / screenshot_width
  907. scale_y = element_height / screenshot_height
  908. sx = element_left + int(rx * scale_x)
  909. sy = element_top + int(ry * scale_y)
  910. else:
  911. # 如果无法获取截图尺寸,直接使用相对坐标加上元素起始位置
  912. sx = element_left + rx
  913. sy = element_top + ry
  914. screen_coordinates.append((sx, sy))
  915. self.logger.info(f"相对坐标({rx}, {ry}) -> 屏幕坐标({sx}, {sy})")
  916. # . 计算最短路径
  917. if len(screen_coordinates) < 2:
  918. self.logger.warning("需要至少2个点才能连接")
  919. return False
  920. unvisited = screen_coordinates.copy()
  921. path = [unvisited.pop(0)]
  922. while unvisited:
  923. last_point = path[-1]
  924. nearest_idx = min(range(len(unvisited)),
  925. key=lambda i: ((last_point[0] - unvisited[i][0]) ** 2 +
  926. (last_point[1] - unvisited[i][1]) ** 2) ** 0.5)
  927. path.append(unvisited.pop(nearest_idx))
  928. self.logger.info(f"最短路径点顺序: {path}")
  929. curved_path = self.human_like_path(path)
  930. # 连接
  931. result = self.simulate_human_drawing(curved_path)
  932. if result:
  933. self.logger.info("最短线连接成功")
  934. time.sleep(3)
  935. return True
  936. else:
  937. self.logger.warning("最短线连接失败")
  938. return False
  939. except Exception as e:
  940. self.logger.error(f"最短线连接验证失败: {e}")
  941. return False
  942. def find_color_coordinates(self, image_path, color_name):
  943. """查找指定颜色"""
  944. self.logger.info(f"开始查找颜色: {color_name}, 图片路径: {image_path}")
  945. # 颜色HSV范围映射
  946. color_ranges = {
  947. "红色": (([0, 120, 70], [10, 255, 255]), ([170, 120, 70], [180, 255, 255])),
  948. "红的": (([0, 120, 70], [10, 255, 255]), ([170, 120, 70], [180, 255, 255])),
  949. "绿色": (([35, 50, 50], [85, 255, 255]),),
  950. "蓝色": (([90, 50, 50], [130, 255, 255]),),
  951. "黄色": (([20, 100, 100], [30, 255, 255]),),
  952. "橙色": (([5, 100, 100], [15, 255, 255]),),
  953. "紫色": (([130, 50, 50], [160, 255, 255]),),
  954. "黑色": (([0, 0, 0], [180, 255, 50]),),
  955. "白色": (([0, 0, 200], [180, 30, 255]),),
  956. "黑色": (([0, 0, 0], [180, 255, 50]),),
  957. "褐色": (([10, 100, 20], [20, 255, 200]),),
  958. "橘色": (([5, 150, 150], [15, 255, 255])),
  959. "褐色": (([10, 50, 20], [20, 255, 150])),
  960. }
  961. if color_name not in color_ranges:
  962. self.logger.warning(f"不支持的颜色: {color_name}")
  963. return []
  964. # 读取图像
  965. image = cv2.imread(image_path)
  966. if image is None:
  967. self.logger.error(f"无法读取图像: {image_path}")
  968. return []
  969. # 转换到HSV颜色空间
  970. hsv = cv2.cvtColor(image, cv2.COLOR_BGR2HSV)
  971. # 根据颜色名称获取HSV范围
  972. color_range = color_ranges[color_name]
  973. # 创建颜色掩码
  974. if color_name == "红色":
  975. lower1 = np.array(color_range[0][0])
  976. upper1 = np.array(color_range[0][1])
  977. lower2 = np.array(color_range[1][0])
  978. upper2 = np.array(color_range[1][1])
  979. mask1 = cv2.inRange(hsv, lower1, upper1)
  980. mask2 = cv2.inRange(hsv, lower2, upper2)
  981. mask = cv2.bitwise_or(mask1, mask2)
  982. else:
  983. lower = np.array(color_range[0][0])
  984. upper = np.array(color_range[0][1])
  985. mask = cv2.inRange(hsv, lower, upper)
  986. # 形态学操作去除噪点
  987. kernel = np.ones((3, 3), np.uint8)
  988. mask = cv2.morphologyEx(mask, cv2.MORPH_OPEN, kernel)
  989. mask = cv2.morphologyEx(mask, cv2.MORPH_CLOSE, kernel)
  990. # 查找轮廓
  991. contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
  992. # 获取每个轮廓的中心点(相对坐标)
  993. coordinates = []
  994. min_area = 30 # 最小面积阈值
  995. for i, contour in enumerate(contours):
  996. area = cv2.contourArea(contour)
  997. if area > min_area:
  998. # 计算轮廓的中心点
  999. M = cv2.moments(contour)
  1000. if M["m00"] != 0:
  1001. cx = int(M["m10"] / M["m00"])
  1002. cy = int(M["m01"] / M["m00"])
  1003. coordinates.append((cx, cy))
  1004. self.logger.info(f"轮廓{i}: 面积={area}, 中心点=({cx}, {cy})")
  1005. else:
  1006. self.logger.info(f"轮廓{i}: 面积={area}, 无法计算中心点")
  1007. self.logger.info(f"找到 {len(coordinates)} 个 {color_name} 坐标点")
  1008. return coordinates
  1009. def human_like_path(self, points):
  1010. """生成模拟人类的弯曲路径"""
  1011. if len(points) < 2:
  1012. return points
  1013. curved_path = []
  1014. for i in range(len(points) - 1):
  1015. start = points[i]
  1016. end = points[i + 1]
  1017. # 在两点之间添加弯曲点
  1018. mid_x = (start[0] + end[0]) / 2
  1019. mid_y = (start[1] + end[1]) / 2
  1020. # 计算随机偏移,模拟人类手绘误差
  1021. if abs(end[0] - start[0]) > abs(end[1] - start[1]):
  1022. # 水平方向为主,在垂直方向添加偏移
  1023. offset_x = 0
  1024. offset_y = random.uniform(-15, 15)
  1025. else:
  1026. # 垂直方向为主,在水平方向添加偏移
  1027. offset_x = random.uniform(-15, 15)
  1028. offset_y = 0
  1029. # 控制点(在中间点添加偏移)
  1030. control_x = mid_x + offset_x
  1031. control_y = mid_y + offset_y
  1032. # 使用二次贝塞尔曲线生成弯曲路径
  1033. curved_path.append(start)
  1034. for t in np.arange(0.1, 1.0, 0.1):
  1035. # 二次贝塞尔曲线公式
  1036. x = (1 - t) ** 2 * start[0] + 2 * (1 - t) * t * control_x + t ** 2 * end[0]
  1037. y = (1 - t) ** 2 * start[1] + 2 * (1 - t) * t * control_y + t ** 2 * end[1]
  1038. curved_path.append((int(x), int(y)))
  1039. # 添加最后一个点
  1040. curved_path.append(points[-1])
  1041. return curved_path
  1042. def simulate_human_drawing(self, path):
  1043. """模拟人类绘制路径"""
  1044. if len(path) < 2:
  1045. return False
  1046. try:
  1047. # 获取第一个点
  1048. start_x, start_y = path[0]
  1049. self.d.touch.down(start_x, start_y)
  1050. time.sleep(random.uniform(0.05, 0.1))
  1051. # 依次移动到路径中的每个点
  1052. for i in range(1, len(path)):
  1053. target_x, target_y = path[i]
  1054. # 添加随机抖动,模拟人类手部颤抖
  1055. jitter_x = random.randint(-2, 2)
  1056. jitter_y = random.randint(-2, 2)
  1057. self.d.touch.move(target_x + jitter_x, target_y + jitter_y)
  1058. # 添加随机延迟
  1059. delay = random.uniform(0.01, 0.03)
  1060. time.sleep(delay)
  1061. time.sleep(random.uniform(0.1, 0.2))
  1062. self.d.touch.up(path[-1][0], path[-1][1])
  1063. print("模拟人类绘制完成")
  1064. return True
  1065. except Exception as e:
  1066. print(f"模拟绘制失败: {e}")
  1067. return False
  1068. # 空间推理
  1069. def click_side(self):
  1070. try:
  1071. image_xpath = '//*[@resource-id="com.sankuai.meituan:id/titans_main_layout"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]'
  1072. image_element = self.d.xpath(image_xpath)
  1073. image_element_info = image_element.info
  1074. bounds = image_element_info['bounds']
  1075. image_left = bounds['left']
  1076. image_top = bounds['top']
  1077. image_width = bounds['right'] - bounds['left']
  1078. image_height = bounds['bottom'] - bounds['top']
  1079. print(f"图片位置: left={image_left}, top={image_top}, width={image_width}, height={image_height}")
  1080. #
  1081. image_img_path = "image.png"
  1082. if self.d.xpath(image_xpath).exists:
  1083. self.d.xpath(image_xpath).screenshot().save("image.png")
  1084. else:
  1085. print("image_xpath not exist")
  1086. try:
  1087. with open('image.png', 'rb') as f:
  1088. c = base64.b64encode(f.read()).decode()
  1089. url = "http://api.jfbym.com/api/YmServer/customApi"
  1090. data = {
  1091. "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
  1092. "type": 50009,
  1093. "image": c
  1094. }
  1095. headers = {
  1096. "Content-Type": "application/json"
  1097. }
  1098. # 发送请求
  1099. response = requests.post(url, headers=headers, json=data, timeout=30)
  1100. response.raise_for_status()
  1101. result = response.json()
  1102. if result.get("code") == 10000 and result.get("data", {}).get("code") == 0:
  1103. verify_data = result["data"]
  1104. print(f"verify_data={verify_data}")
  1105. big_click_xpath = verify_data["data"]
  1106. else:
  1107. print("api 返回错误")
  1108. except Exception as e:
  1109. return {
  1110. "success": False,
  1111. "error_msg": f"处理异常: {str(e)}"
  1112. }
  1113. coords_str = verify_data["data"]
  1114. if coords_str:
  1115. x_img_str, y_img_str = coords_str.split(',')
  1116. x_img = int(x_img_str.strip())
  1117. y_img = int(y_img_str.strip())
  1118. x_screen = image_left + x_img
  1119. y_screen = image_top + y_img
  1120. # 点击转换后的屏幕坐标
  1121. self.d.click(x_screen, y_screen)
  1122. time.sleep(self.get_sleep_time())
  1123. return True
  1124. except Exception as e:
  1125. self.logger.error(f"空间推理验证失败: {e}")
  1126. return False
  1127. # 人工处理
  1128. def _handle_generic_captcha(self, xpath):
  1129. """处理通用验证码"""
  1130. time.sleep(5)
  1131. self.logger.warning("通用验证码弹窗触发,等待人工处理...")
  1132. start_time = time.time()
  1133. timeout = 60 * 60
  1134. while time.time() - start_time < timeout:
  1135. if not self.d.xpath(xpath).exists:
  1136. self.logger.info("验证码已处理完成")
  1137. return True
  1138. time.sleep(2)
  1139. self.logger.warning("验证码处理超时")
  1140. return False
  1141. def check_and_handle_popup(self):
  1142. d = self.spider.d
  1143. exists, captcha_type, xpath = self.is_any_verification_popup_exists(d)
  1144. if not exists:
  1145. for simple_xpath, desc in self.popup_rules["simple"]:
  1146. if d.xpath(simple_xpath).exists and not self._is_recent_click(simple_xpath):
  1147. self.logger.info(f"检测到简单弹窗: {desc}")
  1148. d.xpath(simple_xpath).click()
  1149. return True
  1150. # 处理广告弹窗
  1151. if d.xpath('//*[contains(@text, "广告")]').exists:
  1152. w, h = d.info['displayWidth'], d.info['displayHeight']
  1153. d.click(w - 50, 50)
  1154. self.logger.info("关闭广告弹窗")
  1155. return True
  1156. return False
  1157. # 开始处理
  1158. now = time.time()
  1159. # 防止过于频繁触发
  1160. if now - self.last_verification_time < 30 and self.verification_in_progress.is_set():
  1161. return False
  1162. self.last_verification_time = now
  1163. self.verification_count += 1
  1164. if captcha_type != self.last_verification_type:
  1165. self.logger.info(f"验证码类型变化: {self.last_verification_type} -> {captcha_type}")
  1166. self.verification_retry_count = 0
  1167. self.last_verification_type = captcha_type
  1168. self.verification_retry_count += 1
  1169. self.logger.warning(
  1170. f"检测到验证码弹窗,类型: {captcha_type},重试次数: {self.verification_retry_count}/{self.MAX_VERIFICATION_RETRY}")
  1171. if self.verification_retry_count > self.MAX_VERIFICATION_RETRY:
  1172. self.logger.error("重试次数超限,重启应用")
  1173. self._handle_verification_failure()
  1174. return False
  1175. self.verification_in_progress.set()
  1176. self.pausing.set()
  1177. self.logger.info("已设置主线程暂停事件")
  1178. # 处理
  1179. try:
  1180. if captcha_type == "Numbers_English":
  1181. self.logger.info(f"开始处理通用数验证")
  1182. result = self.Numbers_English_verify()
  1183. elif captcha_type == "Swipe_right":
  1184. self.logger.info(f"开始处理向右滑动")
  1185. result = self.Swipe_right()
  1186. elif captcha_type == "Click_images":
  1187. self.logger.info(f"开始处理依次点击图片或语序")
  1188. result = self.Click_images()
  1189. elif captcha_type == "slider":
  1190. self.logger.info(f"开始处理滑块验证")
  1191. result = self.slider_verify()
  1192. elif captcha_type == "complexs":
  1193. self.logger.info(f"开始处理拖动滑块刚")
  1194. result = self.complexs()
  1195. elif captcha_type == "Shortest_connection":
  1196. self.logger.info(f"开始处理最短距离连接")
  1197. result = self.Shortest_connection()
  1198. elif captcha_type == "click_side":
  1199. self.logger.info(f"开始处理空间推理")
  1200. result = self.click_side()
  1201. else:
  1202. self.logger.info(f"等待人工处理")
  1203. result = self._handle_generic_captcha(xpath)
  1204. except Exception as e:
  1205. self.logger.error(f"验证码处理异常: {e}")
  1206. result = False
  1207. time.sleep(5)
  1208. verification_cleared, remaining_type = self.wait_for_verification_clear(d, timeout=7)
  1209. if verification_cleared:
  1210. self.logger.info(f"第{self.verification_retry_count}次验证成功")
  1211. time.sleep(3)
  1212. self._handle_verification_success()
  1213. return True
  1214. else:
  1215. self.logger.warning(f"第{self.verification_retry_count}次验证失败,仍有验证码: {remaining_type}")
  1216. if self.verification_retry_count >= self.MAX_VERIFICATION_RETRY:
  1217. self._handle_verification_failure()
  1218. return False
  1219. else:
  1220. self.verification_in_progress.clear()
  1221. self.pausing.clear()
  1222. time.sleep(2)
  1223. return self.check_and_handle_popup()
  1224. '''
  1225. # 1. 处理简单弹窗
  1226. for xpath, desc in self.popup_rules["simple"]:
  1227. if d.xpath(xpath).exists and not self._is_recent_click(xpath):
  1228. self.logger.info("检测到弹窗: %s", desc)
  1229. d.xpath(xpath).click()
  1230. return True
  1231. # 2. 处理验证码弹窗
  1232. for xpath in self.popup_rules["verification"]:
  1233. if d.xpath(xpath).exists:
  1234. now = time.time()
  1235. if now - self.last_verification_time < 30:
  1236. return False # 30秒内不重复触发
  1237. self.last_verification_time = now
  1238. self.verification_count += 1
  1239. self.logger.warning("验证码弹窗触发,等待人工处理...")
  1240. if self.verification_count > self.MAX_VERIFICATION_RETRY:
  1241. self.logger.error("验证码重试超限,终止任务")
  1242. self.spider.stop_all()
  1243. return True
  1244. self.pausing.set() # 通知主线程暂停
  1245. d.toast.show("需要人工处理验证码", 120)
  1246. # 等待人工处理
  1247. start = time.time()
  1248. # while time.time() - start < 120*60:
  1249. # if not d.xpath(xpath).exists:
  1250. # self.logger.info("验证码已处理")
  1251. # d.toast.show("验证完成", 2)
  1252. # self.pausing.clear() # 放行主线程
  1253. # return True
  1254. # time.sleep(5)
  1255. while True:
  1256. if not d.xpath(xpath).exists:
  1257. self.logger.info("验证码已处理")
  1258. d.toast.show("验证完成", 2)
  1259. self.pausing.clear() # 放行主线程
  1260. return True
  1261. time.sleep(5)
  1262. self.logger.warning("验证码超时,重启APP")
  1263. self.spider.restart_app()
  1264. return True
  1265. # 3. 处理广告弹窗(点击右上角)
  1266. if d.xpath('//*[contains(@text, "广告")]').exists:
  1267. w, h = d.info['displayWidth'], d.info['displayHeight']
  1268. d.click(w - 50, 50)
  1269. self.logger.info("关闭广告弹窗")
  1270. return True
  1271. return False
  1272. '''
  1273. def is_any_verification_popup_exists(self, d=None):
  1274. """
  1275. 检查是否存在任何类型的验证码弹窗
  1276. """
  1277. if d is None:
  1278. d = self.d
  1279. for xpath, captcha_type in self.popup_rules["verification"]:
  1280. if d.xpath(xpath).exists:
  1281. return True, captcha_type, xpath
  1282. # 特殊的验证码
  1283. additional_indicators = [
  1284. ('//*[contains(@resource-id, "com.sankuai.meituan:id/yoda_activity_rootView")]', "complexs"),
  1285. ('//*[contains(@text, "拖动滑块刚")]', "complexs"),
  1286. ]
  1287. for xpath, captcha_type in additional_indicators:
  1288. if d.xpath(xpath).exists:
  1289. return True, captcha_type, xpath
  1290. return False, None, None
  1291. def wait_for_verification_clear(self, d=None, timeout=10):
  1292. """
  1293. 等待验证码完全消失
  1294. """
  1295. if d is None:
  1296. d = self.d
  1297. start_time = time.time()
  1298. while time.time() - start_time < timeout:
  1299. exists, captcha_type, _ = self.is_any_verification_popup_exists(d)
  1300. if not exists:
  1301. return True, None
  1302. time.sleep(1)
  1303. exists, captcha_type, _ = self.is_any_verification_popup_exists(d)
  1304. if exists:
  1305. self.logger.info(f"超时,类型: {captcha_type}")
  1306. return False, captcha_type
  1307. else:
  1308. return True, None
  1309. def _handle_verification_success(self):
  1310. """验证成功后的处理"""
  1311. time.sleep(5)
  1312. self.verification_retry_count = 0
  1313. self.last_verification_type = None
  1314. self.verification_in_progress.clear()
  1315. self.pausing.clear()
  1316. self.verification_count = 0
  1317. self.logger.info("验证成功,清除暂停状态")
  1318. def _handle_verification_failure(self):
  1319. """验证失败 - 等待人工处理"""
  1320. self.logger.error("验证码处理失败,等待人工处理...")
  1321. # 重置验证状态,但不重启应用
  1322. self.verification_retry_count = 0
  1323. self.last_verification_type = None
  1324. self.verification_in_progress.clear()
  1325. # 设置等待人工处理的超时时间(30分钟)
  1326. timeout = 30 * 60
  1327. start_time = time.time()
  1328. # 持续监控验证码状态,等待人工处理
  1329. while time.time() - start_time < timeout:
  1330. # 检查是否还有验证码存在
  1331. exists, captcha_type, xpath = self.is_any_verification_popup_exists()
  1332. if not exists:
  1333. # 验证码已消失,可能是人工处理成功
  1334. self.logger.info("验证码已消失,人工处理成功")
  1335. # 额外等待确保页面稳定
  1336. time.sleep(3)
  1337. # 清除暂停状态,放行线程
  1338. self.pausing.clear()
  1339. self.logger.info("人工处理完成,放行线程")
  1340. return
  1341. # 每隔10秒检查一次
  1342. time.sleep(10)
  1343. # 超时处理
  1344. self.logger.warning("等待人工处理超时,尝试继续执行")
  1345. # 强制清除暂停状态,放行线程
  1346. self.pausing.clear()
  1347. self.logger.warning("已超时,强制清除暂停状态,放行线程")
  1348. def stop(self):
  1349. self.running = False
  1350. def get_access_token():
  1351. AppKey = "tRK2RhyItCSh6BzyT4CNVXQa"
  1352. AppSrcret = "TDgKiPo94i2mOM1sDqOuDnlcK1bG66jh"
  1353. token_url = 'https://aip.baidubce.com/oauth/2.0/token'
  1354. url = f"{token_url}?grant_type=client_credentials&client_id={AppKey}&client_secret={AppSrcret}"
  1355. payload = ""
  1356. headers = {
  1357. 'Content-Type': 'application/json',
  1358. 'Accept': 'application/json'
  1359. }
  1360. response = requests.request("POST", url, headers=headers, data=payload)
  1361. try:
  1362. return response.json()['access_token']
  1363. except:
  1364. return None
  1365. def get_mysql():
  1366. """
  1367. 建立并返回一个到数据库的连接对象
  1368. """
  1369. import pymysql
  1370. return pymysql.connect(
  1371. host=Config.DB_HOST, # "localhost", # 修改后的主机
  1372. port=Config.DB_PORT, # 3306, # 添加端口号
  1373. user=Config.DB_USER, # 'root', # 修改后的用户名
  1374. password=Config.DB_PASSWORD, # 修改后的密码
  1375. db=Config.DB_NAME, # "drug_data", # 修改后的数据库名
  1376. charset='utf8mb4'
  1377. )
  1378. class TaskReporter:
  1379. """任务上报管理器(线程安全)"""
  1380. def __init__(self):
  1381. self.tasks_data = {} # 存储每个任务的数据
  1382. self.lock = threading.Lock()
  1383. def start_task(self, task_id: int, start_page: int, end_page: int):
  1384. """记录任务开始"""
  1385. with self.lock:
  1386. self.tasks_data[task_id] = {
  1387. 'task_id': task_id,
  1388. 'start_time': int(time.time()),
  1389. 'end_time': None,
  1390. 'start_page': start_page,
  1391. 'end_page': end_page,
  1392. 'actual_end_page': start_page, # 实际结束页数
  1393. 'real_count': 0, # 实际采集数量
  1394. 'status': 'running', # running, completed, failed
  1395. 'finish_status': 0, # 0:未完成,1:已完成
  1396. }
  1397. def update_task_progress(self, task_id: int,
  1398. actual_end_page: int = None,
  1399. real_count: int = None):
  1400. """更新任务进度(线程安全)"""
  1401. with self.lock:
  1402. if task_id in self.tasks_data:
  1403. if actual_end_page is not None:
  1404. self.tasks_data[task_id]['actual_end_page'] = actual_end_page
  1405. if real_count is not None:
  1406. self.tasks_data[task_id]['real_count'] = real_count
  1407. def end_task(self, task_id: int, status: str = 'completed',
  1408. finish_status: int = 0, force_end_page: int = None):
  1409. """记录任务结束并上报"""
  1410. with self.lock:
  1411. if task_id in self.tasks_data:
  1412. data = self.tasks_data[task_id]
  1413. data['end_time'] = int(time.time())
  1414. data['status'] = status
  1415. data['finish_status'] = finish_status
  1416. if force_end_page is not None:
  1417. data['actual_end_page'] = force_end_page
  1418. # 准备上报数据
  1419. report_data = {
  1420. "collect_task_allocate_id": data['task_id'],
  1421. "status": 3 if data['status'] == 'completed' else 4,
  1422. "finish_status": data['finish_status'],
  1423. 'real_count': data['real_count'],
  1424. 'start_time': data['start_time'],
  1425. 'end_time': data['end_time'],
  1426. 'start_page': data['start_page'],
  1427. 'end_page': data['actual_end_page']
  1428. }
  1429. # 调用上报接口
  1430. self._call_report_api(report_data)
  1431. def _call_report_api(self, data: Dict[str, Any]):
  1432. """调用上报接口"""
  1433. try:
  1434. url = 'http://schedule.dfwy.tech/api/collect_equipment_execute/result_report'
  1435. resp = requests.post(url, json=data, timeout=10)
  1436. if resp.status_code == 200:
  1437. print(f"任务 {data['collect_task_allocate_id']} 上报成功")
  1438. # self.loggerMT.info(f"任务 {data['collect_task_allocate_id']} 上报成功")
  1439. else:
  1440. print(f"任务 {data['collect_task_allocate_id']} 上报失败: {resp.status_code}")
  1441. # self.loggerMT.info(f"任务 {data['collect_task_allocate_id']} 上报失败: {resp.status_code}")
  1442. except Exception as e:
  1443. print(f"上报接口调用异常: {e}")
  1444. # 全局上报管理器
  1445. reporter = TaskReporter()
  1446. class MT:
  1447. def __init__(self, key):
  1448. # self.package_name = 'com.sankuai.meituan'
  1449. self.package_name = Config.PACKAGE_NAME
  1450. self.access_token = get_access_token()
  1451. self.city2province = self.get_city_info()
  1452. self.APP_ID = '116857964'
  1453. self.API_KEY = '1gAzACJOAr7BeILKqkqPOETh'
  1454. self.SECRET_KEY = 'ZNArANb9GwJYgLKg4EfYhukKBfPdl1n3'
  1455. self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
  1456. # host = Config.DB_HOST #"localhost"
  1457. # user = Config.DB_USER #"root"
  1458. # password = Config.DB_PASSWORD #"dfwy2025"
  1459. # database = Config.DB_NAME #"drug_data"
  1460. # port = Config.DB_PORT#3306
  1461. # print(f'数据库配置:host:{host},user:{user},password:{password},database:{database},port:{port}')
  1462. self.table_name = Config.DB_AUTO_DRUG_TABLE # "mt_drug"
  1463. self.shop_table_name = Config.DB_SHOP_TABLE
  1464. # print(f'数据库表名:table_name:{self.table_name},shop_table_name:{self.shop_table_name}')
  1465. # self.mysql_client = mysqlClient(host, user, password, database, port)
  1466. self.loggerMT = logging.getLogger()
  1467. self.search_key = key # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒
  1468. self.unrelated_data = 0 # 无关数据数量
  1469. self.shop_data_num = 0 # 店铺数据数量
  1470. # === 新增:采集统计 ===
  1471. self.collected_count = 0 # 实际采集的商品数量
  1472. self.task_id = None # 任务ID
  1473. self.start_time = None # 任务开始时间
  1474. self.current_page = 0 # 当前页码
  1475. self.task_start_page = 0 # 任务开始页码
  1476. self.task_end_page = 0 # 任务结束页码
  1477. self.task_product_name = '' # 产品名称
  1478. self.task_product_specs = '' # 品规
  1479. # ====================
  1480. def update_task_status(self, status):
  1481. """更新任务状态到数据库"""
  1482. if not self.task_id:
  1483. return
  1484. try:
  1485. retrieve_conn = get_retrieve_mysql()
  1486. cursor = retrieve_conn.cursor()
  1487. update_time = time.time()
  1488. update_sql = """
  1489. UPDATE retrieve_collect_task_allocate
  1490. SET status = %s, \
  1491. update_time = %s
  1492. WHERE id = %s \
  1493. """
  1494. cursor.execute(update_sql, (status, update_time, self.task_id))
  1495. retrieve_conn.commit()
  1496. self.loggerMT.info(f"任务 {self.task_id} 状态更新为 {status}")
  1497. except Exception as e:
  1498. self.loggerMT.error(f"更新任务状态失败: {e}")
  1499. finally:
  1500. if 'cursor' in locals():
  1501. cursor.close()
  1502. if 'retrieve_conn' in locals():
  1503. retrieve_conn.close()
  1504. def stop_app(self):
  1505. self.d.app_stop(self.package_name)
  1506. time.sleep(5)
  1507. def start_app(self):
  1508. self.d.app_start(self.package_name)
  1509. time.sleep(5)
  1510. def restart_app(self):
  1511. """
  1512. 重启app
  1513. :return:
  1514. """
  1515. self.stop_app()
  1516. self.start_app()
  1517. time.sleep(5) # 增加稳定等待时间
  1518. # 强制检查弹窗
  1519. self.monitor.check_and_handle_popup()
  1520. @staticmethod
  1521. def get_sleep_time():
  1522. # return random.randint(5, 8)
  1523. return random.randint(1, 3)
  1524. @staticmethod
  1525. def get_current_date():
  1526. return datetime.datetime.now().strftime('%Y/%m/%d')
  1527. @staticmethod
  1528. def get_city_info():
  1529. """
  1530. 获取所有的省市数据
  1531. :return:
  1532. """
  1533. file_path = '../kailin_city.json'
  1534. with open(file_path, 'r', encoding='utf-8') as f:
  1535. data = json.load(f)
  1536. province = {province_one["id"]: province_one for province_one in data['province']}
  1537. city2province = dict()
  1538. city = data['city']
  1539. for city_one in city:
  1540. name = city_one['name']
  1541. pid = city_one['pid']
  1542. if len(str(pid)) > 2:
  1543. pid = int(re.match('^\d{2}', str(pid)).group())
  1544. city2province[name] = province[pid]['name']
  1545. return city2province
  1546. # 将30分钟后采集的数据上报到服务端
  1547. def up_data_to_service(self, collect_task_allocate_id, status, finish_status, real_count, start_time, end_time,
  1548. start_page, end_page):
  1549. # 1. 请求地址
  1550. url = 'http://schedule.dfwy.tech/api/collect_equipment_execute/result_report'
  1551. # 2. 请求参数(字典形式)
  1552. json_data = {
  1553. "collect_task_allocate_id": collect_task_allocate_id,
  1554. "status": status,
  1555. "finish_status": finish_status,
  1556. 'real_count': real_count,
  1557. 'start_time': start_time,
  1558. 'end_time': end_time,
  1559. 'start_page': start_page,
  1560. 'end_page': end_page
  1561. }
  1562. # 3. 发送 POST 并拿到结果
  1563. # resp = requests.post(url, json=json_data, headers=headers)
  1564. resp = requests.post(url, json=json_data)
  1565. # 4. 打印结果
  1566. print("状态码:", resp.status_code)
  1567. # print("响应体(文本):\n", resp.text) # 纯文本
  1568. print("响应体(JSON):\n", resp.json()) # 如果返回的是 JSON,可改用这行
  1569. def get_shop_name(self):
  1570. """
  1571. 获取店铺名
  1572. :return:
  1573. """
  1574. try:
  1575. shop_name = self.d.xpath(
  1576. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1577. print(f'获取到店铺名:{shop_name}')
  1578. return shop_name
  1579. except:
  1580. try:
  1581. shop_name = self.d.xpath(
  1582. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1583. print(f'获取到店铺名2:{shop_name}')
  1584. return shop_name
  1585. except Exception as e:
  1586. # 点击店铺曲获取店铺名称
  1587. print("点击店铺进入后获取店铺名称")
  1588. self.enter_shop()
  1589. shop_xpath = '//*[@resource-id="com.sankuai.meituan:id/layout_header_view"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]//android.widget.FrameLayout[2]/android.widget.FrameLayout[1]/android.widget.TextView'
  1590. if self.d.xpath(shop_xpath).exists:
  1591. shop_name = self.d.xpath(shop_xpath).text
  1592. self.swipe_back(1)
  1593. return shop_name
  1594. else:
  1595. print(f'获取店铺名出错:{e}')
  1596. return None
  1597. def get_qualification_number(self):
  1598. """
  1599. 获取资质编号
  1600. :return:
  1601. """
  1602. try:
  1603. qualification_number_str = self.d.xpath(
  1604. '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.widget.TextView[2]').text
  1605. qualification_number = qualification_number_str.strip('资质编号:').strip()
  1606. return qualification_number
  1607. except:
  1608. return None
  1609. def get_shop_address(self):
  1610. try:
  1611. xpath = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView'
  1612. if self.d.xpath(xpath).exists:
  1613. shop_address = self.d.xpath(xpath).text
  1614. print(f'111-获取到店铺地址:{shop_address}')
  1615. if '发货时间' in shop_address:
  1616. print(f'店铺地址包含发货时间,再次获取店铺地址')
  1617. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.TextView'
  1618. if self.d.xpath(xpath2).exists:
  1619. shop_address = self.d.xpath(xpath2).text
  1620. print(f'222-获取到店铺地址:{shop_address}')
  1621. else:
  1622. print(f'222-xpath2获取店铺地址失败')
  1623. else:
  1624. shop_address = ''
  1625. print(f'333-获取到店铺地址:{shop_address}')
  1626. return shop_address
  1627. except:
  1628. print(f'获取店铺地址出错-get_shop_address')
  1629. return None
  1630. def enter_detail(self):
  1631. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/recycler"]/android.widget.FrameLayout[1]').click()
  1632. time.sleep(self.get_sleep_time())
  1633. def save_to_database(self, data):
  1634. print(f'保存数据到数据库:{data}')
  1635. # 连接数据库
  1636. conn = get_mysql()
  1637. # 创建游标对象
  1638. cur = conn.cursor()
  1639. # add_sql = "insert into delete_friend_table(delete_user_name,delete_user_id,delete_content,delete_time) value(%s,%s,%s,%s)"
  1640. add_sql = f"""
  1641. INSERT INTO {self.table_name}
  1642. (product, min_price, manufacture_date, expiry_date, shop, business_license_company, province, city, manufacturer, specification, approval_number, product_link, scrape_date, scrape_province, availability, credit_code, platform, search_key)
  1643. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  1644. """
  1645. # cur.execute(add_sql, (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'], data['business_license_company'],data['province'], data['city'], data['manufacturer'], data['specification'], data['approval_number'], data['product_link'], self.get_current_date(), data['scrape_province'], data['availability'], data['credit_code'], data['platform']))
  1646. cur.execute(add_sql,
  1647. (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'],
  1648. data['business_license_company'], data['province'], data['city'], data['manufacturer'],
  1649. data['specification'], data['approval_number'], data['product_link'], data['scrape_date'],
  1650. data['scrape_province'], data['availability'], data['credit_code'], data['platform'],
  1651. data['search_key']))
  1652. conn.commit() # 提交数据
  1653. # self.mysql_client.insert(self.table_name, data)
  1654. print(f"存入数据库成功")
  1655. # === 新增:更新采集计数 ===
  1656. self.collected_count += 1
  1657. if self.task_id:
  1658. # 更新上报进度
  1659. reporter.update_task_progress(
  1660. task_id=self.task_id,
  1661. real_count=self.collected_count
  1662. )
  1663. def save_shop_info_to_database(self, data):
  1664. print(f'保存店铺数据到数据库:{data}')
  1665. # 连接数据库
  1666. conn = get_mysql()
  1667. # 创建游标对象
  1668. cur = conn.cursor()
  1669. add_sql = f"""
  1670. INSERT INTO {self.shop_table_name}
  1671. (shop, contact_address, qualification_number, business_license_company, business_license_address, scrape_date, platform)
  1672. VALUES (%s, %s, %s, %s, %s, %s, %s)
  1673. """
  1674. cur.execute(add_sql, (data['shop'], data['contact_address'], data['qualification_number'],
  1675. data['business_license_company'], data['business_license_address'], data['scrape_date'],
  1676. data['platform']))
  1677. conn.commit() # 提交数据
  1678. # self.mysql_client.insert(self.shop_table_name, data)
  1679. print(f'存入店铺信息到数据库成功')
  1680. def swipe_up(self):
  1681. """
  1682. 上滑
  1683. :return:
  1684. """
  1685. screen_width = self.d.info['displayWidth']
  1686. screen_height = self.d.info['displayHeight']
  1687. duration_rate = random.uniform(0, 0.3)
  1688. self.d.swipe(screen_width // 2, screen_height - 100, screen_width // 2, 100, duration=duration_rate)
  1689. no = random.uniform(0, 1)
  1690. if no > 0.85:
  1691. # 有的时候卡着 再稍微往上滑一点点
  1692. self.d.swipe_ext("up", 0.1)
  1693. time.sleep(self.get_sleep_time())
  1694. def swipe_back(self, no):
  1695. """
  1696. 返回
  1697. :param no: 回退次数
  1698. :return:
  1699. """
  1700. for idx in range(no):
  1701. self.d.press('back')
  1702. time.sleep(self.get_sleep_time())
  1703. def drug_price(self):
  1704. """
  1705. 获取药品价格
  1706. :return:
  1707. """
  1708. try:
  1709. price_str = self.d.xpath('//*[starts-with(@text,"¥")]').text
  1710. price = float(re.search('[\d\.]+', price_str).group())
  1711. print(f'获取到价格:{price}')
  1712. return price
  1713. except Exception as e:
  1714. print(f'提取价格出错-->{e}')
  1715. return None
  1716. def restart_uiautomator_services(self, device_id):
  1717. """
  1718. 重启atx的uiautomator 服务
  1719. :param device_id:
  1720. :return:
  1721. """
  1722. stop_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d --stop'
  1723. start_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d'
  1724. # result = subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
  1725. # print(result.stdout)
  1726. subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
  1727. time.sleep(self.get_sleep_time())
  1728. subprocess.run(start_uiautomator_services, capture_output=True, text=True, shell=True)
  1729. time.sleep(self.get_sleep_time())
  1730. def connect_devices(self, device_id):
  1731. """
  1732. 连接设备
  1733. :return:
  1734. """
  1735. try:
  1736. self.d = u2.connect_usb(device_id)
  1737. # 设置隐形等待时间
  1738. # self.d.implicitly_wait(5)
  1739. self.restart_uiautomator_services(device_id)
  1740. print(f'连接到设备:{device_id}')
  1741. except Exception as e:
  1742. print(f'{device_id} 连接错误: {e}')
  1743. raise Exception(e)
  1744. def get_ocr_res(self, img):
  1745. try:
  1746. # img地址
  1747. print(f'开始识别图片:{img}')
  1748. request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/business_license"
  1749. # 二进制方式打开图片文件
  1750. f = open(img, 'rb')
  1751. img = base64.b64encode(f.read())
  1752. params = {"image": img}
  1753. # access_token = get_access_token()
  1754. request_url = request_url + "?access_token=" + self.access_token
  1755. headers = {'content-type': 'application/x-www-form-urlencoded'}
  1756. response = requests.post(request_url, data=params, headers=headers)
  1757. if response:
  1758. res = response.json()
  1759. new_dic = dict()
  1760. for ite in res['words_result'].keys():
  1761. new_dic[ite] = res['words_result'][ite]['words']
  1762. print('资质数据信息', new_dic)
  1763. return new_dic
  1764. else:
  1765. return None
  1766. except:
  1767. return None
  1768. def remove_watermark(self, img_path):
  1769. """
  1770. 图片去水印(将水印部分变成白色背景)并将数据转化为二进制数据
  1771. :param img_path: 图片路径
  1772. :return: 二进制图片数据
  1773. """
  1774. img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
  1775. endswith = os.path.splitext(img_path)[1]
  1776. new = np.clip(1.4057577998008846 * img - 38.33089999653017, 0, 255).astype(np.uint8)
  1777. _, img_binary = cv2.imencode(endswith, new)
  1778. return img_binary
  1779. def get_ocr_res_image(self, img):
  1780. try:
  1781. image = self.remove_watermark(img)
  1782. # image_file = open(img,'wb')
  1783. # image_file.write(image)
  1784. # res_image = self.client.basicAccurate(image) # 高精度
  1785. res_image = self.client.basicGeneral(image)
  1786. # print(f'百度api返回结果:{res_image}')
  1787. # print(res_image.get('words_result', ''))
  1788. # new_dic = dict()
  1789. data = res_image.get('words_result', '')
  1790. print(f'百度api返回结果:{data}')
  1791. # full_text = ';'.join(item['words'] for item in data)
  1792. # address = ''
  1793. # for item in data:
  1794. # if '企业注册号' in item['words']:
  1795. # print('come in 111')
  1796. # reg_number = item['words'].split(':', 1)[1].strip()
  1797. # elif '企业名称' in item['words']:
  1798. # print('come in 222')
  1799. # company_name = item['words'].split(':', 1)[1].strip()
  1800. # elif '所:' in item['words']:
  1801. # print('come in 333')
  1802. # address = item['words'].split(':', 1)[1].strip()
  1803. # # 输出结果
  1804. # print("企业注册号:", reg_number)
  1805. # print("企业名称:", company_name)
  1806. # print("住所:", address)
  1807. return data
  1808. except:
  1809. return None
  1810. def screenshot_the_business_license(self, qualification_number):
  1811. screenshot_path = 'screenshot1.png'
  1812. self.d.screenshot(screenshot_path)
  1813. img = cv2.imread(screenshot_path)
  1814. # 指定裁剪区域 (left, top, right, bottom)
  1815. left = 0
  1816. top = 480
  1817. right = 720
  1818. bottom = 1420
  1819. cropped_img = img[top:bottom, left:right]
  1820. if qualification_number:
  1821. cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  1822. else:
  1823. cropped_screenshot_path = 'cropped_screenshot.png'
  1824. cv2.imwrite(cropped_screenshot_path, cropped_img)
  1825. return cropped_screenshot_path
  1826. def screenshot_instruction(self):
  1827. # 获取当前时间
  1828. current_time = datetime.datetime.now()
  1829. # 格式化为时分秒
  1830. time_str = current_time.strftime("%H-%M-%S")
  1831. # 生成随机的 8 位字符串
  1832. random_str = secrets.token_hex(4) # 生成 4 个字节的随机字符串,转换为 8 位十六进制字符串
  1833. print(time_str)
  1834. screenshot_path = 'instructionscreenshot1-' + time_str + '-' + random_str + '.png'
  1835. self.d.screenshot(screenshot_path)
  1836. return screenshot_path
  1837. def extract_specification(self, text):
  1838. """提取药品规格信息"""
  1839. # 方法1:简单去除到期信息
  1840. pattern = r'^[^【]+'
  1841. match = re.search(pattern, text)
  1842. if match:
  1843. return match.group(0).strip()
  1844. return text
  1845. # 获取商品title
  1846. def get_title(self):
  1847. # try:
  1848. # title = self.d.xpath(
  1849. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1850. # except:
  1851. # title = self.d.xpath(
  1852. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView').text
  1853. # title = self.d.xpath('//*[contains(@text, "舒肝颗粒")]').text
  1854. def _inner():
  1855. temp_search_key = self.search_key
  1856. if "999" in self.search_key:
  1857. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  1858. temp_search_key = self.search_key.replace("999皮炎平", "")
  1859. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  1860. temp_search_key = self.search_key.replace("999必无忧", "")
  1861. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  1862. temp_search_key = self.search_key.replace("999必无忧", "")
  1863. elif self.search_key == '999速复康布洛芬缓释胶囊':
  1864. temp_search_key = self.search_key.replace("999速复康", "")
  1865. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  1866. temp_search_key = self.search_key.replace("999选平", "")
  1867. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  1868. temp_search_key = self.search_key.replace("999皮炎平", "")
  1869. else:
  1870. temp_search_key = self.search_key.replace("999", "")
  1871. else:
  1872. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  1873. temp_search_key = self.search_key.replace("史达功", "")
  1874. temp_search_key = temp_search_key.replace("120", "")
  1875. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  1876. temp_search_key = self.search_key.replace("三九胃泰", "")
  1877. temp_search_key = temp_search_key.replace("8袋", "")
  1878. elif self.search_key == '今维多赐多康牌蛋白粉':
  1879. temp_search_key = self.search_key.replace("今维多", "")
  1880. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  1881. temp_search_key = self.search_key.replace("佳美舒", "")
  1882. temp_search_key = temp_search_key.replace("4", "")
  1883. elif self.search_key == '三九胃泰颗粒20g*10':
  1884. temp_search_key = self.search_key.replace("20g*10", "")
  1885. elif self.search_key == '三九胃泰颗粒20g*6袋':
  1886. temp_search_key = self.search_key.replace("20g*6袋", "")
  1887. elif self.search_key == '顺峰康王酮康他索乳膏':
  1888. temp_search_key = self.search_key.replace("顺峰康王", "")
  1889. if self.search_key == '999糠酸莫米松凝胶15':
  1890. temp_search_key = temp_search_key.replace("15", "")
  1891. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  1892. temp_search_key = temp_search_key.replace("30", "")
  1893. elif self.search_key == '999复方金银花颗粒10g':
  1894. temp_search_key = temp_search_key.replace("10g", "")
  1895. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  1896. temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
  1897. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  1898. temp_search_key = temp_search_key.replace("6粒", "")
  1899. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  1900. temp_search_key = temp_search_key.replace("50", "")
  1901. elif self.search_key == '999止泻利颗粒15g*8':
  1902. temp_search_key = temp_search_key.replace("15g*8", "")
  1903. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  1904. temp_search_key = temp_search_key.replace("30", "")
  1905. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  1906. temp_search_key = temp_search_key.replace("15g", "")
  1907. elif self.search_key == '999复方苦参肠炎康片12片':
  1908. temp_search_key = temp_search_key.replace("12片", "")
  1909. elif self.search_key == '999强力枇杷露16袋':
  1910. temp_search_key = temp_search_key.replace("16袋", "")
  1911. elif self.search_key == '999三蛇胆川贝膏138':
  1912. temp_search_key = temp_search_key.replace("138", "")
  1913. elif self.search_key == '999强力枇杷露120ml':
  1914. temp_search_key = temp_search_key.replace("120ml", "")
  1915. elif self.search_key == '999强力枇杷露150ml':
  1916. temp_search_key = temp_search_key.replace("150ml", "")
  1917. elif self.search_key == '999抗病毒口服液10ml*10':
  1918. temp_search_key = temp_search_key.replace("10ml*10", "")
  1919. elif self.search_key == '999抗病毒口服液10ml*12':
  1920. temp_search_key = temp_search_key.replace("10ml*12", "")
  1921. elif self.search_key == '999糠酸莫米松乳膏10g支':
  1922. temp_search_key = temp_search_key.replace("10g支", "")
  1923. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  1924. temp_search_key = temp_search_key.replace("20g", "")
  1925. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  1926. temp_search_key = temp_search_key.replace("(无糖)6g", "")
  1927. elif self.search_key == '999壮骨关节丸6g*20':
  1928. temp_search_key = temp_search_key.replace("6g*20", "")
  1929. elif self.search_key == '999正天丸6g*15':
  1930. temp_search_key = temp_search_key.replace("6g*15", "")
  1931. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  1932. temp_search_key = temp_search_key.replace("20", "")
  1933. elif self.search_key == '999糠酸莫米松凝胶10':
  1934. temp_search_key = temp_search_key.replace("10", "")
  1935. elif self.search_key == '999板蓝根颗粒10g*20':
  1936. temp_search_key = temp_search_key.replace("10g*20", "")
  1937. elif self.search_key == '999复方氨酚烷胺胶囊10粒':
  1938. temp_search_key = temp_search_key.replace("10粒", "")
  1939. elif self.search_key == '999复方氨酚烷胺胶囊12粒':
  1940. temp_search_key = temp_search_key.replace("12粒", "")
  1941. elif self.search_key == '999咽炎片0.26g*12片*2板':
  1942. temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
  1943. elif self.search_key == '999小儿止咳糖浆120':
  1944. temp_search_key = temp_search_key.replace("120", "")
  1945. elif self.search_key == '999小儿止咳糖浆225':
  1946. temp_search_key = temp_search_key.replace("225", "")
  1947. elif self.search_key == '999小儿感冒颗粒6g*10':
  1948. temp_search_key = temp_search_key.replace("6g*10", "")
  1949. elif self.search_key == '999小儿感冒颗粒6g*24':
  1950. temp_search_key = temp_search_key.replace("6g*24", "")
  1951. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
  1952. temp_search_key = temp_search_key.replace("6g*10袋", "")
  1953. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  1954. temp_search_key = temp_search_key.replace("6g*20袋", "")
  1955. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  1956. temp_search_key = temp_search_key.replace("8g*10袋", "")
  1957. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  1958. temp_search_key = temp_search_key.replace("2.5g*10袋", "")
  1959. elif self.search_key == '999感冒清热颗粒12g*18':
  1960. temp_search_key = temp_search_key.replace("12g*18", "")
  1961. # elif self.search_key == '三九胃泰颗粒':
  1962. # self.search_key = '三九胃泰' #兼容三九胃泰 温胃舒颗粒
  1963. print(f'获取商品title时的搜索关键字:{temp_search_key}')
  1964. # title = self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text
  1965. # 初始化
  1966. drugs_name = ''
  1967. specifications = ''
  1968. title = ''
  1969. # 循环的获取title为了有时间来处理人机验证
  1970. for m in range(1, 6000):
  1971. if self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').exists:
  1972. title = self.safe_exec(
  1973. lambda: self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  1974. )
  1975. print(f"第{m}次获取title成功")
  1976. break
  1977. else:
  1978. time.sleep(3)
  1979. # return drugs_name, specifications
  1980. # drugs_name = ''
  1981. # specifications = ''
  1982. # try:
  1983. # title_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  1984. # title_xpath_2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  1985. # if self.d.xpath(title_xpath).exists:
  1986. # title = self.d.xpath(title_xpath).text
  1987. # print(f"title_xpath获取的title={title}")
  1988. # if temp_search_key not in title:
  1989. # return drugs_name, specifications
  1990. # elif self.d.xpath(title_xpath_2).exists:
  1991. # title = self.d.xpath(title_xpath_2).text
  1992. # print(f"title_xpath_2获取的title={title}")
  1993. # if temp_search_key not in title:
  1994. # return drugs_name, specifications
  1995. # else:
  1996. # print('title_xpath不存在,请确认')
  1997. # return drugs_name, specifications
  1998. # # title = self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  1999. # except Exception as e:
  2000. # print(f"发生异常: {e}")
  2001. # return drugs_name, specifications
  2002. # 奇怪:有的时候title取出来的记过第一位会多一个0
  2003. # title = self.safe_exec(self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text)
  2004. # title = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  2005. title = title[1:] if title.startswith('0') else title
  2006. print(f'获取到药品标题:{title}')
  2007. # 从里面匹配出药品名和规格
  2008. # drugs_name
  2009. # specifications
  2010. # match = re.search(r'([^\d]+)([\d\D]+)', title)
  2011. if self.search_key == '999赐多康大豆':
  2012. return title, '1罐'
  2013. if self.search_key == "999感冒清热颗粒":
  2014. match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
  2015. else:
  2016. match = re.match(r'(\[[^\]]+\])(.*?)\s*((?:\d+\S*|\(.+))$', title)
  2017. if match:
  2018. # drugs_name = match.group(1).strip() + match.group(2).strip()
  2019. drugs_name = title
  2020. specifications = match.group(3).strip()
  2021. print("药品名:", drugs_name)
  2022. print("规格:", specifications)
  2023. # 如果品规中包含到期则需要再次的正则处理
  2024. if '到期' in specifications:
  2025. specifications = self.extract_specification(specifications)
  2026. # print('完整药名:', drugs_name + specifications)
  2027. return drugs_name, specifications
  2028. else:
  2029. if title == '999抗病毒口服液10ml*12' or title == '999抗病毒口服液':
  2030. drugs_name = title
  2031. specifications = '10ml*12支/盒'
  2032. return drugs_name, specifications
  2033. elif title == '999抗病毒口服液10ml*10':
  2034. drugs_name = title
  2035. specifications = '10ml*10支/盒'
  2036. return drugs_name, specifications
  2037. elif title == '999小柴胡颗粒':
  2038. drugs_name = title
  2039. specifications = '10g*9袋/盒'
  2040. return drugs_name, specifications
  2041. elif title == '999养胃舒颗粒':
  2042. drugs_name = title
  2043. specifications = '10g*6袋/盒'
  2044. return drugs_name, specifications
  2045. elif title == '三九胃泰胶囊':
  2046. drugs_name = title
  2047. specifications = '0.5g*24粒/盒'
  2048. return drugs_name, specifications
  2049. elif title == '999补脾益肠丸':
  2050. drugs_name = title
  2051. specifications = '6g*15袋/盒'
  2052. return drugs_name, specifications
  2053. elif title == '999复方感冒灵颗粒':
  2054. drugs_name = title
  2055. specifications = '14g*9袋/盒'
  2056. return drugs_name, specifications
  2057. else:
  2058. print("没有匹配到预期格式")
  2059. drugs_name = title
  2060. specifications = ''
  2061. return drugs_name, specifications
  2062. # 用 safe_exec 包装内部逻辑,确保验证码阻塞
  2063. return self.safe_exec(_inner)
  2064. def enter_shop(self):
  2065. """
  2066. 进店,方便提取资质环境
  2067. :return:
  2068. """
  2069. # self.d.xpath('//*[@text="进店"]').click()
  2070. self.d.xpath('//*[@text="店铺"]').click()
  2071. time.sleep(self.get_sleep_time())
  2072. def enter_shoper(self):
  2073. """
  2074. 进入商家
  2075. :return:
  2076. """
  2077. is_shoper_exists = 0
  2078. for i in range(10):
  2079. if self.d.xpath('//*[@text="商家"]').exists:
  2080. print(f'第{i}次商家存在')
  2081. is_shoper_exists = 1
  2082. break
  2083. else:
  2084. print(f'第{i}次商家不存在')
  2085. time.sleep(self.get_sleep_time())
  2086. if is_shoper_exists == 1:
  2087. self.d.xpath('//*[@text="商家"]').click()
  2088. time.sleep(self.get_sleep_time())
  2089. return True
  2090. else:
  2091. return False
  2092. # 点击查看商家资质
  2093. def scan_shoper_license(self):
  2094. exist_shoper = 0
  2095. for i in range(10):
  2096. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  2097. print(f'第{i}次查看商家资质存在')
  2098. exist_shoper = 1
  2099. break
  2100. else:
  2101. print(f'第{i}次查看商家资质不存在')
  2102. time.sleep(self.get_sleep_time())
  2103. if exist_shoper == 1:
  2104. self.d.xpath('//*[@text="查看商家资质"]').click()
  2105. time.sleep(self.get_sleep_time())
  2106. else:
  2107. self.swipe_back(1)
  2108. # 验证商品的信息是否在数据库中已存在
  2109. def data_is_exists(self, data):
  2110. """
  2111. 检查指定数据是否已存在于数据库表中(仅检查存在性)
  2112. 参数:
  2113. data: 包含查询条件的字典,键为列名,值为条件值
  2114. 返回:
  2115. True: 数据存在
  2116. False: 数据不存在
  2117. None: 检查过程中出错
  2118. """
  2119. # dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  2120. # 'platform': '美团'}
  2121. # 1. 验证必要字段
  2122. required_keys = ['product', 'min_price', 'shop', 'scrape_date', 'platform']
  2123. if not all(key in data for key in required_keys):
  2124. missing = [key for key in required_keys if key not in data]
  2125. logging.error(f"缺少必要字段: {', '.join(missing)}")
  2126. return None
  2127. try:
  2128. # 连接数据库
  2129. conn = get_mysql()
  2130. # 创建游标对象
  2131. cur = conn.cursor()
  2132. # query_sql = f"SELECT * FROM {self.table_name} WHERE product = '{data['product']}' AND min_price = '{data['min_price']}' AND shop = '{data['shop']}' AND scrape_date = '{data['scrape_date']}' AND platform = '{data['platform']}'"
  2133. # cur.execute(query_sql)
  2134. query_sql = """
  2135. SELECT * FROM {}
  2136. WHERE product = %s
  2137. AND min_price = %s
  2138. AND shop = %s
  2139. AND scrape_date = %s
  2140. AND platform = %s
  2141. """.format(self.table_name)
  2142. cur.execute(query_sql, (
  2143. data['product'],
  2144. data['min_price'],
  2145. data['shop'],
  2146. data['scrape_date'],
  2147. data['platform']
  2148. ))
  2149. result = cur.fetchone()
  2150. return bool(result) # 如果存在返回True,否则False
  2151. except Exception as e:
  2152. print(f"MySQL 错误: {str(e)}")
  2153. # 验证店铺信息是否在数据库中已存在
  2154. def shop_is_exists_database(self, shop):
  2155. try:
  2156. # 连接数据库
  2157. conn = get_mysql()
  2158. # 创建游标对象
  2159. cur = conn.cursor()
  2160. query_sql = """
  2161. SELECT * FROM {}
  2162. WHERE shop = %s
  2163. """.format(self.shop_table_name)
  2164. cur.execute(query_sql, (
  2165. shop
  2166. ))
  2167. result = cur.fetchone()
  2168. return bool(result) # 如果存在返回True,否则False
  2169. except Exception as e:
  2170. print(f"MySQL 错误: {str(e)}")
  2171. def wait_if_verifying(self, monitor, timeout=120):
  2172. """验证码处理期间阻塞主线程"""
  2173. start = time.time()
  2174. while monitor.pausing.is_set() and time.time() - start < timeout:
  2175. time.sleep(1)
  2176. # def safe_xpath(self, xpath, timeout=10):
  2177. # """线程安全 xpath 查找"""
  2178. # self.wait_if_verifying(self.monitor)
  2179. # return self.d.xpath(xpath).wait(timeout=timeout)
  2180. def wait_for_ready(self, monitor, timeout=86400):
  2181. """进入每一页前都先等验证码"""
  2182. start = time.time()
  2183. while monitor.pausing.is_set() and time.time() - start < timeout:
  2184. time.sleep(1)
  2185. # 额外保险:如果验证码突然在这一秒才弹,再主动扫一次
  2186. monitor.check_and_handle_popup()
  2187. def safe_list(self, xpath, monitor):
  2188. """线程安全地拿商品列表"""
  2189. self.wait_for_ready(monitor)
  2190. return self.d.xpath(xpath).all()
  2191. def safe_exec(self, func, *args, **kwargs):
  2192. """
  2193. 万能安全壳:执行 func 前检查验证码,
  2194. 若监控线程已置位 pausing,则一直阻塞直到放行。
  2195. """
  2196. # 强制等待一小段时间,让监控线程有机会检测
  2197. time.sleep(0.1)
  2198. while self.monitor.pausing.is_set():
  2199. time.sleep(1)
  2200. # 双重检查
  2201. # if self.monitor.pausing.is_set():
  2202. # print("检测到暂停标志,等待验证码处理...")
  2203. # while self.monitor.pausing.is_set():
  2204. # time.sleep(1)
  2205. # 执行前再快速检查一次
  2206. # self.monitor.check_and_handle_popup()
  2207. # 执行真正逻辑
  2208. return func(*args, **kwargs)
  2209. def get_next_data(self, data, target):
  2210. for i, item in enumerate(data):
  2211. if item['words'] == target:
  2212. if i + 1 < len(data):
  2213. return data[i + 1]['words']
  2214. return None
  2215. def delete_instruction_screenshot(self, screenshot_path):
  2216. # 删除截图文件
  2217. try:
  2218. os.remove(screenshot_path)
  2219. print(f"截图文件已删除:{screenshot_path}")
  2220. except FileNotFoundError:
  2221. print(f"文件未找到,无法删除:{screenshot_path}")
  2222. except Exception as e:
  2223. print(f"删除文件时出错:{e}")
  2224. '''
  2225. def get_instructions_data(self):
  2226. """
  2227. 确定有说明书之后,提取所有的说明书数据
  2228. :return:
  2229. """
  2230. self.d.xpath('//*[@text="说明"]').click()
  2231. # time.sleep(random.randint(3, 5))
  2232. time.sleep(0.5)
  2233. self.d.xpath('//*[@text="查看详细说明"]').click()
  2234. # time.sleep(random.randint(3, 5))
  2235. time.sleep(0.5)
  2236. self.d.xpath('//*[@text="加载更多"]').click_exists()
  2237. loop_page = 5
  2238. # new_list = list()
  2239. new_list = []
  2240. for i in range(loop_page):
  2241. self.d.xpath('//*[@text="加载更多"]').click_exists()
  2242. time.sleep(0.2)
  2243. if i == 0:
  2244. self.d.swipe(200, 1000, 200, 300, 0.4)
  2245. else:
  2246. self.d.swipe(200, 1000, 200, 62)
  2247. time.sleep(0.2)
  2248. if self.d.xpath('//*[@text="加载更多"]').exists:
  2249. self.d.xpath('//*[@text="加载更多"]').click()
  2250. time.sleep(0.2)
  2251. all_tt = self.d.xpath(
  2252. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup').all()
  2253. for idx in range(1, len(all_tt) + 1):
  2254. all_tt1 = self.d.xpath(
  2255. f'//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[{idx}]//android.widget.TextView').all()
  2256. # print(f'当前说明书列表数据:{all_tt1}')
  2257. for tt in all_tt1:
  2258. if tt.text and tt.text != '展开全文':
  2259. new_list.append(tt.text)
  2260. if i == 0:
  2261. height = 938
  2262. else:
  2263. drug_box = self.d.xpath(
  2264. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]').info
  2265. bounds = drug_box['bounds']
  2266. height = bounds['bottom'] - bounds['top']
  2267. if height < 938:
  2268. # print('说明书翻页到底部')
  2269. break
  2270. # 展开全文
  2271. new_list = [item for item in new_list if item != '展开全文']
  2272. print(f'当前说明书列表数据:{new_list}')
  2273. # expiry_date_index = next(idx for idx, i in enumerate(new_list) if i == '有效期')
  2274. # manufacturer_index = next(idx for idx, i in enumerate(new_list) if i == '生产单位')
  2275. # approval_number_index = next(idx for idx, i in enumerate(new_list) if i == '批准文号')
  2276. # res_data = {
  2277. # "有效期": new_list[expiry_date_index + 1],
  2278. # "生产单位": new_list[manufacturer_index + 1],
  2279. # "批准文号": new_list[approval_number_index + 1]
  2280. # }
  2281. res_data = {
  2282. "有效期": (new_list[new_list.index("有效期") + 1]) if "有效期" in new_list and new_list.index("有效期") + 1 < len(new_list) else "",
  2283. "生产单位": (new_list[new_list.index("生产单位") + 1]) if "生产单位" in new_list and new_list.index("生产单位") + 1 < len(new_list) else "",
  2284. "批准文号": (new_list[new_list.index("批准文号") + 1]) if "批准文号" in new_list and new_list.index("批准文号") + 1 < len(new_list) else ""
  2285. }
  2286. print(f'当前说明书字典数据:{res_data}')
  2287. return res_data
  2288. '''
  2289. '''
  2290. def get_instructions_data(self):
  2291. """
  2292. 确定有说明书之后,提取所有的说明书数据
  2293. :return:
  2294. """
  2295. self.d.xpath('//*[@text="说明"]').click()
  2296. # time.sleep(random.randint(3, 5))
  2297. time.sleep(0.5)
  2298. self.d.xpath('//*[@text="查看详细说明"]').click()
  2299. # time.sleep(random.randint(3, 5))
  2300. time.sleep(0.5)
  2301. # 1) 先向上滑动一次,触发“加载更多”出现
  2302. self.d.swipe(200, 1000, 200, 300, 0.4)
  2303. time.sleep(0.3)
  2304. # 2) 再进入“出现就点”的循环
  2305. while self.d.xpath('//*[@text="加载更多"]').click_exists(timeout=1):
  2306. time.sleep(0.2)
  2307. self.d.swipe(200, 1000, 200, 300, 0.4)
  2308. # self.d.swipe(200, 1000, 200, 62)
  2309. time.sleep(0.2)
  2310. # 一次性获取所有文本
  2311. texts = [
  2312. node.text.strip()
  2313. # for node in self.d.xpath('//android.widget.TextView').all()
  2314. for node in self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.TextView').all()
  2315. if node.text and node.text.strip() and node.text != '加载更多'
  2316. ]
  2317. print(f'当前说明书列表数据:{texts}')
  2318. # 提取关键字段
  2319. def safe_get(key):
  2320. # try:
  2321. # idx = texts.index(key)
  2322. # return texts[idx + 1] if idx + 1 < len(texts) else ""
  2323. # except ValueError:
  2324. # return ""
  2325. try:
  2326. idx = next(i for i, text in enumerate(texts) if text == key)
  2327. return texts[idx + 1] if idx + 1 < len(texts) else ""
  2328. except StopIteration:
  2329. return ""
  2330. res_data = {
  2331. "有效期": safe_get("有效期"),
  2332. "生产单位": safe_get("生产单位"),
  2333. "批准文号": safe_get("批准文号")
  2334. }
  2335. print(f'当前说明书字典数据:{res_data}')
  2336. return res_data
  2337. '''
  2338. '''
  2339. def get_instructions_data(self):
  2340. """
  2341. 说明书键值对采集:连续两个 TextView 为一对,精确提取
  2342. """
  2343. # 1. 进入说明书
  2344. self.d(text="说明").click()
  2345. time.sleep(0.5)
  2346. self.d(text="查看详细说明").click()
  2347. time.sleep(0.5)
  2348. # self.d(text="加载更多").click_exists(timeout=0.5)
  2349. # 2. 找到说明书最外层 ScrollView(页面主体)
  2350. scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.ScrollView")
  2351. count = scroll_view.count
  2352. print(f"找到的 ScrollView 数量: {count}")
  2353. if not scroll_view.exists:
  2354. return {"有效期": "", "生产单位": "", "批准文号": ""}
  2355. # 3. 在 ScrollView 内再定位真正包含键值对的容器
  2356. # 绝大多数美团说明书页面对应的是 ScrollView > ViewGroup > 若干 TextView
  2357. kv_container = scroll_view.child(className="android.view.ViewGroup")
  2358. if not kv_container.exists:
  2359. kv_container = scroll_view # 降级:直接对 ScrollView 取子孙 TextView
  2360. # 4. 滑动到底并收集所有 TextView(保留顺序)
  2361. all_texts = []
  2362. max_swipe = 5
  2363. last_length = 0
  2364. for _ in range(max_swipe):
  2365. texts = kv_container.child(className="android.widget.TextView")
  2366. #获取texts中的文本
  2367. print(f'当前说明书列表数据:{texts}')
  2368. current_texts = []
  2369. self.loggerMT.info(f'说明书111')
  2370. for tv in texts:
  2371. try:
  2372. txt = tv.get_text().strip()
  2373. # txt = tv.info['text'].strip()
  2374. except Exception:
  2375. continue
  2376. if txt and txt != "展开全文":
  2377. current_texts.append(txt)
  2378. self.loggerMT.info(f'说明书222')
  2379. print(f'当前说明书列表数据:{current_texts}')
  2380. # 去重
  2381. if current_texts:
  2382. current_texts = [t for t in current_texts if t not in all_texts]
  2383. all_texts.extend(current_texts)
  2384. # 判断是否到底
  2385. # if not scroll_view.info.get("scrollable"):
  2386. # break
  2387. # 判断是否到底
  2388. if len(all_texts) == last_length:
  2389. break
  2390. last_length = len(all_texts)
  2391. # self.d.swipe_ext("up", scale=0.7)
  2392. #向上滑动一次
  2393. self.d.swipe(200, 1000, 200, 300, 0.2)
  2394. time.sleep(0.2)
  2395. if self.d.xpath('//*[@text="加载更多"]').exists:
  2396. self.d.xpath('//*[@text="加载更多"]').click()
  2397. # 5. 成对解析
  2398. res_data = {"有效期": "", "生产单位": "", "批准文号": ""}
  2399. for i in range(len(all_texts) - 1):
  2400. key = all_texts[i]
  2401. val = all_texts[i + 1]
  2402. if key in res_data:
  2403. res_data[key] = val
  2404. print(f'说明书文本共 {len(all_texts)} 条,提取结果: {res_data}')
  2405. # time.sleep(1000000)
  2406. return res_data
  2407. '''
  2408. def get_instructions_data(self):
  2409. """
  2410. 确定有说明书之后,提取所有的说明书数据
  2411. :return:
  2412. """
  2413. self.d.xpath('//*[@text="说明"]').click()
  2414. # time.sleep(random.randint(3, 5))
  2415. time.sleep(0.5)
  2416. if self.d.xpath('//*[@text="查看详细说明"]').exists:
  2417. self.d.xpath('//*[@text="查看详细说明"]').click()
  2418. else:
  2419. for i in range(8):
  2420. if self.d.xpath('//*[@text="查看全部"]').exists:
  2421. print('开始点击查看全部')
  2422. break
  2423. self.d.swipe_ext('down', 0.3)
  2424. time.sleep(1)
  2425. if self.d.xpath('//*[@text="查看全部"]').exists:
  2426. print('开始点击查看全部2')
  2427. break
  2428. if self.d.xpath('//*[@text="查看全部"]').exists:
  2429. self.d.xpath('//*[@text="查看全部"]').click()
  2430. else:
  2431. res_data = {
  2432. "有效期": '',
  2433. "生产单位": '',
  2434. "批准文号": ''
  2435. }
  2436. self.loggerMT.info('获取到的说明书信息为空。')
  2437. return res_data
  2438. # time.sleep(random.randint(3, 5))
  2439. time.sleep(0.5)
  2440. # self.d.xpath('//*[@text="加载更多"]').click_exists()
  2441. # loop_page = 5
  2442. # new_list = list()
  2443. # new_list = []
  2444. for ii in range(8):
  2445. if self.d.xpath('//*[@text="加载更多"]').exists:
  2446. self.d.xpath('//*[@text="加载更多"]').click()
  2447. time.sleep(0.2)
  2448. break
  2449. else:
  2450. self.d.swipe(200, 1000, 200, 300, 0.3)
  2451. for iii in range(10):
  2452. if self.d.xpath('//*[@text="生产单位"]').exists and self.d.xpath('//*[@text="批准文号"]').exists:
  2453. break
  2454. else:
  2455. self.d.swipe(200, 1300, 200, 300, 0.3)
  2456. instruction_path = self.screenshot_instruction()
  2457. print(f"instruction_path= {instruction_path}")
  2458. time.sleep(2)
  2459. ocr_res = self.get_ocr_res_image(instruction_path)
  2460. # print(f'ocr_res:{ocr_res}')
  2461. if ocr_res:
  2462. # 获取有效期的下一个数据
  2463. validity = self.get_next_data(ocr_res, '有效期')
  2464. # 获取批准文号的下一个数据
  2465. approval_number = self.get_next_data(ocr_res, '批准文号')
  2466. # 获取生产单位的下一个数据
  2467. manufacturer = self.get_next_data(ocr_res, '生产单位')
  2468. else:
  2469. validity = ''
  2470. approval_number = ''
  2471. manufacturer = ''
  2472. # print("有效期:", validity)
  2473. # print("批准文号:", approval_number)
  2474. # print("生产单位:", manufacturer)
  2475. res_data = {
  2476. "有效期": validity,
  2477. "生产单位": manufacturer,
  2478. "批准文号": approval_number
  2479. }
  2480. print(f"res_data={res_data}")
  2481. time.sleep(1)
  2482. self.delete_instruction_screenshot(instruction_path)
  2483. return res_data
  2484. def has_instructions(self):
  2485. """
  2486. 是否有说明书
  2487. :return:
  2488. """
  2489. # 没有说明书的无法采集具体数据
  2490. time.sleep(self.get_sleep_time())
  2491. is_has_instructions = False
  2492. for i in range(8):
  2493. if self.d.xpath('//*[@text="说明"]').exists:
  2494. print(f"第{i}次有说明书1")
  2495. is_has_instructions = True
  2496. break
  2497. self.d.swipe_ext('down', 0.3)
  2498. time.sleep(1)
  2499. # detail_info = self.d.xpath(
  2500. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
  2501. # bounds = detail_info['bounds']
  2502. # height = bounds['bottom'] - bounds['top']
  2503. # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
  2504. if self.d.xpath('//*[@text="说明"]').exists:
  2505. is_has_instructions = True
  2506. print(f"第{i}次有说明书2")
  2507. break
  2508. # is_has_instructions = self.d.xpath('//*[@text="说明"]').exists
  2509. return is_has_instructions
  2510. def has_shop(self):
  2511. """
  2512. 是否有进店按钮
  2513. :return:
  2514. """
  2515. # self.d.swipe_ext('up', 0.1)
  2516. time.sleep(self.get_sleep_time())
  2517. is_has_enter_shop = self.d.xpath('//*[@text="进店"]').exists
  2518. return is_has_enter_shop
  2519. # 获取商品对应的店铺信息
  2520. def get_license_info_ex(self):
  2521. # self.enter_shop()
  2522. self.safe_exec(self.enter_shop)
  2523. # self.enter_shoper()
  2524. result = self.safe_exec(self.enter_shoper)
  2525. if result == False:
  2526. license_info_data = {'contact_address': '', 'qualification_number': '', 'business_license_company': '',
  2527. 'business_license_address': ''}
  2528. return license_info_data
  2529. for i in range(10):
  2530. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  2531. print(f"第{i}次有商家资质")
  2532. break
  2533. else:
  2534. print(f"第{i}次没有商家资质")
  2535. time.sleep(self.get_sleep_time())
  2536. # 获取地址
  2537. # contact_address = self.get_shop_address()
  2538. contact_address = self.safe_exec(self.get_shop_address)
  2539. # time.sleep(50000)
  2540. ###
  2541. # self.scan_shoper_license()
  2542. self.safe_exec(self.scan_shoper_license)
  2543. # 获取资质编码
  2544. # qualification_number = self.get_qualification_number()
  2545. qualification_number = self.safe_exec(self.get_qualification_number)
  2546. # qualification_number 不为None继续下一步
  2547. if qualification_number:
  2548. # 营业执照公司名称
  2549. business_license_company = ''
  2550. # 营业执照地址
  2551. business_license_address = ''
  2552. self.d.click(0.603, 0.27)
  2553. time.sleep(self.get_sleep_time())
  2554. cropped_screenshot_path = self.screenshot_the_business_license(qualification_number)
  2555. print(f'cropped_screenshot_path:{cropped_screenshot_path}')
  2556. # if qualification_number:
  2557. # cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  2558. # else:
  2559. # cropped_screenshot_path = 'cropped_screenshot.png'
  2560. # ocr_res = self.get_ocr_res('cropped_screenshot.png')
  2561. ocr_res = self.get_ocr_res(cropped_screenshot_path)
  2562. print(f'ocr_res:{ocr_res}')
  2563. # 获取ocr_res 中的地址、单位名称
  2564. if ocr_res:
  2565. if '单位名称' in ocr_res.keys():
  2566. business_license_company = ocr_res['单位名称']
  2567. if '地址' in ocr_res.keys():
  2568. business_license_address = ocr_res['地址']
  2569. license_info_data = {'contact_address': contact_address, 'qualification_number': qualification_number,
  2570. 'business_license_company': business_license_company,
  2571. 'business_license_address': business_license_address}
  2572. else:
  2573. license_info_data = {'contact_address': contact_address, 'qualification_number': '',
  2574. 'business_license_company': '', 'business_license_address': ''}
  2575. return license_info_data
  2576. """暂不用该功能
  2577. def get_license_info(self):
  2578. self.enter_shop()
  2579. self.enter_shoper()
  2580. self.scan_shoper_license()
  2581. # 获取资质编码
  2582. qualification_number = self.get_qualification_number()
  2583. if qualification_number:
  2584. table_license_info = self.get_table_license_info(qualification_number)
  2585. if table_license_info:
  2586. return {
  2587. '单位名称': table_license_info[0],
  2588. '地址': table_license_info[1],
  2589. '社会信用代码': table_license_info[2]
  2590. }
  2591. else:
  2592. # operate_no = random.randint(0, 1)
  2593. self.d.click(0.603, 0.27)
  2594. # if operate_no == 0:
  2595. # self.d.xpath('//*[@text="营业执照"]').click()
  2596. # else:
  2597. # self.d.click(0.603, 0.27)
  2598. time.sleep(self.get_sleep_time())
  2599. self.screenshot_the_business_license()
  2600. ocr_res = self.get_ocr_res('cropped_screenshot.png')
  2601. return ocr_res
  2602. # operate_no = random.randint(0, 1)
  2603. self.d.click(0.603, 0.27)
  2604. # if operate_no == 0:
  2605. # self.d.xpath('//*[@text="营业执照"]').click()
  2606. # else:
  2607. # self.d.click(0.603, 0.27)
  2608. time.sleep(self.get_sleep_time())
  2609. self.screenshot_the_business_license()
  2610. ocr_res = self.get_ocr_res('cropped_screenshot.png')
  2611. return ocr_res
  2612. """
  2613. def distinct_target(self):
  2614. result = False
  2615. position_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  2616. position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  2617. is_position = self.d.xpath(position_xpath).exists
  2618. is_position2 = self.d.xpath(position_xpath2).exists
  2619. xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2620. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2621. xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2622. xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2623. is_position5 = self.d.xpath(xpath).exists
  2624. is_position6 = self.d.xpath(xpath2).exists
  2625. is_position7 = self.d.xpath(xpath3).exists
  2626. is_position8 = self.d.xpath(xpath4).exists
  2627. # print(f"is_position = {is_position}")
  2628. # print(f"is_position2 = {is_position2}")
  2629. if is_position or is_position2 or is_position5 or is_position6 or is_position7 or is_position8:
  2630. result = True
  2631. return result
  2632. # return is_position
  2633. def click_element_with_retry(self, xpath, max_retries=5, timeout=5):
  2634. """
  2635. 带重试机制的点击函数
  2636. """
  2637. for attempt in range(max_retries):
  2638. try:
  2639. if self.d.xpath(xpath).exists:
  2640. self.d.xpath(xpath).click()
  2641. print(f"第{attempt + 1}次尝试点击成功")
  2642. return True
  2643. else:
  2644. print(f"第{attempt + 1}次尝试:元素不存在")
  2645. except Exception as e:
  2646. print(f"第{attempt + 1}次尝试失败: {e}")
  2647. if attempt < max_retries - 1:
  2648. time.sleep(1) # 等待1秒后重试
  2649. print(f"经过{max_retries}次尝试后点击失败")
  2650. return False
  2651. def enter_target_page(self):
  2652. self.d.xpath('//*[@content-desc="看病买药"]').click()
  2653. time.sleep(self.get_sleep_time())
  2654. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/vf_search_carousel_text"]').click()
  2655. time.sleep(self.get_sleep_time())
  2656. self.d.xpath(
  2657. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]').click()
  2658. time.sleep(self.get_sleep_time())
  2659. self.d.send_keys(self.search_key, clear=True)
  2660. time.sleep(self.get_sleep_time())
  2661. self.d.xpath('//*[@text="搜索"]').click()
  2662. time.sleep(self.get_sleep_time())
  2663. # content_frame = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]').exists
  2664. # print(content_frame)
  2665. # position_xpath1 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  2666. # position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  2667. # if self.d.xpath(position_xpath1).exists:
  2668. # print("position_xpath1 exist")
  2669. # elif self.d.xpath(position_xpath2).exists:
  2670. # print("position_xpath2 exist")
  2671. # else:
  2672. # print("position_xpath not exist")
  2673. # time.sleep(10000)
  2674. # 增加点击快递送
  2675. self.click_express_send()
  2676. time.sleep(self.get_sleep_time())
  2677. # 增加点击价格每次进来都需要点击,为了配合从多少页到多少页的采集
  2678. # 使用
  2679. self.click_element_with_retry('//*[@text="价格"]')
  2680. # self.d.xpath('//*[@text="价格"]').click()
  2681. time.sleep(self.get_sleep_time())
  2682. def click_express_send(self):
  2683. # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.HorizontalScrollView[last()]'
  2684. slide_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2685. slide_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2686. slide_xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2687. slide_xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2688. for i in range(1, 3):
  2689. if self.d.xpath(slide_xpath).exists:
  2690. bounds = self.d.xpath(slide_xpath).info['bounds']
  2691. top = bounds['top']
  2692. bottom = bounds['bottom']
  2693. print(f'top={top}')
  2694. print(f'bottom={bottom}')
  2695. y = (top + bottom) // 2
  2696. print(f'y={y}')
  2697. self.loggerMT.info('开始滑动1')
  2698. self.d.swipe(500, y, 100, y, 0.5)
  2699. time.sleep(self.get_sleep_time())
  2700. break
  2701. elif self.d.xpath(slide_xpath2).exists:
  2702. bounds = self.d.xpath(slide_xpath2).info['bounds']
  2703. top = bounds['top']
  2704. bottom = bounds['bottom']
  2705. print(f'top={top}')
  2706. print(f'bottom={bottom}')
  2707. y = (top + bottom) // 2
  2708. print(f'y={y}')
  2709. self.loggerMT.info('开始滑动2')
  2710. self.d.swipe(500, y, 100, y, 0.5)
  2711. time.sleep(self.get_sleep_time())
  2712. break
  2713. elif self.d.xpath(slide_xpath3).exists:
  2714. bounds = self.d.xpath(slide_xpath3).info['bounds']
  2715. top = bounds['top']
  2716. bottom = bounds['bottom']
  2717. print(f'top={top}')
  2718. print(f'bottom={bottom}')
  2719. y = (top + bottom) // 2
  2720. print(f'y={y}')
  2721. self.loggerMT.info('开始滑动3')
  2722. self.d.swipe(500, y, 100, y, 0.5)
  2723. time.sleep(self.get_sleep_time())
  2724. break
  2725. elif self.d.xpath(slide_xpath4).exists:
  2726. bounds = self.d.xpath(slide_xpath4).info['bounds']
  2727. top = bounds['top']
  2728. bottom = bounds['bottom']
  2729. print(f'top={top}')
  2730. print(f'bottom={bottom}')
  2731. y = (top + bottom) // 2
  2732. print(f'y={y}')
  2733. self.loggerMT.info('开始滑动4')
  2734. self.d.swipe(500, y, 100, y, 0.5)
  2735. time.sleep(self.get_sleep_time())
  2736. break
  2737. max_retry = 5 # 最多尝试次数
  2738. for idx in range(1, max_retry + 1):
  2739. # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]'
  2740. xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2741. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2742. xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2743. xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2744. # print(f"xpath:{xpath}")
  2745. # scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.HorizontalScrollView")
  2746. if self.d.xpath(xpath).exists:
  2747. self.d.xpath(xpath).click()
  2748. # time.sleep(self.get_sleep_time())
  2749. print(f"第{idx}次点击xpath快递送成功")
  2750. time.sleep(self.get_sleep_time())
  2751. break
  2752. elif self.d.xpath(xpath2).exists:
  2753. self.d.xpath(xpath2).click()
  2754. # time.sleep(self.get_sleep_time())
  2755. print(f"第{idx}次点击xpath2快递送成功")
  2756. time.sleep(self.get_sleep_time())
  2757. break
  2758. elif self.d.xpath(xpath3).exists:
  2759. self.d.xpath(xpath3).click()
  2760. # time.sleep(self.get_sleep_time())
  2761. print(f"第{idx}次点击xpath3快递送成功")
  2762. time.sleep(self.get_sleep_time())
  2763. break
  2764. elif self.d.xpath(xpath4).exists:
  2765. self.d.xpath(xpath4).click()
  2766. # time.sleep(self.get_sleep_time())
  2767. print(f"第{idx}次点击xpath4快递送成功")
  2768. time.sleep(self.get_sleep_time())
  2769. break
  2770. else:
  2771. print(f"第{idx}次点击xpath或xpath2或xpath3快递送都失败")
  2772. time.sleep(self.get_sleep_time())
  2773. # xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2774. # if self.d.xpath(xpath2).exists:
  2775. # self.d.xpath(xpath2).click()
  2776. # print(f"第{idx}次点击xpath2快递送成功")
  2777. # time.sleep(self.get_sleep_time())
  2778. # break
  2779. """暂不用该功能
  2780. def get_table_license_info(self, qualification_number):
  2781. try:
  2782. sql = f'select business_license_company,city,credit_code from mt_drug where credit_code = "{qualification_number}"'
  2783. self.mysql_client.cur.execute(sql)
  2784. res = self.mysql_client.cur.fetchone()
  2785. return res
  2786. except:
  2787. return None
  2788. """
  2789. # def get_clipboard(self):
  2790. # """通过ADB获取Android手机剪贴板内容"""
  2791. # try:
  2792. # result = subprocess.run(
  2793. # ["adb", "shell", "am", "broadcast", "-a", "clipper.get"],
  2794. # capture_output=True,
  2795. # text=True,
  2796. # timeout=5
  2797. # )
  2798. # print(f"获取剪贴板结果: {result.stdout}")
  2799. # # 解析返回信息中的剪贴板内容
  2800. # for line in result.stdout.splitlines():
  2801. # if "data=" in line:
  2802. # return line.split("data=")[1].strip()
  2803. # return ""
  2804. # except Exception as e:
  2805. # print("获取剪贴板失败:", e)
  2806. # return ""
  2807. # def get_clipboard(self):
  2808. # """读取 Android 剪贴板(系统自带命令)"""
  2809. # try:
  2810. # text = subprocess.check_output(
  2811. # ["adb", "shell", "cmd", "clipboard", "get"],
  2812. # text=True, timeout=5, stderr=subprocess.STDOUT
  2813. # ).strip()
  2814. # print(f"获取剪贴板结果: {text}")
  2815. # return text if text else ""
  2816. # except Exception as e:
  2817. # print("获取剪贴板失败:", e)
  2818. # return ""
  2819. def get_clipboard(self):
  2820. time.sleep(1)
  2821. self.loggerMT.info(f"Clipboard content:{self.d.clipboard}") # 打印调试信息
  2822. clipboard_content = self.d.clipboard
  2823. if clipboard_content is None:
  2824. return ''
  2825. return clipboard_content.strip()
  2826. # return self.d.clipboard.strip()
  2827. def clear_clipboard(self):
  2828. self.d.set_clipboard("", "text/plain")
  2829. # def clear_clipboard(self):
  2830. # """清空手机剪贴板:写入空字符串(subprocess 版)"""
  2831. # try:
  2832. # subprocess.run(
  2833. # ["adb", "shell", "am", "broadcast", "-a", "clipper.set", "-e", "text", " "],
  2834. # check=True,
  2835. # capture_output=True,
  2836. # text=True,
  2837. # timeout=5
  2838. # )
  2839. # except subprocess.CalledProcessError as e:
  2840. # print("ADB 清空失败:", e.stderr)
  2841. # def clear_clipboard():
  2842. # """清空手机剪贴板:写入空字符串"""
  2843. # try:
  2844. # adb_shell(["shell", "am", "broadcast", "-a", "clipper.set", "-e", "text", ""])
  2845. # except subprocess.CalledProcessError as e:
  2846. # print("ADB 清空失败:", e.output)
  2847. # 获取一个商品的数据、商品对应的店铺的数据
  2848. def get_product_link(self):
  2849. product_link = ''
  2850. # 两种可能的“···”按钮
  2851. dots_xpaths = [
  2852. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
  2853. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
  2854. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]'
  2855. ]
  2856. max_retry = 5 # 最多尝试次数
  2857. for idx in range(1, max_retry + 1):
  2858. if product_link: # 已经拿到则退出
  2859. break
  2860. for xp in dots_xpaths:
  2861. if self.d.xpath(xp).exists:
  2862. print(f'{idx}-进入分享点点点')
  2863. self.loggerMT.info(f'{idx}-进入分享点点点')
  2864. # #先清空剪贴板的内容
  2865. # self.clear_clipboard()
  2866. # print("清空剪贴板内容成功。")
  2867. self.d.xpath(xp).click()
  2868. time.sleep(0.2)
  2869. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2870. time.sleep(0.2)
  2871. link_xpath = '//*[@text="复制链接"]'
  2872. if self.d.xpath(link_xpath).exists:
  2873. self.d.xpath(link_xpath).click()
  2874. time.sleep(1)
  2875. product_link = self.get_clipboard()
  2876. time.sleep(0.5)
  2877. print(f'{idx}-商品链接:{product_link}')
  2878. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  2879. break # 找到并执行后跳出内层循环
  2880. else:
  2881. print(f'{idx}-商品链接:{product_link}')
  2882. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  2883. product_link = ''
  2884. # self.d.xpath('//*[@text="复制链接"]').click_exists()
  2885. # time.sleep(1)
  2886. # product_link = self.get_clipboard()
  2887. # time.sleep(0.5)
  2888. # print(f'{idx}-商品链接:{product_link}')
  2889. # self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  2890. # break # 找到并执行后跳出内层循环
  2891. if not product_link and idx < max_retry:
  2892. time.sleep(0.5) # 最后一次不需要再等待
  2893. return product_link
  2894. def integrate_data(self):
  2895. # 测试说明书详情:
  2896. # instructions_info = self.safe_exec(self.get_instructions_data)
  2897. # time.sleep(1000000)
  2898. # 测试店铺信息
  2899. # license_info = self.safe_exec(self.get_license_info_ex)
  2900. # time.sleep(1000000)
  2901. # 测试定位地址
  2902. # 获取链接开始
  2903. # self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  2904. # 1、点击页面的... 先判断元素是否存在
  2905. '''
  2906. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2907. print('1-进入分享点点点111')
  2908. self.loggerMT.info('1-进入分享点点点111')
  2909. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2910. #点击分享商品
  2911. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2912. time.sleep(0.2)
  2913. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2914. time.sleep(0.2)
  2915. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2916. time.sleep(1)
  2917. #获取剪切板的数据
  2918. product_link = self.get_clipboard()
  2919. time.sleep(0.5)
  2920. print(f'1-商品链接:{product_link}')
  2921. self.loggerMT.info(f'1-商品链接:{product_link}')
  2922. #清空剪切板
  2923. # self.clear_clipboard()
  2924. # if self.d.xpath('//*[@text="加载更多"]').click_exists():
  2925. # self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2926. # if self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').exists:
  2927. # self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').click()
  2928. # #获取剪切板的数据
  2929. # product_link = self.get_clipboard()
  2930. # time.sleep(0.5)
  2931. # print(f'商品链接:{product_link}')
  2932. # #清空剪切板
  2933. # self.clear_clipboard()
  2934. # else:
  2935. # print('未找到分享按钮111')
  2936. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2937. print('1-进入分享点点点222')
  2938. self.loggerMT.info('1-进入分享点点点222')
  2939. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2940. time.sleep(0.2)
  2941. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2942. time.sleep(0.2)
  2943. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2944. time.sleep(1)
  2945. #获取剪切板的数据
  2946. product_link = self.get_clipboard()
  2947. time.sleep(0.5)
  2948. print(f'1-商品链接:{product_link}')
  2949. self.loggerMT.info(f'1-商品链接:{product_link}')
  2950. #如果为获取到product_link 则等待0.5秒再获取
  2951. if not product_link:
  2952. time.sleep(0.5)
  2953. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2954. print('2-进入分享点点点111')
  2955. self.loggerMT.info('2-进入分享点点点111')
  2956. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2957. #点击分享商品
  2958. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2959. time.sleep(0.2)
  2960. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2961. time.sleep(0.2)
  2962. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2963. time.sleep(1)
  2964. #获取剪切板的数据
  2965. product_link = self.get_clipboard()
  2966. time.sleep(0.5)
  2967. print(f'2-商品链接:{product_link}')
  2968. self.loggerMT.info(f'2-商品链接:{product_link}')
  2969. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2970. print('2-进入分享点点点222')
  2971. self.loggerMT.info('2-进入分享点点点222')
  2972. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2973. time.sleep(0.2)
  2974. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2975. time.sleep(0.2)
  2976. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2977. time.sleep(1)
  2978. #获取剪切板的数据
  2979. product_link = self.get_clipboard()
  2980. time.sleep(0.5)
  2981. print(f'2-商品链接:{product_link}')
  2982. self.loggerMT.info(f'2-商品链接:{product_link}')
  2983. #如果为获取到product_link 则等待0.5秒再获取
  2984. if not product_link:
  2985. time.sleep(0.5)
  2986. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2987. print('3-进入分享点点点111')
  2988. self.loggerMT.info('3-进入分享点点点111')
  2989. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2990. #点击分享商品
  2991. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2992. time.sleep(0.2)
  2993. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2994. time.sleep(0.2)
  2995. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2996. time.sleep(1)
  2997. #获取剪切板的数据
  2998. product_link = self.get_clipboard()
  2999. time.sleep(0.5)
  3000. print(f'3-商品链接:{product_link}')
  3001. self.loggerMT.info(f'3-商品链接:{product_link}')
  3002. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  3003. print('3-进入分享点点点222')
  3004. self.loggerMT.info('3-进入分享点点点222')
  3005. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  3006. time.sleep(0.2)
  3007. self.d.xpath('//*[@text="分享商品"]').click_exists()
  3008. time.sleep(0.2)
  3009. self.d.xpath('//*[@text="复制链接"]').click_exists()
  3010. time.sleep(1)
  3011. #获取剪切板的数据
  3012. product_link = self.get_clipboard()
  3013. time.sleep(0.5)
  3014. print(f'3-商品链接:{product_link}')
  3015. self.loggerMT.info(f'3-商品链接:{product_link}')
  3016. '''
  3017. # 获取链接结束
  3018. """
  3019. 整合数据
  3020. :return:
  3021. """
  3022. # title_info = self.get_title() # 药品,规格
  3023. # title_info = self.safe_exec(self.get_title) # 药品,规格
  3024. product, specifications = self.safe_exec(self.get_title) # 药品,规格
  3025. if product:
  3026. # product, specifications = title_info
  3027. # 如果关键字包含999 则 product必须包含999 和 999后面的那段字符串 ps 999感冒灵颗粒必须包含:"999"和"感冒灵颗粒"
  3028. if '999' in self.search_key:
  3029. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  3030. temp_search_key = self.search_key.replace('999皮炎平', '')
  3031. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  3032. temp_search_key = self.search_key.replace('999必无忧', '')
  3033. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  3034. temp_search_key = self.search_key.replace('999必无忧', '')
  3035. elif self.search_key == '999速复康布洛芬缓释胶囊':
  3036. temp_search_key = self.search_key.replace('999速复康', '')
  3037. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  3038. temp_search_key = self.search_key.replace('999选平', '')
  3039. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  3040. temp_search_key = self.search_key.replace('999皮炎平', '')
  3041. else:
  3042. temp_search_key = self.search_key.replace('999', '')
  3043. if self.search_key == '999糠酸莫米松凝胶15':
  3044. temp_search_key = temp_search_key.replace('15', '')
  3045. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  3046. temp_search_key = temp_search_key.replace('30', '')
  3047. elif self.search_key == '999抗病毒口服液10ml*6支/盒':
  3048. temp_search_key = temp_search_key.replace("10ml*6支/盒", "")
  3049. elif self.search_key == '999复方金银花颗粒10g':
  3050. temp_search_key = temp_search_key.replace("10g", "")
  3051. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  3052. temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
  3053. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  3054. temp_search_key = temp_search_key.replace("6粒", "")
  3055. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  3056. temp_search_key = temp_search_key.replace("50", "")
  3057. elif self.search_key == '999止泻利颗粒15g*8':
  3058. temp_search_key = temp_search_key.replace("15g*8", "")
  3059. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  3060. temp_search_key = temp_search_key.replace("30", "")
  3061. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  3062. temp_search_key = temp_search_key.replace("15g", "")
  3063. elif self.search_key == '999复方苦参肠炎康片12片':
  3064. temp_search_key = temp_search_key.replace("12片", "")
  3065. elif self.search_key == '999强力枇杷露16袋':
  3066. temp_search_key = temp_search_key.replace("16袋", "")
  3067. elif self.search_key == '999三蛇胆川贝膏138':
  3068. temp_search_key = temp_search_key.replace("138", "")
  3069. elif self.search_key == '999抗病毒口服液10ml*12':
  3070. temp_search_key = temp_search_key.replace("10ml*12", "")
  3071. elif self.search_key == '999抗病毒口服液10ml*10':
  3072. temp_search_key = temp_search_key.replace("10ml*10", "")
  3073. elif self.search_key == '999强力枇杷露120ml':
  3074. temp_search_key = temp_search_key.replace("120ml", "")
  3075. elif self.search_key == '999强力枇杷露150ml':
  3076. temp_search_key = temp_search_key.replace("150ml", "")
  3077. elif self.search_key == '999糠酸莫米松乳膏10g支':
  3078. temp_search_key = temp_search_key.replace("10g支", "")
  3079. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  3080. temp_search_key = temp_search_key.replace("20g", "")
  3081. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  3082. temp_search_key = temp_search_key.replace("(无糖)6g", "")
  3083. elif self.search_key == '999壮骨关节丸6g*20':
  3084. temp_search_key = temp_search_key.replace("6g*20", "")
  3085. elif self.search_key == '999正天丸6g*15':
  3086. temp_search_key = temp_search_key.replace("6g*15", "")
  3087. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  3088. temp_search_key = temp_search_key.replace("20", "")
  3089. elif self.search_key == '999糠酸莫米松凝胶10':
  3090. temp_search_key = temp_search_key.replace("10", "")
  3091. elif self.search_key == '999板蓝根颗粒10g*20':
  3092. temp_search_key = temp_search_key.replace("10g*20", "")
  3093. elif self.search_key == '999复方氨酚烷胺胶囊10粒':
  3094. temp_search_key = temp_search_key.replace("10粒", "")
  3095. elif self.search_key == '999复方氨酚烷胺胶囊12粒':
  3096. temp_search_key = temp_search_key.replace("12粒", "")
  3097. elif self.search_key == '999咽炎片0.26g*12片*2板':
  3098. temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
  3099. elif self.search_key == '999小儿止咳糖浆120':
  3100. temp_search_key = temp_search_key.replace("120", "")
  3101. elif self.search_key == '999小儿止咳糖浆225':
  3102. temp_search_key = temp_search_key.replace("225", "")
  3103. elif self.search_key == '999小儿感冒颗粒6g*10':
  3104. temp_search_key = temp_search_key.replace("6g*10", "")
  3105. elif self.search_key == '999小儿感冒颗粒6g*24':
  3106. temp_search_key = temp_search_key.replace("6g*24", "")
  3107. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
  3108. temp_search_key = temp_search_key.replace("6g*10袋", "")
  3109. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  3110. temp_search_key = temp_search_key.replace("6g*20袋", "")
  3111. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  3112. temp_search_key = temp_search_key.replace("8g*10袋", "")
  3113. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  3114. temp_search_key = temp_search_key.replace("2.5g*10袋", "")
  3115. elif self.search_key == '999感冒清热颗粒12g*18':
  3116. temp_search_key = temp_search_key.replace("12g*18", "")
  3117. if '999' not in product or temp_search_key not in product:
  3118. self.swipe_back(1)
  3119. self.unrelated_data += 1
  3120. return
  3121. else:
  3122. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  3123. temp_search_key = self.search_key.replace('史达功', '')
  3124. temp_search_key = temp_search_key.replace('120', '')
  3125. if '史达功' not in product or temp_search_key not in product:
  3126. self.swipe_back(1)
  3127. self.unrelated_data += 1
  3128. return
  3129. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  3130. temp_search_key = self.search_key.replace('三九胃泰', '')
  3131. temp_search_key = temp_search_key.replace('8袋', '')
  3132. if '三九胃泰' not in product or temp_search_key not in product:
  3133. self.swipe_back(1)
  3134. self.unrelated_data += 1
  3135. return
  3136. elif self.search_key == '今维多赐多康牌蛋白粉':
  3137. temp_search_key = self.search_key.replace('今维多', '')
  3138. if '今维多' not in product or temp_search_key not in product:
  3139. self.swipe_back(1)
  3140. self.unrelated_data += 1
  3141. return
  3142. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  3143. temp_search_key = self.search_key.replace('佳美舒', '')
  3144. temp_search_key = temp_search_key.replace('4', '')
  3145. if '佳美舒' not in product or temp_search_key not in product:
  3146. self.swipe_back(1)
  3147. self.unrelated_data += 1
  3148. return
  3149. elif self.search_key == '三九胃泰颗粒20g*10':
  3150. temp_search_key = self.search_key.replace('20g*10', '')
  3151. if temp_search_key not in product:
  3152. self.swipe_back(1)
  3153. self.unrelated_data += 1
  3154. return
  3155. elif self.search_key == '三九胃泰颗粒20g*6袋':
  3156. temp_search_key = self.search_key.replace('20g*6袋', '')
  3157. if temp_search_key not in product:
  3158. self.swipe_back(1)
  3159. self.unrelated_data += 1
  3160. return
  3161. elif self.search_key == '顺峰康王酮康他索乳膏':
  3162. temp_search_key = self.search_key.replace('顺峰康王', '')
  3163. if '顺峰康王' not in product or temp_search_key not in product:
  3164. self.swipe_back(1)
  3165. self.unrelated_data += 1
  3166. return
  3167. else:
  3168. if self.search_key not in product.replace(' ', ''):
  3169. self.swipe_back(1)
  3170. self.unrelated_data += 1
  3171. return
  3172. # if self.search_key not in product.replace(' ', ''):
  3173. # self.swipe_back(1)
  3174. # self.unrelated_data += 1
  3175. # return
  3176. else:
  3177. self.swipe_back(1)
  3178. return
  3179. min_price = self.drug_price() # 最低价格
  3180. # 商品链接
  3181. product_link = self.get_product_link()
  3182. # 判断是否有自营的文本,有的话不需要获取店铺的信息
  3183. if self.d.xpath('//*[@text="自营"]').exists:
  3184. shop = "美团自营大药房(快递电商)"
  3185. # 爬取日期
  3186. scrape_date = self.get_current_date()
  3187. # scrape_date = "2025-07-18"
  3188. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  3189. 'platform': '美团'}
  3190. print(f'当前数据:{dup_data}')
  3191. if self.data_is_exists(dup_data):
  3192. print('存在相同数据不入库')
  3193. self.swipe_back(1)
  3194. return
  3195. else:
  3196. for i in range(8):
  3197. if self.d.xpath('//*[@text="进店"]').exists:
  3198. print('开始获取店铺名1')
  3199. break
  3200. self.d.swipe_ext('up', 0.3)
  3201. time.sleep(1)
  3202. # detail_info = self.d.xpath(
  3203. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
  3204. # bounds = detail_info['bounds']
  3205. # height = bounds['bottom'] - bounds['top']
  3206. # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
  3207. if self.d.xpath('//*[@text="进店"]').exists:
  3208. print('开始获取店铺名2')
  3209. break
  3210. shop = self.get_shop_name()
  3211. # 爬取日期
  3212. scrape_date = self.get_current_date()
  3213. # scrape_date = "2025-07-18"
  3214. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  3215. 'platform': '美团'}
  3216. print(f'当前数据:{dup_data}')
  3217. # 获取店铺信息开始
  3218. # 暂时不获取店铺信息 start
  3219. is_has_enter_shop = self.has_shop()
  3220. # 需要判断shop是否已经在数据库中存在,如果存在,则不再进入店铺,直接进入下一个商品
  3221. shop_is_exists = self.shop_is_exists_database(shop)
  3222. # 存在进店 并且店铺的名称不包含美团官方的字样
  3223. print(f"已采集{self.shop_data_num}家店铺数据")
  3224. if is_has_enter_shop and '美团官方' not in shop and '美团自营' not in shop and not shop_is_exists and self.shop_data_num < 500:
  3225. # license_info = self.get_license_info_ex()
  3226. license_info = self.safe_exec(self.get_license_info_ex)
  3227. contact_address = license_info['contact_address']
  3228. qualification_number = license_info['qualification_number']
  3229. business_license_company = license_info['business_license_company']
  3230. business_license_address = license_info['business_license_address']
  3231. save_shop_data = {
  3232. 'shop': shop,
  3233. 'contact_address': contact_address,
  3234. 'qualification_number': qualification_number,
  3235. 'scrape_date': scrape_date,
  3236. 'business_license_company': business_license_company,
  3237. 'business_license_address': business_license_address,
  3238. 'platform': '美团'
  3239. }
  3240. self.save_shop_info_to_database(save_shop_data)
  3241. self.shop_data_num += 1 # 店铺数据数量+1
  3242. self.swipe_back(2)
  3243. else:
  3244. print('不采集店铺信息')
  3245. # 获取店铺信息结束
  3246. # 暂时不获取店铺信息 end
  3247. if self.data_is_exists(dup_data):
  3248. print('存在相同数据不入库')
  3249. self.swipe_back(1)
  3250. return
  3251. if not shop:
  3252. print('未获取到店铺名:开始回退')
  3253. self.swipe_back(1)
  3254. return
  3255. if not shop or '自营' in shop:
  3256. self.swipe_back(1)
  3257. return
  3258. time.sleep(self.get_sleep_time())
  3259. # 生产日期为空
  3260. manufacture_date = ''
  3261. # 执政信息
  3262. # if is_has_enter_shop:
  3263. # license_info = self.get_license_info()
  3264. # business_license_company = license_info["单位名称"]
  3265. # credit_code = license_info['社会信用代码']
  3266. # city_str = license_info['地址']
  3267. # # 先把省份啥的替换掉
  3268. # city_sub_str = re.sub(r'[u4e00-\u9fa5]+省', '', city_str)
  3269. # try:
  3270. # city = re.search(r'[\u4e00-\u9fa5]+?(市|区|县)', city_sub_str).group(0)
  3271. # except:
  3272. # city = city_sub_str
  3273. # try:
  3274. # province = self.city2province[city]
  3275. # except:
  3276. # province = ''
  3277. # self.swipe_back(2)
  3278. # else:
  3279. # business_license_company = ''
  3280. # credit_code = ''
  3281. # city = ''
  3282. # province = ''
  3283. business_license_company = ''
  3284. credit_code = ''
  3285. city = ''
  3286. province = ''
  3287. expiry_date = ''
  3288. manufacturer = ''
  3289. approval_number = ''
  3290. # 暂时不获取说明书信息 start
  3291. # 是否存在说明书
  3292. # is_has_instructions = self.has_instructions()
  3293. # 有的药品没有说明书,直接默认
  3294. if self.search_key == '今维多赐多康牌蛋白粉':
  3295. expiry_date = '18个月'
  3296. manufacturer = '华润圣海健康科技有限公司'
  3297. approval_number = '食健备G202437001992'
  3298. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  3299. expiry_date = '24个月'
  3300. manufacturer = '浙江华润三九众益制药有限公司'
  3301. approval_number = '国药准字H20090152'
  3302. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  3303. expiry_date = '3年'
  3304. manufacturer = '江苏萨瑞斯医疗科技有限公司'
  3305. approval_number = '苏械注准20212140025'
  3306. elif self.search_key == '999蒲地蓝消炎片':
  3307. expiry_date = '24个月'
  3308. manufacturer = '特一药业集团股份有限公司'
  3309. approval_number = '国药准字Z20063596'
  3310. elif self.search_key == '999养胃舒颗粒':
  3311. expiry_date = '36个月'
  3312. manufacturer = '合肥华润神鹿药业有限公司'
  3313. approval_number = '国药准字Z34020289'
  3314. elif self.search_key == '999糠酸莫米松凝胶15':
  3315. expiry_date = '36个月'
  3316. manufacturer = '华润三九(南昌)药业有限公司'
  3317. approval_number = '国药准字H20080010'
  3318. elif self.search_key == '999黄芪精':
  3319. expiry_date = '36个月'
  3320. manufacturer = '台州南峰药业有限公司'
  3321. approval_number = '国药准字Z33020783'
  3322. elif self.search_key == '999复方感冒灵颗粒':
  3323. expiry_date = '24个月'
  3324. manufacturer = '华润三九(郴州)制药有限公司'
  3325. approval_number = '国药准字Z43020334'
  3326. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  3327. expiry_date = '36个月'
  3328. manufacturer = '华润三九(南昌)药业有限公司'
  3329. approval_number = '国药准字H20074155'
  3330. elif self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  3331. expiry_date = '暂定24个月,具体有效期以实物说明书为准'
  3332. manufacturer = '史达德药业(北京)有限公司'
  3333. approval_number = '国药准字H11021837'
  3334. elif self.search_key == '999速复康布洛芬缓释胶囊':
  3335. expiry_date = '24个月'
  3336. manufacturer = '北京红林制药有限公司'
  3337. approval_number = '国药准字H20074172'
  3338. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  3339. expiry_date = '24个月'
  3340. manufacturer = '重庆科瑞东和制药有限责任公司'
  3341. approval_number = '国药准字Z50020420'
  3342. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  3343. expiry_date = '24个月'
  3344. manufacturer = '华润三九(南昌)药业有限公司'
  3345. approval_number = '国药准字H20073954'
  3346. elif self.search_key == '999维生素C咀嚼片':
  3347. expiry_date = '24个月'
  3348. manufacturer = '甘肃成纪生物药业有限公司'
  3349. approval_number = '国药准字H62021166'
  3350. elif self.search_key == '999强力枇杷露120ml':
  3351. expiry_date = '36个月'
  3352. manufacturer = '华润三九(南昌)药业有限公司'
  3353. approval_number = '国药准字Z36021533'
  3354. elif self.search_key == '999强力枇杷露150ml':
  3355. expiry_date = '36个月'
  3356. manufacturer = '华润三九(南昌)药业有限公司'
  3357. approval_number = '国药准字Z36021533'
  3358. elif self.search_key == '999抗病毒口服液10ml*10' or self.search_key == '999抗病毒口服液10ml*12':
  3359. expiry_date = '24个月'
  3360. manufacturer = '杭州华润老桐君药业有限公司'
  3361. approval_number = '国药准字Z33020518'
  3362. elif self.search_key == '999精氨酸布洛芬颗粒':
  3363. expiry_date = '暂定36个月'
  3364. manufacturer = '华润三九(唐山)药业有限公司'
  3365. approval_number = '国药准字H20070139'
  3366. elif self.search_key == '999糠酸莫米松乳膏10g支':
  3367. expiry_date = '36个月'
  3368. manufacturer = '华润三九(南昌)药业有限公司'
  3369. approval_number = '国药准字H20074090'
  3370. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  3371. expiry_date = '24个月'
  3372. manufacturer = '华润三九(南昌)药业有限公司'
  3373. approval_number = '国药准字H20074079'
  3374. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  3375. expiry_date = '36个月'
  3376. manufacturer = '合肥华润神鹿药业有限公司'
  3377. approval_number = '国药准字Z20055023'
  3378. elif self.search_key == '999银菊清咽颗粒':
  3379. expiry_date = '30个月'
  3380. manufacturer = '合肥华润神鹿药业有限公司'
  3381. approval_number = '国药准字Z20026680'
  3382. elif self.search_key == '999阿奇霉素片':
  3383. expiry_date = '48个月'
  3384. manufacturer = '浙江华润三九众益制药有限公司'
  3385. approval_number = '国药准字H20084458'
  3386. elif self.search_key == '999补脾益肠丸':
  3387. expiry_date = '24个月'
  3388. manufacturer = '惠州市九惠制药股份有限公司'
  3389. approval_number = '国药准字Z44023376'
  3390. elif self.search_key == '999壮骨关节丸6g*20':
  3391. expiry_date = '24个月'
  3392. manufacturer = '华润三九医药股份有限公司'
  3393. approval_number = '国药准字Z44023377'
  3394. elif self.search_key == '999壮骨关节胶囊':
  3395. expiry_date = '24个月'
  3396. manufacturer = '华润三九医药股份有限公司'
  3397. approval_number = '国药准字Z20080055'
  3398. elif self.search_key == '999正天丸6g*15':
  3399. expiry_date = '30个月'
  3400. manufacturer = '华润三九医药股份有限公司'
  3401. approval_number = '国药准字Z44020711'
  3402. elif self.search_key == '999正天胶囊':
  3403. expiry_date = '24个月'
  3404. manufacturer = '华润三九医药股份有限公司'
  3405. approval_number = '国药准字Z20010142'
  3406. elif self.search_key == '三九胃泰胶囊':
  3407. expiry_date = '24个月'
  3408. manufacturer = '华润三九医药股份有限公司'
  3409. approval_number = '国药准字Z44020704'
  3410. elif self.search_key == '三九胃泰颗粒20g*10':
  3411. expiry_date = '24个月'
  3412. manufacturer = '华润三九医药股份有限公司'
  3413. approval_number = '国药准字Z44020705'
  3414. elif self.search_key == '999感冒灵颗粒':
  3415. expiry_date = '24个月'
  3416. manufacturer = '华润三九(枣庄)药业有限公司'
  3417. approval_number = '国药准字Z44021940'
  3418. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  3419. expiry_date = '36个月'
  3420. manufacturer = '华润三九医药股份有限公司'
  3421. approval_number = '国药准字H44024170'
  3422. elif self.search_key == '三九胃泰颗粒20g*6袋':
  3423. expiry_date = '24个月'
  3424. manufacturer = '华润三九医药股份有限公司'
  3425. approval_number = '国药准字Z44020705'
  3426. elif self.search_key == '顺峰康王酮康他索乳膏':
  3427. expiry_date = '24个月'
  3428. manufacturer = '广东华润顺峰药业有限公司'
  3429. approval_number = '国药准字H10980204'
  3430. elif self.search_key == '999糠酸莫米松凝胶10':
  3431. expiry_date = '36个月'
  3432. manufacturer = '华润三九(南昌)药业有限公司'
  3433. approval_number = '国药准字H20080010'
  3434. elif self.search_key == '999板蓝根颗粒10g*20':
  3435. expiry_date = '36个月'
  3436. manufacturer = '广东恒诚制药股份有限公司'
  3437. approval_number = '国药准字Z44021520'
  3438. elif self.search_key == '999复方氨酚烷胺胶囊' or self.search_key == '999复方氨酚烷胺胶囊12粒' or self.search_key == '999复方氨酚烷胺胶囊10粒' or self.search_key == '999复方氨酚烷胺胶囊6粒':
  3439. expiry_date = '36个月'
  3440. manufacturer = '华润三九(唐山)药业有限公司'
  3441. approval_number = '国药准字H13021912'
  3442. elif self.search_key == '999咽炎片0.26g*12片*2板':
  3443. expiry_date = '24个月'
  3444. manufacturer = '华润三九(黄石)药业有限公司'
  3445. approval_number = '国药准字Z42021062'
  3446. elif self.search_key == '999小儿止咳糖浆120' or self.search_key == '999小儿止咳糖浆225':
  3447. expiry_date = '24个月'
  3448. manufacturer = '华润三九(雅安)药业有限公司'
  3449. approval_number = '国药准字Z51020675'
  3450. elif self.search_key == '999小儿感冒颗粒6g*10' or self.search_key == '999小儿感冒颗粒6g*24':
  3451. expiry_date = '36个月'
  3452. manufacturer = '华润三九(枣庄)药业有限公司'
  3453. approval_number = '国药准字Z37021392'
  3454. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋' or self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  3455. expiry_date = '36个月'
  3456. manufacturer = '华润三九(黄石)药业有限公司'
  3457. approval_number = '国药准字H42022510'
  3458. elif self.search_key == '999感冒灵胶囊':
  3459. expiry_date = '24个月'
  3460. manufacturer = '华润三九医药股份有限公司'
  3461. approval_number = '国药准字Z44021939'
  3462. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  3463. expiry_date = '24个月'
  3464. manufacturer = '华润三九(黄石)药业有限公司'
  3465. approval_number = '国药准字Z42021105'
  3466. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  3467. expiry_date = '18个月'
  3468. manufacturer = '华润三九医药股份有限公司'
  3469. approval_number = '国药准字Z20100067'
  3470. elif self.search_key == '999感冒清热颗粒12g*18':
  3471. expiry_date = '36个月'
  3472. manufacturer = '山东新大陆制药有限公司'
  3473. approval_number = '国药准字Z37020066'
  3474. elif self.search_key == '999小柴胡颗粒':
  3475. expiry_date = '24个月'
  3476. manufacturer = '华润三九医药股份有限公司'
  3477. approval_number = '国药准字Z44020709'
  3478. else:
  3479. is_has_instructions = self.safe_exec(self.has_instructions)
  3480. # 说明书等信息
  3481. if is_has_instructions:
  3482. print('开始获取说明书信息')
  3483. # instructions_info = self.get_instructions_data()
  3484. instructions_info = self.safe_exec(self.get_instructions_data)
  3485. if instructions_info['有效期'] is not None:
  3486. expiry_date = instructions_info['有效期'].strip('。')
  3487. if instructions_info['生产单位'] is not None:
  3488. manufacturer = instructions_info['生产单位'].strip('。')
  3489. if instructions_info['批准文号'] is not None:
  3490. approval_number = instructions_info['批准文号'].strip('。')
  3491. else:
  3492. # 没有说明书不入库
  3493. print('没有获取到说明书信息')
  3494. self.swipe_back(1)
  3495. return
  3496. # 暂时不获取说明书信息 end
  3497. self.unrelated_data = 0
  3498. # 爬取省份
  3499. scrape_province = '广东' # 这里先默认广东
  3500. # 是否有货
  3501. availability = ''
  3502. save_data = {
  3503. 'product': product,
  3504. 'min_price': min_price,
  3505. 'manufacture_date': manufacture_date,
  3506. 'expiry_date': expiry_date,
  3507. 'shop': shop,
  3508. 'business_license_company': business_license_company,
  3509. 'province': province,
  3510. 'city': city,
  3511. 'manufacturer': manufacturer,
  3512. 'specification': specifications,
  3513. 'approval_number': approval_number,
  3514. 'product_link': product_link,
  3515. 'scrape_date': scrape_date,
  3516. 'scrape_province': scrape_province,
  3517. 'availability': availability,
  3518. 'credit_code': credit_code,
  3519. 'platform': '美团',
  3520. 'search_key': self.search_key,
  3521. }
  3522. self.save_to_database(save_data)
  3523. # time.sleep(100000)
  3524. time.sleep(self.get_sleep_time())
  3525. if self.distinct_target():
  3526. print('已到达搜索列表页')
  3527. else:
  3528. for i in range(1):
  3529. print('在详情页')
  3530. self.swipe_back(1)
  3531. time.sleep(self.get_sleep_time())
  3532. # 最外部有个定位按钮
  3533. if self.distinct_target():
  3534. break
  3535. # 主函数
  3536. # start_page:开始页,采集用
  3537. # end_page:结束页,采集用
  3538. # task_id:上报数据用
  3539. # 添加max_duration_minutes参数
  3540. def main(self, device_id, start_page, end_page, task_id, product_name, product_specs, max_duration_minutes=None,
  3541. retry_count=0):
  3542. # === 新增:初始化任务信息 ===
  3543. self.task_id = task_id
  3544. self.task_start_page = start_page
  3545. self.task_end_page = end_page
  3546. self.task_product_specs = product_specs
  3547. self.task_product_name = product_name
  3548. # self.current_page = start_page
  3549. self.start_time = time.time()
  3550. # === 新增:线程启动成功后更新状态为2 ===
  3551. if self.task_id:
  3552. try:
  3553. self.update_task_status(2) # 状态2: 执行中
  3554. self.loggerMT.info(f"任务 {task_id} 线程启动成功,状态已更新为2")
  3555. except Exception as e:
  3556. self.loggerMT.error(f"更新任务状态失败: {e}")
  3557. # =====================================
  3558. # 记录任务开始
  3559. if task_id:
  3560. reporter.start_task(task_id, start_page, end_page)
  3561. # ========================
  3562. # task_start_time = time.time() #任务开始时间
  3563. task_scape_count = 0 # 任务采集数量初始化为0
  3564. MAX_RETRY = 3 # 最大重试次数
  3565. spider_no = 0
  3566. # 计算超时时间(秒)
  3567. timeout_seconds = None
  3568. if max_duration_minutes:
  3569. timeout_seconds = max_duration_minutes * 60
  3570. self.connect_devices(device_id)
  3571. time.sleep(self.get_sleep_time())
  3572. # self.d.toast.show("测试toast", 20)
  3573. # 启动全局弹窗监控
  3574. self.monitor = SpiderMonitor(self)
  3575. self.monitor.start()
  3576. try:
  3577. # 重新开启美团应用
  3578. self.restart_app()
  3579. # 搜索关键字
  3580. # self.enter_target_page()
  3581. self.safe_exec(self.enter_target_page)
  3582. # === 新增:跳过前面的页面直到start_page start===
  3583. if start_page > 1:
  3584. self.loggerMT.info(f"跳过前 {start_page - 1} 页,从第 {start_page} 页开始采集")
  3585. current_page = 1
  3586. while current_page < start_page:
  3587. # 检查是否需要暂停
  3588. if self.monitor.pausing.is_set():
  3589. self.wait_for_ready(self.monitor)
  3590. # 检查是否到达底部
  3591. if self.d.xpath('//*[@text="已经到底啦"]').exists:
  3592. self.loggerMT.info(f"在第 {current_page} 页已到达底部,无法继续翻页")
  3593. self.loggerMT.warning(f"未能到达目标页码 {start_page},实际只到达第 {current_page} 页")
  3594. if task_id:
  3595. reporter.end_task(
  3596. task_id=task_id,
  3597. status='completed',
  3598. finish_status=1,
  3599. force_end_page=end_page
  3600. # force_end_page=idx
  3601. )
  3602. return
  3603. # break
  3604. # 滑动到下一页
  3605. self.d.drag(300, 1400, 300, 400, 1)
  3606. time.sleep(self.get_sleep_time())
  3607. current_page += 1
  3608. # 可选:添加页码日志
  3609. self.loggerMT.debug(f"已翻到第 {current_page} 页")
  3610. # 验证是否到达目标页码
  3611. if current_page < start_page:
  3612. self.loggerMT.error(f"翻页失败!目标页码:{start_page},实际到达:{current_page}")
  3613. # 这里可以根据需要决定是否继续执行或抛出异常
  3614. # return False 或 raise Exception
  3615. else:
  3616. self.loggerMT.info(f"成功翻到第 {start_page} 页,开始采集")
  3617. for idx in range(start_page, end_page + 1):
  3618. # === 新增:检查是否超过结束页 ===
  3619. if idx > end_page:
  3620. self.loggerMT.info(f"已采集到指定结束页 {end_page},停止采集")
  3621. if task_id:
  3622. reporter.end_task(
  3623. task_id=task_id,
  3624. status='completed',
  3625. finish_status=1,
  3626. force_end_page=end_page
  3627. )
  3628. return
  3629. # === 新增:检查超时 ===
  3630. if timeout_seconds and (time.time() - self.start_time) > timeout_seconds:
  3631. print(f"任务 {task_id} 达到时间限制 {max_duration_minutes} 分钟,停止采集")
  3632. self.loggerMT.info(f"任务 {task_id} 达到时间限制 {max_duration_minutes} 分钟,停止采集")
  3633. # 上报未完成状态
  3634. if task_id:
  3635. reporter.end_task(
  3636. task_id=task_id,
  3637. status='completed',
  3638. finish_status=0, # 0:未完成
  3639. force_end_page=self.current_page
  3640. )
  3641. return
  3642. # ====================
  3643. # print(f'第{idx + 1}页')
  3644. print(f'第{idx}页(指定范围: {start_page}-{end_page})')
  3645. self.current_page = idx # 更新当前页码
  3646. # === 新增:更新上报进度 ===
  3647. if task_id:
  3648. reporter.update_task_progress(
  3649. task_id=task_id,
  3650. actual_end_page=self.current_page
  3651. )
  3652. # ========================
  3653. if spider_no > 30:
  3654. time.sleep(60)
  3655. spider_no = 0
  3656. print('目前无关数据量: ', self.unrelated_data)
  3657. # 检查是否需要暂停(验证码过多)
  3658. if self.monitor.verification_count >= self.monitor.MAX_VERIFICATION_RETRY:
  3659. print("频繁遇到验证码,暂停程序")
  3660. # self.d.toast("请处理验证码后点击继续", 30)
  3661. # 等待用户点击屏幕继续
  3662. self.d.click(0, 0) # 无效点击,等待用户操作
  3663. self.monitor.verification_count = 0
  3664. if self.unrelated_data > 20:
  3665. # 连续超过20个不达标的数据则停止采集
  3666. self.loggerMT.info(f"连续20个数据不达标,品规:{self.search_key}")
  3667. # === 新增:任务正常完成 ===
  3668. if task_id:
  3669. reporter.end_task(
  3670. task_id=task_id,
  3671. status='completed',
  3672. finish_status=1, # 1:已完成
  3673. force_end_page=end_page
  3674. )
  3675. # ========================
  3676. return
  3677. # 线程安全获取商品列表
  3678. # drug_lis = self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all()
  3679. # drug_lis = self.safe_list('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout', self.monitor)
  3680. while True:
  3681. if self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').exists:
  3682. break
  3683. time.sleep(1)
  3684. drug_lis = self.safe_exec(
  3685. self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all)
  3686. lis_len = len(drug_lis)
  3687. print(f'当前页面共有{lis_len}个商品')
  3688. for idxx, drug_one in enumerate(drug_lis, start=1):
  3689. bounds = drug_one.info['bounds']
  3690. top = bounds['top']
  3691. bottom = bounds['bottom']
  3692. # height = bottom - top
  3693. print(f'当前商品bottom:{bottom}')
  3694. print(f'当前商品top:{top}')
  3695. # if 304 <= top and bottom <= 1475: # 默认高度241的才行
  3696. if 304 <= top and bottom <= 1475: # 默认高度241的才行 1559
  3697. # print('目标-->', drug_one.info)
  3698. # drug_one.click()
  3699. # 获取当前元素中的属性来判断是否要点击进入采集
  3700. print(f"这页的第几个商品:{idxx}")
  3701. product_title = ''
  3702. price = ''
  3703. shop_name = ''
  3704. # 商品名称的xpath
  3705. product_tittle_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3706. product_tittle_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3707. if self.d.xpath(product_tittle_xpath).exists:
  3708. product_title = self.d.xpath(product_tittle_xpath).text
  3709. product_title = product_title[1:] if product_title.startswith('0') else product_title
  3710. print(f"product_tittle_xpath列表当前商品名称:{product_title}")
  3711. if self.task_product_name not in product_title or self.task_product_specs not in product_title:
  3712. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  3713. continue
  3714. elif self.d.xpath(product_tittle_xpath2).exists:
  3715. product_title = self.d.xpath(product_tittle_xpath2).text
  3716. product_title = product_title[1:] if product_title.startswith('0') else product_title
  3717. print(f"product_tittle_xpath2列表当前商品名称:{product_title}")
  3718. print(f"search_key:{self.search_key}")
  3719. if self.task_product_name not in product_title or self.task_product_specs not in product_title:
  3720. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  3721. continue
  3722. else:
  3723. print(f"列表当前商品名称不存在")
  3724. # 价格
  3725. price_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3726. price_xpath3 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3727. if self.d.xpath(price_xpath).exists:
  3728. price_str = self.d.xpath(price_xpath).text
  3729. print(f"price_xpath列表当前商品价格:{price_str}")
  3730. if price_str:
  3731. price = float(re.search('[\d\.]+', price_str).group())
  3732. elif self.d.xpath(price_xpath3).exists:
  3733. price_str = self.d.xpath(price_xpath3).text
  3734. print(f"price_xpath3列表当前商品价格:{price_str}")
  3735. if price_str:
  3736. price = float(re.search('[\d\.]+', price_str).group())
  3737. else:
  3738. price_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3739. if self.d.xpath(price_xpath2).exists:
  3740. price_str = self.d.xpath(price_xpath2).text
  3741. print(f"price_xpath2列表当前商品价格:{price_str}")
  3742. if price_str:
  3743. price = float(re.search('[\d\.]+', price_str).group())
  3744. else:
  3745. print(f"列表当前商品价格不存在")
  3746. # price_str = self.d.xpath(f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]//*[starts-with(@text,"¥")]').text
  3747. print(f'列表获取到价格:{price}')
  3748. # 店铺名称的xpath
  3749. shop_name_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
  3750. shop_name_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
  3751. if self.d.xpath(shop_name_xpath).exists:
  3752. shop_name = self.d.xpath(shop_name_xpath).text
  3753. print(f"shop_name_xpath列表当前商品店铺名称:{shop_name}")
  3754. elif self.d.xpath(shop_name_xpath2).exists:
  3755. shop_name = self.d.xpath(shop_name_xpath2).text
  3756. print(f"shop_name_xpath2列表当前商品店铺名称:{shop_name}")
  3757. else:
  3758. print(f"列表当前商品店铺名称不存在")
  3759. # 如果商品的名称、价格和生产厂家都不存在则直接下一条数据。 跳过一些不是商品的数据。
  3760. if product_title == '' and price == '' and shop_name == '':
  3761. continue
  3762. scrape_date = self.get_current_date()
  3763. if product_title and price and shop_name:
  3764. # 判断数据表中是否存在
  3765. dup_data = {'product': product_title, 'min_price': price, 'shop': shop_name,
  3766. 'scrape_date': scrape_date, 'platform': '美团'}
  3767. if self.data_is_exists(dup_data):
  3768. print('列表存在相同数据不入库')
  3769. continue
  3770. self.safe_exec(drug_one.click)
  3771. print('点击目标药品完毕')
  3772. time.sleep(2)
  3773. # 采集药品信息
  3774. try:
  3775. # self.integrate_data()
  3776. self.safe_exec(self.integrate_data)
  3777. # 检测下是否回退到列表页
  3778. if self.distinct_target():
  3779. print('回退到列表页', True)
  3780. else:
  3781. if self.d.xpath('//*[@text="搜索"]').exists:
  3782. print("检测到搜索按钮,重新开始采集流程")
  3783. if retry_count < MAX_RETRY:
  3784. # 停止当前监控线程
  3785. self.monitor.stop()
  3786. self.monitor.join()
  3787. # 递归重启采集
  3788. return self.main(device_id, retry_count + 1)
  3789. else:
  3790. print("超过最大重试次数,终止程序")
  3791. return
  3792. else:
  3793. print("无法恢复页面,终止采集")
  3794. return
  3795. # print('回退到列表页失败,终止采集')
  3796. # return
  3797. time.sleep(self.get_sleep_time())
  3798. spider_no += 1
  3799. except Exception as e:
  3800. print(f'采集药品详情数据出错:{e}')
  3801. # 增加阻塞的方法:
  3802. if not self.distinct_target():
  3803. for i in range(1):
  3804. self.swipe_back(1)
  3805. # 最外部有个定位按钮
  3806. if self.distinct_target():
  3807. break
  3808. if i == 0 and not self.distinct_target():
  3809. print('页面出错,退出采集')
  3810. return
  3811. else:
  3812. continue
  3813. # 翻页逻辑(如果是最后一页则不再翻页)
  3814. if idx < end_page:
  3815. if self.d.xpath('//*[@text="已经到底啦"]').exists:
  3816. self.loggerMT.info(f'在第 {idx} 页已到达列表最底部')
  3817. if task_id:
  3818. reporter.end_task(
  3819. task_id=task_id,
  3820. status='completed',
  3821. finish_status=1,
  3822. force_end_page=idx
  3823. )
  3824. return
  3825. # 翻页
  3826. print('开始滑动')
  3827. self.d.drag(300, 1400, 300, 400, 1)
  3828. print('滑动结束')
  3829. time.sleep(self.get_sleep_time())
  3830. # if self.d.xpath('//*[@text="已经到底啦"]').exists:
  3831. # print('已经到达列表页最底部')
  3832. # # === 新增:任务正常完成 ===
  3833. # if task_id:
  3834. # reporter.end_task(
  3835. # task_id=task_id,
  3836. # status='completed',
  3837. # finish_status=1, # 1:已完成
  3838. # force_end_page=end_page
  3839. # )
  3840. # # ========================
  3841. # return
  3842. # 采集完成,数据上报
  3843. if task_id:
  3844. reporter.end_task(
  3845. task_id=task_id,
  3846. status='completed',
  3847. finish_status=1,
  3848. force_end_page=end_page
  3849. )
  3850. '''
  3851. search_list = self.d.xpath('//android.support.v7.widget.RecyclerView').info
  3852. bounds = search_list['bounds']
  3853. #print('搜索列表高度', 1400 + bounds['top'] - bounds['bottom'])
  3854. # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'])
  3855. # 计算滑动距离
  3856. scroll_distance = bounds['bottom'] - bounds['top'] # 正数
  3857. start_y = 1600
  3858. end_y = start_y - scroll_distance # 向上滑动,y 坐标减小
  3859. # 确保 end_y 不小于 0
  3860. end_y = max(end_y, 304) # 留出一点边距,避免滑出屏幕
  3861. # print('滑动起点 y:', start_y, '终点 y:', end_y)
  3862. # self.d.swipe(200, start_y, 200, end_y, 0.4)
  3863. print('开始滑动')
  3864. self.d.drag(300, 1400, 300, 400, 1)
  3865. # self.safe_exec(self.d.drag, 300, 1400, 300, 400, 1)
  3866. print('滑动结束')
  3867. #print('搜索列表高度', 1400 + bounds['top'] - bounds['bottom'])
  3868. # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'])
  3869. # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'], 0.4)
  3870. time.sleep(self.get_sleep_time())
  3871. '''
  3872. except Exception as e:
  3873. print(f"采集任务异常: {e}")
  3874. # === 新增:异常结束上报 ===
  3875. if task_id:
  3876. reporter.end_task(
  3877. task_id=task_id,
  3878. status='failed',
  3879. finish_status=0, # 未完成
  3880. force_end_page=self.current_page
  3881. )
  3882. # ========================
  3883. raise
  3884. finally:
  3885. # 确保监控线程被停止
  3886. self.monitor.stop()
  3887. self.monitor.join()
  3888. def unitest(self):
  3889. """
  3890. 单元测试
  3891. :return:
  3892. """
  3893. save_data = {
  3894. 'product': "[昆中药]舒肝颗粒(低糖型)",
  3895. 'min_price': 14.0,
  3896. 'manufacture_date': '',
  3897. 'expiry_date': '36个月',
  3898. 'shop': '美团自营大药房(快递电商)',
  3899. 'business_license_company': '',
  3900. 'province': '',
  3901. 'city': '',
  3902. 'manufacturer': '昆明中药厂有限公司',
  3903. 'specification': '3g*16袋/盒',
  3904. 'approval_number': '国药准字Z53021161',
  3905. 'product_link': '',
  3906. 'scrape_date': '2025/07/09',
  3907. 'scrape_province': '广东',
  3908. 'availability': '',
  3909. 'credit_code': '',
  3910. 'platform': '美团'
  3911. }
  3912. self.save_to_database(save_data)
  3913. time.sleep(100000)
  3914. pass
  3915. # retrieve database
  3916. def get_retrieve_mysql():
  3917. """
  3918. 建立远端连接并返回一个到数据库的连接对象
  3919. """
  3920. import pymysql
  3921. # return pymysql.connect(
  3922. # host='39.108.116.125', # 修改后的主机
  3923. # port=3306, # 添加端口号
  3924. # user='drug_retrieve_master', # 修改后的用户名
  3925. # password='6Y6zS4BKjLSBAEdP', # 修改后的密码
  3926. # db='drug_retrieve_master', # 修改后的数据库名
  3927. # charset='utf8mb4'
  3928. # )
  3929. return pymysql.connect(
  3930. host='39.108.116.125', # 修改后的主机
  3931. port=3306, # 添加端口号
  3932. user='drug_retrieve', # 修改后的用户名
  3933. password='Pem287cwM58jNpe2', # 修改后的密码
  3934. db='drug_retrieve', # 修改后的数据库名
  3935. charset='utf8mb4'
  3936. )
  3937. # def main():
  3938. # #从配置的系统里面读取采集用到的设备和搜索关键词
  3939. # #1、数据库的连接,从数据库中获取采集品规和设备adb码,启动程序进行采集,如果配置了采集时间,还需要支持到了时间终止采集,如果配置了采集的页数,需要滑动到指定的页数后再进行采集
  3940. # #2、代码要支持多线程(线程池)的管理,每个线程有自己的生命周期。
  3941. # #获取未开始的美团平台的采集任务
  3942. # retrieve_conn = get_retrieve_mysql()
  3943. # cursor = retrieve_conn.cursor()
  3944. # query = "SELECT id,collect_equipment_id,product_name,start_page,end_page FROM retrieve_collect_task_allocate WHERE status = 1 AND platform = 4"
  3945. # cursor.execute(query)
  3946. # result = cursor.fetchone()
  3947. # if result:
  3948. # collect_equipment_id = result [1]
  3949. # product_name = result[2]
  3950. # start_page = result[3]
  3951. # end_page = result[4]
  3952. # print(f"collect_equipment_id={collect_equipment_id}")
  3953. # print(f"product_name={product_name}")
  3954. # if collect_equipment_id == 0:
  3955. # print("设备id不存在")
  3956. # return
  3957. # if product_name == '':
  3958. # print("采集关键字获取失败")
  3959. # return
  3960. # #通过 collect_equipment_id 获取设别adb码
  3961. # device_query = "SELECT device_id FROM retrieve_collect_equipment WHERE id = %s and status = 0"
  3962. # cursor.execute(device_query, (collect_equipment_id))
  3963. # device_result = cursor.fetchone()
  3964. # if device_result:
  3965. # device_id = device_result[0]
  3966. # print(f"device_id={device_id}")
  3967. # else:
  3968. # # self.loggerMT.info("没有可用的设备进行数据采集")
  3969. # print("没有可用的设备进行数据采集")
  3970. # return
  3971. # else:
  3972. # # self.loggerMT.info("MT 没有要采集的品规")
  3973. # print("MT 没有要采集的品规")
  3974. # return
  3975. # key = product_name
  3976. # try:
  3977. # mt = MT(key) # 用当前关键字实例化
  3978. # mt.main(device_id,start_page,end_page) # 执行一次完整采集
  3979. # logging.info(f'关键字 {key} 本轮采集完成')
  3980. # except Exception as e:
  3981. # # 发生异常直接跳过该关键字,继续下一轮
  3982. # logging.exception(f'关键字 {key} 采集异常:{e}')
  3983. # finally:
  3984. # # 关闭当前 MT 实例资源(如有需要)
  3985. # if hasattr(mt, 'close'):
  3986. # mt.close()
  3987. class TimeoutException(Exception):
  3988. pass
  3989. # 如果需要并行处理(提高效率),可以使用线程池:
  3990. def process_tasks_in_parallel(max_workers=12):
  3991. """使用线程池并行处理多个任务""" """使用线程池并行处理多个任务,每个任务最多执行30分钟"""
  3992. from concurrent.futures import ThreadPoolExecutor, as_completed
  3993. import concurrent.futures # ← 新增导入
  3994. retrieve_conn = get_retrieve_mysql()
  3995. cursor = retrieve_conn.cursor()
  3996. query = """
  3997. SELECT id, collect_equipment_id, product_name, start_page, end_page, duration, product_specs
  3998. FROM retrieve_collect_task_allocate
  3999. WHERE status = 1 \
  4000. AND platform = 4 \
  4001. """
  4002. cursor.execute(query)
  4003. results = cursor.fetchall()
  4004. print(f"获取到的任务结果={results}")
  4005. if not results:
  4006. print("MT 没有要采集的品规")
  4007. return
  4008. # 准备任务列表
  4009. tasks = []
  4010. device_map = {}
  4011. for result in results:
  4012. task_id = result[0]
  4013. collect_equipment_id = result[1]
  4014. product_name = result[2]
  4015. start_page = result[3]
  4016. end_page = result[4]
  4017. duration = result[5]
  4018. product_specs = result[6]
  4019. if collect_equipment_id != 0 and product_name and product_name.strip():
  4020. # 缓存设备查询
  4021. if collect_equipment_id not in device_map:
  4022. device_query = "SELECT device_id FROM retrieve_collect_equipment WHERE id = %s AND status = 0"
  4023. cursor.execute(device_query, (collect_equipment_id,))
  4024. device_result = cursor.fetchone()
  4025. device_map[collect_equipment_id] = device_result[0] if device_result else None
  4026. if device_map[collect_equipment_id]:
  4027. # ↓ 使用数据库中的duration,如果没有设置则用默认值30分钟
  4028. duration_minutes = duration if duration is not None else 30
  4029. tasks.append({
  4030. 'task_id': task_id,
  4031. 'device_id': device_map[collect_equipment_id],
  4032. 'key': product_name.strip() + product_specs.strip(),
  4033. 'start_page': start_page,
  4034. 'end_page': end_page,
  4035. 'duration_minutes': duration_minutes, # 存储执行时间限制(分钟)
  4036. 'product_specs': product_specs, # 存储执行时间限制(分钟)
  4037. 'product_name': product_name, # 存储执行时间限制(分钟)
  4038. })
  4039. cursor.close()
  4040. retrieve_conn.close()
  4041. if not tasks:
  4042. print("没有有效的采集任务")
  4043. return
  4044. print(f"准备并行处理 {len(tasks)} 个任务")
  4045. def process_single_task(task):
  4046. """处理单个任务的函数"""
  4047. task_start_time = time.time() # ← 记录开始时间
  4048. # start_time = time.time()
  4049. try:
  4050. mt = MT(task['key'])
  4051. # 执行采集,获取采集数量 关键数据:实际采集的数量,实际的页数
  4052. mt.main(
  4053. device_id=task['device_id'],
  4054. start_page=task['start_page'],
  4055. end_page=task['end_page'],
  4056. task_id=task['task_id'],
  4057. product_name=task['product_name'],
  4058. product_specs=task['product_specs'],
  4059. max_duration_minutes=task['duration_minutes'] # 传入时间限制
  4060. )
  4061. return {
  4062. 'task_id': task['task_id'],
  4063. 'success': True,
  4064. 'collected_count': mt.collected_count,
  4065. 'final_page': mt.current_page
  4066. }
  4067. except Exception as e:
  4068. print(f"任务 {task['task_id']} 执行异常: {e}")
  4069. return {
  4070. 'task_id': task['task_id'],
  4071. 'success': False,
  4072. 'error': str(e)
  4073. }
  4074. finally:
  4075. if 'mt' in locals() and hasattr(mt, 'close'):
  4076. try:
  4077. mt.close()
  4078. except:
  4079. pass
  4080. # 使用线程池并行执行
  4081. successful_tasks = 0
  4082. failed_tasks = 0
  4083. # total_execution_time = 0 # 初始化总执行时间变量
  4084. with ThreadPoolExecutor(max_workers=max_workers) as executor:
  4085. # 提交所有任务
  4086. future_to_task = {
  4087. executor.submit(process_single_task, task): task
  4088. for task in tasks
  4089. }
  4090. # 处理完成的任务
  4091. for future in as_completed(future_to_task):
  4092. task = future_to_task[future]
  4093. try:
  4094. task_timeout = (task['duration_minutes'] + 5) * 60 # 加5分钟缓冲
  4095. result = future.result(timeout=task_timeout) # 使用动态超时时间
  4096. if result['success']:
  4097. successful_tasks += 1
  4098. print(f"任务 {result['task_id']}: 完成,采集 {result['collected_count']} 条数据")
  4099. else:
  4100. failed_tasks += 1
  4101. print(f"任务 {result['task_id']}: 失败,错误: {result['error']}")
  4102. except concurrent.futures.TimeoutError: # ← 捕获超时异常
  4103. failed_tasks += 1
  4104. print(f"任务 {task['task_id']}: 超时(限制 {task['duration_minutes']} 分钟)")
  4105. # ↓ 超时后上报数据
  4106. if task['task_id']:
  4107. # 这里需要调用上报,但reporter可能没有这个任务的数据
  4108. # 更好的方式是在MT.main中已经上报了
  4109. pass
  4110. except Exception as e:
  4111. failed_tasks += 1
  4112. print(f"任务 {task['task_id']}: 执行异常 {e}")
  4113. # if (successful_tasks + failed_tasks) > 0:
  4114. # avg_time = total_execution_time / (successful_tasks + failed_tasks)
  4115. # avg_minutes = avg_time / 60
  4116. # else:
  4117. # avg_minutes = 0
  4118. # total_minutes = total_execution_time / 60
  4119. print(f"\n并行采集完成:")
  4120. print(f"成功: {successful_tasks} 个")
  4121. print(f"失败: {failed_tasks} 个")
  4122. if __name__ == '__main__':
  4123. # main()
  4124. def run_collection():
  4125. """执行采集任务"""
  4126. try:
  4127. print(f"【定时任务开始】时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
  4128. process_tasks_in_parallel(max_workers=12)
  4129. print(f"【定时任务结束】时间: {datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
  4130. except Exception as e:
  4131. print(f"【定时任务异常】: {e}")
  4132. # 设置定时任务
  4133. schedule.every(10).minutes.do(run_collection)
  4134. # 立即执行一次
  4135. run_collection()
  4136. print("定时任务已设置,每40分钟执行一次采集")
  4137. # 循环执行
  4138. while True:
  4139. schedule.run_pending()
  4140. time.sleep(60) # 每分钟检查一次
  4141. # process_tasks_in_parallel(max_workers=12) # 可以同时处理12个任务
  4142. # scheduler = BlockingScheduler()
  4143. # scheduler.add_job(main, 'cron', hour=21, minute=30, misfire_grace_time=120)
  4144. # try:
  4145. # scheduler.start()
  4146. # except (KeyboardInterrupt, SystemExit):
  4147. # pass