new_mt_2.py 186 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445144614471448144914501451145214531454145514561457145814591460146114621463146414651466146714681469147014711472147314741475147614771478147914801481148214831484148514861487148814891490149114921493149414951496149714981499150015011502150315041505150615071508150915101511151215131514151515161517151815191520152115221523152415251526152715281529153015311532153315341535153615371538153915401541154215431544154515461547154815491550155115521553155415551556155715581559156015611562156315641565156615671568156915701571157215731574157515761577157815791580158115821583158415851586158715881589159015911592159315941595159615971598159916001601160216031604160516061607160816091610161116121613161416151616161716181619162016211622162316241625162616271628162916301631163216331634163516361637163816391640164116421643164416451646164716481649165016511652165316541655165616571658165916601661166216631664166516661667166816691670167116721673167416751676167716781679168016811682168316841685168616871688168916901691169216931694169516961697169816991700170117021703170417051706170717081709171017111712171317141715171617171718171917201721172217231724172517261727172817291730173117321733173417351736173717381739174017411742174317441745174617471748174917501751175217531754175517561757175817591760176117621763176417651766176717681769177017711772177317741775177617771778177917801781178217831784178517861787178817891790179117921793179417951796179717981799180018011802180318041805180618071808180918101811181218131814181518161817181818191820182118221823182418251826182718281829183018311832183318341835183618371838183918401841184218431844184518461847184818491850185118521853185418551856185718581859186018611862186318641865186618671868186918701871187218731874187518761877187818791880188118821883188418851886188718881889189018911892189318941895189618971898189919001901190219031904190519061907190819091910191119121913191419151916191719181919192019211922192319241925192619271928192919301931193219331934193519361937193819391940194119421943194419451946194719481949195019511952195319541955195619571958195919601961196219631964196519661967196819691970197119721973197419751976197719781979198019811982198319841985198619871988198919901991199219931994199519961997199819992000200120022003200420052006200720082009201020112012201320142015201620172018201920202021202220232024202520262027202820292030203120322033203420352036203720382039204020412042204320442045204620472048204920502051205220532054205520562057205820592060206120622063206420652066206720682069207020712072207320742075207620772078207920802081208220832084208520862087208820892090209120922093209420952096209720982099210021012102210321042105210621072108210921102111211221132114211521162117211821192120212121222123212421252126212721282129213021312132213321342135213621372138213921402141214221432144214521462147214821492150215121522153215421552156215721582159216021612162216321642165216621672168216921702171217221732174217521762177217821792180218121822183218421852186218721882189219021912192219321942195219621972198219922002201220222032204220522062207220822092210221122122213221422152216221722182219222022212222222322242225222622272228222922302231223222332234223522362237223822392240224122422243224422452246224722482249225022512252225322542255225622572258225922602261226222632264226522662267226822692270227122722273227422752276227722782279228022812282228322842285228622872288228922902291229222932294229522962297229822992300230123022303230423052306230723082309231023112312231323142315231623172318231923202321232223232324232523262327232823292330233123322333233423352336233723382339234023412342234323442345234623472348234923502351235223532354235523562357235823592360236123622363236423652366236723682369237023712372237323742375237623772378237923802381238223832384238523862387238823892390239123922393239423952396239723982399240024012402240324042405240624072408240924102411241224132414241524162417241824192420242124222423242424252426242724282429243024312432243324342435243624372438243924402441244224432444244524462447244824492450245124522453245424552456245724582459246024612462246324642465246624672468246924702471247224732474247524762477247824792480248124822483248424852486248724882489249024912492249324942495249624972498249925002501250225032504250525062507250825092510251125122513251425152516251725182519252025212522252325242525252625272528252925302531253225332534253525362537253825392540254125422543254425452546254725482549255025512552255325542555255625572558255925602561256225632564256525662567256825692570257125722573257425752576257725782579258025812582258325842585258625872588258925902591259225932594259525962597259825992600260126022603260426052606260726082609261026112612261326142615261626172618261926202621262226232624262526262627262826292630263126322633263426352636263726382639264026412642264326442645264626472648264926502651265226532654265526562657265826592660266126622663266426652666266726682669267026712672267326742675267626772678267926802681268226832684268526862687268826892690269126922693269426952696269726982699270027012702270327042705270627072708270927102711271227132714271527162717271827192720272127222723272427252726272727282729273027312732273327342735273627372738273927402741274227432744274527462747274827492750275127522753275427552756275727582759276027612762276327642765276627672768276927702771277227732774277527762777277827792780278127822783278427852786278727882789279027912792279327942795279627972798279928002801280228032804280528062807280828092810281128122813281428152816281728182819282028212822282328242825282628272828282928302831283228332834283528362837283828392840284128422843284428452846284728482849285028512852285328542855285628572858285928602861286228632864286528662867286828692870287128722873287428752876287728782879288028812882288328842885288628872888288928902891289228932894289528962897289828992900290129022903290429052906290729082909291029112912291329142915291629172918291929202921292229232924292529262927292829292930293129322933293429352936293729382939294029412942294329442945294629472948294929502951295229532954295529562957295829592960296129622963296429652966296729682969297029712972297329742975297629772978297929802981298229832984298529862987298829892990299129922993299429952996299729982999300030013002300330043005300630073008300930103011301230133014301530163017301830193020302130223023302430253026302730283029303030313032303330343035303630373038303930403041304230433044304530463047304830493050305130523053305430553056305730583059306030613062306330643065306630673068306930703071307230733074307530763077307830793080308130823083308430853086308730883089309030913092309330943095309630973098309931003101310231033104310531063107310831093110311131123113311431153116311731183119312031213122312331243125312631273128312931303131313231333134313531363137313831393140314131423143314431453146314731483149315031513152315331543155315631573158315931603161316231633164316531663167316831693170317131723173317431753176317731783179318031813182318331843185318631873188318931903191319231933194319531963197319831993200320132023203320432053206320732083209321032113212321332143215321632173218321932203221322232233224322532263227322832293230323132323233323432353236323732383239324032413242324332443245324632473248324932503251325232533254325532563257325832593260326132623263326432653266326732683269327032713272327332743275327632773278327932803281328232833284328532863287328832893290329132923293329432953296329732983299330033013302330333043305330633073308330933103311331233133314331533163317331833193320332133223323332433253326332733283329333033313332333333343335333633373338333933403341334233433344334533463347334833493350335133523353335433553356335733583359336033613362336333643365336633673368336933703371337233733374337533763377337833793380338133823383338433853386338733883389339033913392339333943395339633973398339934003401340234033404340534063407340834093410341134123413341434153416341734183419342034213422342334243425342634273428342934303431343234333434343534363437343834393440344134423443344434453446344734483449345034513452345334543455345634573458345934603461346234633464346534663467346834693470347134723473347434753476347734783479348034813482348334843485348634873488348934903491349234933494349534963497349834993500350135023503350435053506350735083509351035113512351335143515351635173518351935203521352235233524352535263527352835293530353135323533353435353536353735383539354035413542354335443545354635473548354935503551355235533554355535563557355835593560356135623563356435653566356735683569357035713572357335743575357635773578357935803581358235833584358535863587358835893590359135923593359435953596359735983599360036013602360336043605360636073608360936103611361236133614361536163617361836193620362136223623362436253626362736283629363036313632363336343635363636373638363936403641364236433644364536463647364836493650365136523653365436553656365736583659366036613662366336643665366636673668366936703671367236733674367536763677367836793680368136823683368436853686368736883689369036913692369336943695369636973698369937003701370237033704370537063707370837093710371137123713371437153716371737183719372037213722372337243725372637273728372937303731373237333734373537363737373837393740
  1. import os
  2. import sys
  3. script_dir = os.path.dirname(os.path.abspath(__file__))
  4. os.chdir(script_dir)
  5. import requests
  6. import base64
  7. import cv2
  8. import uiautomator2 as u2
  9. import time
  10. import subprocess
  11. import re
  12. import random
  13. import datetime
  14. import json
  15. from aip import AipOcr
  16. from apscheduler.schedulers.blocking import BlockingScheduler
  17. # from db_mysql import mysqlClient
  18. import threading
  19. from collections import deque
  20. import numpy as np
  21. import secrets
  22. import oss2
  23. import urllib.parse
  24. from certifi.core import where
  25. # import pyperclip
  26. from config import Config
  27. from logger import setup_logger
  28. import logging
  29. # from database import MySQLClient
  30. from PIL import Image
  31. from pathlib import Path
  32. from PIL import Image, ImageDraw, ImageFont
  33. # 配置日志
  34. # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  35. setup_logger("mt_spider") # 初始化日志
  36. class SpiderMonitor(threading.Thread):
  37. """全局弹窗监控线程(增强版)"""
  38. def __init__(self, spider_instance):
  39. super().__init__(daemon=True)
  40. self.spider = spider_instance
  41. self.running = True
  42. self.pausing = threading.Event() # 主线程同步事件
  43. self.last_verification_time = 0
  44. self.verification_count = 0
  45. self.MAX_VERIFICATION_RETRY = 10
  46. self.recent_clicks = deque(maxlen=10) # 防重复点击
  47. self.logger = logging.getLogger("SpiderMonitor")
  48. # 可配置化弹窗规则
  49. self.popup_rules = {
  50. "simple": [
  51. ('//*[@text="确定"]', "点击确定"),
  52. ('//*[@text="允许"]', "点击允许"),
  53. ('//*[@text="关闭"]', "点击关闭"),
  54. ('//*[@resource-id="com.sankuai.meituan:id/close"]', "关闭按钮"),
  55. ('//*[@resource-id="com.sankuai.meituan:id/address_center_location_close"]', "关闭按钮"),
  56. ('//*[@resource-id="com.sankuai.meituan:id/location_close"]', "关闭按钮"),
  57. ('//*[@resource-id="com.sankuai.meituan:id/btn_close"]', "关闭按钮"),
  58. ],
  59. "verification": [
  60. '//*[contains(@text, "验证")]',
  61. '//*[contains(@text, "滑块")]',
  62. '//*[contains(@text, "依次点击")]',
  63. '//*[contains(@text, "请点击")]',
  64. '//*[contains(@text, "拖动滑块刚")]', # 这个需要拖动滑块至最右边,然后再截图
  65. '//*[contains(@text, "请输入图片中的内容")]',
  66. '//*[contains(@text, "用最短线连接")]',
  67. '//*[contains(@text, "请按语序依次点击")]',
  68. '//*[contains(@text, "请向右滑动滑块")]',
  69. '//*[contains(@text, "请拖动下方滑块完成拼图")]',
  70. '//*[contains(@resource-id, "captcha")]'
  71. ]
  72. }
  73. def run(self):
  74. while self.running:
  75. try:
  76. handled = self.check_and_handle_popup()
  77. time.sleep(2 if handled else 1)
  78. except Exception as e:
  79. self.logger.exception("监控线程异常: %s", e)
  80. time.sleep(3)
  81. def _is_recent_click(self, xpath):
  82. """防止重复点击同一个弹窗"""
  83. key = f"{xpath}_{int(time.time())}"
  84. if key in self.recent_clicks:
  85. return True
  86. self.recent_clicks.append(key)
  87. return False
  88. def check_and_handle_popup(self):
  89. d = self.spider.d
  90. # 1. 处理简单弹窗
  91. for xpath, desc in self.popup_rules["simple"]:
  92. if d.xpath(xpath).exists and not self._is_recent_click(xpath):
  93. self.logger.info("检测到弹窗: %s", desc)
  94. d.xpath(xpath).click()
  95. return True
  96. # 2. 处理验证码弹窗
  97. for xpath in self.popup_rules["verification"]:
  98. if d.xpath(xpath).exists:
  99. now = time.time()
  100. if now - self.last_verification_time < 30:
  101. return False # 30秒内不重复触发
  102. self.last_verification_time = now
  103. self.verification_count += 1
  104. self.logger.warning("验证码弹窗触发,等待人工处理...")
  105. if self.verification_count > self.MAX_VERIFICATION_RETRY:
  106. self.logger.error("验证码重试超限,终止任务")
  107. self.spider.stop_all()
  108. return True
  109. self.pausing.set() # 通知主线程暂停
  110. # d.toast.show("需要人工处理验证码", 120)
  111. # 等待人工处理
  112. start = time.time()
  113. # while time.time() - start < 120*60:
  114. # if not d.xpath(xpath).exists:
  115. # self.logger.info("验证码已处理")
  116. # d.toast.show("验证完成", 2)
  117. # self.pausing.clear() # 放行主线程
  118. # return True
  119. # time.sleep(5)
  120. while True:
  121. if not d.xpath(xpath).exists:
  122. self.logger.info("验证码已处理")
  123. # d.toast.show("验证完成", 2)
  124. self.pausing.clear() # 放行主线程
  125. return True
  126. time.sleep(5)
  127. self.logger.warning("验证码超时,重启APP")
  128. self.spider.restart_app()
  129. return True
  130. # 3. 处理广告弹窗(点击右上角)
  131. if d.xpath('//*[contains(@text, "广告")]').exists:
  132. w, h = d.info['displayWidth'], d.info['displayHeight']
  133. d.click(w - 50, 50)
  134. self.logger.info("关闭广告弹窗")
  135. return True
  136. return False
  137. def stop(self):
  138. self.running = False
  139. class MTScreenshot:
  140. def __init__(self, d, oss_config, search_key, scroll_times=4, compress_quality=7, resize_ratio=0.8):
  141. # 接收外部已连接好的u2设备实例
  142. self.d = d
  143. self.search_key = search_key # 添加这行
  144. # 启动全局弹窗监控
  145. self.monitor = SpiderMonitor(self)
  146. self.monitor.start()
  147. self.loggerMT = logging.getLogger()
  148. # 日志初始化
  149. self.logger = self._init_logger()
  150. # OSS配置与初始化(核心配置,无冗余)
  151. self.oss_config = oss_config
  152. self.oss_bucket = self._init_oss_bucket()
  153. # 截图核心参数
  154. self.scroll_times = scroll_times
  155. self.compress_quality = compress_quality
  156. self.resize_ratio = resize_ratio
  157. # self.title_xpaths = [
  158. # '//*[@resource-id="com.jd.lib.productdetail.feature:id/db"]',
  159. # '//*[@resource-id="com.jd.lib.productdetail.feature:id/cx"]',
  160. # '//*[@resource-id="com.jd.lib.productdetail.feature:id/cj"]'
  161. # ]
  162. def _init_logger(self):
  163. # 极简日志配置,仅保留必要输出
  164. logger = logging.getLogger("mt_screenshot")
  165. logger.setLevel(logging.INFO)
  166. logger.handlers.clear()
  167. handler = logging.StreamHandler()
  168. handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
  169. logger.addHandler(handler)
  170. return logger
  171. def _init_oss_bucket(self):
  172. # 仅做OSS配置校验和Bucket连接,无额外功能
  173. if not all([self.oss_config.get("access_key_id"),
  174. self.oss_config.get("access_key_secret"),
  175. self.oss_config.get("endpoint"),
  176. self.oss_config.get("bucket_name")]):
  177. self.logger.warning("OSS配置不完整,无法上传")
  178. return None
  179. try:
  180. auth = oss2.Auth(self.oss_config["access_key_id"], self.oss_config["access_key_secret"])
  181. bucket = oss2.Bucket(auth, self.oss_config["endpoint"], self.oss_config["bucket_name"])
  182. bucket.get_bucket_info() # 验证连接
  183. self.logger.info("OSS Bucket连接成功")
  184. return bucket
  185. except Exception as e:
  186. self.logger.error(f"OSS Bucket连接失败: {e}")
  187. return None
  188. def _upload_to_oss(self, local_path):
  189. # 极简上传逻辑,仅返回OSS URL或None
  190. if not self.oss_bucket or not os.path.exists(local_path):
  191. return None
  192. file_name = os.path.basename(local_path)
  193. safe_name = re.sub(r'[^\w\.\-]', '_', file_name)
  194. oss_key = f"{self.oss_config.get('oss_prefix', 'scrape_data/')}{safe_name}"
  195. try:
  196. oss2.resumable_upload(self.oss_bucket, oss_key, local_path)
  197. # 生成并返回完整OSS URL
  198. oss_file_url = f"https://{self.oss_config['bucket_name']}.{self.oss_config['endpoint']}/{urllib.parse.quote(oss_key, safe='/')}"
  199. self.logger.info(f"OSS上传成功: {oss_file_url}")
  200. return oss_file_url
  201. except Exception as e:
  202. self.logger.error(f"OSS上传失败: {e}")
  203. return None
  204. # def _get_title(self):
  205. # # 仅提取标题,无冗余逻辑
  206. # for xpath in self.title_xpaths:
  207. # elem = self.d.xpath(xpath)
  208. # if elem.exists:
  209. # info = elem.info
  210. # title = (info.get("contentDescription") or info.get("content-desc") or info.get("text") or "").strip()
  211. # return title[:50] # 限制标题长度,避免文件名过长
  212. # return ""
  213. def safe_exec(self, func, *args, **kwargs):
  214. """
  215. 万能安全壳:执行 func 前检查验证码,
  216. 若监控线程已置位 pausing,则一直阻塞直到放行。
  217. """
  218. while self.monitor.pausing.is_set():
  219. time.sleep(1)
  220. # 执行真正逻辑
  221. return func(*args, **kwargs)
  222. def _get_title(self):
  223. # try:
  224. # title = self.d.xpath(
  225. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  226. # except:
  227. # title = self.d.xpath(
  228. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView').text
  229. # title = self.d.xpath('//*[contains(@text, "舒肝颗粒")]').text
  230. def _inner():
  231. temp_search_key = self.search_key
  232. if "999" in self.search_key:
  233. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  234. temp_search_key = self.search_key.replace("999皮炎平", "")
  235. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  236. temp_search_key = self.search_key.replace("999必无忧", "")
  237. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  238. temp_search_key = self.search_key.replace("999必无忧", "")
  239. elif self.search_key == '999速复康布洛芬缓释胶囊':
  240. temp_search_key = self.search_key.replace("999速复康", "")
  241. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  242. temp_search_key = self.search_key.replace("999选平", "")
  243. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  244. temp_search_key = self.search_key.replace("999皮炎平", "")
  245. else:
  246. temp_search_key = self.search_key.replace("999", "")
  247. else:
  248. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  249. temp_search_key = self.search_key.replace("史达功", "")
  250. temp_search_key = temp_search_key.replace("120", "")
  251. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  252. temp_search_key = self.search_key.replace("三九胃泰", "")
  253. temp_search_key = temp_search_key.replace("8袋", "")
  254. elif self.search_key == '今维多赐多康牌蛋白粉':
  255. temp_search_key = self.search_key.replace("今维多", "")
  256. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  257. temp_search_key = self.search_key.replace("佳美舒", "")
  258. temp_search_key = temp_search_key.replace("4", "")
  259. elif self.search_key == '三九胃泰颗粒20g*10':
  260. temp_search_key = self.search_key.replace("20g*10", "")
  261. elif self.search_key == '三九胃泰颗粒20g*6袋':
  262. temp_search_key = self.search_key.replace("20g*6袋", "")
  263. elif self.search_key == '顺峰康王酮康他索乳膏':
  264. temp_search_key = self.search_key.replace("顺峰康王", "")
  265. if self.search_key == '999糠酸莫米松凝胶15':
  266. temp_search_key = temp_search_key.replace("15", "")
  267. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  268. temp_search_key = temp_search_key.replace("30", "")
  269. elif self.search_key == '999复方感冒灵颗粒15':
  270. temp_search_key = temp_search_key.replace("15", "")
  271. elif self.search_key == '999复方金银花颗粒10g':
  272. temp_search_key = temp_search_key.replace("10g", "")
  273. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  274. temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
  275. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  276. temp_search_key = temp_search_key.replace("6粒", "")
  277. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  278. temp_search_key = temp_search_key.replace("50", "")
  279. elif self.search_key == '999止泻利颗粒15g*8':
  280. temp_search_key = temp_search_key.replace("15g*8", "")
  281. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  282. temp_search_key = temp_search_key.replace("30", "")
  283. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  284. temp_search_key = temp_search_key.replace("15g", "")
  285. elif self.search_key == '999复方苦参肠炎康片12片':
  286. temp_search_key = temp_search_key.replace("12片", "")
  287. elif self.search_key == '999强力枇杷露16袋':
  288. temp_search_key = temp_search_key.replace("16袋", "")
  289. elif self.search_key == '999三蛇胆川贝膏138':
  290. temp_search_key = temp_search_key.replace("138", "")
  291. elif self.search_key == '999强力枇杷露120ml':
  292. temp_search_key = temp_search_key.replace("120ml", "")
  293. elif self.search_key == '999强力枇杷露150ml':
  294. temp_search_key = temp_search_key.replace("150ml", "")
  295. elif self.search_key == '999抗病毒口服液10ml*10':
  296. temp_search_key = temp_search_key.replace("10ml*10", "")
  297. elif self.search_key == '999抗病毒口服液10ml*12':
  298. temp_search_key = temp_search_key.replace("10ml*12", "")
  299. elif self.search_key == '999糠酸莫米松乳膏10g支':
  300. temp_search_key = temp_search_key.replace("10g支", "")
  301. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  302. temp_search_key = temp_search_key.replace("20g", "")
  303. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  304. temp_search_key = temp_search_key.replace("6g", "")
  305. elif self.search_key == '999壮骨关节丸6g*20':
  306. temp_search_key = temp_search_key.replace("6g*20", "")
  307. elif self.search_key == '999正天丸6g*15':
  308. temp_search_key = temp_search_key.replace("6g*15", "")
  309. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  310. temp_search_key = temp_search_key.replace("20", "")
  311. elif self.search_key == '999糠酸莫米松凝胶10':
  312. temp_search_key = temp_search_key.replace("10", "")
  313. elif self.search_key == '999板蓝根颗粒10g*20':
  314. temp_search_key = temp_search_key.replace("10g*20", "")
  315. elif self.search_key == '999复方氨酚烷胺胶囊10粒':
  316. temp_search_key = temp_search_key.replace("10粒", "")
  317. elif self.search_key == '999复方氨酚烷胺胶囊12粒':
  318. temp_search_key = temp_search_key.replace("12粒", "")
  319. elif self.search_key == '999咽炎片0.26g*12片*2板':
  320. temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
  321. elif self.search_key == '999小儿止咳糖浆120':
  322. temp_search_key = temp_search_key.replace("120", "")
  323. elif self.search_key == '999小儿止咳糖浆225':
  324. temp_search_key = temp_search_key.replace("225", "")
  325. elif self.search_key == '999小儿感冒颗粒6g*10':
  326. temp_search_key = temp_search_key.replace("6g*10", "")
  327. elif self.search_key == '999小儿感冒颗粒6g*24':
  328. temp_search_key = temp_search_key.replace("6g*24", "")
  329. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
  330. temp_search_key = temp_search_key.replace("6g*10袋", "")
  331. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  332. temp_search_key = temp_search_key.replace("6g*20袋", "")
  333. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  334. temp_search_key = temp_search_key.replace("8g*10袋", "")
  335. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  336. temp_search_key = temp_search_key.replace("2.5g*10袋", "")
  337. # elif self.search_key == '三九胃泰颗粒':
  338. # self.search_key = '三九胃泰' #兼容三九胃泰 温胃舒颗粒
  339. print(f'获取商品title时的搜索关键字:{temp_search_key}')
  340. # title = self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text
  341. # 初始化
  342. drugs_name = ''
  343. specifications = ''
  344. title = ''
  345. # 循环的获取title为了有时间来处理人机验证
  346. for m in range(1, 6000):
  347. if self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').exists:
  348. title = self.safe_exec(
  349. lambda: self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  350. )
  351. self.loggerMT.info(f"第{m}次获取title成功")
  352. print(f"第{m}次获取title成功")
  353. break
  354. else:
  355. time.sleep(3)
  356. # return drugs_name, specifications
  357. # drugs_name = ''
  358. # specifications = ''
  359. # try:
  360. # title_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  361. # title_xpath_2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  362. # if self.d.xpath(title_xpath).exists:
  363. # title = self.d.xpath(title_xpath).text
  364. # print(f"title_xpath获取的title={title}")
  365. # if temp_search_key not in title:
  366. # return drugs_name, specifications
  367. # elif self.d.xpath(title_xpath_2).exists:
  368. # title = self.d.xpath(title_xpath_2).text
  369. # print(f"title_xpath_2获取的title={title}")
  370. # if temp_search_key not in title:
  371. # return drugs_name, specifications
  372. # else:
  373. # print('title_xpath不存在,请确认')
  374. # return drugs_name, specifications
  375. # # title = self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  376. # except Exception as e:
  377. # print(f"发生异常: {e}")
  378. # return drugs_name, specifications
  379. # 奇怪:有的时候title取出来的记过第一位会多一个0
  380. # title = self.safe_exec(self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text)
  381. # title = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  382. title = title[1:] if title.startswith('0') else title
  383. print(f'获取到药品标题:{title}')
  384. # 从里面匹配出药品名和规格
  385. # drugs_name
  386. # specifications
  387. # match = re.search(r'([^\d]+)([\d\D]+)', title)
  388. if self.search_key == '999赐多康大豆':
  389. return title, '1罐'
  390. if self.search_key == "999感冒清热颗粒":
  391. match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
  392. else:
  393. match = re.match(r'(\[[^\]]+\])(.*?)\s*((?:\d+\S*|\(.+))$', title)
  394. if match:
  395. # drugs_name = match.group(1).strip() + match.group(2).strip()
  396. drugs_name = title
  397. specifications = match.group(3).strip()
  398. print("药品名:", drugs_name)
  399. print("规格:", specifications)
  400. # print('完整药名:', drugs_name + specifications)
  401. return drugs_name # , specifications
  402. else:
  403. if title == '999抗病毒口服液10ml*12' or title == '999抗病毒口服液':
  404. drugs_name = title
  405. specifications = '10ml*12支/盒'
  406. return drugs_name # , specifications
  407. elif title == '999抗病毒口服液10ml*10':
  408. drugs_name = title
  409. specifications = '10ml*10支/盒'
  410. return drugs_name # , specifications
  411. elif title == '999小柴胡颗粒':
  412. drugs_name = title
  413. specifications = '10g*9袋/盒'
  414. return drugs_name # , specifications
  415. elif title == '999养胃舒颗粒':
  416. drugs_name = title
  417. specifications = '10g*10袋/盒'
  418. return drugs_name # , specifications
  419. elif title == '三九胃泰胶囊':
  420. drugs_name = title
  421. specifications = '0.5g*24粒/盒'
  422. return drugs_name # , specifications
  423. elif title == '999补脾益肠丸':
  424. drugs_name = title
  425. specifications = '6g*15袋/盒'
  426. return drugs_name # , specifications
  427. elif title == '999感冒灵颗粒':
  428. drugs_name = title
  429. specifications = '10g*9袋/盒'
  430. return drugs_name # , specifications
  431. elif title == '999感冒灵胶囊':
  432. drugs_name = title
  433. specifications = '0.5g*12粒/盒'
  434. return drugs_name # , specifications
  435. else:
  436. print("没有匹配到预期格式")
  437. drugs_name = title
  438. specifications = ''
  439. return drugs_name # , specifications
  440. # 用 safe_exec 包装内部逻辑,确保验证码阻塞
  441. return self.safe_exec(_inner)
  442. def _merge_screenshots(self, screens):
  443. # 仅拼接截图,无额外功能
  444. if len(screens) == 1:
  445. return screens[0].convert('RGB')
  446. rgb_screens = [s.convert('RGB') for s in screens]
  447. total_width = rgb_screens[0].width
  448. total_height = sum(s.height for s in rgb_screens)
  449. merged_img = Image.new('RGB', (total_width, total_height))
  450. y_offset = 0
  451. for img in rgb_screens:
  452. merged_img.paste(img, (0, y_offset))
  453. y_offset += img.height
  454. return merged_img
  455. def get_oss_url(self):
  456. """核心方法:截图+临时本地保存+上传OSS+上传成功删本地文件+返回OSS URL,可直接赋值给oss_file"""
  457. local_file_path = None
  458. try:
  459. # 1. 提取标题
  460. title = self._get_title()
  461. self.logger.info(f"获取标题: {title[:20]}..." if title else "未获取到标题")
  462. # 2. 生成本地文件路径
  463. timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
  464. safe_title = re.sub(r'[\\/*?:"<>|]', '_', title)
  465. local_dir = "../scrape_data"
  466. os.makedirs(local_dir, exist_ok=True)
  467. local_file_path = os.path.join(local_dir, f"{timestamp}_{safe_title}.jpg")
  468. # 3. 滚动截图
  469. screen_list = [self.d.screenshot()]
  470. w, h = self.d.window_size()
  471. for i in range(self.scroll_times):
  472. # 可能滑动距离太短,截不到店名。原本是0.8
  473. # self.d.swipe(w // 2, h * 0.9, w // 2, h * 0.1, duration=random.uniform(0.6, 1.2))
  474. self.d.swipe(w // 2, h * 0.85, w // 2, h * 0.15, # 滑动到15%
  475. duration=random.uniform(0.8, 1.5))
  476. time.sleep(random.uniform(2.0, 4.0))
  477. screen_list.append(self.d.screenshot())
  478. if self.d(textContains='商家服务').exists:
  479. # 看情况是否需要补滑
  480. break
  481. # # ========== 自动处理“是否存储图像”弹窗 ==========
  482. # # 检测弹窗是否存在(根据弹窗的文本/控件ID定位)
  483. # # 通过“是否存储图像”文本定位弹窗
  484. # # if self.d(text="是否存储图像").exists(timeout=2):
  485. # # # 点击“取消”(不需要系统存储截图)
  486. # # self.d(text="取消").click(timeout=2)
  487. # # self.logger.info("已自动关闭“是否存储图像”弹窗")
  488. #
  489. # #出现标题 break
  490. # ========== 滑动截图完成后,滑回初始位置 ==========
  491. # self.logger.info("开始滑回初始位置")
  492. #
  493. # for i in range(self.scroll_times):
  494. # # 反向滑动(与正向滑动方向相反)
  495. # self.d.swipe_ext('down', 0.8)
  496. # time.sleep(random.uniform(1.0, 2.0))
  497. # print(f"第{i+1}次反向滑动,已滑回部分距离")
  498. # self.logger.info("✅ 已滑回初始页面位置")
  499. # 4. 拼接+压缩+保存
  500. merged_img = self._merge_screenshots(screen_list)
  501. if 0.1 < self.resize_ratio < 1.0:
  502. new_size = (int(merged_img.width * self.resize_ratio), int(merged_img.height * self.resize_ratio))
  503. resample_mode = Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS
  504. merged_img = merged_img.resize(new_size, resample_mode)
  505. # 临时保存到本地
  506. merged_img.save(local_file_path, format='JPEG', quality=self.compress_quality)
  507. merged_img.close() # 释放长图句柄
  508. self.logger.info(f"临时本地保存: {local_file_path}")
  509. # 5. 上传OSS
  510. oss_url = self._upload_to_oss(local_file_path)
  511. # 6. 核心:OSS上传成功后,删除本地临时文件
  512. if oss_url is not None:
  513. try:
  514. # 先不删除,检查还有没有问题
  515. # os.remove(local_file_path)
  516. self.logger.info(f"✅ OSS上传成功,已删除本地临时文件: {local_file_path}")
  517. # 若本地目录为空,可删除目录(按需开启)
  518. # if not os.listdir(local_dir):
  519. # os.rmdir(local_dir)
  520. # self.logger.info(f"本地目录{local_dir}为空,已删除")
  521. except Exception as e:
  522. self.logger.warning(f"⚠️ OSS上传成功,但删除本地文件失败: {e}")
  523. return oss_url
  524. except Exception as e:
  525. self.logger.error(f"截图/上传失败: {e}")
  526. return None
  527. def get_access_token():
  528. AppKey = "tRK2RhyItCSh6BzyT4CNVXQa"
  529. AppSrcret = "TDgKiPo94i2mOM1sDqOuDnlcK1bG66jh"
  530. token_url = 'https://aip.baidubce.com/oauth/2.0/token'
  531. url = f"{token_url}?grant_type=client_credentials&client_id={AppKey}&client_secret={AppSrcret}"
  532. payload = ""
  533. headers = {
  534. 'Content-Type': 'application/json',
  535. 'Accept': 'application/json'
  536. }
  537. response = requests.request("POST", url, headers=headers, data=payload)
  538. try:
  539. return response.json()['access_token']
  540. except:
  541. return None
  542. def get_mysql():
  543. """
  544. 建立并返回一个到数据库的连接对象
  545. """
  546. import pymysql
  547. return pymysql.connect(
  548. host=Config.DB_HOST,
  549. port=Config.DB_PORT,
  550. user=Config.DB_USER,
  551. password=Config.DB_PASSWORD,
  552. db=Config.DB_NAME, # "drug_data", # 修改后的数据库名
  553. charset='utf8mb4'
  554. )
  555. class MT:
  556. def __init__(self, key):
  557. # self.package_name = 'com.sankuai.meituan'
  558. self.package_name = Config.PACKAGE_NAME
  559. self.access_token = get_access_token()
  560. self.city2province = self.get_city_info()
  561. self.APP_ID = ''
  562. self.API_KEY = ''
  563. self.SECRET_KEY = ''
  564. self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
  565. self.table_name = Config.DB_TABLE # "mt_drug_middle"
  566. self.shop_table_name = Config.DB_SHOP_TABLE
  567. self.loggerMT = logging.getLogger()
  568. self.search_key = key # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒
  569. self.unrelated_data = 0 # 无关数据数量
  570. self.shop_data_num = 0 # 店铺数据数量
  571. self.device_id = None # 设备ID
  572. self.monitor = None # 监控线程
  573. def cleanup(self):
  574. """
  575. 清理资源,确保程序退出时释放所有资源
  576. """
  577. self.loggerMT.info("开始清理资源...")
  578. try:
  579. # 停止监控线程
  580. if self.monitor:
  581. self.loggerMT.info("停止监控线程...")
  582. self.monitor.stop()
  583. if hasattr(self.monitor, 'join'):
  584. self.monitor.join(timeout=5)
  585. self.monitor = None
  586. # 停止应用(如果设备连接正常)
  587. if hasattr(self, 'd') and self.d is not None:
  588. try:
  589. self.loggerMT.info("停止美团应用...")
  590. # self.stop_app()
  591. except Exception as e:
  592. self.loggerMT.warning(f"停止应用时发生异常: {e}")
  593. # 清理临时文件
  594. # self._cleanup_temp_files()
  595. # self.loggerMT.info("资源清理完成")
  596. except Exception as e:
  597. self.loggerMT.error(f"清理资源时发生异常: {e}")
  598. def _cleanup_temp_files(self):
  599. """
  600. 清理临时文件
  601. """
  602. try:
  603. temp_patterns = [
  604. "screenshot*.png",
  605. "*.jpg",
  606. "*.jpeg",
  607. "temp_*.png"
  608. ]
  609. for pattern in temp_patterns:
  610. for file in Path(".").glob(pattern):
  611. try:
  612. file.unlink()
  613. self.loggerMT.debug(f"删除临时文件: {file}")
  614. except Exception as e:
  615. self.loggerMT.warning(f"删除文件 {file} 失败: {e}")
  616. except Exception as e:
  617. self.loggerMT.warning(f"清理临时文件时发生异常: {e}")
  618. def check_device_status(self):
  619. """
  620. 检查设备状态
  621. :return: True表示设备正常,False表示设备异常
  622. """
  623. try:
  624. if not hasattr(self, 'd') or self.d is None:
  625. self.loggerMT.warning("设备未连接")
  626. return False
  627. # 检查设备信息
  628. device_info = self.d.info
  629. if not device_info:
  630. self.loggerMT.warning("无法获取设备信息")
  631. return False
  632. # 检查屏幕状态
  633. screen_status = self.d.info.get('screenOn', True)
  634. if not screen_status:
  635. self.loggerMT.warning("设备屏幕关闭")
  636. # 尝试唤醒屏幕
  637. try:
  638. self.d.screen_on()
  639. time.sleep(2)
  640. except:
  641. pass
  642. self.loggerMT.debug(f"设备状态正常: {device_info.get('productName', 'Unknown')}")
  643. return True
  644. except u2.exceptions.SessionBrokenError as e:
  645. self.loggerMT.error(f"设备会话断开: {e}")
  646. return False
  647. except Exception as e:
  648. self.loggerMT.exception(f"检查设备状态时发生异常: {e}")
  649. return False
  650. def check_network_status(self, test_urls=None):
  651. """
  652. 检查网络状态
  653. :param test_urls: 要测试的URL列表,默认为常用服务
  654. :return: True表示网络正常,False表示网络异常
  655. """
  656. if test_urls is None:
  657. test_urls = [
  658. "https://www.baidu.com",
  659. "https://www.taobao.com",
  660. "https://www.meituan.com"
  661. ]
  662. success_count = 0
  663. max_timeout = 10 # 最大超时时间(秒)
  664. for url in test_urls:
  665. try:
  666. self.loggerMT.debug(f"测试网络连接: {url}")
  667. response = requests.get(url, timeout=max_timeout)
  668. if response.status_code == 200:
  669. success_count += 1
  670. self.loggerMT.debug(f"网络测试成功: {url}")
  671. else:
  672. self.loggerMT.warning(f"网络测试失败: {url} (状态码: {response.status_code})")
  673. except requests.exceptions.Timeout:
  674. self.loggerMT.warning(f"网络测试超时: {url}")
  675. except requests.exceptions.ConnectionError:
  676. self.loggerMT.warning(f"网络连接错误: {url}")
  677. except Exception as e:
  678. self.loggerMT.warning(f"网络测试异常: {url} - {e}")
  679. # 如果至少有一个测试成功,认为网络正常
  680. if success_count > 0:
  681. self.loggerMT.info(f"网络状态正常 ({success_count}/{len(test_urls)} 个测试成功)")
  682. return True
  683. else:
  684. self.loggerMT.error("网络状态异常,所有测试都失败")
  685. return False
  686. def wait_for_network(self, max_wait_time=300, check_interval=30):
  687. """
  688. 等待网络恢复
  689. :param max_wait_time: 最大等待时间(秒)
  690. :param check_interval: 检查间隔(秒)
  691. :return: True表示网络恢复,False表示超时
  692. """
  693. start_time = time.time()
  694. wait_count = 0
  695. self.loggerMT.info(f"等待网络恢复,最大等待时间: {max_wait_time}秒")
  696. while time.time() - start_time < max_wait_time:
  697. wait_count += 1
  698. elapsed_time = time.time() - start_time
  699. if self.check_network_status():
  700. self.loggerMT.info(f"网络恢复,等待时间: {elapsed_time:.1f}秒")
  701. return True
  702. remaining_time = max_wait_time - elapsed_time
  703. self.loggerMT.info(
  704. f"网络仍未恢复,已等待 {elapsed_time:.1f}秒,剩余 {remaining_time:.1f}秒 (第{wait_count}次检查)")
  705. if remaining_time > check_interval:
  706. time.sleep(check_interval)
  707. else:
  708. time.sleep(remaining_time)
  709. self.loggerMT.error(f"网络恢复超时,已等待 {max_wait_time}秒")
  710. return False
  711. def wr_re(self, mod, device_id, title=None, shop=None, prices=None, page=1):
  712. """
  713. 写入或读取采集进度,增强异常处理和原子性操作
  714. """
  715. file_path = f'./ycwj/{device_id}_{self.search_key}.txt'
  716. if mod == "写":
  717. # 原子性写入:先写入临时文件,然后重命名为目标文件
  718. temp_file = f'{file_path}.tmp'
  719. try:
  720. data = {
  721. "device_id": device_id,
  722. "title": title if title else "",
  723. "shop": shop if shop else "",
  724. "prices": f"¥{int(prices)}" if prices else "",
  725. "page": page if page else 1
  726. }
  727. # 确保目录存在
  728. os.makedirs(os.path.dirname(file_path), exist_ok=True)
  729. # 写入临时文件
  730. json_str = json.dumps(data, ensure_ascii=False, indent=2)
  731. with open(temp_file, 'w', encoding='utf-8') as f:
  732. f.write(json_str)
  733. # 原子性重命名(在Windows上可能需要特殊处理)
  734. if os.path.exists(file_path):
  735. backup_file = f'{file_path}.bak'
  736. try:
  737. os.replace(file_path, backup_file)
  738. except:
  739. pass # 如果备份失败,继续
  740. os.replace(temp_file, file_path)
  741. self.loggerMT.info(f"进度保存成功: 第{page}页 - {shop if shop else '未知店铺'}")
  742. # 清理备份文件(如果存在)
  743. backup_file = f'{file_path}.bak'
  744. if os.path.exists(backup_file):
  745. try:
  746. os.remove(backup_file)
  747. except:
  748. pass
  749. except Exception as e:
  750. self.loggerMT.error(f"保存进度失败: {e}")
  751. # 清理临时文件
  752. if os.path.exists(temp_file):
  753. try:
  754. os.remove(temp_file)
  755. except:
  756. pass
  757. elif mod == "读":
  758. try:
  759. if not os.path.exists(file_path):
  760. self.loggerMT.info("进度文件不存在,从头开始采集")
  761. return None
  762. with open(file_path, 'r', encoding='utf-8') as f:
  763. content = f.read()
  764. data = json.loads(content)
  765. # 验证数据完整性
  766. required_fields = ['device_id', 'title', 'shop', 'prices', 'page']
  767. if all(field in data for field in required_fields):
  768. self.loggerMT.info(f"从进度恢复: 第{data['page']}页 - {data['shop']}")
  769. return data
  770. else:
  771. self.loggerMT.warning("进度文件数据不完整")
  772. return None
  773. except json.JSONDecodeError as e:
  774. self.loggerMT.error(f"进度文件JSON解析失败: {e}")
  775. # 尝试恢复备份文件
  776. backup_file = f'{file_path}.bak'
  777. if os.path.exists(backup_file):
  778. self.loggerMT.info("尝试从备份文件恢复进度")
  779. try:
  780. os.replace(backup_file, file_path)
  781. return self.wr_re("读", device_id) # 递归调用
  782. except:
  783. pass
  784. return None
  785. except Exception as e:
  786. self.loggerMT.error(f"读取进度失败: {e}")
  787. return None
  788. return None
  789. def save_progress(self, device_id, current_page, current_shop=None, current_title=None, current_price=None):
  790. """
  791. 保存当前采集进度
  792. :param device_id: 设备ID
  793. :param current_page: 当前页码
  794. :param current_shop: 当前店铺名(可选)
  795. :param current_title: 当前商品标题(可选)
  796. :param current_price: 当前价格(可选)
  797. :return: True表示保存成功,False表示保存失败
  798. """
  799. try:
  800. # 使用安全执行器保存进度
  801. result = self.safe_exec(
  802. self.wr_re,
  803. "写",
  804. device_id,
  805. current_title,
  806. current_shop,
  807. current_price,
  808. current_page,
  809. max_retries=20,
  810. retry_delay=1
  811. )
  812. if result is None: # wr_re方法在"写"模式下返回None
  813. self.loggerMT.debug(f"进度保存成功: 第{current_page}页")
  814. return True
  815. else:
  816. self.loggerMT.warning("进度保存返回意外结果")
  817. return False
  818. except Exception as e:
  819. self.loggerMT.error(f"保存进度时发生异常: {e}")
  820. return False
  821. def auto_save_progress(self, device_id, page_num, shop_name="", product_title="", price=""):
  822. """
  823. 自动保存进度,根据策略决定是否保存
  824. :return: True表示已保存,False表示跳过保存
  825. """
  826. # 保存策略:
  827. # 1. 每采集完一个商品就保存
  828. # 2. 每5页保存一次(作为备份)
  829. # 3. 异常发生时强制保存
  830. try:
  831. # 总是保存当前进度
  832. success = self.save_progress(
  833. device_id,
  834. page_num,
  835. shop_name if shop_name else "",
  836. product_title if product_title else "",
  837. price if price else ""
  838. )
  839. if success:
  840. # 每5页额外记录一个检查点
  841. if page_num % 5 == 0:
  842. checkpoint_file = f'./ycwj/{device_id}_{self.search_key}_checkpoint_{page_num}.txt'
  843. try:
  844. with open(checkpoint_file, 'w') as f:
  845. f.write(f"检查点: 第{page_num}页, 时间: {datetime.datetime.now()}")
  846. self.loggerMT.debug(f"创建检查点: {checkpoint_file}")
  847. except:
  848. pass # 检查点保存失败不影响主流程
  849. return success
  850. except Exception as e:
  851. self.loggerMT.warning(f"自动保存进度失败: {e}")
  852. return False
  853. def _cleanup_checkpoint_files(self, device_id, search_key):
  854. """
  855. 清理检查点文件
  856. """
  857. try:
  858. pattern = f'./ycwj/{device_id}_{search_key}_checkpoint_*.txt'
  859. for checkpoint_file in Path(".").glob(pattern):
  860. try:
  861. checkpoint_file.unlink()
  862. self.loggerMT.debug(f"清理检查点文件: {checkpoint_file}")
  863. except Exception as e:
  864. self.loggerMT.warning(f"清理检查点文件失败 {checkpoint_file}: {e}")
  865. except Exception as e:
  866. self.loggerMT.warning(f"清理检查点文件时发生异常: {e}")
  867. def li_or_lo(self, key):
  868. if key == "升序":
  869. self.d.xpath('//*[@text="价格"]').click()
  870. time.sleep(self.get_sleep_time())
  871. if key == "降序":
  872. self.d.xpath('//*[@text="价格"]').click()
  873. time.sleep(self.get_sleep_time())
  874. self.d.xpath('//*[@text="价格"]').click()
  875. time.sleep(self.get_sleep_time())
  876. def get_prices(self):
  877. """
  878. 获取价格列表,增强异常处理
  879. :return: 价格列表,如果获取失败返回空列表
  880. """
  881. price_list = []
  882. try:
  883. # 检查设备连接
  884. if not hasattr(self, 'd') or self.d is None:
  885. self.loggerMT.warning("设备未连接,无法获取价格")
  886. return price_list
  887. # 查找价格元素
  888. price_elements = self.d.xpath('//android.widget.TextView[contains(@text,"¥")]').all()
  889. if not price_elements:
  890. self.loggerMT.debug("未找到价格元素")
  891. return price_list
  892. # 处理价格元素
  893. for price_element in price_elements:
  894. try:
  895. price_text = price_element.text
  896. if not price_text:
  897. continue
  898. # 过滤优惠信息
  899. if "优惠" in price_text or "以优惠" in price_text:
  900. continue
  901. # 提取价格数值
  902. price_value = price_text.replace("¥", "").strip()
  903. # 转换为数值
  904. if "." in price_value:
  905. price_list.append(float(price_value))
  906. else:
  907. price_list.append(int(price_value))
  908. except (ValueError, AttributeError) as e:
  909. self.loggerMT.warning(
  910. f"价格解析失败: {price_element.text if hasattr(price_element, 'text') else 'unknown'}, 错误: {e}")
  911. continue
  912. except Exception as e:
  913. self.loggerMT.warning(f"处理价格元素时发生异常: {e}")
  914. continue
  915. self.loggerMT.debug(f"获取到价格列表: {price_list}")
  916. return price_list
  917. except u2.exceptions.UiObjectNotFoundError as e:
  918. self.loggerMT.warning(f"未找到价格UI元素: {e}")
  919. return price_list
  920. except u2.exceptions.SessionBrokenError as e:
  921. self.loggerMT.error(f"设备会话断开,无法获取价格: {e}")
  922. return price_list
  923. except Exception as e:
  924. self.loggerMT.exception(f"获取价格时发生未预料异常: {e}")
  925. return price_list
  926. def slide_n(self):
  927. recycler = self.d.xpath('//android.support.v7.widget.RecyclerView')
  928. if recycler.wait(timeout=8): # 最多等10秒
  929. search_list = recycler.info
  930. # 操作
  931. else:
  932. print("超时,列表没出现")
  933. if self.d.xpath('//*[@text="已经到底啦"]').exists:
  934. print('已经到达列表页最底部')
  935. return
  936. bounds = search_list['bounds']
  937. # # 计算滑动距离
  938. scroll_distance = bounds['bottom'] - bounds['top'] # 正数
  939. start_y = 1600
  940. end_y = start_y - scroll_distance # 向上滑动,y 坐标减小
  941. # # 确保 end_y 不小于 0
  942. end_y = max(end_y, 304) # 留出一点边距,避免滑出屏幕
  943. # # print('滑动起点 y:', start_y, '终点 y:', end_y)
  944. # # self.d.swipe(200, start_y, 200, end_y, 0.4)
  945. print('开始滑动')
  946. self.d.drag(300, 1400, 300, 400, 1)
  947. print('滑动结束')
  948. def stop_app(self):
  949. self.d.app_stop(self.package_name)
  950. time.sleep(5)
  951. def start_app(self):
  952. self.d.app_start(self.package_name)
  953. time.sleep(5)
  954. def restart_app(self):
  955. """
  956. 重启app
  957. :return:
  958. """
  959. self.stop_app()
  960. self.start_app()
  961. @staticmethod
  962. def get_sleep_time():
  963. # return random.randint(5, 8)
  964. return random.randint(1, 3)
  965. @staticmethod
  966. def get_current_date():
  967. return datetime.datetime.now().strftime('%Y/%m/%d')
  968. @staticmethod
  969. def get_city_info():
  970. """
  971. 获取所有的省市数据
  972. :return:
  973. """
  974. file_path = 'D:\work\dfwy_spider\drug_data\mt\kailin_city.json'
  975. with open(file_path, 'r', encoding='utf-8') as f:
  976. data = json.load(f)
  977. province = {province_one["id"]: province_one for province_one in data['province']}
  978. city2province = dict()
  979. city = data['city']
  980. for city_one in city:
  981. name = city_one['name']
  982. pid = city_one['pid']
  983. if len(str(pid)) > 2:
  984. pid = int(re.match('^\d{2}', str(pid)).group())
  985. city2province[name] = province[pid]['name']
  986. return city2province
  987. def get_shop_name(self):
  988. """
  989. 获取店铺名
  990. :return:
  991. """
  992. max_retries = 2
  993. shop_name = None
  994. for attempt in range(max_retries):
  995. try:
  996. # 检查设备连接
  997. if not hasattr(self, 'd') or self.d is None:
  998. self.loggerMT.warning("设备未连接,无法获取店铺名")
  999. return None
  1000. # 第一种xpath尝试
  1001. try:
  1002. shop_name = self.d.xpath(
  1003. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1004. if shop_name and shop_name.strip():
  1005. self.loggerMT.info(f'获取到店铺名: {shop_name}')
  1006. return shop_name.strip()
  1007. except u2.exceptions.UiObjectNotFoundError:
  1008. pass # 继续尝试下一种方法
  1009. # 第二种xpath尝试
  1010. try:
  1011. shop_name = self.d.xpath(
  1012. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1013. if shop_name and shop_name.strip():
  1014. self.loggerMT.info(f'获取到店铺名(备用方法): {shop_name}')
  1015. return shop_name.strip()
  1016. except u2.exceptions.UiObjectNotFoundError:
  1017. pass # 继续尝试下一种方法
  1018. # 第三种方法:点击店铺进入后获取
  1019. if attempt == max_retries - 1: # 最后一次尝试
  1020. self.loggerMT.info("尝试点击店铺进入后获取店铺名称")
  1021. try:
  1022. self.enter_shop()
  1023. shop_xpath = '//*[@resource-id="com.sankuai.meituan:id/layout_header_view"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]//android.widget.FrameLayout[2]/android.widget.FrameLayout[1]/android.widget.TextView'
  1024. if self.d.xpath(shop_xpath).exists:
  1025. shop_name = self.d.xpath(shop_xpath).text
  1026. if shop_name and shop_name.strip():
  1027. self.loggerMT.info(f'通过店铺页面获取到店铺名: {shop_name}')
  1028. self.swipe_back(1)
  1029. return shop_name.strip()
  1030. except Exception as e:
  1031. self.loggerMT.warning(f"通过店铺页面获取店铺名失败: {e}")
  1032. # 确保返回上一页
  1033. try:
  1034. self.swipe_back(1)
  1035. except:
  1036. pass
  1037. # 如果未找到,等待后重试
  1038. if attempt < max_retries - 1:
  1039. wait_time = 2 * (attempt + 1)
  1040. self.loggerMT.debug(f"未找到店铺名,等待{wait_time}秒后重试...")
  1041. time.sleep(wait_time)
  1042. except u2.exceptions.SessionBrokenError as e:
  1043. self.loggerMT.error(f"设备会话断开,无法获取店铺名: {e}")
  1044. return None
  1045. except Exception as e:
  1046. self.loggerMT.warning(f"获取店铺名时发生异常(尝试{attempt + 1}/{max_retries}): {e}")
  1047. if attempt < max_retries - 1:
  1048. time.sleep(2 * (attempt + 1))
  1049. self.loggerMT.warning("无法获取店铺名,所有方法都尝试失败")
  1050. return None
  1051. def safe_find_element(self, xpath, max_retries=20, retry_delay=2, timeout=10, default_return=None):
  1052. """
  1053. 安全查找UI元素,支持重试和异常处理
  1054. :param xpath: 要查找的xpath
  1055. :param max_retries: 最大重试次数
  1056. :param retry_delay: 重试延迟(秒)
  1057. :param timeout: 每次查找的超时时间(秒)
  1058. :param default_return: 查找失败时的默认返回值
  1059. :return: 找到的元素或默认返回值
  1060. """
  1061. for attempt in range(max_retries):
  1062. try:
  1063. # 检查设备连接
  1064. if not hasattr(self, 'd') or self.d is None:
  1065. self.loggerMT.warning("设备未连接,无法查找元素")
  1066. return default_return
  1067. # 查找元素
  1068. element = self.d.xpath(xpath)
  1069. if element.wait(timeout=timeout):
  1070. return element
  1071. else:
  1072. self.loggerMT.debug(f"未找到元素: {xpath} (尝试 {attempt + 1}/{max_retries})")
  1073. except u2.exceptions.SessionBrokenError as e:
  1074. self.loggerMT.error(f"设备会话断开,无法查找元素: {e}")
  1075. return default_return
  1076. except Exception as e:
  1077. self.loggerMT.warning(f"查找元素时发生异常: {e} (尝试 {attempt + 1}/{max_retries})")
  1078. # 如果不是最后一次尝试,等待后重试
  1079. if attempt < max_retries - 1:
  1080. wait_time = retry_delay * (attempt + 1)
  1081. self.loggerMT.debug(f"等待 {wait_time} 秒后重试查找元素...")
  1082. time.sleep(wait_time)
  1083. self.loggerMT.warning(f"无法找到元素: {xpath},已达到最大重试次数")
  1084. return default_return
  1085. def safe_get_element_text(self, xpath, max_retries=20, retry_delay=2, timeout=10, default_return=None):
  1086. """
  1087. 安全获取UI元素的文本内容
  1088. :param xpath: 要查找的xpath
  1089. :param max_retries: 最大重试次数
  1090. :param retry_delay: 重试延迟(秒)
  1091. :param timeout: 每次查找的超时时间(秒)
  1092. :param default_return: 查找失败时的默认返回值
  1093. :return: 元素的文本内容或默认返回值
  1094. """
  1095. element = self.safe_find_element(xpath, max_retries, retry_delay, timeout, None)
  1096. if element is None:
  1097. return default_return
  1098. try:
  1099. text = element.text
  1100. if text and text.strip():
  1101. return text.strip()
  1102. else:
  1103. return default_return
  1104. except Exception as e:
  1105. self.loggerMT.warning(f"获取元素文本时发生异常: {e}")
  1106. return default_return
  1107. def get_qualification_number(self):
  1108. """
  1109. 获取资质编号,增强异常处理
  1110. :return: 资质编号,如果获取失败返回None
  1111. """
  1112. try:
  1113. # 使用安全查找方法
  1114. xpath = '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.widget.TextView[2]'
  1115. qualification_number_str = self.safe_get_element_text(xpath, max_retries=20, retry_delay=3,
  1116. default_return="")
  1117. if qualification_number_str:
  1118. qualification_number = qualification_number_str.strip('资质编号:').strip()
  1119. if qualification_number:
  1120. self.loggerMT.info(f'获取到资质编号: {qualification_number}')
  1121. return qualification_number
  1122. self.loggerMT.warning("未找到资质编号或资质编号为空")
  1123. return None
  1124. except Exception as e:
  1125. self.loggerMT.exception(f"获取资质编号时发生异常: {e}")
  1126. return None
  1127. def get_shop_address(self):
  1128. try:
  1129. xpath = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView'
  1130. if self.d.xpath(xpath).exists:
  1131. shop_address = self.d.xpath(xpath).text
  1132. print(f'111-获取到店铺地址:{shop_address}')
  1133. if '发货时间' in shop_address:
  1134. print(f'店铺地址包含发货时间,再次获取店铺地址')
  1135. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.TextView'
  1136. if self.d.xpath(xpath2).exists:
  1137. shop_address = self.d.xpath(xpath2).text
  1138. print(f'222-获取到店铺地址:{shop_address}')
  1139. else:
  1140. print(f'222-xpath2获取店铺地址失败')
  1141. else:
  1142. shop_address = ''
  1143. print(f'333-获取到店铺地址:{shop_address}')
  1144. return shop_address
  1145. except:
  1146. print(f'获取店铺地址出错-get_shop_address')
  1147. return None
  1148. def enter_detail(self):
  1149. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/recycler"]/android.widget.FrameLayout[1]').click()
  1150. time.sleep(self.get_sleep_time())
  1151. def save_to_database(self, data):
  1152. print(f'保存数据到数据库:{data}')
  1153. # 连接数据库
  1154. conn = get_mysql()
  1155. # 创建游标对象
  1156. cur = conn.cursor()
  1157. # add_sql = "insert into delete_friend_table(delete_user_name,delete_user_id,delete_content,delete_time) value(%s,%s,%s,%s)"
  1158. add_sql = f"""
  1159. INSERT INTO {self.table_name}
  1160. (product, min_price, manufacture_date, expiry_date, shop, business_license_company, province, city, manufacturer, specification, approval_number, product_link, scrape_date, scrape_province, availability, credit_code, platform, search_key, sales, inventory, snapshot_url)
  1161. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  1162. """
  1163. # cur.execute(add_sql, (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'], data['business_license_company'],data['province'], data['city'], data['manufacturer'], data['specification'], data['approval_number'], data['product_link'], self.get_current_date(), data['scrape_province'], data['availability'], data['credit_code'], data['platform']))
  1164. cur.execute(add_sql,
  1165. (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'],
  1166. data['business_license_company'], data['province'], data['city'], data['manufacturer'],
  1167. data['specification'], data['approval_number'], data['product_link'], data['scrape_date'],
  1168. data['scrape_province'], data['availability'], data['credit_code'], data['platform'],
  1169. data['search_key'], data['sales'], data['inventory'], data['snapshot_url']))
  1170. conn.commit() # 提交数据
  1171. # self.mysql_client.insert(self.table_name, data)
  1172. print(f"存入数据库成功")
  1173. def save_shop_info_to_database(self, data):
  1174. print(f'保存店铺数据到数据库:{data}')
  1175. # 连接数据库
  1176. conn = get_mysql()
  1177. # 创建游标对象
  1178. cur = conn.cursor()
  1179. add_sql = f"""
  1180. INSERT INTO {self.shop_table_name}
  1181. (shop, contact_address, qualification_number, business_license_company, business_license_address, scrape_date, platform)
  1182. VALUES (%s, %s, %s, %s, %s, %s, %s)
  1183. """
  1184. cur.execute(add_sql, (data['shop'], data['contact_address'], data['qualification_number'],
  1185. data['business_license_company'], data['business_license_address'], data['scrape_date'],
  1186. data['platform']))
  1187. conn.commit() # 提交数据
  1188. # self.mysql_client.insert(self.shop_table_name, data)
  1189. print(f'存入店铺信息到数据库成功')
  1190. def swipe_up(self):
  1191. """
  1192. 上滑
  1193. :return:
  1194. """
  1195. screen_width = self.d.info['displayWidth']
  1196. screen_height = self.d.info['displayHeight']
  1197. duration_rate = random.uniform(0, 0.3)
  1198. self.d.swipe(screen_width // 2, screen_height - 100, screen_width // 2, 100, duration=duration_rate)
  1199. no = random.uniform(0, 1)
  1200. if no > 0.85:
  1201. # 有的时候卡着 再稍微往上滑一点点
  1202. self.d.swipe_ext("up", 0.1)
  1203. time.sleep(self.get_sleep_time())
  1204. def swipe_back(self, no):
  1205. """
  1206. 返回
  1207. :param no: 回退次数
  1208. :return:
  1209. """
  1210. for idx in range(no):
  1211. self.d.press('back')
  1212. time.sleep(self.get_sleep_time())
  1213. def drug_price(self):
  1214. """
  1215. 获取药品价格
  1216. :return:
  1217. """
  1218. try:
  1219. price_str = self.d.xpath('//*[starts-with(@text,"¥")]').text
  1220. price = float(re.search(r'[\d\.]+', price_str).group())
  1221. print(f'获取到价格:{price}')
  1222. return price
  1223. except Exception as e:
  1224. print(f'提取价格出错-->{e}')
  1225. return None
  1226. def drug_sale_num(self):
  1227. """
  1228. 获取药品销量
  1229. :return:
  1230. """
  1231. try:
  1232. sales_element = self.d.xpath('//*[starts-with(@text,"已售")]')
  1233. if sales_element.exists:
  1234. sales_num_str = self.d.xpath('//*[starts-with(@text,"已售")]').text
  1235. sales_num_str = sales_num_str.replace("已售", "").strip()
  1236. # price = float(re.search(r'[\d\.]+', price_str).group())
  1237. print(f'获取到已售数量:{sales_num_str}')
  1238. return sales_num_str
  1239. return None
  1240. except Exception as e:
  1241. print(f'提取已售数量出错-->{e}')
  1242. return None
  1243. def restart_uiautomator_services(self, device_id):
  1244. """
  1245. 重启atx的uiautomator 服务
  1246. :param device_id:
  1247. :return:
  1248. """
  1249. stop_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d --stop'
  1250. start_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d'
  1251. subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
  1252. time.sleep(self.get_sleep_time())
  1253. subprocess.run(start_uiautomator_services, capture_output=True, text=True, shell=True)
  1254. time.sleep(self.get_sleep_time())
  1255. def connect_devices(self, device_id):
  1256. """
  1257. 连接设备,支持重试机制
  1258. :return: True表示连接成功,False表示连接失败
  1259. """
  1260. max_retries = 3
  1261. retry_delay = 5 # 秒
  1262. for attempt in range(max_retries):
  1263. try:
  1264. self.loggerMT.info(f'尝试连接设备 {device_id},第 {attempt + 1} 次尝试')
  1265. self.d = u2.connect_usb(device_id)
  1266. # 测试连接是否有效
  1267. device_info = self.d.info
  1268. self.loggerMT.info(f'设备连接成功: {device_info}')
  1269. self.restart_uiautomator_services(device_id)
  1270. self.oss_config = {
  1271. "access_key_id": 'LTAI5tDwjfteBvivYN41r8sJ',
  1272. "access_key_secret": 'yowuOGi2nYYnrqGpO3qcz94C4brcPp',
  1273. "endpoint": "oss-cn-shenzhen.aliyuncs.com",
  1274. "bucket_name": "zhijiayun-jiansuo",
  1275. "oss_prefix": "scrape_data/"
  1276. }
  1277. print(f'✅ 成功连接到设备: {device_id}')
  1278. self.loggerMT.info(f'✅ 成功连接到设备: {device_id}')
  1279. return True
  1280. except Exception as e:
  1281. error_msg = f'{device_id} 连接错误 (尝试 {attempt + 1}/{max_retries}): {e}'
  1282. print(f'⚠️ {error_msg}')
  1283. self.loggerMT.error(error_msg)
  1284. if attempt < max_retries - 1:
  1285. print(f'等待 {retry_delay} 秒后重试...')
  1286. time.sleep(retry_delay)
  1287. retry_delay *= 2 # 指数退避
  1288. else:
  1289. print(f'❌ 设备 {device_id} 连接失败,已达到最大重试次数')
  1290. self.loggerMT.error(f'设备 {device_id} 连接失败,已达到最大重试次数')
  1291. return False
  1292. return False
  1293. def get_ocr_res(self, img):
  1294. try:
  1295. # img地址
  1296. print(f'开始识别图片:{img}')
  1297. request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/business_license"
  1298. # 二进制方式打开图片文件
  1299. f = open(img, 'rb')
  1300. img = base64.b64encode(f.read())
  1301. params = {"image": img}
  1302. # access_token = get_access_token()
  1303. request_url = request_url + "?access_token=" + self.access_token
  1304. headers = {'content-type': 'application/x-www-form-urlencoded'}
  1305. response = requests.post(request_url, data=params, headers=headers)
  1306. if response:
  1307. res = response.json()
  1308. new_dic = dict()
  1309. for ite in res['words_result'].keys():
  1310. new_dic[ite] = res['words_result'][ite]['words']
  1311. print('资质数据信息', new_dic)
  1312. return new_dic
  1313. else:
  1314. return None
  1315. except:
  1316. return None
  1317. def remove_watermark(self, img_path):
  1318. """
  1319. 图片去水印(将水印部分变成白色背景)并将数据转化为二进制数据
  1320. :param img_path: 图片路径
  1321. :return: 二进制图片数据
  1322. """
  1323. img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
  1324. endswith = os.path.splitext(img_path)[1]
  1325. new = np.clip(1.4057577998008846 * img - 38.33089999653017, 0, 255).astype(np.uint8)
  1326. _, img_binary = cv2.imencode(endswith, new)
  1327. return img_binary
  1328. def get_ocr_res_image(self, img):
  1329. try:
  1330. image = self.remove_watermark(img)
  1331. res_image = self.client.basicGeneral(image)
  1332. data = res_image.get('words_result', '')
  1333. print(f'百度api返回结果:{data}')
  1334. return data
  1335. except:
  1336. return None
  1337. def screenshot_the_business_license(self, qualification_number):
  1338. screenshot_path = 'screenshot1.png'
  1339. self.d.screenshot(screenshot_path)
  1340. img = cv2.imread(screenshot_path)
  1341. # 指定裁剪区域 (left, top, right, bottom)
  1342. left = 0
  1343. top = 480
  1344. right = 720
  1345. bottom = 1420
  1346. cropped_img = img[top:bottom, left:right]
  1347. # 创建目录
  1348. SCREENSHOT_DIR = Path('screenshot') # 注意这里的变化和py文件同一级目录即可
  1349. SCREENSHOT_DIR.mkdir(parents=True, exist_ok=True)
  1350. if qualification_number:
  1351. # cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  1352. cropped_screenshot_path = SCREENSHOT_DIR / f'{qualification_number}.png'
  1353. else:
  1354. cropped_screenshot_path = 'cropped_screenshot.png'
  1355. cv2.imwrite(cropped_screenshot_path, cropped_img)
  1356. return cropped_screenshot_path
  1357. def screenshot_instruction(self):
  1358. # 获取当前时间
  1359. current_time = datetime.datetime.now()
  1360. # 格式化为时分秒
  1361. time_str = current_time.strftime("%H-%M-%S")
  1362. # 生成随机的 8 位字符串
  1363. random_str = secrets.token_hex(4) # 生成 4 个字节的随机字符串,转换为 8 位十六进制字符串
  1364. print(time_str)
  1365. screenshot_path = 'instructionscreenshot1-' + time_str + '-' + random_str + '.png'
  1366. self.d.screenshot(screenshot_path)
  1367. return screenshot_path
  1368. def extract_specification(self, text):
  1369. """提取药品规格信息"""
  1370. # 方法1:简单去除到期信息
  1371. pattern = r'^[^【]+'
  1372. match = re.search(pattern, text)
  1373. if match:
  1374. return match.group(0).strip()
  1375. return text
  1376. # 获取商品title
  1377. def get_title(self):
  1378. # try:
  1379. # title = self.d.xpath(
  1380. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1381. # except:
  1382. # title = self.d.xpath(
  1383. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView').text
  1384. # title = self.d.xpath('//*[contains(@text, "舒肝颗粒")]').text
  1385. def _inner():
  1386. temp_search_key = self.search_key
  1387. if "天力士" in self.search_key:
  1388. temp_search_key = '复方丹参滴丸'
  1389. # elif self.search_key == '三九胃泰颗粒':
  1390. # self.search_key = '三九胃泰' #兼容三九胃泰 温胃舒颗粒
  1391. print(f'获取商品title时的搜索关键字:{temp_search_key}')
  1392. # title = self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text
  1393. # 初始化
  1394. drugs_name = ''
  1395. specifications = ''
  1396. title = ''
  1397. # 循环的获取title为了有时间来处理人机验证
  1398. for m in range(1, 6000):
  1399. if self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').exists:
  1400. title = self.safe_exec(
  1401. lambda: self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  1402. )
  1403. print(f"第{m}次获取title成功")
  1404. break
  1405. else:
  1406. time.sleep(1)
  1407. # return drugs_name, specifications
  1408. title = title[1:] if title.startswith('0') else title
  1409. print(f'获取到药品标题:{title}')
  1410. # 从里面匹配出药品名和规格
  1411. # drugs_name
  1412. # specifications
  1413. # match = re.search(r'([^\d]+)([\d\D]+)', title)
  1414. if self.search_key == '999赐多康大豆':
  1415. return title, '1罐'
  1416. if self.search_key == "999感冒清热颗粒":
  1417. match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
  1418. else:
  1419. match = re.match(r'(\[[^\]]+\])(.*?)\s*((?:\d+\S*|\(.+))$', title)
  1420. if match:
  1421. # drugs_name = match.group(1).strip() + match.group(2).strip()
  1422. drugs_name = title
  1423. specifications = match.group(3).strip()
  1424. print("药品名:", drugs_name)
  1425. print("规格:", specifications)
  1426. # 如果品规中包含到期则需要再次的正则处理
  1427. if '到期' in specifications:
  1428. specifications = self.extract_specification(specifications)
  1429. # print('完整药名:', drugs_name + specifications)
  1430. return drugs_name, specifications
  1431. else:
  1432. if title == '999抗病毒口服液10ml*12' or title == '999抗病毒口服液':
  1433. drugs_name = title
  1434. specifications = '10ml*12支/盒'
  1435. return drugs_name, specifications
  1436. elif title == '999抗病毒口服液10ml*10':
  1437. drugs_name = title
  1438. specifications = '10ml*10支/盒'
  1439. return drugs_name, specifications
  1440. elif title == '999小柴胡颗粒':
  1441. drugs_name = title
  1442. specifications = '10g*9袋/盒'
  1443. return drugs_name, specifications
  1444. elif title == '999养胃舒颗粒':
  1445. drugs_name = title
  1446. specifications = '10g*10袋/盒'
  1447. return drugs_name, specifications
  1448. elif title == '三九胃泰胶囊':
  1449. drugs_name = title
  1450. specifications = '0.5g*24粒/盒'
  1451. return drugs_name, specifications
  1452. elif title == '999补脾益肠丸':
  1453. drugs_name = title
  1454. specifications = '6g*15袋/盒'
  1455. return drugs_name, specifications
  1456. elif title == '999复方感冒灵颗粒':
  1457. drugs_name = title
  1458. specifications = '14g*9袋/盒'
  1459. return drugs_name, specifications
  1460. else:
  1461. print("没有匹配到预期格式")
  1462. drugs_name = title
  1463. specifications = ''
  1464. return drugs_name, specifications
  1465. # 用 safe_exec 包装内部逻辑,确保验证码阻塞
  1466. return self.safe_exec(_inner)
  1467. def enter_shop(self):
  1468. """
  1469. 进店,方便提取资质环境
  1470. :return:
  1471. """
  1472. # self.d.xpath('//*[@text="进店"]').click()
  1473. self.d.xpath('//*[@text="店铺"]').click()
  1474. time.sleep(self.get_sleep_time())
  1475. def enter_shoper(self):
  1476. """
  1477. 进入商家
  1478. :return:
  1479. """
  1480. is_shoper_exists = 0
  1481. for i in range(10):
  1482. if self.d.xpath('//*[@text="商家"]').exists:
  1483. print(f'第{i}次商家存在')
  1484. is_shoper_exists = 1
  1485. break
  1486. else:
  1487. print(f'第{i}次商家不存在')
  1488. time.sleep(self.get_sleep_time())
  1489. if is_shoper_exists == 1:
  1490. self.d.xpath('//*[@text="商家"]').click()
  1491. time.sleep(self.get_sleep_time())
  1492. return True
  1493. else:
  1494. return False
  1495. # 点击查看商家资质
  1496. def scan_shoper_license(self):
  1497. exist_shoper = 0
  1498. for i in range(10):
  1499. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  1500. print(f'第{i}次查看商家资质存在')
  1501. exist_shoper = 1
  1502. break
  1503. else:
  1504. print(f'第{i}次查看商家资质不存在')
  1505. time.sleep(self.get_sleep_time())
  1506. if exist_shoper == 1:
  1507. self.d.xpath('//*[@text="查看商家资质"]').click()
  1508. time.sleep(self.get_sleep_time())
  1509. else:
  1510. self.swipe_back(1)
  1511. # 验证商品的信息是否在数据库中已存在
  1512. def data_is_exists(self, data):
  1513. """
  1514. 检查指定数据是否已存在于数据库表中(仅检查存在性)
  1515. 参数:
  1516. data: 包含查询条件的字典,键为列名,值为条件值
  1517. 返回:
  1518. True: 数据存在
  1519. False: 数据不存在
  1520. None: 检查过程中出错
  1521. """
  1522. # dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  1523. # 'platform': '美团'}
  1524. # 1. 验证必要字段
  1525. required_keys = ['product', 'min_price', 'shop', 'scrape_date', 'platform']
  1526. if not all(key in data for key in required_keys):
  1527. missing = [key for key in required_keys if key not in data]
  1528. logging.error(f"缺少必要字段: {', '.join(missing)}")
  1529. return None
  1530. try:
  1531. # 连接数据库
  1532. conn = get_mysql()
  1533. # 创建游标对象
  1534. cur = conn.cursor()
  1535. # query_sql = f"SELECT * FROM {self.table_name} WHERE product = '{data['product']}' AND min_price = '{data['min_price']}' AND shop = '{data['shop']}' AND scrape_date = '{data['scrape_date']}' AND platform = '{data['platform']}'"
  1536. # cur.execute(query_sql)
  1537. query_sql = """
  1538. SELECT * FROM {}
  1539. WHERE product = %s
  1540. AND min_price = %s
  1541. AND shop = %s
  1542. AND scrape_date = %s
  1543. AND platform = %s
  1544. """.format(self.table_name)
  1545. cur.execute(query_sql, (
  1546. data['product'],
  1547. data['min_price'],
  1548. data['shop'],
  1549. data['scrape_date'],
  1550. data['platform']
  1551. ))
  1552. result = cur.fetchone()
  1553. return bool(result) # 如果存在返回True,否则False
  1554. except Exception as e:
  1555. print(f"MySQL 错误: {str(e)}")
  1556. # 验证店铺信息是否在数据库中已存在
  1557. def shop_is_exists_database(self, shop):
  1558. try:
  1559. # 连接数据库
  1560. conn = get_mysql()
  1561. # 创建游标对象
  1562. cur = conn.cursor()
  1563. query_sql = """
  1564. SELECT * FROM {}
  1565. WHERE shop = %s
  1566. """.format(self.shop_table_name)
  1567. cur.execute(query_sql, (
  1568. shop
  1569. ))
  1570. result = cur.fetchone()
  1571. return bool(result) # 如果存在返回True,否则False
  1572. except Exception as e:
  1573. print(f"MySQL 错误: {str(e)}")
  1574. def wait_if_verifying(self, monitor, timeout=120):
  1575. """验证码处理期间阻塞主线程"""
  1576. start = time.time()
  1577. while monitor.pausing.is_set() and time.time() - start < timeout:
  1578. time.sleep(1)
  1579. def wait_for_ready(self, monitor, timeout=86400):
  1580. """进入每一页前都先等验证码"""
  1581. start = time.time()
  1582. while monitor.pausing.is_set() and time.time() - start < timeout:
  1583. time.sleep(1)
  1584. # 额外保险:如果验证码突然在这一秒才弹,再主动扫一次
  1585. monitor.check_and_handle_popup()
  1586. def safe_list(self, xpath, monitor):
  1587. """线程安全地拿商品列表"""
  1588. self.wait_for_ready(monitor)
  1589. return self.d.xpath(xpath).all()
  1590. def safe_exec(self, func, *args, max_retries=20, retry_delay=2, default_return=None, **kwargs):
  1591. """
  1592. 增强版安全执行器:执行 func 前检查验证码,支持重试机制和异常处理
  1593. :param func: 要执行的函数
  1594. :param args: 函数参数
  1595. :param max_retries: 最大重试次数
  1596. :param retry_delay: 重试延迟(秒)
  1597. :param default_return: 异常时的默认返回值
  1598. :param kwargs: 函数关键字参数
  1599. :return: 函数执行结果或默认返回值
  1600. """
  1601. # 等待验证码处理完成
  1602. while self.monitor.pausing.is_set():
  1603. time.sleep(1)
  1604. last_exception = None
  1605. for attempt in range(max_retries):
  1606. try:
  1607. # 检查设备连接状态
  1608. if not hasattr(self, 'd') or self.d is None:
  1609. self.loggerMT.warning("设备未连接,尝试重新连接")
  1610. if not self.connect_devices(self.device_id if hasattr(self, 'device_id') else '95b2c764'):
  1611. self.loggerMT.error("设备重新连接失败")
  1612. return default_return
  1613. # 执行函数
  1614. result = func(*args, **kwargs)
  1615. return result
  1616. except u2.exceptions.UiObjectNotFoundError as e:
  1617. # UI元素未找到异常
  1618. error_msg = f"UI元素未找到 (尝试 {attempt + 1}/{max_retries}): {e}"
  1619. self.loggerMT.warning(error_msg)
  1620. last_exception = e
  1621. except u2.exceptions.SessionBrokenError as e:
  1622. # 会话断开异常
  1623. error_msg = f"设备会话断开 (尝试 {attempt + 1}/{max_retries}): {e}"
  1624. self.loggerMT.error(error_msg)
  1625. last_exception = e
  1626. # 尝试重启应用
  1627. try:
  1628. self.loggerMT.info("尝试重启应用恢复会话")
  1629. self.restart_app()
  1630. except Exception as restart_error:
  1631. self.loggerMT.error(f"重启应用失败: {restart_error}")
  1632. except requests.exceptions.RequestException as e:
  1633. # 网络请求异常
  1634. error_msg = f"网络请求失败 (尝试 {attempt + 1}/{max_retries}): {e}"
  1635. self.loggerMT.error(error_msg)
  1636. last_exception = e
  1637. except Exception as e:
  1638. # 其他异常
  1639. error_msg = f"执行函数 {func.__name__ if hasattr(func, '__name__') else 'unknown'} 时发生异常 (尝试 {attempt + 1}/{max_retries}): {e}"
  1640. self.loggerMT.exception(error_msg)
  1641. last_exception = e
  1642. # 如果不是最后一次尝试,等待后重试
  1643. if attempt < max_retries - 1:
  1644. wait_time = retry_delay * (attempt + 1) # 指数退避
  1645. self.loggerMT.info(f"等待 {wait_time} 秒后重试...")
  1646. time.sleep(wait_time)
  1647. # 所有重试都失败
  1648. self.loggerMT.error(f"函数执行失败,已达到最大重试次数")
  1649. if last_exception:
  1650. self.loggerMT.error(f"最后异常: {last_exception}")
  1651. return default_return
  1652. def get_next_data(self, data, target):
  1653. for i, item in enumerate(data):
  1654. if item['words'] == target:
  1655. if i + 1 < len(data):
  1656. return data[i + 1]['words']
  1657. return None
  1658. def delete_instruction_screenshot(self, screenshot_path):
  1659. # 删除截图文件
  1660. try:
  1661. os.remove(screenshot_path)
  1662. print(f"截图文件已删除:{screenshot_path}")
  1663. except FileNotFoundError:
  1664. print(f"文件未找到,无法删除:{screenshot_path}")
  1665. except Exception as e:
  1666. print(f"删除文件时出错:{e}")
  1667. '''
  1668. def get_instructions_data(self):
  1669. """
  1670. 确定有说明书之后,提取所有的说明书数据
  1671. :return:
  1672. """
  1673. self.d.xpath('//*[@text="说明"]').click()
  1674. # time.sleep(random.randint(3, 5))
  1675. time.sleep(0.5)
  1676. self.d.xpath('//*[@text="查看详细说明"]').click()
  1677. # time.sleep(random.randint(3, 5))
  1678. time.sleep(0.5)
  1679. self.d.xpath('//*[@text="加载更多"]').click_exists()
  1680. loop_page = 5
  1681. # new_list = list()
  1682. new_list = []
  1683. for i in range(loop_page):
  1684. self.d.xpath('//*[@text="加载更多"]').click_exists()
  1685. time.sleep(0.2)
  1686. if i == 0:
  1687. self.d.swipe(200, 1000, 200, 300, 0.4)
  1688. else:
  1689. self.d.swipe(200, 1000, 200, 62)
  1690. time.sleep(0.2)
  1691. if self.d.xpath('//*[@text="加载更多"]').exists:
  1692. self.d.xpath('//*[@text="加载更多"]').click()
  1693. time.sleep(0.2)
  1694. all_tt = self.d.xpath(
  1695. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup').all()
  1696. for idx in range(1, len(all_tt) + 1):
  1697. all_tt1 = self.d.xpath(
  1698. f'//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[{idx}]//android.widget.TextView').all()
  1699. # print(f'当前说明书列表数据:{all_tt1}')
  1700. for tt in all_tt1:
  1701. if tt.text and tt.text != '展开全文':
  1702. new_list.append(tt.text)
  1703. if i == 0:
  1704. height = 938
  1705. else:
  1706. drug_box = self.d.xpath(
  1707. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]').info
  1708. bounds = drug_box['bounds']
  1709. height = bounds['bottom'] - bounds['top']
  1710. if height < 938:
  1711. # print('说明书翻页到底部')
  1712. break
  1713. # 展开全文
  1714. new_list = [item for item in new_list if item != '展开全文']
  1715. print(f'当前说明书列表数据:{new_list}')
  1716. # expiry_date_index = next(idx for idx, i in enumerate(new_list) if i == '有效期')
  1717. # manufacturer_index = next(idx for idx, i in enumerate(new_list) if i == '生产单位')
  1718. # approval_number_index = next(idx for idx, i in enumerate(new_list) if i == '批准文号')
  1719. # res_data = {
  1720. # "有效期": new_list[expiry_date_index + 1],
  1721. # "生产单位": new_list[manufacturer_index + 1],
  1722. # "批准文号": new_list[approval_number_index + 1]
  1723. # }
  1724. res_data = {
  1725. "有效期": (new_list[new_list.index("有效期") + 1]) if "有效期" in new_list and new_list.index("有效期") + 1 < len(new_list) else "",
  1726. "生产单位": (new_list[new_list.index("生产单位") + 1]) if "生产单位" in new_list and new_list.index("生产单位") + 1 < len(new_list) else "",
  1727. "批准文号": (new_list[new_list.index("批准文号") + 1]) if "批准文号" in new_list and new_list.index("批准文号") + 1 < len(new_list) else ""
  1728. }
  1729. print(f'当前说明书字典数据:{res_data}')
  1730. return res_data
  1731. '''
  1732. '''
  1733. def get_instructions_data(self):
  1734. """
  1735. 确定有说明书之后,提取所有的说明书数据
  1736. :return:
  1737. """
  1738. self.d.xpath('//*[@text="说明"]').click()
  1739. # time.sleep(random.randint(3, 5))
  1740. time.sleep(0.5)
  1741. self.d.xpath('//*[@text="查看详细说明"]').click()
  1742. # time.sleep(random.randint(3, 5))
  1743. time.sleep(0.5)
  1744. # 1) 先向上滑动一次,触发“加载更多”出现
  1745. self.d.swipe(200, 1000, 200, 300, 0.4)
  1746. time.sleep(0.3)
  1747. # 2) 再进入“出现就点”的循环
  1748. while self.d.xpath('//*[@text="加载更多"]').click_exists(timeout=1):
  1749. time.sleep(0.2)
  1750. self.d.swipe(200, 1000, 200, 300, 0.4)
  1751. # self.d.swipe(200, 1000, 200, 62)
  1752. time.sleep(0.2)
  1753. # 一次性获取所有文本
  1754. texts = [
  1755. node.text.strip()
  1756. # for node in self.d.xpath('//android.widget.TextView').all()
  1757. for node in self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.TextView').all()
  1758. if node.text and node.text.strip() and node.text != '加载更多'
  1759. ]
  1760. print(f'当前说明书列表数据:{texts}')
  1761. # 提取关键字段
  1762. def safe_get(key):
  1763. # try:
  1764. # idx = texts.index(key)
  1765. # return texts[idx + 1] if idx + 1 < len(texts) else ""
  1766. # except ValueError:
  1767. # return ""
  1768. try:
  1769. idx = next(i for i, text in enumerate(texts) if text == key)
  1770. return texts[idx + 1] if idx + 1 < len(texts) else ""
  1771. except StopIteration:
  1772. return ""
  1773. res_data = {
  1774. "有效期": safe_get("有效期"),
  1775. "生产单位": safe_get("生产单位"),
  1776. "批准文号": safe_get("批准文号")
  1777. }
  1778. print(f'当前说明书字典数据:{res_data}')
  1779. return res_data
  1780. '''
  1781. '''
  1782. def get_instructions_data(self):
  1783. """
  1784. 说明书键值对采集:连续两个 TextView 为一对,精确提取
  1785. """
  1786. # 1. 进入说明书
  1787. self.d(text="说明").click()
  1788. time.sleep(0.5)
  1789. self.d(text="查看详细说明").click()
  1790. time.sleep(0.5)
  1791. # self.d(text="加载更多").click_exists(timeout=0.5)
  1792. # 2. 找到说明书最外层 ScrollView(页面主体)
  1793. scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.ScrollView")
  1794. count = scroll_view.count
  1795. print(f"找到的 ScrollView 数量: {count}")
  1796. if not scroll_view.exists:
  1797. return {"有效期": "", "生产单位": "", "批准文号": ""}
  1798. # 3. 在 ScrollView 内再定位真正包含键值对的容器
  1799. # 绝大多数美团说明书页面对应的是 ScrollView > ViewGroup > 若干 TextView
  1800. kv_container = scroll_view.child(className="android.view.ViewGroup")
  1801. if not kv_container.exists:
  1802. kv_container = scroll_view # 降级:直接对 ScrollView 取子孙 TextView
  1803. # 4. 滑动到底并收集所有 TextView(保留顺序)
  1804. all_texts = []
  1805. max_swipe = 5
  1806. last_length = 0
  1807. for _ in range(max_swipe):
  1808. texts = kv_container.child(className="android.widget.TextView")
  1809. #获取texts中的文本
  1810. print(f'当前说明书列表数据:{texts}')
  1811. current_texts = []
  1812. self.loggerMT.info(f'说明书111')
  1813. for tv in texts:
  1814. try:
  1815. txt = tv.get_text().strip()
  1816. # txt = tv.info['text'].strip()
  1817. except Exception:
  1818. continue
  1819. if txt and txt != "展开全文":
  1820. current_texts.append(txt)
  1821. self.loggerMT.info(f'说明书222')
  1822. print(f'当前说明书列表数据:{current_texts}')
  1823. # 去重
  1824. if current_texts:
  1825. current_texts = [t for t in current_texts if t not in all_texts]
  1826. all_texts.extend(current_texts)
  1827. # 判断是否到底
  1828. # if not scroll_view.info.get("scrollable"):
  1829. # break
  1830. # 判断是否到底
  1831. if len(all_texts) == last_length:
  1832. break
  1833. last_length = len(all_texts)
  1834. # self.d.swipe_ext("up", scale=0.7)
  1835. #向上滑动一次
  1836. self.d.swipe(200, 1000, 200, 300, 0.2)
  1837. time.sleep(0.2)
  1838. if self.d.xpath('//*[@text="加载更多"]').exists:
  1839. self.d.xpath('//*[@text="加载更多"]').click()
  1840. # 5. 成对解析
  1841. res_data = {"有效期": "", "生产单位": "", "批准文号": ""}
  1842. for i in range(len(all_texts) - 1):
  1843. key = all_texts[i]
  1844. val = all_texts[i + 1]
  1845. if key in res_data:
  1846. res_data[key] = val
  1847. print(f'说明书文本共 {len(all_texts)} 条,提取结果: {res_data}')
  1848. # time.sleep(1000000)
  1849. return res_data
  1850. '''
  1851. def get_instructions_data(self):
  1852. """
  1853. 确定有说明书之后,提取所有的说明书数据
  1854. :return:
  1855. """
  1856. self.d.xpath('//*[@text="说明"]').click()
  1857. # time.sleep(random.randint(3, 5))
  1858. time.sleep(0.5)
  1859. if self.d.xpath('//*[@text="查看详细说明"]').exists:
  1860. self.d.xpath('//*[@text="查看详细说明"]').click()
  1861. else:
  1862. for i in range(8):
  1863. if self.d.xpath('//*[@text="查看全部"]').exists:
  1864. print('开始点击查看全部')
  1865. break
  1866. self.d.swipe_ext('down', 0.3)
  1867. time.sleep(1)
  1868. if self.d.xpath('//*[@text="查看全部"]').exists:
  1869. print('开始点击查看全部2')
  1870. break
  1871. if self.d.xpath('//*[@text="查看全部"]').exists:
  1872. self.d.xpath('//*[@text="查看全部"]').click()
  1873. else:
  1874. res_data = {
  1875. "有效期": '',
  1876. "生产单位": '',
  1877. "批准文号": ''
  1878. }
  1879. self.loggerMT.info('获取到的说明书信息为空。')
  1880. return res_data
  1881. # time.sleep(random.randint(3, 5))
  1882. time.sleep(0.5)
  1883. # self.d.xpath('//*[@text="加载更多"]').click_exists()
  1884. # loop_page = 5
  1885. # new_list = list()
  1886. # new_list = []
  1887. for ii in range(8):
  1888. if self.d.xpath('//*[@text="加载更多"]').exists:
  1889. self.d.xpath('//*[@text="加载更多"]').click()
  1890. time.sleep(0.2)
  1891. break
  1892. else:
  1893. self.d.swipe(200, 1000, 200, 300, 0.3)
  1894. # self.d.swipe_ext("up", scale=0.3)
  1895. for iii in range(10):
  1896. if self.d.xpath('//*[@text="生产单位"]').exists and self.d.xpath('//*[@text="批准文号"]').exists:
  1897. break
  1898. else:
  1899. self.d.swipe(200, 1300, 200, 300, 0.3)
  1900. # self.d.swipe_ext("up", scale=0.3)
  1901. instruction_path = self.screenshot_instruction()
  1902. print(f"instruction_path= {instruction_path}")
  1903. time.sleep(2)
  1904. ocr_res = self.get_ocr_res_image(instruction_path)
  1905. # print(f'ocr_res:{ocr_res}')
  1906. if ocr_res:
  1907. # 获取有效期的下一个数据
  1908. validity = self.get_next_data(ocr_res, '有效期')
  1909. # 获取批准文号的下一个数据
  1910. approval_number = self.get_next_data(ocr_res, '批准文号')
  1911. # 获取生产单位的下一个数据
  1912. manufacturer = self.get_next_data(ocr_res, '生产单位')
  1913. else:
  1914. validity = ''
  1915. approval_number = ''
  1916. manufacturer = ''
  1917. # print("有效期:", validity)
  1918. # print("批准文号:", approval_number)
  1919. # print("生产单位:", manufacturer)
  1920. res_data = {
  1921. "有效期": validity,
  1922. "生产单位": manufacturer,
  1923. "批准文号": approval_number
  1924. }
  1925. print(f"res_data={res_data}")
  1926. time.sleep(1)
  1927. self.delete_instruction_screenshot(instruction_path)
  1928. return res_data
  1929. def has_instructions(self):
  1930. """
  1931. 是否有说明书
  1932. :return:
  1933. """
  1934. # 没有说明书的无法采集具体数据
  1935. time.sleep(self.get_sleep_time())
  1936. is_has_instructions = False
  1937. for i in range(8):
  1938. if self.d.xpath('//*[@text="说明"]').exists:
  1939. print(f"第{i}次有说明书1")
  1940. is_has_instructions = True
  1941. break
  1942. self.d.swipe_ext('down', 0.3)
  1943. time.sleep(1)
  1944. # detail_info = self.d.xpath(
  1945. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
  1946. # bounds = detail_info['bounds']
  1947. # height = bounds['bottom'] - bounds['top']
  1948. # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
  1949. if self.d.xpath('//*[@text="说明"]').exists:
  1950. is_has_instructions = True
  1951. print(f"第{i}次有说明书2")
  1952. break
  1953. # is_has_instructions = self.d.xpath('//*[@text="说明"]').exists
  1954. return is_has_instructions
  1955. def has_shop(self):
  1956. """
  1957. 是否有进店按钮
  1958. :return:
  1959. """
  1960. # self.d.swipe_ext('up', 0.1)
  1961. time.sleep(self.get_sleep_time())
  1962. is_has_enter_shop = self.d.xpath('//*[@text="进店"]').exists
  1963. return is_has_enter_shop
  1964. # 获取商品对应的店铺信息
  1965. def get_license_info_ex(self):
  1966. # self.enter_shop()
  1967. self.safe_exec(self.enter_shop)
  1968. # self.enter_shoper()
  1969. result = self.safe_exec(self.enter_shoper)
  1970. if result == False:
  1971. license_info_data = {'contact_address': '', 'qualification_number': '', 'business_license_company': '',
  1972. 'business_license_address': ''}
  1973. return license_info_data
  1974. for i in range(10):
  1975. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  1976. print(f"第{i}次有商家资质")
  1977. break
  1978. else:
  1979. print(f"第{i}次没有商家资质")
  1980. time.sleep(self.get_sleep_time())
  1981. # 获取地址
  1982. # contact_address = self.get_shop_address()
  1983. contact_address = self.safe_exec(self.get_shop_address)
  1984. # time.sleep(50000)
  1985. ###
  1986. # self.scan_shoper_license()
  1987. self.safe_exec(self.scan_shoper_license)
  1988. # 获取资质编码
  1989. # qualification_number = self.get_qualification_number()
  1990. qualification_number = self.safe_exec(self.get_qualification_number)
  1991. # qualification_number 不为None继续下一步
  1992. if qualification_number:
  1993. # 营业执照公司名称
  1994. business_license_company = ''
  1995. # 营业执照地址
  1996. business_license_address = ''
  1997. self.d.click(0.603, 0.27)
  1998. time.sleep(self.get_sleep_time())
  1999. cropped_screenshot_path = self.screenshot_the_business_license(qualification_number)
  2000. print(f'cropped_screenshot_path:{cropped_screenshot_path}')
  2001. # if qualification_number:
  2002. # cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  2003. # else:
  2004. # cropped_screenshot_path = 'cropped_screenshot.png'
  2005. # ocr_res = self.get_ocr_res('cropped_screenshot.png')
  2006. ocr_res = self.get_ocr_res(cropped_screenshot_path)
  2007. print(f'ocr_res:{ocr_res}')
  2008. # 获取ocr_res 中的地址、单位名称
  2009. if ocr_res:
  2010. if '单位名称' in ocr_res.keys():
  2011. business_license_company = ocr_res['单位名称']
  2012. if '地址' in ocr_res.keys():
  2013. business_license_address = ocr_res['地址']
  2014. license_info_data = {'contact_address': contact_address, 'qualification_number': qualification_number,
  2015. 'business_license_company': business_license_company,
  2016. 'business_license_address': business_license_address}
  2017. else:
  2018. license_info_data = {'contact_address': contact_address, 'qualification_number': '',
  2019. 'business_license_company': '', 'business_license_address': ''}
  2020. return license_info_data
  2021. """暂不用该功能
  2022. def get_license_info(self):
  2023. self.enter_shop()
  2024. self.enter_shoper()
  2025. self.scan_shoper_license()
  2026. # 获取资质编码
  2027. qualification_number = self.get_qualification_number()
  2028. if qualification_number:
  2029. table_license_info = self.get_table_license_info(qualification_number)
  2030. if table_license_info:
  2031. return {
  2032. '单位名称': table_license_info[0],
  2033. '地址': table_license_info[1],
  2034. '社会信用代码': table_license_info[2]
  2035. }
  2036. else:
  2037. # operate_no = random.randint(0, 1)
  2038. self.d.click(0.603, 0.27)
  2039. # if operate_no == 0:
  2040. # self.d.xpath('//*[@text="营业执照"]').click()
  2041. # else:
  2042. # self.d.click(0.603, 0.27)
  2043. time.sleep(self.get_sleep_time())
  2044. self.screenshot_the_business_license()
  2045. ocr_res = self.get_ocr_res('cropped_screenshot.png')
  2046. return ocr_res
  2047. # operate_no = random.randint(0, 1)
  2048. self.d.click(0.603, 0.27)
  2049. # if operate_no == 0:
  2050. # self.d.xpath('//*[@text="营业执照"]').click()
  2051. # else:
  2052. # self.d.click(0.603, 0.27)
  2053. time.sleep(self.get_sleep_time())
  2054. self.screenshot_the_business_license()
  2055. ocr_res = self.get_ocr_res('cropped_screenshot.png')
  2056. return ocr_res
  2057. """
  2058. def distinct_target(self):
  2059. result = False
  2060. position_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  2061. position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  2062. is_position = self.d.xpath(position_xpath).exists
  2063. is_position2 = self.d.xpath(position_xpath2).exists
  2064. xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2065. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2066. xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2067. xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2068. is_position5 = self.d.xpath(xpath).exists
  2069. is_position6 = self.d.xpath(xpath2).exists
  2070. is_position7 = self.d.xpath(xpath3).exists
  2071. is_position8 = self.d.xpath(xpath4).exists
  2072. # print(f"is_position = {is_position}")
  2073. # print(f"is_position2 = {is_position2}")
  2074. if result == False:
  2075. print("---检测没有回到列表页---")
  2076. else:
  2077. print("---检测回到了列表页---")
  2078. if is_position or is_position2 or is_position5 or is_position6 or is_position7 or is_position8:
  2079. result = True
  2080. return result
  2081. # return is_position
  2082. def enter_target_page(self):
  2083. self.d.xpath('//*[@content-desc="看病买药"]').click()
  2084. time.sleep(self.get_sleep_time())
  2085. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/vf_search_carousel_text"]').click()
  2086. time.sleep(self.get_sleep_time())
  2087. self.d.xpath(
  2088. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]').click()
  2089. time.sleep(self.get_sleep_time())
  2090. self.d.send_keys(self.search_key, clear=True)
  2091. time.sleep(self.get_sleep_time())
  2092. self.d.xpath('//*[@text="搜索"]').click()
  2093. time.sleep(self.get_sleep_time())
  2094. # 下滑逻辑
  2095. self.click_express_send()
  2096. def click_express_send(self):
  2097. # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.HorizontalScrollView[last()]'
  2098. slide_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2099. slide_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2100. slide_xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2101. slide_xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  2102. for i in range(1, 3):
  2103. if self.d.xpath(slide_xpath).exists:
  2104. bounds = self.d.xpath(slide_xpath).info['bounds']
  2105. top = bounds['top']
  2106. bottom = bounds['bottom']
  2107. print(f'top={top}')
  2108. print(f'bottom={bottom}')
  2109. y = (top + bottom) // 2
  2110. print(f'y={y}')
  2111. self.loggerMT.info('开始滑动1')
  2112. self.d.swipe(500, y, 100, y, 0.5)
  2113. time.sleep(self.get_sleep_time())
  2114. break
  2115. elif self.d.xpath(slide_xpath2).exists:
  2116. bounds = self.d.xpath(slide_xpath2).info['bounds']
  2117. top = bounds['top']
  2118. bottom = bounds['bottom']
  2119. print(f'top={top}')
  2120. print(f'bottom={bottom}')
  2121. y = (top + bottom) // 2
  2122. print(f'y={y}')
  2123. self.loggerMT.info('开始滑动2')
  2124. self.d.swipe(500, y, 100, y, 0.5)
  2125. time.sleep(self.get_sleep_time())
  2126. break
  2127. elif self.d.xpath(slide_xpath3).exists:
  2128. bounds = self.d.xpath(slide_xpath3).info['bounds']
  2129. top = bounds['top']
  2130. bottom = bounds['bottom']
  2131. print(f'top={top}')
  2132. print(f'bottom={bottom}')
  2133. y = (top + bottom) // 2
  2134. print(f'y={y}')
  2135. self.loggerMT.info('开始滑动3')
  2136. self.d.swipe(500, y, 100, y, 0.5)
  2137. time.sleep(self.get_sleep_time())
  2138. break
  2139. elif self.d.xpath(slide_xpath4).exists:
  2140. bounds = self.d.xpath(slide_xpath4).info['bounds']
  2141. top = bounds['top']
  2142. bottom = bounds['bottom']
  2143. print(f'top={top}')
  2144. print(f'bottom={bottom}')
  2145. y = (top + bottom) // 2
  2146. print(f'y={y}')
  2147. self.loggerMT.info('开始滑动4')
  2148. self.d.swipe(500, y, 100, y, 0.5)
  2149. time.sleep(self.get_sleep_time())
  2150. break
  2151. max_retry = 5 # 最多尝试次数
  2152. for idx in range(1, max_retry + 1):
  2153. # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]'
  2154. xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2155. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2156. xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2157. xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2158. # print(f"xpath:{xpath}")
  2159. # scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.HorizontalScrollView")
  2160. if self.d.xpath(xpath).exists:
  2161. self.d.xpath(xpath).click()
  2162. # time.sleep(self.get_sleep_time())
  2163. print(f"第{idx}次点击xpath快递送成功")
  2164. time.sleep(self.get_sleep_time())
  2165. break
  2166. elif self.d.xpath(xpath2).exists:
  2167. self.d.xpath(xpath2).click()
  2168. # time.sleep(self.get_sleep_time())
  2169. print(f"第{idx}次点击xpath2快递送成功")
  2170. time.sleep(self.get_sleep_time())
  2171. break
  2172. elif self.d.xpath(xpath3).exists:
  2173. self.d.xpath(xpath3).click()
  2174. # time.sleep(self.get_sleep_time())
  2175. print(f"第{idx}次点击xpath3快递送成功")
  2176. time.sleep(self.get_sleep_time())
  2177. break
  2178. elif self.d.xpath(xpath4).exists:
  2179. self.d.xpath(xpath4).click()
  2180. # time.sleep(self.get_sleep_time())
  2181. print(f"第{idx}次点击xpath4快递送成功")
  2182. time.sleep(self.get_sleep_time())
  2183. break
  2184. else:
  2185. print(f"第{idx}次点击xpath或xpath2或xpath3快递送都失败")
  2186. time.sleep(self.get_sleep_time())
  2187. # xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  2188. # if self.d.xpath(xpath2).exists:
  2189. # self.d.xpath(xpath2).click()
  2190. # print(f"第{idx}次点击xpath2快递送成功")
  2191. # time.sleep(self.get_sleep_time())
  2192. # break
  2193. """暂不用该功能
  2194. def get_table_license_info(self, qualification_number):
  2195. try:
  2196. sql = f'select business_license_company,city,credit_code from mt_drug where credit_code = "{qualification_number}"'
  2197. self.mysql_client.cur.execute(sql)
  2198. res = self.mysql_client.cur.fetchone()
  2199. return res
  2200. except:
  2201. return None
  2202. """
  2203. def get_clipboard(self):
  2204. time.sleep(1)
  2205. self.loggerMT.info(f"Clipboard content:{self.d.clipboard}") # 打印调试信息
  2206. clipboard_content = self.d.clipboard
  2207. if clipboard_content is None:
  2208. return ''
  2209. return clipboard_content.strip()
  2210. # return self.d.clipboard.strip()
  2211. def clear_clipboard(self):
  2212. self.d.set_clipboard("", "text/plain")
  2213. def get_product_link(self):
  2214. product_link = ''
  2215. # 两种可能的“···”按钮
  2216. dots_xpaths = [
  2217. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
  2218. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
  2219. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]'
  2220. ]
  2221. max_retry = 5 # 最多尝试次数
  2222. for idx in range(1, max_retry + 1):
  2223. if product_link: # 已经拿到则退出
  2224. break
  2225. for xp in dots_xpaths:
  2226. if self.d.xpath(xp).exists:
  2227. print(f'{idx}-进入分享点点点')
  2228. self.loggerMT.info(f'{idx}-进入分享点点点')
  2229. # #先清空剪贴板的内容
  2230. # self.clear_clipboard()
  2231. # print("清空剪贴板内容成功。")
  2232. self.d.xpath(xp).click()
  2233. time.sleep(0.2)
  2234. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2235. time.sleep(0.2)
  2236. link_xpath = '//*[@text="复制链接"]'
  2237. if self.d.xpath(link_xpath).exists:
  2238. self.d.xpath(link_xpath).click()
  2239. time.sleep(1)
  2240. product_link = self.get_clipboard()
  2241. time.sleep(0.5)
  2242. print(f'{idx}-商品链接:{product_link}')
  2243. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  2244. break # 找到并执行后跳出内层循环
  2245. else:
  2246. print(f'{idx}-商品链接:{product_link}')
  2247. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  2248. product_link = ''
  2249. if not product_link and idx < max_retry:
  2250. time.sleep(0.5) # 最后一次不需要再等待
  2251. return product_link
  2252. def integrate_data(self):
  2253. # 测试说明书详情:
  2254. # instructions_info = self.safe_exec(self.get_instructions_data)
  2255. # time.sleep(1000000)
  2256. # 测试店铺信息
  2257. # license_info = self.safe_exec(self.get_license_info_ex)
  2258. # time.sleep(1000000)
  2259. # 测试定位地址
  2260. # 获取链接开始
  2261. # self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  2262. # 1、点击页面的... 先判断元素是否存在
  2263. '''
  2264. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2265. print('1-进入分享点点点111')
  2266. self.loggerMT.info('1-进入分享点点点111')
  2267. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2268. #点击分享商品
  2269. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2270. time.sleep(0.2)
  2271. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2272. time.sleep(0.2)
  2273. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2274. time.sleep(1)
  2275. #获取剪切板的数据
  2276. product_link = self.get_clipboard()
  2277. time.sleep(0.5)
  2278. print(f'1-商品链接:{product_link}')
  2279. self.loggerMT.info(f'1-商品链接:{product_link}')
  2280. #清空剪切板
  2281. # self.clear_clipboard()
  2282. # if self.d.xpath('//*[@text="加载更多"]').click_exists():
  2283. # self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2284. # if self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').exists:
  2285. # self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').click()
  2286. # #获取剪切板的数据
  2287. # product_link = self.get_clipboard()
  2288. # time.sleep(0.5)
  2289. # print(f'商品链接:{product_link}')
  2290. # #清空剪切板
  2291. # self.clear_clipboard()
  2292. # else:
  2293. # print('未找到分享按钮111')
  2294. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2295. print('1-进入分享点点点222')
  2296. self.loggerMT.info('1-进入分享点点点222')
  2297. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2298. time.sleep(0.2)
  2299. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2300. time.sleep(0.2)
  2301. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2302. time.sleep(1)
  2303. #获取剪切板的数据
  2304. product_link = self.get_clipboard()
  2305. time.sleep(0.5)
  2306. print(f'1-商品链接:{product_link}')
  2307. self.loggerMT.info(f'1-商品链接:{product_link}')
  2308. #如果为获取到product_link 则等待0.5秒再获取
  2309. if not product_link:
  2310. time.sleep(0.5)
  2311. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2312. print('2-进入分享点点点111')
  2313. self.loggerMT.info('2-进入分享点点点111')
  2314. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2315. #点击分享商品
  2316. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2317. time.sleep(0.2)
  2318. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2319. time.sleep(0.2)
  2320. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2321. time.sleep(1)
  2322. #获取剪切板的数据
  2323. product_link = self.get_clipboard()
  2324. time.sleep(0.5)
  2325. print(f'2-商品链接:{product_link}')
  2326. self.loggerMT.info(f'2-商品链接:{product_link}')
  2327. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2328. print('2-进入分享点点点222')
  2329. self.loggerMT.info('2-进入分享点点点222')
  2330. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2331. time.sleep(0.2)
  2332. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2333. time.sleep(0.2)
  2334. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2335. time.sleep(1)
  2336. #获取剪切板的数据
  2337. product_link = self.get_clipboard()
  2338. time.sleep(0.5)
  2339. print(f'2-商品链接:{product_link}')
  2340. self.loggerMT.info(f'2-商品链接:{product_link}')
  2341. #如果为获取到product_link 则等待0.5秒再获取
  2342. if not product_link:
  2343. time.sleep(0.5)
  2344. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2345. print('3-进入分享点点点111')
  2346. self.loggerMT.info('3-进入分享点点点111')
  2347. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2348. #点击分享商品
  2349. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2350. time.sleep(0.2)
  2351. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2352. time.sleep(0.2)
  2353. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2354. time.sleep(1)
  2355. #获取剪切板的数据
  2356. product_link = self.get_clipboard()
  2357. time.sleep(0.5)
  2358. print(f'3-商品链接:{product_link}')
  2359. self.loggerMT.info(f'3-商品链接:{product_link}')
  2360. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  2361. print('3-进入分享点点点222')
  2362. self.loggerMT.info('3-进入分享点点点222')
  2363. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  2364. time.sleep(0.2)
  2365. self.d.xpath('//*[@text="分享商品"]').click_exists()
  2366. time.sleep(0.2)
  2367. self.d.xpath('//*[@text="复制链接"]').click_exists()
  2368. time.sleep(1)
  2369. #获取剪切板的数据
  2370. product_link = self.get_clipboard()
  2371. time.sleep(0.5)
  2372. print(f'3-商品链接:{product_link}')
  2373. self.loggerMT.info(f'3-商品链接:{product_link}')
  2374. '''
  2375. # 获取链接结束
  2376. """
  2377. 整合数据
  2378. :return:
  2379. """
  2380. product, specifications = self.safe_exec(self.get_title) # 药品,规格
  2381. if product:
  2382. # product, specifications = title_info
  2383. # 如果关键字包含999 则 product必须包含999 和 999后面的那段字符串 ps 999感冒灵颗粒必须包含:"999"和"感冒灵颗粒"
  2384. if '天力士' in self.search_key:
  2385. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  2386. temp_search_key = self.search_key.replace('天力士', '')
  2387. if '天力士' not in product or temp_search_key not in product:
  2388. self.swipe_back(1)
  2389. self.unrelated_data += 1
  2390. return
  2391. else:
  2392. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  2393. temp_search_key = self.search_key.replace('史达功', '')
  2394. temp_search_key = temp_search_key.replace('120', '')
  2395. if '史达功' not in product or temp_search_key not in product:
  2396. self.swipe_back(1)
  2397. self.unrelated_data += 1
  2398. return
  2399. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  2400. temp_search_key = self.search_key.replace('三九胃泰', '')
  2401. temp_search_key = temp_search_key.replace('8袋', '')
  2402. if '三九胃泰' not in product or temp_search_key not in product:
  2403. self.swipe_back(1)
  2404. self.unrelated_data += 1
  2405. return
  2406. elif self.search_key == '今维多赐多康牌蛋白粉':
  2407. temp_search_key = self.search_key.replace('今维多', '')
  2408. if '今维多' not in product or temp_search_key not in product:
  2409. self.swipe_back(1)
  2410. self.unrelated_data += 1
  2411. return
  2412. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  2413. temp_search_key = self.search_key.replace('佳美舒', '')
  2414. temp_search_key = temp_search_key.replace('4', '')
  2415. if '佳美舒' not in product or temp_search_key not in product:
  2416. self.swipe_back(1)
  2417. self.unrelated_data += 1
  2418. return
  2419. elif self.search_key == '三九胃泰颗粒20g*10':
  2420. temp_search_key = self.search_key.replace('20g*10', '')
  2421. if temp_search_key not in product:
  2422. self.swipe_back(1)
  2423. self.unrelated_data += 1
  2424. return
  2425. elif self.search_key == '三九胃泰颗粒20g*6袋':
  2426. temp_search_key = self.search_key.replace('20g*6袋', '')
  2427. if temp_search_key not in product:
  2428. self.swipe_back(1)
  2429. self.unrelated_data += 1
  2430. return
  2431. elif self.search_key == '顺峰康王酮康他索乳膏':
  2432. temp_search_key = self.search_key.replace('顺峰康王', '')
  2433. if '顺峰康王' not in product or temp_search_key not in product:
  2434. self.swipe_back(1)
  2435. self.unrelated_data += 1
  2436. return
  2437. else:
  2438. if self.search_key not in product.replace(' ', ''):
  2439. self.swipe_back(1)
  2440. self.unrelated_data += 1
  2441. return
  2442. # if self.search_key not in product.replace(' ', ''):
  2443. # self.swipe_back(1)
  2444. # self.unrelated_data += 1
  2445. # return
  2446. else:
  2447. self.swipe_back(1)
  2448. return
  2449. min_price = self.drug_price() # 最低价格
  2450. sales_num = self.drug_sale_num() # 销售数量
  2451. snapshot_url = '' # 网页快照
  2452. # 在这里截图存放到OSS;#采集图片存放的oss_url;
  2453. # mt_screenshot = MTScreenshot(
  2454. # d=self.d,
  2455. # oss_config=self.oss_config,
  2456. # search_key=self.search_key # 添加这行
  2457. # )
  2458. # snapshot_url = mt_screenshot.get_oss_url() #网页快照
  2459. # 判断是否有自营的文本,有的话不需要获取店铺的信息
  2460. if self.d.xpath('//*[@text="自营"]').exists:
  2461. shop = "美团自营大药房(快递电商)"
  2462. # 爬取日期
  2463. scrape_date = self.get_current_date()
  2464. # scrape_date = "2025-07-18"
  2465. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  2466. 'platform': '美团'}
  2467. print(f'当前数据:{dup_data}')
  2468. if self.data_is_exists(dup_data):
  2469. print('存在相同数据不入库')
  2470. self.swipe_back(1)
  2471. return
  2472. else:
  2473. for i in range(8):
  2474. if self.d.xpath('//*[@text="进店"]').exists:
  2475. print('开始获取店铺名1')
  2476. break
  2477. self.d.swipe_ext('up', 0.3)
  2478. time.sleep(1)
  2479. # detail_info = self.d.xpath(
  2480. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
  2481. # bounds = detail_info['bounds']
  2482. # height = bounds['bottom'] - bounds['top']
  2483. # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
  2484. if self.d.xpath('//*[@text="进店"]').exists:
  2485. print('开始获取店铺名2')
  2486. break
  2487. shop = self.get_shop_name()
  2488. # 爬取日期
  2489. scrape_date = self.get_current_date()
  2490. # scrape_date = "2025-07-18"
  2491. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  2492. 'platform': '美团'}
  2493. print(f'当前数据:{dup_data}')
  2494. # 获取店铺信息开始
  2495. # 暂时不获取店铺信息 start
  2496. is_has_enter_shop = self.has_shop()
  2497. # 需要判断shop是否已经在数据库中存在,如果存在,则不再进入店铺,直接进入下一个商品
  2498. shop_is_exists = self.shop_is_exists_database(shop)
  2499. # 存在进店 并且店铺的名称不包含美团官方的字样
  2500. print(f"已采集{self.shop_data_num}家店铺数据")
  2501. if is_has_enter_shop and '美团官方' not in shop and '美团自营' not in shop and not shop_is_exists and self.shop_data_num < 500:
  2502. # license_info = self.get_license_info_ex()
  2503. license_info = self.safe_exec(self.get_license_info_ex)
  2504. contact_address = license_info['contact_address']
  2505. qualification_number = license_info['qualification_number']
  2506. business_license_company = license_info['business_license_company']
  2507. business_license_address = license_info['business_license_address']
  2508. save_shop_data = {
  2509. 'shop': shop,
  2510. 'contact_address': contact_address,
  2511. 'qualification_number': qualification_number,
  2512. 'scrape_date': scrape_date,
  2513. 'business_license_company': business_license_company,
  2514. 'business_license_address': business_license_address,
  2515. 'platform': '美团'
  2516. }
  2517. self.save_shop_info_to_database(save_shop_data)
  2518. self.shop_data_num += 1 # 店铺数据数量+1
  2519. self.swipe_back(2)
  2520. else:
  2521. print('不采集店铺信息')
  2522. # 获取店铺信息结束
  2523. # 暂时不获取店铺信息 end
  2524. if self.data_is_exists(dup_data):
  2525. print('存在相同数据不入库')
  2526. self.swipe_back(1)
  2527. return
  2528. # 商品链接
  2529. product_link = self.get_product_link()
  2530. if not shop:
  2531. print('未获取到店铺名:开始回退')
  2532. self.swipe_back(1)
  2533. return
  2534. if not shop or '自营' in shop:
  2535. self.swipe_back(1)
  2536. return
  2537. time.sleep(self.get_sleep_time())
  2538. # 生产日期为空
  2539. manufacture_date = ''
  2540. # 执政信息
  2541. # if is_has_enter_shop:
  2542. # license_info = self.get_license_info()
  2543. # business_license_company = license_info["单位名称"]
  2544. # credit_code = license_info['社会信用代码']
  2545. # city_str = license_info['地址']
  2546. # # 先把省份啥的替换掉
  2547. # city_sub_str = re.sub(r'[u4e00-\u9fa5]+省', '', city_str)
  2548. # try:
  2549. # city = re.search(r'[\u4e00-\u9fa5]+?(市|区|县)', city_sub_str).group(0)
  2550. # except:
  2551. # city = city_sub_str
  2552. # try:
  2553. # province = self.city2province[city]
  2554. # except:
  2555. # province = ''
  2556. # self.swipe_back(2)
  2557. # else:
  2558. # business_license_company = ''
  2559. # credit_code = ''
  2560. # city = ''
  2561. # province = ''
  2562. business_license_company = ''
  2563. credit_code = ''
  2564. city = ''
  2565. province = ''
  2566. expiry_date = ''
  2567. manufacturer = ''
  2568. approval_number = ''
  2569. # 暂时不获取说明书信息 start
  2570. # 是否存在说明书
  2571. # is_has_instructions = self.has_instructions()
  2572. # 有的药品没有说明书,直接默认
  2573. if self.search_key == '今维多赐多康牌蛋白粉':
  2574. expiry_date = '18个月'
  2575. manufacturer = '华润圣海健康科技有限公司'
  2576. approval_number = '食健备G202437001992'
  2577. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  2578. expiry_date = '24个月'
  2579. manufacturer = '浙江华润三九众益制药有限公司'
  2580. approval_number = '国药准字H20090152'
  2581. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  2582. expiry_date = '3年'
  2583. manufacturer = '江苏萨瑞斯医疗科技有限公司'
  2584. approval_number = '苏械注准20212140025'
  2585. elif self.search_key == '999蒲地蓝消炎片':
  2586. expiry_date = '24个月'
  2587. manufacturer = '特一药业集团股份有限公司'
  2588. approval_number = '国药准字Z20063596'
  2589. elif self.search_key == '999养胃舒颗粒':
  2590. expiry_date = '36个月'
  2591. manufacturer = '合肥华润神鹿药业有限公司'
  2592. approval_number = '国药准字Z34020289'
  2593. elif self.search_key == '999糠酸莫米松凝胶15':
  2594. expiry_date = '36个月'
  2595. manufacturer = '华润三九(南昌)药业有限公司'
  2596. approval_number = '国药准字H20080010'
  2597. elif self.search_key == '999黄芪精':
  2598. expiry_date = '36个月'
  2599. manufacturer = '台州南峰药业有限公司'
  2600. approval_number = '国药准字Z33020783'
  2601. elif self.search_key == '999复方感冒灵颗粒':
  2602. expiry_date = '24个月'
  2603. manufacturer = '华润三九(郴州)制药有限公司'
  2604. approval_number = '国药准字Z43020334'
  2605. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  2606. expiry_date = '36个月'
  2607. manufacturer = '华润三九(南昌)药业有限公司'
  2608. approval_number = '国药准字H20074155'
  2609. elif self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  2610. expiry_date = '暂定24个月,具体有效期以实物说明书为准'
  2611. manufacturer = '史达德药业(北京)有限公司'
  2612. approval_number = '国药准字H11021837'
  2613. elif self.search_key == '999速复康布洛芬缓释胶囊':
  2614. expiry_date = '24个月'
  2615. manufacturer = '北京红林制药有限公司'
  2616. approval_number = '国药准字H20074172'
  2617. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  2618. expiry_date = '24个月'
  2619. manufacturer = '重庆科瑞东和制药有限责任公司'
  2620. approval_number = '国药准字Z50020420'
  2621. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  2622. expiry_date = '24个月'
  2623. manufacturer = '华润三九(南昌)药业有限公司'
  2624. approval_number = '国药准字H20073954'
  2625. elif self.search_key == '999维生素C咀嚼片':
  2626. expiry_date = '24个月'
  2627. manufacturer = '甘肃成纪生物药业有限公司'
  2628. approval_number = '国药准字H62021166'
  2629. elif self.search_key == '999强力枇杷露120ml':
  2630. expiry_date = '36个月'
  2631. manufacturer = '华润三九(南昌)药业有限公司'
  2632. approval_number = '国药准字Z36021533'
  2633. elif self.search_key == '999强力枇杷露150ml':
  2634. expiry_date = '36个月'
  2635. manufacturer = '华润三九(南昌)药业有限公司'
  2636. approval_number = '国药准字Z36021533'
  2637. elif self.search_key == '999抗病毒口服液10ml*10' or self.search_key == '999抗病毒口服液10ml*12':
  2638. expiry_date = '24个月'
  2639. manufacturer = '杭州华润老桐君药业有限公司'
  2640. approval_number = '国药准字Z33020518'
  2641. elif self.search_key == '999精氨酸布洛芬颗粒':
  2642. expiry_date = '暂定36个月'
  2643. manufacturer = '华润三九(唐山)药业有限公司'
  2644. approval_number = '国药准字H20070139'
  2645. elif self.search_key == '999糠酸莫米松乳膏10g支':
  2646. expiry_date = '36个月'
  2647. manufacturer = '华润三九(南昌)药业有限公司'
  2648. approval_number = '国药准字H20074090'
  2649. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  2650. expiry_date = '24个月'
  2651. manufacturer = '华润三九(南昌)药业有限公司'
  2652. approval_number = '国药准字H20074079'
  2653. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  2654. expiry_date = '36个月'
  2655. manufacturer = '合肥华润神鹿药业有限公司'
  2656. approval_number = '国药准字Z20055023'
  2657. elif self.search_key == '999银菊清咽颗粒':
  2658. expiry_date = '30个月'
  2659. manufacturer = '合肥华润神鹿药业有限公司'
  2660. approval_number = '国药准字Z20026680'
  2661. elif self.search_key == '999阿奇霉素片':
  2662. expiry_date = '48个月'
  2663. manufacturer = '浙江华润三九众益制药有限公司'
  2664. approval_number = '国药准字H20084458'
  2665. elif self.search_key == '999补脾益肠丸':
  2666. expiry_date = '24个月'
  2667. manufacturer = '惠州市九惠制药股份有限公司'
  2668. approval_number = '国药准字Z44023376'
  2669. elif self.search_key == '999壮骨关节丸6g*20':
  2670. expiry_date = '24个月'
  2671. manufacturer = '华润三九医药股份有限公司'
  2672. approval_number = '国药准字Z44023377'
  2673. elif self.search_key == '999壮骨关节胶囊':
  2674. expiry_date = '24个月'
  2675. manufacturer = '华润三九医药股份有限公司'
  2676. approval_number = '国药准字Z20080055'
  2677. elif self.search_key == '999正天丸6g*15':
  2678. expiry_date = '30个月'
  2679. manufacturer = '华润三九医药股份有限公司'
  2680. approval_number = '国药准字Z44020711'
  2681. elif self.search_key == '999正天胶囊':
  2682. expiry_date = '24个月'
  2683. manufacturer = '华润三九医药股份有限公司'
  2684. approval_number = '国药准字Z20010142'
  2685. elif self.search_key == '三九胃泰胶囊':
  2686. expiry_date = '24个月'
  2687. manufacturer = '华润三九医药股份有限公司'
  2688. approval_number = '国药准字Z44020704'
  2689. elif self.search_key == '三九胃泰颗粒20g*10':
  2690. expiry_date = '24个月'
  2691. manufacturer = '华润三九医药股份有限公司'
  2692. approval_number = '国药准字Z44020705'
  2693. elif self.search_key == '999感冒灵颗粒':
  2694. expiry_date = '24个月'
  2695. manufacturer = '华润三九(枣庄)药业有限公司'
  2696. approval_number = '国药准字Z44021940'
  2697. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  2698. expiry_date = '36个月'
  2699. manufacturer = '华润三九医药股份有限公司'
  2700. approval_number = '国药准字H44024170'
  2701. elif self.search_key == '三九胃泰颗粒20g*6袋':
  2702. expiry_date = '24个月'
  2703. manufacturer = '华润三九医药股份有限公司'
  2704. approval_number = '国药准字Z44020705'
  2705. elif self.search_key == '顺峰康王酮康他索乳膏':
  2706. expiry_date = '24个月'
  2707. manufacturer = '广东华润顺峰药业有限公司'
  2708. approval_number = '国药准字H10980204'
  2709. elif self.search_key == '999糠酸莫米松凝胶10':
  2710. expiry_date = '36个月'
  2711. manufacturer = '华润三九(南昌)药业有限公司'
  2712. approval_number = '国药准字H20080010'
  2713. elif self.search_key == '999板蓝根颗粒10g*20':
  2714. expiry_date = '36个月'
  2715. manufacturer = '广东恒诚制药股份有限公司'
  2716. approval_number = '国药准字Z44021520'
  2717. elif self.search_key == '999复方氨酚烷胺胶囊' or self.search_key == '999复方氨酚烷胺胶囊12粒' or self.search_key == '999复方氨酚烷胺胶囊10粒' or self.search_key == '999复方氨酚烷胺胶囊6粒':
  2718. expiry_date = '36个月'
  2719. manufacturer = '华润三九(唐山)药业有限公司'
  2720. approval_number = '国药准字H13021912'
  2721. elif self.search_key == '999咽炎片0.26g*12片*2板':
  2722. expiry_date = '24个月'
  2723. manufacturer = '华润三九(黄石)药业有限公司'
  2724. approval_number = '国药准字Z42021062'
  2725. elif self.search_key == '999小儿止咳糖浆120' or self.search_key == '999小儿止咳糖浆225':
  2726. expiry_date = '24个月'
  2727. manufacturer = '华润三九(雅安)药业有限公司'
  2728. approval_number = '国药准字Z51020675'
  2729. elif self.search_key == '999小儿感冒颗粒6g*10' or self.search_key == '999小儿感冒颗粒6g*24':
  2730. expiry_date = '36个月'
  2731. manufacturer = '华润三九(枣庄)药业有限公司'
  2732. approval_number = '国药准字Z37021392'
  2733. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋' or self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  2734. expiry_date = '36个月'
  2735. manufacturer = '华润三九(黄石)药业有限公司'
  2736. approval_number = '国药准字H42022510'
  2737. elif self.search_key == '999感冒灵胶囊':
  2738. expiry_date = '24个月'
  2739. manufacturer = '华润三九医药股份有限公司'
  2740. approval_number = '国药准字Z44021939'
  2741. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  2742. expiry_date = '24个月'
  2743. manufacturer = '华润三九(黄石)药业有限公司'
  2744. approval_number = '国药准字Z42021105'
  2745. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  2746. expiry_date = '18个月'
  2747. manufacturer = '华润三九医药股份有限公司'
  2748. approval_number = '国药准字Z20100067'
  2749. elif self.search_key == '999感冒清热颗粒12g*18':
  2750. expiry_date = '36个月'
  2751. manufacturer = '山东新大陆制药有限公司'
  2752. approval_number = '国药准字Z37020066'
  2753. elif self.search_key == '999小柴胡颗粒10g*15':
  2754. expiry_date = '24个月'
  2755. manufacturer = '广东一力罗定制药有限公司'
  2756. approval_number = '国药准字Z44020709'
  2757. elif self.search_key == '999布洛芬混悬液':
  2758. expiry_date = '24个月'
  2759. manufacturer = '华润三九(南昌)药业有限公司'
  2760. approval_number = '国药准字H20223755'
  2761. else:
  2762. is_has_instructions = self.safe_exec(self.has_instructions)
  2763. # 说明书等信息
  2764. if is_has_instructions:
  2765. print('开始获取说明书信息')
  2766. # instructions_info = self.get_instructions_data()
  2767. instructions_info = self.safe_exec(self.get_instructions_data)
  2768. if instructions_info['有效期'] is not None:
  2769. expiry_date = instructions_info['有效期'].strip('。')
  2770. if instructions_info['生产单位'] is not None:
  2771. manufacturer = instructions_info['生产单位'].strip('。')
  2772. if instructions_info['批准文号'] is not None:
  2773. approval_number = instructions_info['批准文号'].strip('。')
  2774. else:
  2775. # 没有说明书不入库
  2776. print('没有获取到说明书信息')
  2777. self.swipe_back(1)
  2778. return
  2779. # 暂时不获取说明书信息 end
  2780. self.unrelated_data = 0
  2781. if self.search_key == '999小柴胡颗粒10g*15':
  2782. save_search_key = '999小柴胡颗粒'
  2783. else:
  2784. save_search_key = self.search_key
  2785. # 爬取省份
  2786. scrape_province = '广东' # 这里先默认广东
  2787. # 是否有货
  2788. availability = ''
  2789. save_data = {
  2790. 'product': product,
  2791. 'min_price': min_price,
  2792. 'manufacture_date': manufacture_date,
  2793. 'expiry_date': expiry_date,
  2794. 'shop': shop,
  2795. 'business_license_company': business_license_company,
  2796. 'province': province,
  2797. 'city': city,
  2798. 'manufacturer': manufacturer,
  2799. 'specification': specifications,
  2800. 'approval_number': approval_number,
  2801. 'product_link': product_link,
  2802. 'scrape_date': scrape_date,
  2803. 'scrape_province': scrape_province,
  2804. 'availability': availability,
  2805. 'credit_code': credit_code,
  2806. 'platform': '美团',
  2807. 'search_key': save_search_key,
  2808. 'sales': sales_num,
  2809. 'inventory': '',
  2810. 'snapshot_url': snapshot_url
  2811. }
  2812. # ccc注释
  2813. self.save_to_database(save_data)
  2814. # time.sleep(100000)
  2815. time.sleep(self.get_sleep_time())
  2816. if self.distinct_target():
  2817. print('已到达搜索列表页')
  2818. else:
  2819. for i in range(1):
  2820. print('在详情页')
  2821. self.swipe_back(1)
  2822. time.sleep(self.get_sleep_time())
  2823. # 最外部有个定位按钮
  2824. if self.distinct_target():
  2825. break
  2826. # 主函数
  2827. def main(self, device_id, kkk=None, interval_m=None, retry_count=0):
  2828. """
  2829. 主采集函数,增强异常处理和恢复机制
  2830. """
  2831. print(kkk, "采集")
  2832. MAX_RETRY = 3 # 最大重试次数
  2833. spider_no = 0
  2834. # 保存设备ID用于重连
  2835. self.device_id = device_id
  2836. # 1. 设备连接(支持重试)
  2837. if not self.connect_devices(device_id):
  2838. self.loggerMT.error(f"设备 {device_id} 连接失败,跳过本次采集")
  2839. return False
  2840. time.sleep(self.get_sleep_time())
  2841. # 2. 启动全局弹窗监控
  2842. self.monitor = SpiderMonitor(self)
  2843. self.monitor.start()
  2844. try:
  2845. # 重新开启美团应用
  2846. data_s = 0
  2847. rang_page = 100
  2848. self.restart_app()
  2849. self.safe_exec(self.enter_target_page)
  2850. # 尝试从进度文件恢复
  2851. progress_data = self.safe_exec(self.wr_re, "读", device_id)
  2852. if progress_data:
  2853. self.loggerMT.info(f"从进度恢复: 第{progress_data['page']}页 - {progress_data['shop']}")
  2854. self.safe_exec(self.li_or_lo, kkk)
  2855. data_s += 1
  2856. rang_page = rang_page - progress_data['page']
  2857. # 滑动到上次的位置
  2858. self.loggerMT.info("滑动到上次采集的位置...")
  2859. while True:
  2860. if self.d.xpath(f'//*[@text="{progress_data["shop"]}"]').exists:
  2861. break
  2862. else:
  2863. self.safe_exec(self.slide_n)
  2864. else:
  2865. self.loggerMT.info("无进度文件或进度文件无效,从头开始采集")
  2866. for idx in range(rang_page):
  2867. current_page = idx + 1
  2868. self.current_page = current_page # 保存当前页码用于异常处理
  2869. print(f'第{current_page}页')
  2870. if spider_no > 30:
  2871. time.sleep(60)
  2872. spider_no = 0
  2873. print('目前无关数据量: ', self.unrelated_data)
  2874. # 检查是否需要暂停(验证码过多)
  2875. if self.monitor.verification_count >= self.monitor.MAX_VERIFICATION_RETRY:
  2876. print("频繁遇到验证码,暂停程序")
  2877. # self.d.toast("请处理验证码后点击继续", 30)
  2878. # 等待用户点击屏幕继续
  2879. self.d.click(0, 0) # 无效点击,等待用户操作
  2880. self.monitor.verification_count = 0
  2881. if self.unrelated_data > 15:
  2882. # 连续超过15个不达标的数据则停止采集
  2883. print("连续超过15个不达标的数据则停止采集")
  2884. return
  2885. # 采集区间
  2886. # 是否需要根据价格升降排序
  2887. # 升降序采集
  2888. if interval_m and data_s == 0:
  2889. self.safe_exec(self.li_or_lo, kkk)
  2890. data_s += 1
  2891. while True:
  2892. if any(x >= interval_m[0] for x in self.get_prices()):
  2893. break
  2894. else:
  2895. self.safe_exec(self.slide_n)
  2896. if data_s == 0:
  2897. self.safe_exec(self.li_or_lo, kkk)
  2898. data_s += 1
  2899. while True:
  2900. if self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').exists:
  2901. break
  2902. time.sleep(1)
  2903. drug_lis = self.safe_exec(
  2904. self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all)
  2905. lis_len = len(drug_lis)
  2906. print(f'当前页面共有{lis_len}个商品')
  2907. for idxx, drug_one in enumerate(drug_lis, start=1):
  2908. bounds = drug_one.info['bounds']
  2909. top = bounds['top']
  2910. bottom = bounds['bottom']
  2911. # height = bottom - top
  2912. print(f'当前商品bottom:{bottom}')
  2913. print(f'当前商品top:{top}')
  2914. # if 304 <= top and bottom <= 1475: # 默认高度241的才行
  2915. if 304 <= top and bottom <= 1475: # 默认高度241的才行 1559
  2916. # print('目标-->', drug_one.info)
  2917. # drug_one.click()
  2918. # 获取当前元素中的属性来判断是否要点击进入采集
  2919. print(f"这页的第几个商品:{idxx}")
  2920. product_title = ''
  2921. price = ''
  2922. shop_name = ''
  2923. # 价格
  2924. price_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2925. price_xpath3 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2926. price_xpath1 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2927. if self.d.xpath(price_xpath).exists:
  2928. price_str = self.d.xpath(price_xpath).text
  2929. print(f"price_xpath列表当前商品价格:{price_str}")
  2930. if price_str:
  2931. price = float(re.search(r'[\d\.]+', price_str).group())
  2932. elif self.d.xpath(price_xpath3).exists:
  2933. price_str = self.d.xpath(price_xpath3).text
  2934. print(f"price_xpath3列表当前商品价格:{price_str}")
  2935. if price_str:
  2936. price = float(re.search(r'[\d\.]+', price_str).group())
  2937. elif self.d.xpath(price_xpath1).exists:
  2938. price_str = self.d.xpath(price_xpath1).text
  2939. print(f"price_xpath1列表当前商品价格:{price_str}")
  2940. if price_str:
  2941. price = float(re.search(r'[\d\.]+', price_str).group())
  2942. else:
  2943. price_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2944. if self.d.xpath(price_xpath2).exists:
  2945. price_str = self.d.xpath(price_xpath2).text
  2946. print(f"price_xpath2列表当前商品价格:{price_str}")
  2947. if price_str:
  2948. price = float(re.search(r'[\d\.]+', price_str).group())
  2949. else:
  2950. print(f"列表当前商品价格不存在")
  2951. # price_str = self.d.xpath(f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]//*[starts-with(@text,"¥")]').text
  2952. if interval_m:
  2953. if kkk == "升序":
  2954. if interval_m[0] > price:
  2955. continue
  2956. if price > interval_m[1]:
  2957. print("当前区间采集完成", interval_m)
  2958. return
  2959. if kkk == "降序":
  2960. if interval_m[1] < price:
  2961. continue
  2962. if price < interval_m[0]:
  2963. print("当前区间采集完成", interval_m)
  2964. return
  2965. print(f'列表获取到价格:{price}')
  2966. # 商品名称的xpath
  2967. product_tittle_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2968. product_tittle_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2969. if self.d.xpath(product_tittle_xpath).exists:
  2970. product_title = self.d.xpath(product_tittle_xpath).text
  2971. product_title = product_title[1:] if product_title.startswith('0') else product_title
  2972. print(f"product_tittle_xpath列表当前商品名称:{product_title}")
  2973. if '天力士' in self.search_key:
  2974. self.search_key = self.search_key
  2975. elif self.d.xpath(product_tittle_xpath2).exists:
  2976. product_title = self.d.xpath(product_tittle_xpath2).text
  2977. product_title = product_title[1:] if product_title.startswith('0') else product_title
  2978. print(f"product_tittle_xpath2列表当前商品名称:{product_title}")
  2979. if '天力士' in self.search_key:
  2980. self.search_key = self.search_key
  2981. else:
  2982. print(f"列表当前商品名称不存在")
  2983. # 店铺名称的xpath
  2984. shop_name_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
  2985. shop_name_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
  2986. if self.d.xpath(shop_name_xpath).exists:
  2987. shop_name = self.d.xpath(shop_name_xpath).text
  2988. print(f"shop_name_xpath列表当前商品店铺名称:{shop_name}")
  2989. elif self.d.xpath(shop_name_xpath2).exists:
  2990. shop_name = self.d.xpath(shop_name_xpath2).text
  2991. print(f"shop_name_xpath2列表当前商品店铺名称:{shop_name}")
  2992. else:
  2993. print(f"列表当前商品店铺名称不存在")
  2994. # 自动保存进度(每采集一个商品就保存)
  2995. self.auto_save_progress(device_id, idx, shop_name, product_title, price)
  2996. # 如果商品的名称、价格和生产厂家都不存在则直接下一条数据。 跳过一些不是商品的数据。
  2997. if product_title == '' and price == '' and shop_name == '':
  2998. continue
  2999. scrape_date = self.get_current_date()
  3000. if product_title and price and shop_name:
  3001. # 判断数据表中是否存在
  3002. dup_data = {'product': product_title, 'min_price': price, 'shop': shop_name,
  3003. 'scrape_date': scrape_date, 'platform': '美团'}
  3004. if self.data_is_exists(dup_data):
  3005. print('列表存在相同数据不入库')
  3006. continue
  3007. self.safe_exec(drug_one.click)
  3008. print('点击目标药品完毕')
  3009. time.sleep(2)
  3010. # 采集药品信息
  3011. try:
  3012. # self.integrate_data()
  3013. # 检查数据库是否有数据
  3014. self.integrate_data()
  3015. # 检测下是否回退到列表页
  3016. if self.distinct_target():
  3017. print('回退到列表页', True)
  3018. else:
  3019. if self.d.xpath('//*[@text="搜索"]').exists:
  3020. print("检测到搜索按钮,重新开始采集流程")
  3021. if retry_count < MAX_RETRY:
  3022. # 停止当前监控线程
  3023. self.monitor.stop()
  3024. self.monitor.join()
  3025. # 递归重启采集(保留进度文件)
  3026. print("递归重启采集,保留当前进度")
  3027. self.loggerMT.warning(f"第{retry_count + 1}次重试,保留进度文件以便恢复")
  3028. return self.main(device_id, kkk, interval_m, retry_count + 1)
  3029. else:
  3030. print("超过最大重试次数,终止程序")
  3031. return
  3032. else:
  3033. print("无法恢复页面,终止采集")
  3034. # 插入异常处理
  3035. return
  3036. # print('回退到列表页失败,终止采集')
  3037. # return
  3038. time.sleep(self.get_sleep_time())
  3039. spider_no += 1
  3040. except Exception as e:
  3041. print(f'采集药品详情数据出错:{e}')
  3042. # 增加阻塞的方法:
  3043. if not self.distinct_target():
  3044. for i in range(1):
  3045. self.swipe_back(1)
  3046. # 最外部有个定位按钮
  3047. if self.distinct_target():
  3048. break
  3049. if i == 0 and not self.distinct_target():
  3050. print('页面出错,退出采集')
  3051. return
  3052. else:
  3053. continue
  3054. if self.d.xpath('//*[@text="已经到底啦"]').exists:
  3055. print('已经到达列表页最底部')
  3056. return
  3057. self.safe_exec(self.slide_n)
  3058. except u2.exceptions.SessionBrokenError as e:
  3059. # 设备会话断开异常
  3060. self.loggerMT.error(f"设备会话断开: {e}")
  3061. print(f"⚠️ 设备会话断开,需要重新连接: {e}")
  3062. # 在抛出异常前尝试保存当前进度
  3063. try:
  3064. if hasattr(self, 'current_page') and hasattr(self, 'device_id'):
  3065. self.loggerMT.info("设备会话断开,尝试保存当前进度...")
  3066. self.save_progress(self.device_id, self.current_page)
  3067. except Exception as save_error:
  3068. self.loggerMT.warning(f"保存进度失败: {save_error}")
  3069. # 记录错误但不停止程序,让外层处理
  3070. raise
  3071. except u2.exceptions.UiObjectNotFoundError as e:
  3072. # UI元素未找到异常
  3073. self.loggerMT.warning(f"UI元素未找到: {e}")
  3074. print(f"⚠️ UI元素未找到,跳过当前操作: {e}")
  3075. # 尝试返回上一页
  3076. try:
  3077. self.swipe_back(1)
  3078. except:
  3079. pass
  3080. except requests.exceptions.RequestException as e:
  3081. # 网络请求异常
  3082. self.loggerMT.error(f"网络请求失败: {e}")
  3083. print(f"⚠️ 网络请求失败: {e}")
  3084. # 等待网络恢复
  3085. print("等待30秒后继续...")
  3086. time.sleep(30)
  3087. except Exception as e:
  3088. # 其他未预料异常
  3089. self.loggerMT.exception(f"采集过程中发生未预料异常: {e}")
  3090. print(f"❌ 采集过程中发生未预料异常: {e}")
  3091. # 在抛出异常前尝试保存当前进度
  3092. try:
  3093. if hasattr(self, 'current_page') and hasattr(self, 'device_id'):
  3094. self.loggerMT.info("发生未预料异常,尝试保存当前进度...")
  3095. self.save_progress(self.device_id, self.current_page)
  3096. except Exception as save_error:
  3097. self.loggerMT.warning(f"保存进度失败: {save_error}")
  3098. # 记录异常但不停止程序,让外层处理
  3099. raise
  3100. finally:
  3101. try:
  3102. # 确保监控线程被停止
  3103. self.monitor.stop()
  3104. self.monitor.join()
  3105. # 采集完成,清理进度文件(如果采集成功)
  3106. if hasattr(self, 'current_page'):
  3107. try:
  3108. progress_file = f'./ycwj/{self.device_id}_{self.search_key}.txt'
  3109. if os.path.exists(progress_file):
  3110. self.loggerMT.info(f"采集完成,清理进度文件: {progress_file}")
  3111. os.remove(progress_file)
  3112. # 清理检查点文件
  3113. self._cleanup_checkpoint_files(self.device_id, self.search_key)
  3114. except Exception as e:
  3115. self.loggerMT.warning(f"清理进度文件失败: {e}")
  3116. except Exception as e:
  3117. self.loggerMT.error(f"finally块执行异常: {e}")
  3118. # 循环停止
  3119. self.xh = False
  3120. def unitest(self):
  3121. time.sleep(100000)
  3122. """
  3123. 单元测试
  3124. :return:
  3125. """
  3126. save_data = {
  3127. 'product': "[昆中药]舒肝颗粒(低糖型)",
  3128. 'min_price': 14.0,
  3129. 'manufacture_date': '',
  3130. 'expiry_date': '36个月',
  3131. 'shop': '美团自营大药房(快递电商)',
  3132. 'business_license_company': '',
  3133. 'province': '',
  3134. 'city': '',
  3135. 'manufacturer': '昆明中药厂有限公司',
  3136. 'specification': '3g*16袋/盒',
  3137. 'approval_number': '国药准字Z53021161',
  3138. 'product_link': '',
  3139. 'scrape_date': '2025/07/09',
  3140. 'scrape_province': '广东',
  3141. 'availability': '',
  3142. 'credit_code': '',
  3143. 'platform': '美团',
  3144. 'search_key': '',
  3145. 'sales': '',
  3146. 'inventory': '',
  3147. 'snapshot_url': ''
  3148. }
  3149. self.save_to_database(save_data)
  3150. time.sleep(100000)
  3151. pass
  3152. def main():
  3153. """
  3154. 主程序入口,增强异常处理和恢复机制
  3155. """
  3156. keys_list = [
  3157. '天力士复方丹参滴丸27mg180丸盒',
  3158. # '999糠酸莫米松凝胶10', # 不低于26.9
  3159. # '999糠酸莫米松凝胶15',
  3160. ]
  3161. device_id = ''
  3162. cycle_no = 0 # 轮次计数
  3163. max_cycles = 100 # 最大循环次数,防止无限循环
  3164. cycle_cooldown = 60 # 每轮之间的冷却时间(秒)
  3165. # 是否循环采集
  3166. xh_d = True
  3167. while xh_d and cycle_no < max_cycles:
  3168. cycle_no += 1
  3169. logging.info(f'========== 第 {cycle_no} 轮采集开始 ==========')
  3170. # 记录本轮开始时间
  3171. cycle_start_time = time.time()
  3172. for idx, key in enumerate(keys_list, 1):
  3173. logging.info(f'[{idx}/{len(keys_list)}] 开始采集关键字:{key}')
  3174. mt = None
  3175. try:
  3176. # 创建MT实例
  3177. mt = MT(key)
  3178. # 执行采集
  3179. success = mt.main(device_id, )
  3180. if success:
  3181. logging.info(f'✅ 关键字 {key} 采集成功')
  3182. else:
  3183. logging.warning(f'⚠️ 关键字 {key} 采集失败或部分失败')
  3184. xh_d = mt.xh
  3185. # 如果采集成功,跳出循环进入下一轮
  3186. if success:
  3187. break
  3188. except u2.exceptions.SessionBrokenError as e:
  3189. # 设备会话断开异常
  3190. logging.error(f'❌ 设备会话断开,需要人工干预: {e}')
  3191. print(f"⚠️ 设备会话断开,请检查设备连接后继续...")
  3192. # 等待用户处理
  3193. input("请处理设备连接问题后按Enter键继续...")
  3194. except u2.exceptions.UiObjectNotFoundError as e:
  3195. # UI元素未找到异常
  3196. logging.warning(f'⚠️ UI元素未找到,跳过当前关键字: {e}')
  3197. print(f"⚠️ UI元素未找到,跳过关键字 {key}")
  3198. except requests.exceptions.RequestException as e:
  3199. # 网络异常
  3200. logging.error(f'🌐 网络异常,等待恢复: {e}')
  3201. print(f"⚠️ 网络异常,等待30秒后重试...")
  3202. time.sleep(30)
  3203. except KeyboardInterrupt:
  3204. # 用户中断
  3205. logging.info('用户中断采集')
  3206. print("\n⚠️ 采集被用户中断")
  3207. xh_d = False
  3208. break
  3209. except Exception as e:
  3210. # 其他未预料异常
  3211. logging.exception(f'❌ 关键字 {key} 采集发生未预料异常: {e}')
  3212. print(f"❌ 关键字 {key} 采集失败: {e}")
  3213. finally:
  3214. # 清理资源
  3215. if mt:
  3216. try:
  3217. # 调用清理方法(如果存在)
  3218. if hasattr(mt, 'cleanup'):
  3219. mt.cleanup()
  3220. elif hasattr(mt, 'close'):
  3221. mt.close()
  3222. except Exception as cleanup_error:
  3223. logging.warning(f'清理资源时发生错误: {cleanup_error}')
  3224. # 计算本轮耗时
  3225. cycle_duration = time.time() - cycle_start_time
  3226. logging.info(f'第 {cycle_no} 轮采集完成,耗时: {cycle_duration:.2f}秒')
  3227. # 如果不是最后一轮,添加冷却时间
  3228. if xh_d and cycle_no < max_cycles:
  3229. logging.info(f'等待 {cycle_cooldown} 秒后开始下一轮采集...')
  3230. print(f"等待 {cycle_cooldown} 秒后开始下一轮采集...")
  3231. time.sleep(cycle_cooldown)
  3232. if cycle_no >= max_cycles:
  3233. logging.warning(f'已达到最大循环次数 ({max_cycles}),停止采集')
  3234. print(f"⚠️ 已达到最大循环次数 ({max_cycles}),停止采集")
  3235. logging.info('========== 采集程序结束 ==========')
  3236. print("✅ 采集程序结束")
  3237. if __name__ == '__main__':
  3238. main()