no_search_another_way.py 129 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550
  1. import requests
  2. import base64
  3. import cv2
  4. import uiautomator2 as u2
  5. import time
  6. import subprocess
  7. import re
  8. import random
  9. import datetime
  10. import json
  11. from aip import AipOcr
  12. from apscheduler.schedulers.blocking import BlockingScheduler
  13. # from db_mysql import mysqlClient
  14. import threading
  15. from collections import deque
  16. import numpy as np
  17. import secrets
  18. import os
  19. # import pyperclip
  20. from config import Config
  21. from logger import setup_logger
  22. import logging
  23. # from database import MySQLClient
  24. # 配置日志
  25. # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  26. setup_logger("mt_spider") # 初始化日志
  27. class SpiderMonitor(threading.Thread):
  28. """全局弹窗监控线程(增强版)"""
  29. def __init__(self, spider_instance):
  30. super().__init__(daemon=True)
  31. self.spider = spider_instance
  32. self.running = True
  33. self.pausing = threading.Event() # 主线程同步事件
  34. self.last_verification_time = 0
  35. self.verification_count = 0
  36. self.MAX_VERIFICATION_RETRY = 3
  37. self.recent_clicks = deque(maxlen=10) # 防重复点击
  38. self.logger = logging.getLogger("SpiderMonitor")
  39. # 可配置化弹窗规则
  40. self.popup_rules = {
  41. "simple": [
  42. ('//*[@text="确定"]', "点击确定"),
  43. ('//*[@text="允许"]', "点击允许"),
  44. ('//*[@text="关闭"]', "点击关闭"),
  45. ('//*[@resource-id="com.sankuai.meituan:id/close"]', "关闭按钮"),
  46. ],
  47. "verification": [
  48. '//*[contains(@text, "验证")]',
  49. '//*[contains(@text, "滑块")]',
  50. '//*[contains(@text, "依次点击")]',
  51. '//*[contains(@text, "请点击")]',
  52. '//*[contains(@text, "拖动滑块刚")]',
  53. '//*[contains(@text, "请输入图片中的内容")]',
  54. '//*[contains(@text, "用最短线连接")]',
  55. '//*[contains(@text, "请按语序依次点击")]',
  56. '//*[contains(@text, "请向右滑动滑块")]',
  57. '//*[contains(@text, "请拖动下方滑块完成拼图")]',
  58. '//*[contains(@resource-id, "captcha")]'
  59. ]
  60. }
  61. def run(self):
  62. while self.running:
  63. try:
  64. handled = self.check_and_handle_popup()
  65. time.sleep(2 if handled else 1)
  66. except Exception as e:
  67. self.logger.exception("监控线程异常: %s", e)
  68. time.sleep(3)
  69. def _is_recent_click(self, xpath):
  70. """防止重复点击同一个弹窗"""
  71. key = f"{xpath}_{int(time.time())}"
  72. if key in self.recent_clicks:
  73. return True
  74. self.recent_clicks.append(key)
  75. return False
  76. def check_and_handle_popup(self):
  77. d = self.spider.d
  78. # 1. 处理简单弹窗
  79. for xpath, desc in self.popup_rules["simple"]:
  80. if d.xpath(xpath).exists and not self._is_recent_click(xpath):
  81. self.logger.info("检测到弹窗: %s", desc)
  82. d.xpath(xpath).click()
  83. return True
  84. # 2. 处理验证码弹窗
  85. for xpath in self.popup_rules["verification"]:
  86. if d.xpath(xpath).exists:
  87. now = time.time()
  88. if now - self.last_verification_time < 30:
  89. return False # 30秒内不重复触发
  90. self.last_verification_time = now
  91. self.verification_count += 1
  92. self.logger.warning("验证码弹窗触发,等待人工处理...")
  93. if self.verification_count > self.MAX_VERIFICATION_RETRY:
  94. self.logger.error("验证码重试超限,终止任务")
  95. self.spider.stop_all()
  96. return True
  97. self.pausing.set() # 通知主线程暂停
  98. d.toast.show("需要人工处理验证码", 120)
  99. # 等待人工处理
  100. start = time.time()
  101. # while time.time() - start < 120*60:
  102. # if not d.xpath(xpath).exists:
  103. # self.logger.info("验证码已处理")
  104. # d.toast.show("验证完成", 2)
  105. # self.pausing.clear() # 放行主线程
  106. # return True
  107. # time.sleep(5)
  108. while True:
  109. if not d.xpath(xpath).exists:
  110. self.logger.info("验证码已处理")
  111. d.toast.show("验证完成", 2)
  112. self.pausing.clear() # 放行主线程
  113. return True
  114. time.sleep(5)
  115. self.logger.warning("验证码超时,重启APP")
  116. self.spider.restart_app()
  117. return True
  118. # 3. 处理广告弹窗(点击右上角)
  119. if d.xpath('//*[contains(@text, "广告")]').exists:
  120. w, h = d.info['displayWidth'], d.info['displayHeight']
  121. d.click(w - 50, 50)
  122. self.logger.info("关闭广告弹窗")
  123. return True
  124. return False
  125. def stop(self):
  126. self.running = False
  127. def get_access_token():
  128. AppKey = "tRK2RhyItCSh6BzyT4CNVXQa"
  129. AppSrcret = "TDgKiPo94i2mOM1sDqOuDnlcK1bG66jh"
  130. token_url = 'https://aip.baidubce.com/oauth/2.0/token'
  131. url = f"{token_url}?grant_type=client_credentials&client_id={AppKey}&client_secret={AppSrcret}"
  132. payload = ""
  133. headers = {
  134. 'Content-Type': 'application/json',
  135. 'Accept': 'application/json'
  136. }
  137. response = requests.request("POST", url, headers=headers, data=payload)
  138. try:
  139. return response.json()['access_token']
  140. except:
  141. return None
  142. def get_mysql():
  143. """
  144. 建立并返回一个到数据库的连接对象
  145. """
  146. import pymysql
  147. return pymysql.connect(
  148. host = Config.DB_HOST, #"localhost", # 修改后的主机
  149. port = Config.DB_PORT, #3306, # 添加端口号
  150. user = Config.DB_USER, #'root', # 修改后的用户名
  151. password = Config.DB_PASSWORD, # 修改后的密码
  152. db = Config.DB_NAME, #"drug_data", # 修改后的数据库名
  153. charset='utf8mb4'
  154. )
  155. class MT:
  156. def __init__(self, key):
  157. # self.package_name = 'com.sankuai.meituan'
  158. self.package_name = Config.PACKAGE_NAME
  159. self.access_token = get_access_token()
  160. self.city2province = self.get_city_info()
  161. self.APP_ID = '116857964'
  162. self.API_KEY = '1gAzACJOAr7BeILKqkqPOETh'
  163. self.SECRET_KEY = 'ZNArANb9GwJYgLKg4EfYhukKBfPdl1n3'
  164. self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
  165. # host = Config.DB_HOST #"localhost"
  166. # user = Config.DB_USER #"root"
  167. # password = Config.DB_PASSWORD #"dfwy2025"
  168. # database = Config.DB_NAME #"drug_data"
  169. # port = Config.DB_PORT#3306
  170. # print(f'数据库配置:host:{host},user:{user},password:{password},database:{database},port:{port}')
  171. self.table_name = Config.DB_TABLE #"mt_drug"
  172. self.shop_table_name = Config.DB_SHOP_TABLE
  173. # print(f'数据库表名:table_name:{self.table_name},shop_table_name:{self.shop_table_name}')
  174. # self.mysql_client = mysqlClient(host, user, password, database, port)
  175. self.loggerMT = logging.getLogger()
  176. self.search_key = key # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒
  177. self.unrelated_data = 0 # 无关数据数量
  178. self.shop_data_num = 0 # 店铺数据数量
  179. def stop_app(self):
  180. self.d.app_stop(self.package_name)
  181. time.sleep(5)
  182. def start_app(self):
  183. self.d.app_start(self.package_name)
  184. time.sleep(5)
  185. def restart_app(self):
  186. """
  187. 重启app
  188. :return:
  189. """
  190. self.stop_app()
  191. self.start_app()
  192. @staticmethod
  193. def get_sleep_time():
  194. # return random.randint(5, 8)
  195. return random.randint(1, 3)
  196. @staticmethod
  197. def get_current_date():
  198. return datetime.datetime.now().strftime('%Y/%m/%d')
  199. @staticmethod
  200. def get_city_info():
  201. """
  202. 获取所有的省市数据
  203. :return:
  204. """
  205. file_path = '../kailin_city.json'
  206. with open(file_path, 'r', encoding='utf-8') as f:
  207. data = json.load(f)
  208. province = {province_one["id"]: province_one for province_one in data['province']}
  209. city2province = dict()
  210. city = data['city']
  211. for city_one in city:
  212. name = city_one['name']
  213. pid = city_one['pid']
  214. if len(str(pid)) > 2:
  215. pid = int(re.match('^\d{2}', str(pid)).group())
  216. city2province[name] = province[pid]['name']
  217. return city2province
  218. def get_shop_name(self):
  219. """
  220. 获取店铺名
  221. :return:
  222. """
  223. try:
  224. shop_name = self.d.xpath(
  225. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  226. print(f'获取到店铺名:{shop_name}')
  227. return shop_name
  228. except:
  229. try:
  230. shop_name = self.d.xpath(
  231. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  232. print(f'获取到店铺名2:{shop_name}')
  233. return shop_name
  234. except Exception as e:
  235. print(f'获取店铺名出错:{e}')
  236. return None
  237. def get_qualification_number(self):
  238. """
  239. 获取资质编号
  240. :return:
  241. """
  242. try:
  243. qualification_number_str = self.d.xpath(
  244. '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.widget.TextView[2]').text
  245. qualification_number = qualification_number_str.strip('资质编号:').strip()
  246. return qualification_number
  247. except:
  248. return None
  249. def get_shop_address(self):
  250. try:
  251. xpath = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView'
  252. if self.d.xpath(xpath).exists:
  253. shop_address = self.d.xpath(xpath).text
  254. print(f'111-获取到店铺地址:{shop_address}')
  255. if '发货时间' in shop_address:
  256. print(f'店铺地址包含发货时间,再次获取店铺地址')
  257. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.TextView'
  258. if self.d.xpath(xpath2).exists:
  259. shop_address = self.d.xpath(xpath2).text
  260. print(f'222-获取到店铺地址:{shop_address}')
  261. else:
  262. print(f'222-xpath2获取店铺地址失败')
  263. else:
  264. shop_address = ''
  265. print(f'333-获取到店铺地址:{shop_address}')
  266. return shop_address
  267. except:
  268. print(f'获取店铺地址出错-get_shop_address')
  269. return None
  270. def enter_detail(self):
  271. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/recycler"]/android.widget.FrameLayout[1]').click()
  272. time.sleep(self.get_sleep_time())
  273. def save_to_database(self, data):
  274. print(f'保存数据到数据库:{data}')
  275. # 连接数据库
  276. conn = get_mysql()
  277. # 创建游标对象
  278. cur = conn.cursor()
  279. # add_sql = "insert into delete_friend_table(delete_user_name,delete_user_id,delete_content,delete_time) value(%s,%s,%s,%s)"
  280. add_sql = f"""
  281. INSERT INTO {self.table_name}
  282. (product, min_price, manufacture_date, expiry_date, shop, business_license_company, province, city, manufacturer, specification, approval_number, product_link, scrape_date, scrape_province, availability, credit_code, platform)
  283. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  284. """
  285. # cur.execute(add_sql, (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'], data['business_license_company'],data['province'], data['city'], data['manufacturer'], data['specification'], data['approval_number'], data['product_link'], self.get_current_date(), data['scrape_province'], data['availability'], data['credit_code'], data['platform']))
  286. cur.execute(add_sql, (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'], data['business_license_company'],data['province'], data['city'], data['manufacturer'], data['specification'], data['approval_number'], data['product_link'], data['scrape_date'], data['scrape_province'], data['availability'], data['credit_code'], data['platform']))
  287. conn.commit() # 提交数据
  288. #self.mysql_client.insert(self.table_name, data)
  289. print(f"存入数据库成功")
  290. def save_shop_info_to_database(self, data):
  291. print(f'保存店铺数据到数据库:{data}')
  292. # 连接数据库
  293. conn = get_mysql()
  294. # 创建游标对象
  295. cur = conn.cursor()
  296. add_sql = f"""
  297. INSERT INTO {self.shop_table_name}
  298. (shop, contact_address, qualification_number, business_license_company, business_license_address, scrape_date, platform)
  299. VALUES (%s, %s, %s, %s, %s, %s, %s)
  300. """
  301. cur.execute(add_sql, (data['shop'], data['contact_address'], data['qualification_number'], data['business_license_company'], data['business_license_address'], data['scrape_date'], data['platform']))
  302. conn.commit() # 提交数据
  303. #self.mysql_client.insert(self.shop_table_name, data)
  304. print(f'存入店铺信息到数据库成功')
  305. def swipe_up(self):
  306. """
  307. 上滑
  308. :return:
  309. """
  310. screen_width = self.d.info['displayWidth']
  311. screen_height = self.d.info['displayHeight']
  312. duration_rate = random.uniform(0, 0.3)
  313. self.d.swipe(screen_width // 2, screen_height - 100, screen_width // 2, 100, duration=duration_rate)
  314. no = random.uniform(0, 1)
  315. if no > 0.85:
  316. # 有的时候卡着 再稍微往上滑一点点
  317. self.d.swipe_ext("up", 0.1)
  318. time.sleep(self.get_sleep_time())
  319. def swipe_back(self, no):
  320. """
  321. 返回
  322. :param no: 回退次数
  323. :return:
  324. """
  325. for idx in range(no):
  326. self.d.press('back')
  327. time.sleep(self.get_sleep_time())
  328. def drug_price(self):
  329. """
  330. 获取药品价格
  331. :return:
  332. """
  333. try:
  334. price_str = self.d.xpath('//*[starts-with(@text,"¥")]').text
  335. price = float(re.search('[\d\.]+', price_str).group())
  336. print(f'获取到价格:{price}')
  337. return price
  338. except Exception as e:
  339. print(f'提取价格出错-->{e}')
  340. return None
  341. def restart_uiautomator_services(self, device_id):
  342. """
  343. 重启atx的uiautomator 服务
  344. :param device_id:
  345. :return:
  346. """
  347. stop_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d --stop'
  348. start_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d'
  349. # result = subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
  350. # print(result.stdout)
  351. subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
  352. time.sleep(self.get_sleep_time())
  353. subprocess.run(start_uiautomator_services, capture_output=True, text=True, shell=True)
  354. time.sleep(self.get_sleep_time())
  355. def connect_devices(self, device_id):
  356. """
  357. 连接设备
  358. :return:
  359. """
  360. try:
  361. self.d = u2.connect_usb(device_id)
  362. # 设置隐形等待时间
  363. # self.d.implicitly_wait(5)
  364. self.restart_uiautomator_services(device_id)
  365. print(f'连接到设备:{device_id}')
  366. except Exception as e:
  367. print(f'{device_id} 连接错误: {e}')
  368. raise Exception(e)
  369. def get_ocr_res(self, img):
  370. try:
  371. #img地址
  372. print(f'开始识别图片:{img}')
  373. request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/business_license"
  374. # 二进制方式打开图片文件
  375. f = open(img, 'rb')
  376. img = base64.b64encode(f.read())
  377. params = {"image": img}
  378. # access_token = get_access_token()
  379. request_url = request_url + "?access_token=" + self.access_token
  380. headers = {'content-type': 'application/x-www-form-urlencoded'}
  381. response = requests.post(request_url, data=params, headers=headers)
  382. if response:
  383. res = response.json()
  384. new_dic = dict()
  385. for ite in res['words_result'].keys():
  386. new_dic[ite] = res['words_result'][ite]['words']
  387. print('资质数据信息', new_dic)
  388. return new_dic
  389. else:
  390. return None
  391. except:
  392. return None
  393. def remove_watermark(self, img_path):
  394. """
  395. 图片去水印(将水印部分变成白色背景)并将数据转化为二进制数据
  396. :param img_path: 图片路径
  397. :return: 二进制图片数据
  398. """
  399. img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
  400. endswith = os.path.splitext(img_path)[1]
  401. new = np.clip(1.4057577998008846 * img - 38.33089999653017, 0, 255).astype(np.uint8)
  402. _, img_binary = cv2.imencode(endswith, new)
  403. return img_binary
  404. def get_ocr_res_image(self, img):
  405. try:
  406. image = self.remove_watermark(img)
  407. # image_file = open(img,'wb')
  408. # image_file.write(image)
  409. # res_image = self.client.basicAccurate(image) # 高精度
  410. res_image = self.client.basicGeneral(image)
  411. # print(f'百度api返回结果:{res_image}')
  412. # print(res_image.get('words_result', ''))
  413. # new_dic = dict()
  414. data = res_image.get('words_result', '')
  415. print(f'百度api返回结果:{data}')
  416. # full_text = ';'.join(item['words'] for item in data)
  417. # address = ''
  418. # for item in data:
  419. # if '企业注册号' in item['words']:
  420. # print('come in 111')
  421. # reg_number = item['words'].split(':', 1)[1].strip()
  422. # elif '企业名称' in item['words']:
  423. # print('come in 222')
  424. # company_name = item['words'].split(':', 1)[1].strip()
  425. # elif '所:' in item['words']:
  426. # print('come in 333')
  427. # address = item['words'].split(':', 1)[1].strip()
  428. # # 输出结果
  429. # print("企业注册号:", reg_number)
  430. # print("企业名称:", company_name)
  431. # print("住所:", address)
  432. return data
  433. except:
  434. return None
  435. def screenshot_the_business_license(self, qualification_number):
  436. screenshot_path = 'screenshot1.png'
  437. self.d.screenshot(screenshot_path)
  438. img = cv2.imread(screenshot_path)
  439. # 指定裁剪区域 (left, top, right, bottom)
  440. left = 0
  441. top = 480
  442. right = 720
  443. bottom = 1420
  444. cropped_img = img[top:bottom, left:right]
  445. if qualification_number:
  446. cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  447. else:
  448. cropped_screenshot_path = 'cropped_screenshot.png'
  449. cv2.imwrite(cropped_screenshot_path, cropped_img)
  450. return cropped_screenshot_path
  451. def screenshot_instruction(self):
  452. # 获取当前时间
  453. current_time = datetime.datetime.now()
  454. # 格式化为时分秒
  455. time_str = current_time.strftime("%H-%M-%S")
  456. # 生成随机的 8 位字符串
  457. random_str = secrets.token_hex(4) # 生成 4 个字节的随机字符串,转换为 8 位十六进制字符串
  458. print(time_str)
  459. screenshot_path = 'instructionscreenshot1-' + time_str + '-' + random_str + '.png'
  460. self.d.screenshot(screenshot_path)
  461. return screenshot_path
  462. #获取商品title
  463. def get_title(self):
  464. # try:
  465. # title = self.d.xpath(
  466. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  467. # except:
  468. # title = self.d.xpath(
  469. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView').text
  470. # title = self.d.xpath('//*[contains(@text, "舒肝颗粒")]').text
  471. def _inner():
  472. temp_search_key = self.search_key
  473. if "999" in self.search_key:
  474. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  475. temp_search_key = self.search_key.replace("999皮炎平", "")
  476. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g/支':
  477. temp_search_key = self.search_key.replace("999必无忧", "")
  478. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  479. temp_search_key = self.search_key.replace("999必无忧", "")
  480. elif self.search_key == '999速复康布洛芬缓释胶囊':
  481. temp_search_key = self.search_key.replace("999速复康", "")
  482. else:
  483. temp_search_key = self.search_key.replace("999", "")
  484. else:
  485. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  486. temp_search_key = self.search_key.replace("史达功", "")
  487. temp_search_key = temp_search_key.replace("120", "")
  488. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  489. temp_search_key = self.search_key.replace("三九胃泰", "")
  490. temp_search_key = temp_search_key.replace("8袋", "")
  491. elif self.search_key == '今维多赐多康牌蛋白粉':
  492. temp_search_key = self.search_key.replace("今维多", "")
  493. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  494. temp_search_key = self.search_key.replace("佳美舒", "")
  495. temp_search_key = temp_search_key.replace("8", "")
  496. if self.search_key == '999糠酸莫米松凝胶15':
  497. temp_search_key = temp_search_key.replace("15", "")
  498. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  499. temp_search_key = temp_search_key.replace("30", "")
  500. elif self.search_key == '999复方感冒灵颗粒15':
  501. temp_search_key = temp_search_key.replace("15", "")
  502. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  503. temp_search_key = temp_search_key.replace("30", "")
  504. elif self.search_key == '999复方金银花颗粒10g':
  505. temp_search_key = temp_search_key.replace("10g", "")
  506. elif self.search_key == '999复方苦参肠炎康片12片':
  507. temp_search_key = temp_search_key.replace("12片", "")
  508. elif self.search_key == '999强力枇杷露16袋':
  509. temp_search_key = temp_search_key.replace("16袋", "")
  510. elif self.search_key == '999三蛇胆川贝膏138':
  511. temp_search_key = temp_search_key.replace("138", "")
  512. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g/支':
  513. temp_search_key = temp_search_key.replace("15g/支", "")
  514. elif self.search_key == '999止泻利颗粒15g*8':
  515. temp_search_key = temp_search_key.replace("15g*8", "")
  516. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  517. temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
  518. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  519. temp_search_key = temp_search_key.replace("6粒", "")
  520. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  521. temp_search_key = temp_search_key.replace("50", "")
  522. print(f'获取商品title时的搜索关键字:{temp_search_key}')
  523. # title = self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text
  524. # title_xpath = f'//*[contains(@text, "{temp_search_key}")]'
  525. # if self.d.xpath(title_xpath).exists:
  526. # title = self.safe_exec(
  527. # lambda: self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  528. # )
  529. # else:
  530. # print("标题不包含关键字")
  531. # return None, None
  532. # if self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').exists:
  533. # else:
  534. # print("标题不包含关键字")
  535. # return None, None
  536. time.sleep(3)
  537. # container_view = self.d(resourceId="com.sankuai.meituan:id/container")
  538. # count = container_view.count
  539. # print(f"找到的 container 数量: {count}")
  540. # content_view = self.d(resourceId="com.sankuai.meituan:id/content")
  541. # count = content_view.count
  542. # print(f"找到的 content 数量: {count}")
  543. # time.sleep(1000)
  544. drugs_name = ''
  545. specifications = ''
  546. try:
  547. title_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  548. title_xpath_2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  549. if self.d.xpath(title_xpath).exists:
  550. title = self.d.xpath(title_xpath).text
  551. print(f"title_xpath获取的title={title}")
  552. if temp_search_key not in title:
  553. print (f"搜索关键字:{temp_search_key}, 不存在title={title}中")
  554. return drugs_name, specifications
  555. elif self.d.xpath(title_xpath_2).exists:
  556. title = self.d.xpath(title_xpath_2).text
  557. print(f"title_xpath_2获取的title={title}")
  558. if temp_search_key not in title:
  559. print (f"搜索关键字{temp_search_key} 不存在title={title}中")
  560. return drugs_name, specifications
  561. else:
  562. print('title_xpath不存在,请确认')
  563. return drugs_name, specifications
  564. # title = self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  565. except Exception as e:
  566. print(f"发生异常: {e}")
  567. return drugs_name, specifications
  568. #奇怪:有的时候title取出来的记过第一位会多一个0
  569. # title = self.safe_exec(self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text)
  570. # title = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  571. title = title[1:] if title.startswith('0') else title
  572. print(f'获取到药品标题:{title}')
  573. # 从里面匹配出药品名和规格
  574. # drugs_name
  575. # specifications
  576. # match = re.search(r'([^\d]+)([\d\D]+)', title)
  577. if self.search_key == '999赐多康大豆':
  578. return title, '1罐'
  579. if self.search_key == "999感冒清热颗粒" :
  580. match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
  581. else:
  582. match = re.match(r'(\[[^\]]+\])(.*?)\s*((?:\d+\S*|\(.+))$', title)
  583. if match:
  584. #drugs_name = match.group(1).strip() + match.group(2).strip()
  585. drugs_name = title
  586. specifications = match.group(3).strip()
  587. print("药品名:", drugs_name)
  588. print("规格:", specifications)
  589. # print('完整药名:', drugs_name + specifications)
  590. return drugs_name, specifications
  591. else:
  592. print("没有匹配到预期格式")
  593. return drugs_name, specifications
  594. # 用 safe_exec 包装内部逻辑,确保验证码阻塞
  595. return self.safe_exec(_inner)
  596. def enter_shop(self):
  597. """
  598. 进店,方便提取资质环境
  599. :return:
  600. """
  601. # self.d.xpath('//*[@text="进店"]').click()
  602. self.d.xpath('//*[@text="店铺"]').click()
  603. time.sleep(self.get_sleep_time())
  604. def enter_shoper(self):
  605. """
  606. 进入商家
  607. :return:
  608. """
  609. for i in range(10):
  610. if self.d.xpath('//*[@text="商家"]').exists:
  611. print(f'第{i}次商家存在')
  612. break
  613. else:
  614. print(f'第{i}次商家不存在')
  615. time.sleep(self.get_sleep_time())
  616. self.d.xpath('//*[@text="商家"]').click()
  617. time.sleep(self.get_sleep_time())
  618. #点击查看商家资质
  619. def scan_shoper_license(self):
  620. exist_shoper = 0
  621. for i in range(10):
  622. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  623. print(f'第{i}次查看商家资质存在')
  624. exist_shoper = 1
  625. break
  626. else:
  627. print(f'第{i}次查看商家资质不存在')
  628. time.sleep(self.get_sleep_time())
  629. if exist_shoper == 1:
  630. self.d.xpath('//*[@text="查看商家资质"]').click()
  631. time.sleep(self.get_sleep_time())
  632. else:
  633. self.swipe_back(1)
  634. #验证商品的信息是否在数据库中已存在
  635. def data_is_exists(self, data):
  636. """
  637. 检查指定数据是否已存在于数据库表中(仅检查存在性)
  638. 参数:
  639. data: 包含查询条件的字典,键为列名,值为条件值
  640. 返回:
  641. True: 数据存在
  642. False: 数据不存在
  643. None: 检查过程中出错
  644. """
  645. # dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  646. # 'platform': '美团'}
  647. # 1. 验证必要字段
  648. required_keys = ['product', 'min_price', 'shop', 'scrape_date', 'platform']
  649. if not all(key in data for key in required_keys):
  650. missing = [key for key in required_keys if key not in data]
  651. logging.error(f"缺少必要字段: {', '.join(missing)}")
  652. return None
  653. try:
  654. # 连接数据库
  655. conn = get_mysql()
  656. # 创建游标对象
  657. cur = conn.cursor()
  658. # query_sql = f"SELECT * FROM {self.table_name} WHERE product = '{data['product']}' AND min_price = '{data['min_price']}' AND shop = '{data['shop']}' AND scrape_date = '{data['scrape_date']}' AND platform = '{data['platform']}'"
  659. # cur.execute(query_sql)
  660. query_sql = """
  661. SELECT * FROM {}
  662. WHERE product = %s
  663. AND min_price = %s
  664. AND shop = %s
  665. AND scrape_date = %s
  666. AND platform = %s
  667. """.format(self.table_name)
  668. cur.execute(query_sql, (
  669. data['product'],
  670. data['min_price'],
  671. data['shop'],
  672. data['scrape_date'],
  673. data['platform']
  674. ))
  675. result = cur.fetchone()
  676. return bool(result) # 如果存在返回True,否则False
  677. except Exception as e:
  678. print(f"MySQL 错误: {str(e)}")
  679. #验证店铺信息是否在数据库中已存在
  680. def shop_is_exists_database(self, shop):
  681. try:
  682. # 连接数据库
  683. conn = get_mysql()
  684. # 创建游标对象
  685. cur = conn.cursor()
  686. query_sql = """
  687. SELECT * FROM {}
  688. WHERE shop = %s
  689. """.format(self.shop_table_name)
  690. cur.execute(query_sql, (
  691. shop
  692. ))
  693. result = cur.fetchone()
  694. return bool(result) # 如果存在返回True,否则False
  695. except Exception as e:
  696. print(f"MySQL 错误: {str(e)}")
  697. def wait_if_verifying(self, monitor, timeout=120):
  698. """验证码处理期间阻塞主线程"""
  699. start = time.time()
  700. while monitor.pausing.is_set() and time.time() - start < timeout:
  701. time.sleep(1)
  702. # def safe_xpath(self, xpath, timeout=10):
  703. # """线程安全 xpath 查找"""
  704. # self.wait_if_verifying(self.monitor)
  705. # return self.d.xpath(xpath).wait(timeout=timeout)
  706. def wait_for_ready(self, monitor, timeout=86400):
  707. """进入每一页前都先等验证码"""
  708. start = time.time()
  709. while monitor.pausing.is_set() and time.time() - start < timeout:
  710. time.sleep(1)
  711. # 额外保险:如果验证码突然在这一秒才弹,再主动扫一次
  712. monitor.check_and_handle_popup()
  713. def safe_list(self, xpath, monitor):
  714. """线程安全地拿商品列表"""
  715. self.wait_for_ready(monitor)
  716. return self.d.xpath(xpath).all()
  717. def safe_exec(self, func, *args, **kwargs):
  718. """
  719. 万能安全壳:执行 func 前检查验证码,
  720. 若监控线程已置位 pausing,则一直阻塞直到放行。
  721. """
  722. while self.monitor.pausing.is_set():
  723. time.sleep(1)
  724. # 执行真正逻辑
  725. return func(*args, **kwargs)
  726. def get_next_data(self, data, target):
  727. for i, item in enumerate(data):
  728. if item['words'] == target:
  729. if i + 1 < len(data):
  730. return data[i + 1]['words']
  731. return None
  732. def delete_instruction_screenshot(self, screenshot_path):
  733. # 删除截图文件
  734. try:
  735. os.remove(screenshot_path)
  736. print(f"截图文件已删除:{screenshot_path}")
  737. except FileNotFoundError:
  738. print(f"文件未找到,无法删除:{screenshot_path}")
  739. except Exception as e:
  740. print(f"删除文件时出错:{e}")
  741. '''
  742. def get_instructions_data(self):
  743. """
  744. 确定有说明书之后,提取所有的说明书数据
  745. :return:
  746. """
  747. self.d.xpath('//*[@text="说明"]').click()
  748. # time.sleep(random.randint(3, 5))
  749. time.sleep(0.5)
  750. self.d.xpath('//*[@text="查看详细说明"]').click()
  751. # time.sleep(random.randint(3, 5))
  752. time.sleep(0.5)
  753. self.d.xpath('//*[@text="加载更多"]').click_exists()
  754. loop_page = 5
  755. # new_list = list()
  756. new_list = []
  757. for i in range(loop_page):
  758. self.d.xpath('//*[@text="加载更多"]').click_exists()
  759. time.sleep(0.2)
  760. if i == 0:
  761. self.d.swipe(200, 1000, 200, 300, 0.4)
  762. else:
  763. self.d.swipe(200, 1000, 200, 62)
  764. time.sleep(0.2)
  765. if self.d.xpath('//*[@text="加载更多"]').exists:
  766. self.d.xpath('//*[@text="加载更多"]').click()
  767. time.sleep(0.2)
  768. all_tt = self.d.xpath(
  769. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup').all()
  770. for idx in range(1, len(all_tt) + 1):
  771. all_tt1 = self.d.xpath(
  772. f'//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[{idx}]//android.widget.TextView').all()
  773. # print(f'当前说明书列表数据:{all_tt1}')
  774. for tt in all_tt1:
  775. if tt.text and tt.text != '展开全文':
  776. new_list.append(tt.text)
  777. if i == 0:
  778. height = 938
  779. else:
  780. drug_box = self.d.xpath(
  781. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]').info
  782. bounds = drug_box['bounds']
  783. height = bounds['bottom'] - bounds['top']
  784. if height < 938:
  785. # print('说明书翻页到底部')
  786. break
  787. # 展开全文
  788. new_list = [item for item in new_list if item != '展开全文']
  789. print(f'当前说明书列表数据:{new_list}')
  790. # expiry_date_index = next(idx for idx, i in enumerate(new_list) if i == '有效期')
  791. # manufacturer_index = next(idx for idx, i in enumerate(new_list) if i == '生产单位')
  792. # approval_number_index = next(idx for idx, i in enumerate(new_list) if i == '批准文号')
  793. # res_data = {
  794. # "有效期": new_list[expiry_date_index + 1],
  795. # "生产单位": new_list[manufacturer_index + 1],
  796. # "批准文号": new_list[approval_number_index + 1]
  797. # }
  798. res_data = {
  799. "有效期": (new_list[new_list.index("有效期") + 1]) if "有效期" in new_list and new_list.index("有效期") + 1 < len(new_list) else "",
  800. "生产单位": (new_list[new_list.index("生产单位") + 1]) if "生产单位" in new_list and new_list.index("生产单位") + 1 < len(new_list) else "",
  801. "批准文号": (new_list[new_list.index("批准文号") + 1]) if "批准文号" in new_list and new_list.index("批准文号") + 1 < len(new_list) else ""
  802. }
  803. print(f'当前说明书字典数据:{res_data}')
  804. return res_data
  805. '''
  806. '''
  807. def get_instructions_data(self):
  808. """
  809. 确定有说明书之后,提取所有的说明书数据
  810. :return:
  811. """
  812. self.d.xpath('//*[@text="说明"]').click()
  813. # time.sleep(random.randint(3, 5))
  814. time.sleep(0.5)
  815. self.d.xpath('//*[@text="查看详细说明"]').click()
  816. # time.sleep(random.randint(3, 5))
  817. time.sleep(0.5)
  818. # 1) 先向上滑动一次,触发“加载更多”出现
  819. self.d.swipe(200, 1000, 200, 300, 0.4)
  820. time.sleep(0.3)
  821. # 2) 再进入“出现就点”的循环
  822. while self.d.xpath('//*[@text="加载更多"]').click_exists(timeout=1):
  823. time.sleep(0.2)
  824. self.d.swipe(200, 1000, 200, 300, 0.4)
  825. # self.d.swipe(200, 1000, 200, 62)
  826. time.sleep(0.2)
  827. # 一次性获取所有文本
  828. texts = [
  829. node.text.strip()
  830. # for node in self.d.xpath('//android.widget.TextView').all()
  831. for node in self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.TextView').all()
  832. if node.text and node.text.strip() and node.text != '加载更多'
  833. ]
  834. print(f'当前说明书列表数据:{texts}')
  835. # 提取关键字段
  836. def safe_get(key):
  837. # try:
  838. # idx = texts.index(key)
  839. # return texts[idx + 1] if idx + 1 < len(texts) else ""
  840. # except ValueError:
  841. # return ""
  842. try:
  843. idx = next(i for i, text in enumerate(texts) if text == key)
  844. return texts[idx + 1] if idx + 1 < len(texts) else ""
  845. except StopIteration:
  846. return ""
  847. res_data = {
  848. "有效期": safe_get("有效期"),
  849. "生产单位": safe_get("生产单位"),
  850. "批准文号": safe_get("批准文号")
  851. }
  852. print(f'当前说明书字典数据:{res_data}')
  853. return res_data
  854. '''
  855. '''
  856. def get_instructions_data(self):
  857. """
  858. 说明书键值对采集:连续两个 TextView 为一对,精确提取
  859. """
  860. # 1. 进入说明书
  861. self.d(text="说明").click()
  862. time.sleep(0.5)
  863. self.d(text="查看详细说明").click()
  864. time.sleep(0.5)
  865. # self.d(text="加载更多").click_exists(timeout=0.5)
  866. # 2. 找到说明书最外层 ScrollView(页面主体)
  867. scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.ScrollView")
  868. count = scroll_view.count
  869. print(f"找到的 ScrollView 数量: {count}")
  870. if not scroll_view.exists:
  871. return {"有效期": "", "生产单位": "", "批准文号": ""}
  872. # 3. 在 ScrollView 内再定位真正包含键值对的容器
  873. # 绝大多数美团说明书页面对应的是 ScrollView > ViewGroup > 若干 TextView
  874. kv_container = scroll_view.child(className="android.view.ViewGroup")
  875. if not kv_container.exists:
  876. kv_container = scroll_view # 降级:直接对 ScrollView 取子孙 TextView
  877. # 4. 滑动到底并收集所有 TextView(保留顺序)
  878. all_texts = []
  879. max_swipe = 5
  880. last_length = 0
  881. for _ in range(max_swipe):
  882. texts = kv_container.child(className="android.widget.TextView")
  883. #获取texts中的文本
  884. print(f'当前说明书列表数据:{texts}')
  885. current_texts = []
  886. self.loggerMT.info(f'说明书111')
  887. for tv in texts:
  888. try:
  889. txt = tv.get_text().strip()
  890. # txt = tv.info['text'].strip()
  891. except Exception:
  892. continue
  893. if txt and txt != "展开全文":
  894. current_texts.append(txt)
  895. self.loggerMT.info(f'说明书222')
  896. print(f'当前说明书列表数据:{current_texts}')
  897. # 去重
  898. if current_texts:
  899. current_texts = [t for t in current_texts if t not in all_texts]
  900. all_texts.extend(current_texts)
  901. # 判断是否到底
  902. # if not scroll_view.info.get("scrollable"):
  903. # break
  904. # 判断是否到底
  905. if len(all_texts) == last_length:
  906. break
  907. last_length = len(all_texts)
  908. # self.d.swipe_ext("up", scale=0.7)
  909. #向上滑动一次
  910. self.d.swipe(200, 1000, 200, 300, 0.2)
  911. time.sleep(0.2)
  912. if self.d.xpath('//*[@text="加载更多"]').exists:
  913. self.d.xpath('//*[@text="加载更多"]').click()
  914. # 5. 成对解析
  915. res_data = {"有效期": "", "生产单位": "", "批准文号": ""}
  916. for i in range(len(all_texts) - 1):
  917. key = all_texts[i]
  918. val = all_texts[i + 1]
  919. if key in res_data:
  920. res_data[key] = val
  921. print(f'说明书文本共 {len(all_texts)} 条,提取结果: {res_data}')
  922. # time.sleep(1000000)
  923. return res_data
  924. '''
  925. def get_instructions_data(self):
  926. """
  927. 确定有说明书之后,提取所有的说明书数据
  928. :return:
  929. """
  930. self.d.xpath('//*[@text="说明"]').click()
  931. # time.sleep(random.randint(3, 5))
  932. time.sleep(0.5)
  933. self.d.xpath('//*[@text="查看详细说明"]').click()
  934. # time.sleep(random.randint(3, 5))
  935. time.sleep(0.5)
  936. # self.d.xpath('//*[@text="加载更多"]').click_exists()
  937. # loop_page = 5
  938. # new_list = list()
  939. # new_list = []
  940. for ii in range(8):
  941. if self.d.xpath('//*[@text="加载更多"]').exists:
  942. self.d.xpath('//*[@text="加载更多"]').click()
  943. time.sleep(0.2)
  944. break
  945. else:
  946. self.d.swipe(200, 1000, 200, 300, 0.3)
  947. for iii in range(10):
  948. if self.d.xpath('//*[@text="生产单位"]').exists and self.d.xpath('//*[@text="批准文号"]').exists:
  949. break
  950. else:
  951. self.d.swipe(200, 1000, 200, 300, 0.3)
  952. instruction_path = self.screenshot_instruction()
  953. print(f"instruction_path= {instruction_path}")
  954. time.sleep(2)
  955. ocr_res = self.get_ocr_res_image(instruction_path)
  956. # print(f'ocr_res:{ocr_res}')
  957. if ocr_res:
  958. # 获取有效期的下一个数据
  959. validity = self.get_next_data(ocr_res, '有效期')
  960. # 获取批准文号的下一个数据
  961. approval_number = self.get_next_data(ocr_res, '批准文号')
  962. # 获取生产单位的下一个数据
  963. manufacturer = self.get_next_data(ocr_res, '生产单位')
  964. else:
  965. validity = ''
  966. approval_number = ''
  967. manufacturer = ''
  968. # print("有效期:", validity)
  969. # print("批准文号:", approval_number)
  970. # print("生产单位:", manufacturer)
  971. res_data = {
  972. "有效期": validity,
  973. "生产单位": manufacturer,
  974. "批准文号": approval_number
  975. }
  976. print(f"res_data={res_data}")
  977. time.sleep(1)
  978. self.delete_instruction_screenshot(instruction_path)
  979. return res_data
  980. def has_instructions(self):
  981. """
  982. 是否有说明书
  983. :return:
  984. """
  985. # 没有说明书的无法采集具体数据
  986. time.sleep(self.get_sleep_time())
  987. is_has_instructions = False
  988. for i in range(8):
  989. if self.d.xpath('//*[@text="说明"]').exists:
  990. print(f"第{i}次有说明书1")
  991. is_has_instructions = True
  992. break
  993. self.d.swipe_ext('down', 0.3)
  994. time.sleep(1)
  995. # detail_info = self.d.xpath(
  996. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
  997. # bounds = detail_info['bounds']
  998. # height = bounds['bottom'] - bounds['top']
  999. # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
  1000. if self.d.xpath('//*[@text="说明"]').exists:
  1001. is_has_instructions = True
  1002. print(f"第{i}次有说明书2")
  1003. break
  1004. # is_has_instructions = self.d.xpath('//*[@text="说明"]').exists
  1005. return is_has_instructions
  1006. def has_shop(self):
  1007. """
  1008. 是否有进店按钮
  1009. :return:
  1010. """
  1011. # self.d.swipe_ext('up', 0.1)
  1012. time.sleep(self.get_sleep_time())
  1013. is_has_enter_shop = self.d.xpath('//*[@text="进店"]').exists
  1014. return is_has_enter_shop
  1015. #获取商品对应的店铺信息
  1016. def get_license_info_ex(self):
  1017. # self.enter_shop()
  1018. self.safe_exec(self.enter_shop)
  1019. # self.enter_shoper()
  1020. self.safe_exec(self.enter_shoper)
  1021. for i in range(10):
  1022. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  1023. print(f"第{i}次有商家资质")
  1024. break
  1025. else:
  1026. print(f"第{i}次没有商家资质")
  1027. time.sleep(self.get_sleep_time())
  1028. #获取地址
  1029. # contact_address = self.get_shop_address()
  1030. contact_address = self.safe_exec(self.get_shop_address)
  1031. # time.sleep(50000)
  1032. ###
  1033. # self.scan_shoper_license()
  1034. self.safe_exec(self.scan_shoper_license)
  1035. # 获取资质编码
  1036. # qualification_number = self.get_qualification_number()
  1037. qualification_number = self.safe_exec(self.get_qualification_number)
  1038. #qualification_number 不为None继续下一步
  1039. if qualification_number:
  1040. #营业执照公司名称
  1041. business_license_company = ''
  1042. #营业执照地址
  1043. business_license_address = ''
  1044. self.d.click(0.603, 0.27)
  1045. time.sleep(self.get_sleep_time())
  1046. cropped_screenshot_path = self.screenshot_the_business_license(qualification_number)
  1047. print(f'cropped_screenshot_path:{cropped_screenshot_path}')
  1048. # if qualification_number:
  1049. # cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  1050. # else:
  1051. # cropped_screenshot_path = 'cropped_screenshot.png'
  1052. # ocr_res = self.get_ocr_res('cropped_screenshot.png')
  1053. ocr_res = self.get_ocr_res(cropped_screenshot_path)
  1054. print(f'ocr_res:{ocr_res}')
  1055. #获取ocr_res 中的地址、单位名称
  1056. if ocr_res:
  1057. if '单位名称' in ocr_res.keys():
  1058. business_license_company = ocr_res['单位名称']
  1059. if '地址' in ocr_res.keys():
  1060. business_license_address = ocr_res['地址']
  1061. license_info_data = {'contact_address': contact_address, 'qualification_number': qualification_number, 'business_license_company': business_license_company, 'business_license_address': business_license_address}
  1062. else:
  1063. license_info_data = {'contact_address': contact_address, 'qualification_number': '', 'business_license_company': '', 'business_license_address': ''}
  1064. return license_info_data
  1065. """暂不用该功能
  1066. def get_license_info(self):
  1067. self.enter_shop()
  1068. self.enter_shoper()
  1069. self.scan_shoper_license()
  1070. # 获取资质编码
  1071. qualification_number = self.get_qualification_number()
  1072. if qualification_number:
  1073. table_license_info = self.get_table_license_info(qualification_number)
  1074. if table_license_info:
  1075. return {
  1076. '单位名称': table_license_info[0],
  1077. '地址': table_license_info[1],
  1078. '社会信用代码': table_license_info[2]
  1079. }
  1080. else:
  1081. # operate_no = random.randint(0, 1)
  1082. self.d.click(0.603, 0.27)
  1083. # if operate_no == 0:
  1084. # self.d.xpath('//*[@text="营业执照"]').click()
  1085. # else:
  1086. # self.d.click(0.603, 0.27)
  1087. time.sleep(self.get_sleep_time())
  1088. self.screenshot_the_business_license()
  1089. ocr_res = self.get_ocr_res('cropped_screenshot.png')
  1090. return ocr_res
  1091. # operate_no = random.randint(0, 1)
  1092. self.d.click(0.603, 0.27)
  1093. # if operate_no == 0:
  1094. # self.d.xpath('//*[@text="营业执照"]').click()
  1095. # else:
  1096. # self.d.click(0.603, 0.27)
  1097. time.sleep(self.get_sleep_time())
  1098. self.screenshot_the_business_license()
  1099. ocr_res = self.get_ocr_res('cropped_screenshot.png')
  1100. return ocr_res
  1101. """
  1102. def distinct_target(self):
  1103. result = False
  1104. #收获地址:
  1105. # self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/filter_container"]').click()
  1106. position_xpath = '//*[@resource-id="com.sankuai.meituan:id/filter_container"]'
  1107. # position_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  1108. # position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  1109. is_position = self.d.xpath(position_xpath).exists
  1110. # is_position2 = self.d.xpath(position_xpath2).exists
  1111. # print(f"is_position = {is_position}")
  1112. # print(f"is_position2 = {is_position2}")
  1113. if is_position:
  1114. result = True
  1115. return result
  1116. # return is_position
  1117. def enter_target_page_ex(self):
  1118. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/search_edit_flipper"]').click()
  1119. time.sleep(self.get_sleep_time())
  1120. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/search_edit_linear_layout"]/android.widget.EditText[1]').click()
  1121. time.sleep(self.get_sleep_time())
  1122. self.d.send_keys(self.search_key, clear=True)
  1123. time.sleep(self.get_sleep_time())
  1124. self.d.xpath('//*[@text="搜索"]').click()
  1125. time.sleep(self.get_sleep_time())
  1126. #点击选择收获地址
  1127. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/filter_container"]').click()
  1128. time.sleep(self.get_sleep_time())
  1129. #点击选择按钮:
  1130. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/selected_icon"]').click()
  1131. #点击确认按钮
  1132. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/bottom_confirm"]').click()
  1133. time.sleep(self.get_sleep_time())
  1134. #增加点击快递送
  1135. # self.click_express_send_ex()
  1136. # self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/tab_item_title"]').click()
  1137. #页面有可能需要滑动才能出现快递
  1138. if (self.search_key == '今维多赐多康牌蛋白粉' or self.search_key == '999可调式生理性海水鼻腔喷雾50' or self.search_key == '999复方氨酚烷胺胶囊6粒' or
  1139. self.search_key == '999复方板蓝根颗粒15g*15袋/盒' or self.search_key == '999必无忧盐酸特比萘芬乳膏15g/支' or self.search_key == '三九胃泰养胃舒颗粒8袋' or
  1140. self.search_key == '999三蛇胆川贝膏138' or self.search_key == '999强力枇杷露16袋' or self.search_key == '999复方苦参肠炎康片12片' or
  1141. self.search_key == '999复方金银花颗粒10g' or self.search_key == '999冰连清咽' or self.search_key == '999必无忧盐酸特比萘芬喷雾剂30' or
  1142. self.search_key == '佳美舒阿奇霉素肠溶胶囊4' or self.search_key == '999速复康布洛芬缓释胶囊' or self.search_key == '999皮炎平曲安奈德益康唑乳膏30'):
  1143. #固定的位置移动性
  1144. self.d.swipe(400, 190, 100, 190, 0.3)
  1145. time.sleep(0.5)
  1146. self.d.xpath('//*[@text="快递"]').click()
  1147. else:
  1148. self.d.xpath('//*[@text="快递"]').click()
  1149. time.sleep(self.get_sleep_time())
  1150. def enter_target_page(self):
  1151. self.d.xpath('//*[@content-desc="看病买药"]').click()
  1152. time.sleep(self.get_sleep_time())
  1153. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/vf_search_carousel_text"]').click()
  1154. time.sleep(self.get_sleep_time())
  1155. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]').click()
  1156. time.sleep(self.get_sleep_time())
  1157. self.d.send_keys(self.search_key, clear=True)
  1158. time.sleep(self.get_sleep_time())
  1159. self.d.xpath('//*[@text="搜索"]').click()
  1160. time.sleep(self.get_sleep_time())
  1161. # content_frame = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]').exists
  1162. # print(content_frame)
  1163. # position_xpath1 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  1164. # position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  1165. # if self.d.xpath(position_xpath1).exists:
  1166. # print("position_xpath1 exist")
  1167. # elif self.d.xpath(position_xpath2).exists:
  1168. # print("position_xpath2 exist")
  1169. # else:
  1170. # print("position_xpath not exist")
  1171. # time.sleep(10000)
  1172. #增加点击快递送
  1173. self.click_express_send()
  1174. time.sleep(self.get_sleep_time())
  1175. def click_express_send(self):
  1176. max_retry = 5 # 最多尝试次数
  1177. for idx in range(1, max_retry + 1):
  1178. # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]'
  1179. xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1180. xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1181. xpath3= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1182. xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1183. # print(f"xpath:{xpath}")
  1184. # scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.HorizontalScrollView")
  1185. if self.d.xpath(xpath).exists:
  1186. self.d.xpath(xpath).click()
  1187. # time.sleep(self.get_sleep_time())
  1188. print(f"第{idx}次点击xpath快递送成功")
  1189. time.sleep(self.get_sleep_time())
  1190. break
  1191. elif self.d.xpath(xpath2).exists:
  1192. self.d.xpath(xpath2).click()
  1193. # time.sleep(self.get_sleep_time())
  1194. print(f"第{idx}次点击xpath2快递送成功")
  1195. time.sleep(self.get_sleep_time())
  1196. break
  1197. elif self.d.xpath(xpath3).exists:
  1198. self.d.xpath(xpath3).click()
  1199. # time.sleep(self.get_sleep_time())
  1200. print(f"第{idx}次点击xpath3快递送成功")
  1201. time.sleep(self.get_sleep_time())
  1202. break
  1203. elif self.d.xpath(xpath4).exists:
  1204. self.d.xpath(xpath4).click()
  1205. # time.sleep(self.get_sleep_time())
  1206. print(f"第{idx}次点击xpath4快递送成功")
  1207. time.sleep(self.get_sleep_time())
  1208. break
  1209. else:
  1210. print(f"第{idx}次点击xpath或xpath2或xpath3快递送都失败")
  1211. time.sleep(self.get_sleep_time())
  1212. # xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1213. # if self.d.xpath(xpath2).exists:
  1214. # self.d.xpath(xpath2).click()
  1215. # print(f"第{idx}次点击xpath2快递送成功")
  1216. # time.sleep(self.get_sleep_time())
  1217. # break
  1218. """暂不用该功能
  1219. def get_table_license_info(self, qualification_number):
  1220. try:
  1221. sql = f'select business_license_company,city,credit_code from mt_drug where credit_code = "{qualification_number}"'
  1222. self.mysql_client.cur.execute(sql)
  1223. res = self.mysql_client.cur.fetchone()
  1224. return res
  1225. except:
  1226. return None
  1227. """
  1228. # def get_clipboard(self):
  1229. # """通过ADB获取Android手机剪贴板内容"""
  1230. # try:
  1231. # result = subprocess.run(
  1232. # ["adb", "shell", "am", "broadcast", "-a", "clipper.get"],
  1233. # capture_output=True,
  1234. # text=True,
  1235. # timeout=5
  1236. # )
  1237. # print(f"获取剪贴板结果: {result.stdout}")
  1238. # # 解析返回信息中的剪贴板内容
  1239. # for line in result.stdout.splitlines():
  1240. # if "data=" in line:
  1241. # return line.split("data=")[1].strip()
  1242. # return ""
  1243. # except Exception as e:
  1244. # print("获取剪贴板失败:", e)
  1245. # return ""
  1246. # def get_clipboard(self):
  1247. # """读取 Android 剪贴板(系统自带命令)"""
  1248. # try:
  1249. # text = subprocess.check_output(
  1250. # ["adb", "shell", "cmd", "clipboard", "get"],
  1251. # text=True, timeout=5, stderr=subprocess.STDOUT
  1252. # ).strip()
  1253. # print(f"获取剪贴板结果: {text}")
  1254. # return text if text else ""
  1255. # except Exception as e:
  1256. # print("获取剪贴板失败:", e)
  1257. # return ""
  1258. def get_clipboard(self):
  1259. time.sleep(1)
  1260. self.loggerMT.info(f"Clipboard content:{self.d.clipboard}") # 打印调试信息
  1261. clipboard_content = self.d.clipboard
  1262. if clipboard_content is None:
  1263. return ''
  1264. return clipboard_content.strip()
  1265. # return self.d.clipboard.strip()
  1266. def clear_clipboard(self):
  1267. self.d.set_clipboard("", "text/plain")
  1268. # def clear_clipboard(self):
  1269. # """清空手机剪贴板:写入空字符串(subprocess 版)"""
  1270. # try:
  1271. # subprocess.run(
  1272. # ["adb", "shell", "am", "broadcast", "-a", "clipper.set", "-e", "text", " "],
  1273. # check=True,
  1274. # capture_output=True,
  1275. # text=True,
  1276. # timeout=5
  1277. # )
  1278. # except subprocess.CalledProcessError as e:
  1279. # print("ADB 清空失败:", e.stderr)
  1280. # def clear_clipboard():
  1281. # """清空手机剪贴板:写入空字符串"""
  1282. # try:
  1283. # adb_shell(["shell", "am", "broadcast", "-a", "clipper.set", "-e", "text", ""])
  1284. # except subprocess.CalledProcessError as e:
  1285. # print("ADB 清空失败:", e.output)
  1286. #获取一个商品的数据、商品对应的店铺的数据
  1287. def get_product_link(self):
  1288. product_link = ''
  1289. # 两种可能的“···”按钮
  1290. dots_xpaths = [
  1291. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
  1292. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]'
  1293. ]
  1294. max_retry = 5 # 最多尝试次数
  1295. for idx in range(1, max_retry + 1):
  1296. if product_link: # 已经拿到则退出
  1297. break
  1298. for xp in dots_xpaths:
  1299. if self.d.xpath(xp).exists:
  1300. print(f'{idx}-进入分享点点点')
  1301. self.loggerMT.info(f'{idx}-进入分享点点点')
  1302. # #先清空剪贴板的内容
  1303. # self.clear_clipboard()
  1304. # print("清空剪贴板内容成功。")
  1305. self.d.xpath(xp).click()
  1306. time.sleep(0.2)
  1307. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1308. time.sleep(0.2)
  1309. link_xpath = '//*[@text="复制链接"]'
  1310. if self.d.xpath(link_xpath).exists:
  1311. self.d.xpath(link_xpath).click()
  1312. time.sleep(1)
  1313. product_link = self.get_clipboard()
  1314. time.sleep(0.5)
  1315. print(f'{idx}-商品链接:{product_link}')
  1316. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  1317. break # 找到并执行后跳出内层循环
  1318. else:
  1319. print(f'{idx}-商品链接:{product_link}')
  1320. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  1321. product_link = ''
  1322. # self.d.xpath('//*[@text="复制链接"]').click_exists()
  1323. # time.sleep(1)
  1324. # product_link = self.get_clipboard()
  1325. # time.sleep(0.5)
  1326. # print(f'{idx}-商品链接:{product_link}')
  1327. # self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  1328. # break # 找到并执行后跳出内层循环
  1329. if not product_link and idx < max_retry:
  1330. time.sleep(0.5) # 最后一次不需要再等待
  1331. return product_link
  1332. def integrate_data(self):
  1333. #测试说明书详情:
  1334. # instructions_info = self.safe_exec(self.get_instructions_data)
  1335. # time.sleep(1000000)
  1336. #测试店铺信息
  1337. # license_info = self.safe_exec(self.get_license_info_ex)
  1338. # time.sleep(1000000)
  1339. #测试定位地址
  1340. #获取链接开始
  1341. #self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1342. #1、点击页面的... 先判断元素是否存在
  1343. '''
  1344. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1345. print('1-进入分享点点点111')
  1346. self.loggerMT.info('1-进入分享点点点111')
  1347. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1348. #点击分享商品
  1349. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1350. time.sleep(0.2)
  1351. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1352. time.sleep(0.2)
  1353. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1354. time.sleep(1)
  1355. #获取剪切板的数据
  1356. product_link = self.get_clipboard()
  1357. time.sleep(0.5)
  1358. print(f'1-商品链接:{product_link}')
  1359. self.loggerMT.info(f'1-商品链接:{product_link}')
  1360. #清空剪切板
  1361. # self.clear_clipboard()
  1362. # if self.d.xpath('//*[@text="加载更多"]').click_exists():
  1363. # self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1364. # if self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').exists:
  1365. # self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').click()
  1366. # #获取剪切板的数据
  1367. # product_link = self.get_clipboard()
  1368. # time.sleep(0.5)
  1369. # print(f'商品链接:{product_link}')
  1370. # #清空剪切板
  1371. # self.clear_clipboard()
  1372. # else:
  1373. # print('未找到分享按钮111')
  1374. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1375. print('1-进入分享点点点222')
  1376. self.loggerMT.info('1-进入分享点点点222')
  1377. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1378. time.sleep(0.2)
  1379. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1380. time.sleep(0.2)
  1381. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1382. time.sleep(1)
  1383. #获取剪切板的数据
  1384. product_link = self.get_clipboard()
  1385. time.sleep(0.5)
  1386. print(f'1-商品链接:{product_link}')
  1387. self.loggerMT.info(f'1-商品链接:{product_link}')
  1388. #如果为获取到product_link 则等待0.5秒再获取
  1389. if not product_link:
  1390. time.sleep(0.5)
  1391. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1392. print('2-进入分享点点点111')
  1393. self.loggerMT.info('2-进入分享点点点111')
  1394. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1395. #点击分享商品
  1396. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1397. time.sleep(0.2)
  1398. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1399. time.sleep(0.2)
  1400. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1401. time.sleep(1)
  1402. #获取剪切板的数据
  1403. product_link = self.get_clipboard()
  1404. time.sleep(0.5)
  1405. print(f'2-商品链接:{product_link}')
  1406. self.loggerMT.info(f'2-商品链接:{product_link}')
  1407. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1408. print('2-进入分享点点点222')
  1409. self.loggerMT.info('2-进入分享点点点222')
  1410. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1411. time.sleep(0.2)
  1412. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1413. time.sleep(0.2)
  1414. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1415. time.sleep(1)
  1416. #获取剪切板的数据
  1417. product_link = self.get_clipboard()
  1418. time.sleep(0.5)
  1419. print(f'2-商品链接:{product_link}')
  1420. self.loggerMT.info(f'2-商品链接:{product_link}')
  1421. #如果为获取到product_link 则等待0.5秒再获取
  1422. if not product_link:
  1423. time.sleep(0.5)
  1424. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1425. print('3-进入分享点点点111')
  1426. self.loggerMT.info('3-进入分享点点点111')
  1427. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1428. #点击分享商品
  1429. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1430. time.sleep(0.2)
  1431. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1432. time.sleep(0.2)
  1433. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1434. time.sleep(1)
  1435. #获取剪切板的数据
  1436. product_link = self.get_clipboard()
  1437. time.sleep(0.5)
  1438. print(f'3-商品链接:{product_link}')
  1439. self.loggerMT.info(f'3-商品链接:{product_link}')
  1440. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1441. print('3-进入分享点点点222')
  1442. self.loggerMT.info('3-进入分享点点点222')
  1443. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1444. time.sleep(0.2)
  1445. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1446. time.sleep(0.2)
  1447. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1448. time.sleep(1)
  1449. #获取剪切板的数据
  1450. product_link = self.get_clipboard()
  1451. time.sleep(0.5)
  1452. print(f'3-商品链接:{product_link}')
  1453. self.loggerMT.info(f'3-商品链接:{product_link}')
  1454. '''
  1455. #获取链接结束
  1456. """
  1457. 整合数据
  1458. :return:
  1459. """
  1460. # title_info = self.get_title() # 药品,规格
  1461. title_info = self.safe_exec(self.get_title) # 药品,规格
  1462. print(f"title_info={title_info}")
  1463. if title_info:
  1464. product, specifications = title_info
  1465. #如果关键字包含999 则 product必须包含999 和 999后面的那段字符串 ps 999感冒灵颗粒必须包含:"999"和"感冒灵颗粒"
  1466. if '999' in self.search_key:
  1467. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  1468. temp_search_key = self.search_key.replace('999皮炎平', '')
  1469. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g/支':
  1470. temp_search_key = self.search_key.replace("999必无忧", "")
  1471. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  1472. temp_search_key = self.search_key.replace("999必无忧", "")
  1473. elif self.search_key == '999速复康布洛芬缓释胶囊':
  1474. temp_search_key = self.search_key.replace("999速复康", "")
  1475. else:
  1476. temp_search_key = self.search_key.replace('999', '')
  1477. if self.search_key == '999糠酸莫米松凝胶15':
  1478. temp_search_key = temp_search_key.replace('15', '')
  1479. if '999' not in product or temp_search_key not in product:
  1480. self.swipe_back(2) #需要回退2步
  1481. self.unrelated_data += 1
  1482. return
  1483. elif '15' not in product:
  1484. self.swipe_back(2) #需要回退2步
  1485. self.unrelated_data += 1
  1486. return
  1487. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  1488. temp_search_key = temp_search_key.replace('30', '')
  1489. if '999' not in product or temp_search_key not in product:
  1490. self.swipe_back(2) #需要回退2步
  1491. self.unrelated_data += 1
  1492. return
  1493. elif '30' not in product:
  1494. self.swipe_back(2) #需要回退2步
  1495. self.unrelated_data += 1
  1496. return
  1497. elif self.search_key == '999复方感冒灵颗粒15':
  1498. temp_search_key = temp_search_key.replace('15', '')
  1499. if '999' not in product or temp_search_key not in product:
  1500. self.swipe_back(2) #需要回退2步
  1501. self.unrelated_data += 1
  1502. return
  1503. elif '15' not in product:
  1504. self.swipe_back(2) #需要回退2步
  1505. self.unrelated_data += 1
  1506. return
  1507. elif self.search_key == '999抗病毒口服液10ml*6支/盒':
  1508. temp_search_key = temp_search_key.replace("10ml*6支/盒", "")
  1509. if '999' not in product or temp_search_key not in product:
  1510. self.swipe_back(2) #需要回退2步
  1511. self.unrelated_data += 1
  1512. return
  1513. elif '10ml*6' not in product:
  1514. self.swipe_back(2) #需要回退2步
  1515. self.unrelated_data += 1
  1516. return
  1517. elif self.search_key == '999复方金银花颗粒10g':
  1518. temp_search_key = temp_search_key.replace("10g", "")
  1519. if temp_search_key not in product:
  1520. self.swipe_back(2) #需要回退2步
  1521. self.unrelated_data += 1
  1522. return
  1523. elif ('999' not in product) and ('三九' not in product):
  1524. self.swipe_back(2) #需要回退2步
  1525. self.unrelated_data += 1
  1526. return
  1527. elif '10g*8' not in product:
  1528. self.swipe_back(2) #需要回退2步
  1529. self.unrelated_data += 1
  1530. return
  1531. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  1532. temp_search_key = temp_search_key.replace("30", "")
  1533. if '999' not in product or temp_search_key not in product:
  1534. self.swipe_back(2) #需要回退2步
  1535. self.unrelated_data += 1
  1536. return
  1537. elif '30' not in product:
  1538. self.swipe_back(2) #需要回退2步
  1539. self.unrelated_data += 1
  1540. return
  1541. elif self.search_key == '999复方苦参肠炎康片12片':
  1542. temp_search_key = temp_search_key.replace("12片", "")
  1543. if '999' not in product or temp_search_key not in product:
  1544. self.swipe_back(2) #需要回退2步
  1545. self.unrelated_data += 1
  1546. return
  1547. elif '12' not in product:
  1548. self.swipe_back(2) #需要回退2步
  1549. self.unrelated_data += 1
  1550. return
  1551. elif self.search_key == '999强力枇杷露16袋':
  1552. temp_search_key = temp_search_key.replace("16袋", "")
  1553. if '999' not in product or temp_search_key not in product:
  1554. self.swipe_back(2) #需要回退2步
  1555. self.unrelated_data += 1
  1556. return
  1557. elif '16' not in product:
  1558. self.swipe_back(2) #需要回退2步
  1559. self.unrelated_data += 1
  1560. return
  1561. elif self.search_key == '999三蛇胆川贝膏138':
  1562. temp_search_key = temp_search_key.replace("138", "")
  1563. if '999' not in product or temp_search_key not in product:
  1564. self.swipe_back(2) #需要回退2步
  1565. self.unrelated_data += 1
  1566. return
  1567. elif '138' not in product:
  1568. self.swipe_back(2) #需要回退2步
  1569. self.unrelated_data += 1
  1570. return
  1571. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g/支':
  1572. temp_search_key = temp_search_key.replace("15g/支", "")
  1573. if '999' not in product or temp_search_key not in product:
  1574. print(f"关键字temp_search_key={temp_search_key}不存在于product={product}中")
  1575. self.swipe_back(2) #需要回退2步
  1576. self.unrelated_data += 1
  1577. return
  1578. elif '15g' not in product:
  1579. print(f"15g不存在于product={product}中")
  1580. self.swipe_back(2) #需要回退2步
  1581. self.unrelated_data += 1
  1582. return
  1583. elif self.search_key == '999止泻利颗粒15g*8':
  1584. temp_search_key = temp_search_key.replace("15g*8", "")
  1585. if '999' not in product or temp_search_key not in product:
  1586. self.swipe_back(2) #需要回退2步
  1587. self.unrelated_data += 1
  1588. return
  1589. elif '15g*8' not in product:
  1590. self.swipe_back(2) #需要回退2步
  1591. self.unrelated_data += 1
  1592. return
  1593. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  1594. temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
  1595. if '999' not in product or temp_search_key not in product:
  1596. print(f"关键字temp_search_key={temp_search_key}不存在于product={product}中")
  1597. self.swipe_back(2) #需要回退2步
  1598. self.unrelated_data += 1
  1599. return
  1600. elif '15g*15' not in product:
  1601. print(f"15g*15不存在于product={product}中")
  1602. self.swipe_back(2) #需要回退2步
  1603. self.unrelated_data += 1
  1604. return
  1605. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  1606. temp_search_key = temp_search_key.replace("6粒", "")
  1607. if '999' not in product or temp_search_key not in product:
  1608. self.swipe_back(2) #需要回退2步
  1609. self.unrelated_data += 1
  1610. return
  1611. elif '6' not in product:
  1612. self.swipe_back(2) #需要回退2步
  1613. self.unrelated_data += 1
  1614. return
  1615. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  1616. temp_search_key = temp_search_key.replace("50", "")
  1617. if '999' not in product or temp_search_key not in product:
  1618. self.swipe_back(2) #需要回退2步
  1619. self.unrelated_data += 1
  1620. return
  1621. elif '50' not in product:
  1622. self.swipe_back(2) #需要回退2步
  1623. self.unrelated_data += 1
  1624. return
  1625. elif self.search_key == '999速复康布洛芬缓释胶囊':
  1626. if temp_search_key not in product:
  1627. self.swipe_back(2) #需要回退2步
  1628. self.unrelated_data += 1
  1629. return
  1630. elif ('999' not in product) and ('三九' not in product):
  1631. self.swipe_back(2) #需要回退2步
  1632. self.unrelated_data += 1
  1633. return
  1634. else:
  1635. if '999' not in product or temp_search_key not in product:
  1636. self.swipe_back(2) #需要回退2步
  1637. self.unrelated_data += 1
  1638. return
  1639. else:
  1640. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  1641. temp_search_key = self.search_key.replace('史达功', '')
  1642. temp_search_key = temp_search_key.replace('120', '')
  1643. if '史达功' not in product or temp_search_key not in product:
  1644. self.swipe_back(2) #需要回退2步到列表
  1645. self.unrelated_data += 1
  1646. return
  1647. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  1648. temp_search_key = self.search_key.replace('三九胃泰', '')
  1649. temp_search_key = temp_search_key.replace('8袋', '')
  1650. if '三九胃泰' not in product or temp_search_key not in product:
  1651. self.swipe_back(2) #需要回退2步到列表
  1652. self.unrelated_data += 1
  1653. return
  1654. elif self.search_key == '今维多赐多康牌蛋白粉':
  1655. temp_search_key = self.search_key.replace('今维多', '')
  1656. if '今维多' not in product or temp_search_key not in product:
  1657. self.swipe_back(2) #需要回退2步到列表
  1658. self.unrelated_data += 1
  1659. return
  1660. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  1661. temp_search_key = self.search_key.replace('佳美舒', '')
  1662. temp_search_key = temp_search_key.replace('8', '')
  1663. if '佳美舒' not in product or temp_search_key not in product:
  1664. self.swipe_back(2) #需要回退2步到列表
  1665. self.unrelated_data += 1
  1666. return
  1667. elif ('8' not in title_info ) and ('4' not in title_info):
  1668. print(f"当前商品名称:{title_info} 不包含8和4品规")
  1669. self.swipe_back(2)
  1670. self.unrelated_data += 1
  1671. return
  1672. else:
  1673. if self.search_key not in product.replace(' ', ''):
  1674. self.swipe_back(2) #需要回退2步到列表
  1675. self.unrelated_data += 1
  1676. return
  1677. # if self.search_key not in product.replace(' ', ''):
  1678. # self.swipe_back(1)
  1679. # self.unrelated_data += 1
  1680. # return
  1681. else:
  1682. self.swipe_back(2) # 需要回退2步
  1683. return
  1684. min_price = self.drug_price() # 最低价格
  1685. # 商品链接
  1686. product_link = self.get_product_link()
  1687. #判断是否有自营的文本,有的话不需要获取店铺的信息
  1688. if self.d.xpath('//*[@text="自营"]').exists:
  1689. shop = "美团自营大药房(快递电商)"
  1690. # 爬取日期
  1691. scrape_date = self.get_current_date()
  1692. # scrape_date = "2025-07-18"
  1693. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  1694. 'platform': '美团'}
  1695. print(f'当前数据:{dup_data}')
  1696. if self.data_is_exists(dup_data):
  1697. print('存在相同数据不入库')
  1698. self.swipe_back(2) # 需要回退2步
  1699. return
  1700. else:
  1701. for i in range(8):
  1702. if self.d.xpath('//*[@text="进店"]').exists:
  1703. print('开始获取店铺名1')
  1704. break
  1705. self.d.swipe_ext('up', 0.3)
  1706. time.sleep(1)
  1707. # detail_info = self.d.xpath(
  1708. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
  1709. # bounds = detail_info['bounds']
  1710. # height = bounds['bottom'] - bounds['top']
  1711. # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
  1712. if self.d.xpath('//*[@text="进店"]').exists:
  1713. print('开始获取店铺名2')
  1714. break
  1715. shop = self.get_shop_name()
  1716. # 爬取日期
  1717. scrape_date = self.get_current_date()
  1718. # scrape_date = "2025-07-18"
  1719. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  1720. 'platform': '美团'}
  1721. print(f'当前数据:{dup_data}')
  1722. #获取店铺信息开始
  1723. #暂时不获取店铺信息 start
  1724. is_has_enter_shop = self.has_shop()
  1725. #需要判断shop是否已经在数据库中存在,如果存在,则不再进入店铺,直接进入下一个商品
  1726. shop_is_exists = self.shop_is_exists_database(shop)
  1727. #存在进店 并且店铺的名称不包含美团官方的字样
  1728. print(f"已采集{self.shop_data_num}家店铺数据")
  1729. if is_has_enter_shop and '美团官方' not in shop and not shop_is_exists and self.shop_data_num < 50:
  1730. # license_info = self.get_license_info_ex()
  1731. license_info = self.safe_exec(self.get_license_info_ex)
  1732. contact_address = license_info['contact_address']
  1733. qualification_number = license_info['qualification_number']
  1734. business_license_company = license_info['business_license_company']
  1735. business_license_address = license_info['business_license_address']
  1736. save_shop_data = {
  1737. 'shop': shop,
  1738. 'contact_address': contact_address,
  1739. 'qualification_number': qualification_number,
  1740. 'scrape_date': scrape_date,
  1741. 'business_license_company':business_license_company,
  1742. 'business_license_address':business_license_address,
  1743. 'platform': '美团'
  1744. }
  1745. self.save_shop_info_to_database(save_shop_data)
  1746. self.shop_data_num += 1 # 店铺数据数量+1
  1747. self.swipe_back(2) #这里执行完后会推到药品主页面
  1748. else:
  1749. print('不采集店铺信息')
  1750. #获取店铺信息结束
  1751. #暂时不获取店铺信息 end
  1752. if self.data_is_exists(dup_data):
  1753. print('存在相同数据不入库')
  1754. # self.swipe_back(1)
  1755. self.swipe_back(2) #需要回退2步到列表页
  1756. return
  1757. if not shop:
  1758. print('未获取到店铺名:开始回退')
  1759. #self.swipe_back(1)
  1760. self.swipe_back(2) #需要回退2步到列表页
  1761. return
  1762. if not shop or '自营' in shop:
  1763. #self.swipe_back(1)
  1764. self.swipe_back(2) #需要回退2步到列表页
  1765. return
  1766. time.sleep(self.get_sleep_time())
  1767. # 生产日期为空
  1768. manufacture_date = ''
  1769. # 执政信息
  1770. # if is_has_enter_shop:
  1771. # license_info = self.get_license_info()
  1772. # business_license_company = license_info["单位名称"]
  1773. # credit_code = license_info['社会信用代码']
  1774. # city_str = license_info['地址']
  1775. # # 先把省份啥的替换掉
  1776. # city_sub_str = re.sub(r'[u4e00-\u9fa5]+省', '', city_str)
  1777. # try:
  1778. # city = re.search(r'[\u4e00-\u9fa5]+?(市|区|县)', city_sub_str).group(0)
  1779. # except:
  1780. # city = city_sub_str
  1781. # try:
  1782. # province = self.city2province[city]
  1783. # except:
  1784. # province = ''
  1785. # self.swipe_back(2)
  1786. # else:
  1787. # business_license_company = ''
  1788. # credit_code = ''
  1789. # city = ''
  1790. # province = ''
  1791. business_license_company = ''
  1792. credit_code = ''
  1793. city = ''
  1794. province = ''
  1795. expiry_date = ''
  1796. manufacturer = ''
  1797. approval_number = ''
  1798. #暂时不获取说明书信息 start
  1799. #是否存在说明书
  1800. # is_has_instructions = self.has_instructions()
  1801. #有的药品没有说明书,直接默认
  1802. if self.search_key == '今维多赐多康牌蛋白粉':
  1803. expiry_date = '18个月'
  1804. manufacturer = '华润圣海健康科技有限公司'
  1805. approval_number = '食健备G202437001992'
  1806. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  1807. expiry_date = '24个月'
  1808. manufacturer = '浙江华润三九众益制药有限公司'
  1809. approval_number = '国药准字H20090152'
  1810. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  1811. expiry_date = '3年'
  1812. manufacturer = '江苏萨瑞斯医疗科技有限公司'
  1813. approval_number = '苏械注准20212140025'
  1814. elif self.search_key == '999蒲地蓝消炎片':
  1815. expiry_date = '24个月'
  1816. manufacturer = '特一药业集团股份有限公司'
  1817. approval_number = '国药准字Z20063596'
  1818. elif self.search_key == '999养胃舒颗粒':
  1819. expiry_date = '36个月'
  1820. manufacturer = '合肥华润神鹿药业有限公司'
  1821. approval_number = '国药准字Z34020289'
  1822. elif self.search_key == '999糠酸莫米松凝胶15':
  1823. expiry_date = '36个月'
  1824. manufacturer = '华润三九(南昌)药业有限公司'
  1825. approval_number = '国药准字H20080010'
  1826. elif self.search_key == '999黄芪精':
  1827. expiry_date = '36个月'
  1828. manufacturer = '台州南峰药业有限公司'
  1829. approval_number = '国药准字Z33020783'
  1830. elif self.search_key == '999复方感冒灵颗粒15':
  1831. expiry_date = '24个月'
  1832. manufacturer = '华润三九(郴州)制药有限公司'
  1833. approval_number = '国药准字Z43020334'
  1834. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  1835. expiry_date = '36个月'
  1836. manufacturer = '华润三九(南昌)药业有限公司'
  1837. approval_number = '国药准字H20074155'
  1838. elif self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  1839. expiry_date = '暂定24个月,具体有效期以实物说明书为准'
  1840. manufacturer = '史达德药业(北京)有限公司'
  1841. approval_number = '国药准字H11021837'
  1842. elif self.search_key == '999速复康布洛芬缓释胶囊':
  1843. expiry_date = '24个月'
  1844. manufacturer = '北京红林制药有限公司'
  1845. approval_number = '国药准字H20074172'
  1846. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  1847. expiry_date = '24个月'
  1848. manufacturer = '重庆科瑞东和制药有限责任公司'
  1849. approval_number = '国药准字Z50020420'
  1850. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g/支':
  1851. expiry_date = '24个月'
  1852. manufacturer = '华润三九(南昌)药业有限公司'
  1853. approval_number = '国药准字H20073954'
  1854. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  1855. expiry_date = '24个月'
  1856. manufacturer = '浙江华润三九众益制药有限公司'
  1857. approval_number = '国药准字H20090152'
  1858. else:
  1859. is_has_instructions = self.safe_exec(self.has_instructions)
  1860. # 说明书等信息
  1861. if is_has_instructions:
  1862. print('开始获取说明书信息')
  1863. # instructions_info = self.get_instructions_data()
  1864. instructions_info = self.safe_exec(self.get_instructions_data)
  1865. if instructions_info['有效期'] is not None:
  1866. expiry_date = instructions_info['有效期'].strip('。')
  1867. if instructions_info['生产单位'] is not None:
  1868. manufacturer = instructions_info['生产单位'].strip('。')
  1869. if instructions_info['批准文号'] is not None:
  1870. approval_number = instructions_info['批准文号'].strip('。')
  1871. else:
  1872. # 没有说明书不入库
  1873. print('没有获取到说明书信息')
  1874. # self.swipe_back(1)
  1875. self.swipe_back(2) #需要回退2步到列表页
  1876. return
  1877. #暂时不获取说明书信息 end
  1878. self.unrelated_data = 0
  1879. # 爬取省份
  1880. scrape_province = '广东' # 这里先默认广东
  1881. # 是否有货
  1882. availability = ''
  1883. save_data = {
  1884. 'product': product,
  1885. 'min_price': min_price,
  1886. 'manufacture_date': manufacture_date,
  1887. 'expiry_date': expiry_date,
  1888. 'shop': shop,
  1889. 'business_license_company': business_license_company,
  1890. 'province': province,
  1891. 'city': city,
  1892. 'manufacturer': manufacturer,
  1893. 'specification': specifications,
  1894. 'approval_number': approval_number,
  1895. 'product_link': product_link,
  1896. 'scrape_date': scrape_date,
  1897. 'scrape_province': scrape_province,
  1898. 'availability': availability,
  1899. 'credit_code': credit_code,
  1900. 'platform': '美团'
  1901. }
  1902. self.save_to_database(save_data)
  1903. time.sleep(1)
  1904. self.swipe_back(2) #需要回退2步到列表页
  1905. # time.sleep(100000)
  1906. if self.distinct_target():
  1907. print('已到达搜索列表页')
  1908. else:
  1909. for i in range(1):
  1910. # self.swipe_back(1)
  1911. self.swipe_back(2) #需要回退2步到列表页
  1912. # 最外部有个定位按钮
  1913. if self.distinct_target():
  1914. break
  1915. #主函数
  1916. def main(self, device_id, retry_count=0):
  1917. MAX_RETRY = 3 # 最大重试次数
  1918. spider_no = 0
  1919. self.connect_devices(device_id)
  1920. time.sleep(self.get_sleep_time())
  1921. self.d.toast.show("测试toast", 20)
  1922. # 启动全局弹窗监控
  1923. self.monitor = SpiderMonitor(self)
  1924. self.monitor.start()
  1925. try:
  1926. # 重新开启美团应用
  1927. self.restart_app()
  1928. # 搜索关键字
  1929. # self.enter_target_page()
  1930. self.enter_target_page_ex()
  1931. # print('开始滑动')
  1932. # self.d.drag(300, 1400, 300, 400, 1)
  1933. # time.sleep(100000)
  1934. for idx in range(300):
  1935. print(f'第{idx + 1}页')
  1936. if spider_no > 30:
  1937. time.sleep(60)
  1938. spider_no = 0
  1939. print('目前无关数据量: ', self.unrelated_data)
  1940. # 检查是否需要暂停(验证码过多)
  1941. if self.monitor.verification_count >= self.monitor.MAX_VERIFICATION_RETRY:
  1942. print("频繁遇到验证码,暂停程序")
  1943. self.d.toast("请处理验证码后点击继续", 30)
  1944. # 等待用户点击屏幕继续
  1945. self.d.click(0, 0) # 无效点击,等待用户操作
  1946. self.monitor.verification_count = 0
  1947. # if self.unrelated_data > 10:
  1948. # # 连续超过5个不达标的数据则停止采集
  1949. # break
  1950. # 线程安全获取商品列表
  1951. # drug_lis = self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all()
  1952. # drug_lis = self.safe_list('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout', self.monitor)
  1953. while True:
  1954. if self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').exists:
  1955. break
  1956. time.sleep(1)
  1957. drug_lis = self.safe_exec(self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all)
  1958. lis_len = len(drug_lis)
  1959. print(f'当前页面共有{lis_len}个商品')
  1960. for idxx,drug_one in enumerate(drug_lis,start = 1):
  1961. bounds = drug_one.info['bounds']
  1962. top = bounds['top']
  1963. bottom = bounds['bottom']
  1964. # height = bottom - top
  1965. print(f'当前商品bottom:{bottom}')
  1966. print(f'当前商品top:{top}')
  1967. # if 304 <= top and bottom <= 1475: # 默认高度241的才行
  1968. if 304 <= top and bottom <= 1559: # 默认高度241的才行
  1969. # print('目标-->', drug_one.info)
  1970. # drug_one.click()
  1971. #获取当前元素中的属性来判断是否要点击进入采集
  1972. print(f"这页的第几个商品:{idxx}")
  1973. self.safe_exec(drug_one.click)
  1974. print('点击目标药品完毕')
  1975. time.sleep(5)
  1976. #还需要再点击一次商品
  1977. #//*[@resource-id="com.sankuai.meituan:id/ll_stickyfoodList_adapter_food_food"]
  1978. #'//*[contains(@text, "广告")]'
  1979. # if '999' in self.search_key:
  1980. # temp_search_key = self.search_key.replace('999', '')
  1981. # self.d.xpath(f'//*[contains(@text, {temp_search_key})]').click()
  1982. #点击店铺商品
  1983. # content_frame = self.d.xpath('//android.widget.FrameLayout[2]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[4]/android.widget.RelativeLayout[1]').exists
  1984. # print(content_frame)
  1985. # content_frame_1 = self.d.xpath('//android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[4]/android.widget.RelativeLayout[1]').exists
  1986. # print(content_frame_1)
  1987. # content_frame_2 = self.d.xpath('//android.widget.FrameLayout[3]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[4]/android.widget.RelativeLayout[1]').exists
  1988. # print(content_frame_2)
  1989. #通过页面的相关结果来定位需要点击的位置
  1990. if self.d.xpath('//*[contains(@text, "相关结果")]'):
  1991. print("存在相关结果")
  1992. # 通过关键字定位元素
  1993. element= self.d.xpath('//*[contains(@text, "相关结果")]')
  1994. # 获取元素的属性信息
  1995. info = element.info
  1996. # 获取元素的坐标
  1997. # 获取元素的边界坐标
  1998. bounds = info['bounds']
  1999. x1, y1 = bounds['left'], bounds['top']
  2000. x2, y2 = bounds['right'], bounds['bottom']
  2001. print(f"元素的边界坐标为:左上角({x1}, {y1}),右下角({x2}, {y2})")
  2002. center_x = (x1 + x2) / 2
  2003. center_y = (y1 + y2) / 2
  2004. # 计算距离元素下方150像素的位置
  2005. target_y = center_y + 150
  2006. # 执行点击操作 #点击页面的元素
  2007. self.d.click(center_x, target_y)
  2008. print(f"已点击位置:({center_x}, {target_y})")
  2009. # test_frame = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/layout_shop_root_scroll_container"]')
  2010. # print(test_frame)
  2011. # time.sleep(100000)
  2012. # product_xpath = '//*[@resource-id="com.sankuai.meituan:id/layout_shop_root_scroll_container"]/android.widget.RelativeLayout[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[3]/android.widget.LinearLayout[1]/android.support.v4.view.ViewPager[3]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.ScrollView[2]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[2]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]'
  2013. # if self.d.xpath(product_xpath).exists:
  2014. # self.d.xpath(product_xpath).click()
  2015. # else:
  2016. # print('product_xpath bu cun zai')
  2017. # time.sleep(self.get_sleep_time())
  2018. # print('点击店铺的药品')
  2019. # 采集药品信息
  2020. try:
  2021. # self.integrate_data()
  2022. self.safe_exec(self.integrate_data)
  2023. # 检测下是否回退到列表页
  2024. if self.distinct_target():
  2025. print('回退到列表页', True)
  2026. else:
  2027. if self.d.xpath('//*[@text="搜索"]').exists:
  2028. print("检测到搜索按钮,重新开始采集流程")
  2029. if retry_count < MAX_RETRY:
  2030. # 停止当前监控线程
  2031. self.monitor.stop()
  2032. self.monitor.join()
  2033. # 递归重启采集
  2034. return self.main(device_id, retry_count+1)
  2035. else:
  2036. print("超过最大重试次数,终止程序")
  2037. return
  2038. else:
  2039. print("无法恢复页面,终止采集")
  2040. return
  2041. # print('回退到列表页失败,终止采集')
  2042. # return
  2043. time.sleep(self.get_sleep_time())
  2044. spider_no += 1
  2045. except Exception as e:
  2046. print(f'采集药品详情数据出错:{e}')
  2047. #增加阻塞的方法:
  2048. if not self.distinct_target():
  2049. for i in range(1):
  2050. self.swipe_back(1)
  2051. # 最外部有个定位按钮
  2052. if self.distinct_target():
  2053. break
  2054. if i == 0 and not self.distinct_target():
  2055. print('页面出错,退出采集')
  2056. return
  2057. else:
  2058. continue
  2059. if self.d.xpath('//*[@text="已经到底啦"]').exists:
  2060. print('已经到达列表页最底部')
  2061. return
  2062. search_list = self.d.xpath('//android.support.v7.widget.RecyclerView').info
  2063. bounds = search_list['bounds']
  2064. #print('搜索列表高度', 1400 + bounds['top'] - bounds['bottom'])
  2065. # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'])
  2066. # 计算滑动距离
  2067. scroll_distance = bounds['bottom'] - bounds['top'] # 正数
  2068. start_y = 1600
  2069. end_y = start_y - scroll_distance # 向上滑动,y 坐标减小
  2070. # 确保 end_y 不小于 0
  2071. end_y = max(end_y, 304) # 留出一点边距,避免滑出屏幕
  2072. # print('滑动起点 y:', start_y, '终点 y:', end_y)
  2073. # self.d.swipe(200, start_y, 200, end_y, 0.4)
  2074. print('开始滑动')
  2075. self.d.drag(300, 1400, 300, 400, 1)
  2076. # self.safe_exec(self.d.drag, 300, 1400, 300, 400, 1)
  2077. print('滑动结束')
  2078. #print('搜索列表高度', 1400 + bounds['top'] - bounds['bottom'])
  2079. # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'])
  2080. # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'], 0.4)
  2081. time.sleep(self.get_sleep_time())
  2082. finally:
  2083. # 确保监控线程被停止
  2084. self.monitor.stop()
  2085. self.monitor.join()
  2086. def unitest(self):
  2087. """
  2088. 单元测试
  2089. :return:
  2090. """
  2091. save_data = {
  2092. 'product':"[昆中药]舒肝颗粒(低糖型)",
  2093. 'min_price': 14.0,
  2094. 'manufacture_date': '',
  2095. 'expiry_date': '36个月',
  2096. 'shop': '美团自营大药房(快递电商)',
  2097. 'business_license_company': '',
  2098. 'province': '',
  2099. 'city': '',
  2100. 'manufacturer': '昆明中药厂有限公司',
  2101. 'specification': '3g*16袋/盒',
  2102. 'approval_number': '国药准字Z53021161',
  2103. 'product_link': '',
  2104. 'scrape_date': '2025/07/09',
  2105. 'scrape_province': '广东',
  2106. 'availability': '',
  2107. 'credit_code': '',
  2108. 'platform': '美团'
  2109. }
  2110. self.save_to_database(save_data)
  2111. time.sleep(100000)
  2112. pass
  2113. def main():
  2114. keys_list = [
  2115. # '三九胃泰颗粒',
  2116. # '999小柴胡颗粒',
  2117. # '999强力枇杷露',
  2118. # '[999]感冒清热颗粒',
  2119. # '999抗病毒口服液',
  2120. # '999皮炎平',
  2121. # '999盐酸特比萘芬乳膏',
  2122. # '999盐酸特比萘芬',
  2123. # '999藿香正气合剂',
  2124. # '999必无忧盐酸特比萘芬乳膏',
  2125. # '999复方感冒灵颗粒',
  2126. # '999糠酸莫米松凝胶',
  2127. # '999铝碳酸镁咀嚼片',
  2128. # '999阿奇霉素片',
  2129. # '999选平硝酸咪康唑乳膏',
  2130. # 按需继续添加,
  2131. #2025-08-01最新 其中 藿香正气合剂两种规格 10支和6支 抗病毒口服液 12支和18支 蒲地蓝 24片 36片和44片 枇杷露225ml 小柴胡颗粒9袋和15袋 养胃舒 6袋 复方感冒灵颗粒15袋,
  2132. #曲安奈德益康唑乳膏 30g 葡萄糖酸锌口服溶液 12支 18支 24支和30支,
  2133. # 1、999止泻利颗粒15g*8 没有数据 2、999维生素ec颗粒 没有数据 3、999三蛇胆川贝膏138 没有数据 4、999强力枇杷露16袋 没有数据 5、999冰连清咽 没有数据
  2134. # '999藿香正气合剂',
  2135. # '999糠酸莫米松凝胶15',
  2136. # '999抗病毒口服液',
  2137. # '999蒲地蓝消炎片',
  2138. # '999强力枇杷露225ml',
  2139. # '999小柴胡颗粒',
  2140. # '999养胃舒颗粒',
  2141. # '999复方感冒灵颗粒15',
  2142. # '999黄芪精',
  2143. '999皮炎平曲安奈德益康唑乳膏30',
  2144. # '999葡萄糖酸锌口服溶液',
  2145. # '今维多赐多康牌蛋白粉',
  2146. # '佳美舒阿奇霉素肠溶胶囊4'
  2147. # '999必无忧盐酸特比萘芬喷雾剂30'
  2148. # '999冰连清咽'
  2149. # '999复方金银花颗粒10g'
  2150. # '999复方苦参肠炎康片12片'
  2151. # '999强力枇杷露16袋'
  2152. # '999三蛇胆川贝膏138'
  2153. # '999维生素ec颗粒'
  2154. # '三九胃泰养胃舒颗粒8袋'
  2155. # '999必无忧盐酸特比萘芬乳膏15g/支'
  2156. # '999止泻利颗粒15g*8'
  2157. # '999复方板蓝根颗粒15g*15袋/盒'
  2158. # '史达功右美沙芬愈创甘油醚糖浆120'
  2159. # '999复方氨酚烷胺胶囊6粒'
  2160. # '999可调式生理性海水鼻腔喷雾50'
  2161. # '999速复康布洛芬缓释胶囊'
  2162. ]
  2163. #美团手机号:
  2164. # device_id = '21885f5' # 设备序列号
  2165. device_id = 'ea4e4eb8' # 设备序列号
  2166. # device_id = '97ae80e0' # 设备序列号
  2167. # device_id = '2e58510' # 设备序列号
  2168. # device_id = 'UCQGF6CQFMU8WKHI'
  2169. # device_id = 'B6JVE6AYSWU4LRLZ'
  2170. # device_id = '2e58510'
  2171. cycle_no = 0 # 轮次计数
  2172. while True:
  2173. cycle_no += 1
  2174. logging.info(f'========== 第 {cycle_no} 轮采集开始 ==========')
  2175. for idx, key in enumerate(keys_list, 1):
  2176. logging.info(f'[{idx}/{len(keys_list)}] 开始采集关键字:{key}')
  2177. try:
  2178. mt = MT(key) # 用当前关键字实例化
  2179. mt.main(device_id) # 执行一次完整采集
  2180. logging.info(f'关键字 {key} 本轮采集完成')
  2181. except Exception as e:
  2182. # 发生异常直接跳过该关键字,继续下一轮
  2183. logging.exception(f'关键字 {key} 采集异常:{e}')
  2184. finally:
  2185. # 关闭当前 MT 实例资源(如有需要)
  2186. if hasattr(mt, 'close'):
  2187. mt.close()
  2188. # logging.info('本轮全部关键字采集完成,等待 2 小时后下一轮...')
  2189. # time.sleep(1 * 3600) # 2 小时 = 7200 秒
  2190. # keys = '小柴胡颗粒' # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒 小柴胡颗粒
  2191. # mt = MT(keys) # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒
  2192. # # mt.main('95b2c764')
  2193. # mt.main('fcb3c749')
  2194. if __name__ == '__main__':
  2195. main()
  2196. # scheduler = BlockingScheduler()
  2197. # scheduler.add_job(main, 'cron', hour=21, minute=30, misfire_grace_time=120)
  2198. # try:
  2199. # scheduler.start()
  2200. # except (KeyboardInterrupt, SystemExit):
  2201. # pass