mt_auto_scrape .py 244 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677787980818283848586878889909192939495969798991001011021031041051061071081091101111121131141151161171181191201211221231241251261271281291301311321331341351361371381391401411421431441451461471481491501511521531541551561571581591601611621631641651661671681691701711721731741751761771781791801811821831841851861871881891901911921931941951961971981992002012022032042052062072082092102112122132142152162172182192202212222232242252262272282292302312322332342352362372382392402412422432442452462472482492502512522532542552562572582592602612622632642652662672682692702712722732742752762772782792802812822832842852862872882892902912922932942952962972982993003013023033043053063073083093103113123133143153163173183193203213223233243253263273283293303313323333343353363373383393403413423433443453463473483493503513523533543553563573583593603613623633643653663673683693703713723733743753763773783793803813823833843853863873883893903913923933943953963973983994004014024034044054064074084094104114124134144154164174184194204214224234244254264274284294304314324334344354364374384394404414424434444454464474484494504514524534544554564574584594604614624634644654664674684694704714724734744754764774784794804814824834844854864874884894904914924934944954964974984995005015025035045055065075085095105115125135145155165175185195205215225235245255265275285295305315325335345355365375385395405415425435445455465475485495505515525535545555565575585595605615625635645655665675685695705715725735745755765775785795805815825835845855865875885895905915925935945955965975985996006016026036046056066076086096106116126136146156166176186196206216226236246256266276286296306316326336346356366376386396406416426436446456466476486496506516526536546556566576586596606616626636646656666676686696706716726736746756766776786796806816826836846856866876886896906916926936946956966976986997007017027037047057067077087097107117127137147157167177187197207217227237247257267277287297307317327337347357367377387397407417427437447457467477487497507517527537547557567577587597607617627637647657667677687697707717727737747757767777787797807817827837847857867877887897907917927937947957967977987998008018028038048058068078088098108118128138148158168178188198208218228238248258268278288298308318328338348358368378388398408418428438448458468478488498508518528538548558568578588598608618628638648658668678688698708718728738748758768778788798808818828838848858868878888898908918928938948958968978988999009019029039049059069079089099109119129139149159169179189199209219229239249259269279289299309319329339349359369379389399409419429439449459469479489499509519529539549559569579589599609619629639649659669679689699709719729739749759769779789799809819829839849859869879889899909919929939949959969979989991000100110021003100410051006100710081009101010111012101310141015101610171018101910201021102210231024102510261027102810291030103110321033103410351036103710381039104010411042104310441045104610471048104910501051105210531054105510561057105810591060106110621063106410651066106710681069107010711072107310741075107610771078107910801081108210831084108510861087108810891090109110921093109410951096109710981099110011011102110311041105110611071108110911101111111211131114111511161117111811191120112111221123112411251126112711281129113011311132113311341135113611371138113911401141114211431144114511461147114811491150115111521153115411551156115711581159116011611162116311641165116611671168116911701171117211731174117511761177117811791180118111821183118411851186118711881189119011911192119311941195119611971198119912001201120212031204120512061207120812091210121112121213121412151216121712181219122012211222122312241225122612271228122912301231123212331234123512361237123812391240124112421243124412451246124712481249125012511252125312541255125612571258125912601261126212631264126512661267126812691270127112721273127412751276127712781279128012811282128312841285128612871288128912901291129212931294129512961297129812991300130113021303130413051306130713081309131013111312131313141315131613171318131913201321132213231324132513261327132813291330133113321333133413351336133713381339134013411342134313441345134613471348134913501351135213531354135513561357135813591360136113621363136413651366136713681369137013711372137313741375137613771378137913801381138213831384138513861387138813891390139113921393139413951396139713981399140014011402140314041405140614071408140914101411141214131414141514161417141814191420142114221423142414251426142714281429143014311432143314341435143614371438143914401441144214431444144514461447144814491450145114521453145414551456145714581459146014611462146314641465146614671468146914701471147214731474147514761477147814791480148114821483148414851486148714881489149014911492149314941495149614971498149915001501150215031504150515061507150815091510151115121513151415151516151715181519152015211522152315241525152615271528152915301531153215331534153515361537153815391540154115421543154415451546154715481549155015511552155315541555155615571558155915601561156215631564156515661567156815691570157115721573157415751576157715781579158015811582158315841585158615871588158915901591159215931594159515961597159815991600160116021603160416051606160716081609161016111612161316141615161616171618161916201621162216231624162516261627162816291630163116321633163416351636163716381639164016411642164316441645164616471648164916501651165216531654165516561657165816591660166116621663166416651666166716681669167016711672167316741675167616771678167916801681168216831684168516861687168816891690169116921693169416951696169716981699170017011702170317041705170617071708170917101711171217131714171517161717171817191720172117221723172417251726172717281729173017311732173317341735173617371738173917401741174217431744174517461747174817491750175117521753175417551756175717581759176017611762176317641765176617671768176917701771177217731774177517761777177817791780178117821783178417851786178717881789179017911792179317941795179617971798179918001801180218031804180518061807180818091810181118121813181418151816181718181819182018211822182318241825182618271828182918301831183218331834183518361837183818391840184118421843184418451846184718481849185018511852185318541855185618571858185918601861186218631864186518661867186818691870187118721873187418751876187718781879188018811882188318841885188618871888188918901891189218931894189518961897189818991900190119021903190419051906190719081909191019111912191319141915191619171918191919201921192219231924192519261927192819291930193119321933193419351936193719381939194019411942194319441945194619471948194919501951195219531954195519561957195819591960196119621963196419651966196719681969197019711972197319741975197619771978197919801981198219831984198519861987198819891990199119921993199419951996199719981999200020012002200320042005200620072008200920102011201220132014201520162017201820192020202120222023202420252026202720282029203020312032203320342035203620372038203920402041204220432044204520462047204820492050205120522053205420552056205720582059206020612062206320642065206620672068206920702071207220732074207520762077207820792080208120822083208420852086208720882089209020912092209320942095209620972098209921002101210221032104210521062107210821092110211121122113211421152116211721182119212021212122212321242125212621272128212921302131213221332134213521362137213821392140214121422143214421452146214721482149215021512152215321542155215621572158215921602161216221632164216521662167216821692170217121722173217421752176217721782179218021812182218321842185218621872188218921902191219221932194219521962197219821992200220122022203220422052206220722082209221022112212221322142215221622172218221922202221222222232224222522262227222822292230223122322233223422352236223722382239224022412242224322442245224622472248224922502251225222532254225522562257225822592260226122622263226422652266226722682269227022712272227322742275227622772278227922802281228222832284228522862287228822892290229122922293229422952296229722982299230023012302230323042305230623072308230923102311231223132314231523162317231823192320232123222323232423252326232723282329233023312332233323342335233623372338233923402341234223432344234523462347234823492350235123522353235423552356235723582359236023612362236323642365236623672368236923702371237223732374237523762377237823792380238123822383238423852386238723882389239023912392239323942395239623972398239924002401240224032404240524062407240824092410241124122413241424152416241724182419242024212422242324242425242624272428242924302431243224332434243524362437243824392440244124422443244424452446244724482449245024512452245324542455245624572458245924602461246224632464246524662467246824692470247124722473247424752476247724782479248024812482248324842485248624872488248924902491249224932494249524962497249824992500250125022503250425052506250725082509251025112512251325142515251625172518251925202521252225232524252525262527252825292530253125322533253425352536253725382539254025412542254325442545254625472548254925502551255225532554255525562557255825592560256125622563256425652566256725682569257025712572257325742575257625772578257925802581258225832584258525862587258825892590259125922593259425952596259725982599260026012602260326042605260626072608260926102611261226132614261526162617261826192620262126222623262426252626262726282629263026312632263326342635263626372638263926402641264226432644264526462647264826492650265126522653265426552656265726582659266026612662266326642665266626672668266926702671267226732674267526762677267826792680268126822683268426852686268726882689269026912692269326942695269626972698269927002701270227032704270527062707270827092710271127122713271427152716271727182719272027212722272327242725272627272728272927302731273227332734273527362737273827392740274127422743274427452746274727482749275027512752275327542755275627572758275927602761276227632764276527662767276827692770277127722773277427752776277727782779278027812782278327842785278627872788278927902791279227932794279527962797279827992800280128022803280428052806280728082809281028112812281328142815281628172818281928202821282228232824282528262827282828292830283128322833283428352836283728382839284028412842284328442845284628472848284928502851285228532854285528562857285828592860286128622863286428652866286728682869287028712872287328742875287628772878287928802881288228832884288528862887288828892890289128922893289428952896289728982899290029012902290329042905290629072908290929102911291229132914291529162917291829192920292129222923292429252926292729282929293029312932293329342935293629372938293929402941294229432944294529462947294829492950295129522953295429552956295729582959296029612962296329642965296629672968296929702971297229732974297529762977297829792980298129822983298429852986298729882989299029912992299329942995299629972998299930003001300230033004300530063007300830093010301130123013301430153016301730183019302030213022302330243025302630273028302930303031303230333034303530363037303830393040304130423043304430453046304730483049305030513052305330543055305630573058305930603061306230633064306530663067306830693070307130723073307430753076307730783079308030813082308330843085308630873088308930903091309230933094309530963097309830993100310131023103310431053106310731083109311031113112311331143115311631173118311931203121312231233124312531263127312831293130313131323133313431353136313731383139314031413142314331443145314631473148314931503151315231533154315531563157315831593160316131623163316431653166316731683169317031713172317331743175317631773178317931803181318231833184318531863187318831893190319131923193319431953196319731983199320032013202320332043205320632073208320932103211321232133214321532163217321832193220322132223223322432253226322732283229323032313232323332343235323632373238323932403241324232433244324532463247324832493250325132523253325432553256325732583259326032613262326332643265326632673268326932703271327232733274327532763277327832793280328132823283328432853286328732883289329032913292329332943295329632973298329933003301330233033304330533063307330833093310331133123313331433153316331733183319332033213322332333243325332633273328332933303331333233333334333533363337333833393340334133423343334433453346334733483349335033513352335333543355335633573358335933603361336233633364336533663367336833693370337133723373337433753376337733783379338033813382338333843385338633873388338933903391339233933394339533963397339833993400340134023403340434053406340734083409341034113412341334143415341634173418341934203421342234233424342534263427342834293430343134323433343434353436343734383439344034413442344334443445344634473448344934503451345234533454345534563457345834593460346134623463346434653466346734683469347034713472347334743475347634773478347934803481348234833484348534863487348834893490349134923493349434953496349734983499350035013502350335043505350635073508350935103511351235133514351535163517351835193520352135223523352435253526352735283529353035313532353335343535353635373538353935403541354235433544354535463547354835493550355135523553355435553556355735583559356035613562356335643565356635673568356935703571357235733574357535763577357835793580358135823583358435853586358735883589359035913592359335943595359635973598359936003601360236033604360536063607360836093610361136123613361436153616361736183619362036213622362336243625362636273628362936303631363236333634363536363637363836393640364136423643364436453646364736483649365036513652365336543655365636573658365936603661366236633664366536663667366836693670367136723673367436753676367736783679368036813682368336843685368636873688368936903691369236933694369536963697369836993700370137023703370437053706370737083709371037113712371337143715371637173718371937203721372237233724372537263727372837293730373137323733373437353736373737383739374037413742374337443745374637473748374937503751375237533754375537563757375837593760376137623763376437653766376737683769377037713772377337743775377637773778377937803781378237833784378537863787378837893790379137923793379437953796379737983799380038013802380338043805380638073808380938103811381238133814381538163817381838193820382138223823382438253826382738283829383038313832383338343835383638373838383938403841384238433844384538463847384838493850385138523853385438553856385738583859386038613862386338643865386638673868386938703871387238733874387538763877387838793880388138823883388438853886388738883889389038913892389338943895389638973898389939003901390239033904390539063907390839093910391139123913391439153916391739183919392039213922392339243925392639273928392939303931393239333934393539363937393839393940394139423943394439453946394739483949395039513952395339543955395639573958395939603961396239633964396539663967396839693970397139723973397439753976397739783979398039813982398339843985398639873988398939903991399239933994399539963997399839994000400140024003400440054006400740084009401040114012401340144015401640174018401940204021402240234024402540264027402840294030403140324033403440354036403740384039404040414042404340444045404640474048404940504051405240534054405540564057405840594060406140624063406440654066406740684069407040714072407340744075407640774078
  1. import requests
  2. import base64
  3. import cv2
  4. import uiautomator2 as u2
  5. import time
  6. import subprocess
  7. import re
  8. import random
  9. import datetime
  10. import json
  11. from aip import AipOcr
  12. from apscheduler.schedulers.blocking import BlockingScheduler
  13. # from db_mysql import mysqlClient
  14. import threading
  15. from collections import deque
  16. import numpy as np
  17. import secrets
  18. import os
  19. import math
  20. import schedule
  21. # import pyperclip
  22. from config import Config
  23. from logger import setup_logger
  24. import logging
  25. from contextlib import contextmanager
  26. from typing import Dict, Any
  27. # from database import MySQLClient
  28. # 配置日志
  29. # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
  30. setup_logger("mt_spider") # 初始化日志
  31. class SpiderMonitor(threading.Thread):
  32. """全局弹窗监控线程(增强版)"""
  33. def __init__(self, spider_instance):
  34. super().__init__(daemon=True)
  35. self.spider = spider_instance
  36. self.running = True
  37. self.pausing = threading.Event() # 主线程同步事件
  38. self.last_verification_time = 0
  39. self.verification_count = 0
  40. self.MAX_VERIFICATION_RETRY = 10
  41. self.recent_clicks = deque(maxlen=10) # 防重复点击
  42. self.logger = logging.getLogger("SpiderMonitor")
  43. # 可配置化弹窗规则
  44. self.popup_rules = {
  45. "simple": [
  46. ('//*[@text="确定"]', "点击确定"),
  47. ('//*[@text="允许"]', "点击允许"),
  48. ('//*[@text="关闭"]', "点击关闭"),
  49. ('//*[@resource-id="com.sankuai.meituan:id/close"]', "关闭按钮"),
  50. ('//*[@resource-id="com.sankuai.meituan:id/address_center_location_close"]', "关闭按钮"),
  51. ('//*[@resource-id="com.sankuai.meituan:id/location_close"]', "关闭按钮"),
  52. ],
  53. "verification": [
  54. '//*[contains(@text, "验证")]',
  55. '//*[contains(@text, "滑块")]',
  56. '//*[contains(@text, "依次点击")]',
  57. '//*[contains(@text, "请点击")]',
  58. '//*[contains(@text, "拖动滑块刚")]', #这个需要拖动滑块至最右边,然后再截图
  59. '//*[contains(@text, "请输入图片中的内容")]',
  60. '//*[contains(@text, "用最短线连接")]',
  61. '//*[contains(@text, "请按语序依次点击")]',
  62. '//*[contains(@text, "请向右滑动滑块")]',
  63. '//*[contains(@text, "请拖动下方滑块完成拼图")]',
  64. '//*[contains(@resource-id, "captcha")]'
  65. ]
  66. }
  67. def run(self):
  68. while self.running:
  69. try:
  70. handled = self.check_and_handle_popup()
  71. time.sleep(2 if handled else 1)
  72. except Exception as e:
  73. self.logger.exception("监控线程异常: %s", e)
  74. time.sleep(3)
  75. def _is_recent_click(self, xpath):
  76. """防止重复点击同一个弹窗"""
  77. key = f"{xpath}_{int(time.time())}"
  78. if key in self.recent_clicks:
  79. return True
  80. self.recent_clicks.append(key)
  81. return False
  82. def check_and_handle_popup(self):
  83. d = self.spider.d
  84. # 1. 处理简单弹窗
  85. for xpath, desc in self.popup_rules["simple"]:
  86. if d.xpath(xpath).exists and not self._is_recent_click(xpath):
  87. self.logger.info("检测到弹窗: %s", desc)
  88. d.xpath(xpath).click()
  89. return True
  90. # 2. 处理验证码弹窗
  91. for xpath in self.popup_rules["verification"]:
  92. if d.xpath(xpath).exists:
  93. now = time.time()
  94. if now - self.last_verification_time < 30:
  95. return False # 30秒内不重复触发
  96. self.last_verification_time = now
  97. self.verification_count += 1
  98. self.logger.warning("验证码弹窗触发,等待人工处理...")
  99. if self.verification_count > self.MAX_VERIFICATION_RETRY:
  100. self.logger.error("验证码重试超限,终止任务")
  101. self.spider.stop_all()
  102. return True
  103. self.pausing.set() # 通知主线程暂停
  104. d.toast.show("需要人工处理验证码", 120)
  105. # 等待人工处理
  106. start = time.time()
  107. # while time.time() - start < 120*60:
  108. # if not d.xpath(xpath).exists:
  109. # self.logger.info("验证码已处理")
  110. # d.toast.show("验证完成", 2)
  111. # self.pausing.clear() # 放行主线程
  112. # return True
  113. # time.sleep(5)
  114. while True:
  115. if not d.xpath(xpath).exists:
  116. self.logger.info("验证码已处理")
  117. d.toast.show("验证完成", 2)
  118. self.pausing.clear() # 放行主线程
  119. return True
  120. time.sleep(5)
  121. self.logger.warning("验证码超时,重启APP")
  122. self.spider.restart_app()
  123. return True
  124. # 3. 处理广告弹窗(点击右上角)
  125. if d.xpath('//*[contains(@text, "广告")]').exists:
  126. w, h = d.info['displayWidth'], d.info['displayHeight']
  127. d.click(w - 50, 50)
  128. self.logger.info("关闭广告弹窗")
  129. return True
  130. return False
  131. def stop(self):
  132. self.running = False
  133. def get_access_token():
  134. AppKey = "tRK2RhyItCSh6BzyT4CNVXQa"
  135. AppSrcret = "TDgKiPo94i2mOM1sDqOuDnlcK1bG66jh"
  136. token_url = 'https://aip.baidubce.com/oauth/2.0/token'
  137. url = f"{token_url}?grant_type=client_credentials&client_id={AppKey}&client_secret={AppSrcret}"
  138. payload = ""
  139. headers = {
  140. 'Content-Type': 'application/json',
  141. 'Accept': 'application/json'
  142. }
  143. response = requests.request("POST", url, headers=headers, data=payload)
  144. try:
  145. return response.json()['access_token']
  146. except:
  147. return None
  148. def get_mysql():
  149. """
  150. 建立并返回一个到数据库的连接对象
  151. """
  152. import pymysql
  153. return pymysql.connect(
  154. host = Config.DB_HOST, #"localhost", # 修改后的主机
  155. port = Config.DB_PORT, #3306, # 添加端口号
  156. user = Config.DB_USER, #'root', # 修改后的用户名
  157. password = Config.DB_PASSWORD, # 修改后的密码
  158. db = Config.DB_NAME, #"drug_data", # 修改后的数据库名
  159. charset='utf8mb4'
  160. )
  161. class MT:
  162. def __init__(self, key):
  163. # self.package_name = 'com.sankuai.meituan'
  164. self.package_name = Config.PACKAGE_NAME
  165. self.access_token = get_access_token()
  166. self.city2province = self.get_city_info()
  167. self.APP_ID = '116857964'
  168. self.API_KEY = '1gAzACJOAr7BeILKqkqPOETh'
  169. self.SECRET_KEY = 'ZNArANb9GwJYgLKg4EfYhukKBfPdl1n3'
  170. self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
  171. # host = Config.DB_HOST #"localhost"
  172. # user = Config.DB_USER #"root"
  173. # password = Config.DB_PASSWORD #"dfwy2025"
  174. # database = Config.DB_NAME #"drug_data"
  175. # port = Config.DB_PORT#3306
  176. # print(f'数据库配置:host:{host},user:{user},password:{password},database:{database},port:{port}')
  177. self.table_name = Config.DB_TABLE #"mt_drug"
  178. self.shop_table_name = Config.DB_SHOP_TABLE
  179. # print(f'数据库表名:table_name:{self.table_name},shop_table_name:{self.shop_table_name}')
  180. # self.mysql_client = mysqlClient(host, user, password, database, port)
  181. self.loggerMT = logging.getLogger()
  182. self.search_key = key # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒
  183. self.unrelated_data = 0 # 无关数据数量
  184. self.shop_data_num = 0 # 店铺数据数量
  185. def stop_app(self):
  186. self.d.app_stop(self.package_name)
  187. time.sleep(5)
  188. def start_app(self):
  189. self.d.app_start(self.package_name)
  190. time.sleep(5)
  191. def restart_app(self):
  192. """
  193. 重启app
  194. :return:
  195. """
  196. self.stop_app()
  197. self.start_app()
  198. @staticmethod
  199. def get_sleep_time():
  200. # return random.randint(5, 8)
  201. return random.randint(1, 3)
  202. @staticmethod
  203. def get_current_date():
  204. return datetime.datetime.now().strftime('%Y/%m/%d')
  205. @staticmethod
  206. def get_city_info():
  207. """
  208. 获取所有的省市数据
  209. :return:
  210. """
  211. file_path = '../kailin_city.json'
  212. with open(file_path, 'r', encoding='utf-8') as f:
  213. data = json.load(f)
  214. province = {province_one["id"]: province_one for province_one in data['province']}
  215. city2province = dict()
  216. city = data['city']
  217. for city_one in city:
  218. name = city_one['name']
  219. pid = city_one['pid']
  220. if len(str(pid)) > 2:
  221. pid = int(re.match('^\d{2}', str(pid)).group())
  222. city2province[name] = province[pid]['name']
  223. return city2province
  224. def get_shop_name(self):
  225. """
  226. 获取店铺名
  227. :return:
  228. """
  229. try:
  230. shop_name = self.d.xpath(
  231. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  232. print(f'获取到店铺名:{shop_name}')
  233. return shop_name
  234. except:
  235. try:
  236. shop_name = self.d.xpath(
  237. '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
  238. print(f'获取到店铺名2:{shop_name}')
  239. return shop_name
  240. except Exception as e:
  241. #点击店铺曲获取店铺名称
  242. print("点击店铺进入后获取店铺名称")
  243. self.enter_shop()
  244. shop_xpath = '//*[@resource-id="com.sankuai.meituan:id/layout_header_view"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]//android.widget.FrameLayout[2]/android.widget.FrameLayout[1]/android.widget.TextView'
  245. if self.d.xpath(shop_xpath).exists:
  246. shop_name = self.d.xpath(shop_xpath).text
  247. self.swipe_back(1)
  248. return shop_name
  249. else:
  250. print(f'获取店铺名出错:{e}')
  251. return None
  252. def get_qualification_number(self):
  253. """
  254. 获取资质编号
  255. :return:
  256. """
  257. try:
  258. qualification_number_str = self.d.xpath(
  259. '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.widget.TextView[2]').text
  260. qualification_number = qualification_number_str.strip('资质编号:').strip()
  261. return qualification_number
  262. except:
  263. return None
  264. def get_shop_address(self):
  265. try:
  266. xpath = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView'
  267. if self.d.xpath(xpath).exists:
  268. shop_address = self.d.xpath(xpath).text
  269. print(f'111-获取到店铺地址:{shop_address}')
  270. if '发货时间' in shop_address:
  271. print(f'店铺地址包含发货时间,再次获取店铺地址')
  272. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.TextView'
  273. if self.d.xpath(xpath2).exists:
  274. shop_address = self.d.xpath(xpath2).text
  275. print(f'222-获取到店铺地址:{shop_address}')
  276. else:
  277. print(f'222-xpath2获取店铺地址失败')
  278. else:
  279. shop_address = ''
  280. print(f'333-获取到店铺地址:{shop_address}')
  281. return shop_address
  282. except:
  283. print(f'获取店铺地址出错-get_shop_address')
  284. return None
  285. def enter_detail(self):
  286. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/recycler"]/android.widget.FrameLayout[1]').click()
  287. time.sleep(self.get_sleep_time())
  288. def save_to_database(self, data):
  289. print(f'保存数据到数据库:{data}')
  290. # 连接数据库
  291. conn = get_mysql()
  292. # 创建游标对象
  293. cur = conn.cursor()
  294. # add_sql = "insert into delete_friend_table(delete_user_name,delete_user_id,delete_content,delete_time) value(%s,%s,%s,%s)"
  295. add_sql = f"""
  296. INSERT INTO {self.table_name}
  297. (product, min_price, manufacture_date, expiry_date, shop, business_license_company, province, city, manufacturer, specification, approval_number, product_link, scrape_date, scrape_province, availability, credit_code, platform, search_key)
  298. VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
  299. """
  300. # cur.execute(add_sql, (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'], data['business_license_company'],data['province'], data['city'], data['manufacturer'], data['specification'], data['approval_number'], data['product_link'], self.get_current_date(), data['scrape_province'], data['availability'], data['credit_code'], data['platform']))
  301. cur.execute(add_sql, (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'], data['business_license_company'],data['province'], data['city'], data['manufacturer'], data['specification'], data['approval_number'], data['product_link'], data['scrape_date'], data['scrape_province'], data['availability'], data['credit_code'], data['platform'], data['search_key']))
  302. conn.commit() # 提交数据
  303. #self.mysql_client.insert(self.table_name, data)
  304. print(f"存入数据库成功")
  305. def save_shop_info_to_database(self, data):
  306. print(f'保存店铺数据到数据库:{data}')
  307. # 连接数据库
  308. conn = get_mysql()
  309. # 创建游标对象
  310. cur = conn.cursor()
  311. add_sql = f"""
  312. INSERT INTO {self.shop_table_name}
  313. (shop, contact_address, qualification_number, business_license_company, business_license_address, scrape_date, platform)
  314. VALUES (%s, %s, %s, %s, %s, %s, %s)
  315. """
  316. cur.execute(add_sql, (data['shop'], data['contact_address'], data['qualification_number'], data['business_license_company'], data['business_license_address'], data['scrape_date'], data['platform']))
  317. conn.commit() # 提交数据
  318. #self.mysql_client.insert(self.shop_table_name, data)
  319. print(f'存入店铺信息到数据库成功')
  320. def swipe_up(self):
  321. """
  322. 上滑
  323. :return:
  324. """
  325. screen_width = self.d.info['displayWidth']
  326. screen_height = self.d.info['displayHeight']
  327. duration_rate = random.uniform(0, 0.3)
  328. self.d.swipe(screen_width // 2, screen_height - 100, screen_width // 2, 100, duration=duration_rate)
  329. no = random.uniform(0, 1)
  330. if no > 0.85:
  331. # 有的时候卡着 再稍微往上滑一点点
  332. self.d.swipe_ext("up", 0.1)
  333. time.sleep(self.get_sleep_time())
  334. def swipe_back(self, no):
  335. """
  336. 返回
  337. :param no: 回退次数
  338. :return:
  339. """
  340. for idx in range(no):
  341. self.d.press('back')
  342. time.sleep(self.get_sleep_time())
  343. def drug_price(self):
  344. """
  345. 获取药品价格
  346. :return:
  347. """
  348. try:
  349. price_str = self.d.xpath('//*[starts-with(@text,"¥")]').text
  350. price = float(re.search('[\d\.]+', price_str).group())
  351. print(f'获取到价格:{price}')
  352. return price
  353. except Exception as e:
  354. print(f'提取价格出错-->{e}')
  355. return None
  356. def restart_uiautomator_services(self, device_id):
  357. """
  358. 重启atx的uiautomator 服务
  359. :param device_id:
  360. :return:
  361. """
  362. stop_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d --stop'
  363. start_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d'
  364. # result = subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
  365. # print(result.stdout)
  366. subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
  367. time.sleep(self.get_sleep_time())
  368. subprocess.run(start_uiautomator_services, capture_output=True, text=True, shell=True)
  369. time.sleep(self.get_sleep_time())
  370. def connect_devices(self, device_id):
  371. """
  372. 连接设备
  373. :return:
  374. """
  375. try:
  376. self.d = u2.connect_usb(device_id)
  377. # 设置隐形等待时间
  378. # self.d.implicitly_wait(5)
  379. self.restart_uiautomator_services(device_id)
  380. print(f'连接到设备:{device_id}')
  381. except Exception as e:
  382. print(f'{device_id} 连接错误: {e}')
  383. raise Exception(e)
  384. def get_ocr_res(self, img):
  385. try:
  386. #img地址
  387. print(f'开始识别图片:{img}')
  388. request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/business_license"
  389. # 二进制方式打开图片文件
  390. f = open(img, 'rb')
  391. img = base64.b64encode(f.read())
  392. params = {"image": img}
  393. # access_token = get_access_token()
  394. request_url = request_url + "?access_token=" + self.access_token
  395. headers = {'content-type': 'application/x-www-form-urlencoded'}
  396. response = requests.post(request_url, data=params, headers=headers)
  397. if response:
  398. res = response.json()
  399. new_dic = dict()
  400. for ite in res['words_result'].keys():
  401. new_dic[ite] = res['words_result'][ite]['words']
  402. print('资质数据信息', new_dic)
  403. return new_dic
  404. else:
  405. return None
  406. except:
  407. return None
  408. def remove_watermark(self, img_path):
  409. """
  410. 图片去水印(将水印部分变成白色背景)并将数据转化为二进制数据
  411. :param img_path: 图片路径
  412. :return: 二进制图片数据
  413. """
  414. img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
  415. endswith = os.path.splitext(img_path)[1]
  416. new = np.clip(1.4057577998008846 * img - 38.33089999653017, 0, 255).astype(np.uint8)
  417. _, img_binary = cv2.imencode(endswith, new)
  418. return img_binary
  419. def get_ocr_res_image(self, img):
  420. try:
  421. image = self.remove_watermark(img)
  422. # image_file = open(img,'wb')
  423. # image_file.write(image)
  424. # res_image = self.client.basicAccurate(image) # 高精度
  425. res_image = self.client.basicGeneral(image)
  426. # print(f'百度api返回结果:{res_image}')
  427. # print(res_image.get('words_result', ''))
  428. # new_dic = dict()
  429. data = res_image.get('words_result', '')
  430. print(f'百度api返回结果:{data}')
  431. # full_text = ';'.join(item['words'] for item in data)
  432. # address = ''
  433. # for item in data:
  434. # if '企业注册号' in item['words']:
  435. # print('come in 111')
  436. # reg_number = item['words'].split(':', 1)[1].strip()
  437. # elif '企业名称' in item['words']:
  438. # print('come in 222')
  439. # company_name = item['words'].split(':', 1)[1].strip()
  440. # elif '所:' in item['words']:
  441. # print('come in 333')
  442. # address = item['words'].split(':', 1)[1].strip()
  443. # # 输出结果
  444. # print("企业注册号:", reg_number)
  445. # print("企业名称:", company_name)
  446. # print("住所:", address)
  447. return data
  448. except:
  449. return None
  450. def screenshot_the_business_license(self, qualification_number):
  451. screenshot_path = 'screenshot1.png'
  452. self.d.screenshot(screenshot_path)
  453. img = cv2.imread(screenshot_path)
  454. # 指定裁剪区域 (left, top, right, bottom)
  455. left = 0
  456. top = 480
  457. right = 720
  458. bottom = 1420
  459. cropped_img = img[top:bottom, left:right]
  460. if qualification_number:
  461. cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  462. else:
  463. cropped_screenshot_path = 'cropped_screenshot.png'
  464. cv2.imwrite(cropped_screenshot_path, cropped_img)
  465. return cropped_screenshot_path
  466. def screenshot_instruction(self):
  467. # 获取当前时间
  468. current_time = datetime.datetime.now()
  469. # 格式化为时分秒
  470. time_str = current_time.strftime("%H-%M-%S")
  471. # 生成随机的 8 位字符串
  472. random_str = secrets.token_hex(4) # 生成 4 个字节的随机字符串,转换为 8 位十六进制字符串
  473. print(time_str)
  474. screenshot_path = 'instructionscreenshot1-' + time_str + '-' + random_str + '.png'
  475. self.d.screenshot(screenshot_path)
  476. return screenshot_path
  477. #获取商品title
  478. def get_title(self):
  479. # try:
  480. # title = self.d.xpath(
  481. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  482. # except:
  483. # title = self.d.xpath(
  484. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView').text
  485. # title = self.d.xpath('//*[contains(@text, "舒肝颗粒")]').text
  486. def _inner():
  487. temp_search_key = self.search_key
  488. if "999" in self.search_key:
  489. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  490. temp_search_key = self.search_key.replace("999皮炎平", "")
  491. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  492. temp_search_key = self.search_key.replace("999必无忧", "")
  493. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  494. temp_search_key = self.search_key.replace("999必无忧", "")
  495. elif self.search_key == '999速复康布洛芬缓释胶囊':
  496. temp_search_key = self.search_key.replace("999速复康", "")
  497. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  498. temp_search_key = self.search_key.replace("999选平", "")
  499. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  500. temp_search_key = self.search_key.replace("999皮炎平", "")
  501. else:
  502. temp_search_key = self.search_key.replace("999", "")
  503. else:
  504. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  505. temp_search_key = self.search_key.replace("史达功", "")
  506. temp_search_key = temp_search_key.replace("120", "")
  507. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  508. temp_search_key = self.search_key.replace("三九胃泰", "")
  509. temp_search_key = temp_search_key.replace("8袋", "")
  510. elif self.search_key == '今维多赐多康牌蛋白粉':
  511. temp_search_key = self.search_key.replace("今维多", "")
  512. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  513. temp_search_key = self.search_key.replace("佳美舒", "")
  514. temp_search_key = temp_search_key.replace("4", "")
  515. elif self.search_key == '三九胃泰颗粒20g*10':
  516. temp_search_key = self.search_key.replace("20g*10", "")
  517. elif self.search_key == '三九胃泰颗粒20g*6袋':
  518. temp_search_key = self.search_key.replace("20g*6袋", "")
  519. elif self.search_key == '顺峰康王酮康他索乳膏':
  520. temp_search_key = self.search_key.replace("顺峰康王", "")
  521. if self.search_key == '999糠酸莫米松凝胶15':
  522. temp_search_key = temp_search_key.replace("15", "")
  523. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  524. temp_search_key = temp_search_key.replace("30", "")
  525. elif self.search_key == '999复方金银花颗粒10g':
  526. temp_search_key = temp_search_key.replace("10g", "")
  527. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  528. temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
  529. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  530. temp_search_key = temp_search_key.replace("6粒", "")
  531. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  532. temp_search_key = temp_search_key.replace("50", "")
  533. elif self.search_key == '999止泻利颗粒15g*8':
  534. temp_search_key = temp_search_key.replace("15g*8", "")
  535. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  536. temp_search_key = temp_search_key.replace("30", "")
  537. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  538. temp_search_key = temp_search_key.replace("15g", "")
  539. elif self.search_key == '999复方苦参肠炎康片12片':
  540. temp_search_key = temp_search_key.replace("12片", "")
  541. elif self.search_key == '999强力枇杷露16袋':
  542. temp_search_key = temp_search_key.replace("16袋", "")
  543. elif self.search_key == '999三蛇胆川贝膏138':
  544. temp_search_key = temp_search_key.replace("138", "")
  545. elif self.search_key == '999强力枇杷露120ml':
  546. temp_search_key = temp_search_key.replace("120ml", "")
  547. elif self.search_key == '999强力枇杷露150ml':
  548. temp_search_key = temp_search_key.replace("150ml", "")
  549. elif self.search_key == '999抗病毒口服液10ml*10':
  550. temp_search_key = temp_search_key.replace("10ml*10", "")
  551. elif self.search_key == '999抗病毒口服液10ml*12':
  552. temp_search_key = temp_search_key.replace("10ml*12", "")
  553. elif self.search_key == '999糠酸莫米松乳膏10g支':
  554. temp_search_key = temp_search_key.replace("10g支", "")
  555. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  556. temp_search_key = temp_search_key.replace("20g", "")
  557. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  558. temp_search_key = temp_search_key.replace("(无糖)6g", "")
  559. elif self.search_key == '999壮骨关节丸6g*20':
  560. temp_search_key = temp_search_key.replace("6g*20", "")
  561. elif self.search_key == '999正天丸6g*15':
  562. temp_search_key = temp_search_key.replace("6g*15", "")
  563. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  564. temp_search_key = temp_search_key.replace("20", "")
  565. elif self.search_key == '999糠酸莫米松凝胶10':
  566. temp_search_key = temp_search_key.replace("10", "")
  567. elif self.search_key == '999板蓝根颗粒10g*20':
  568. temp_search_key = temp_search_key.replace("10g*20", "")
  569. elif self.search_key == '999复方氨酚烷胺胶囊10粒':
  570. temp_search_key = temp_search_key.replace("10粒", "")
  571. elif self.search_key == '999复方氨酚烷胺胶囊12粒':
  572. temp_search_key = temp_search_key.replace("12粒", "")
  573. elif self.search_key == '999咽炎片0.26g*12片*2板':
  574. temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
  575. elif self.search_key == '999小儿止咳糖浆120':
  576. temp_search_key = temp_search_key.replace("120", "")
  577. elif self.search_key == '999小儿止咳糖浆225':
  578. temp_search_key = temp_search_key.replace("225", "")
  579. elif self.search_key == '999小儿感冒颗粒6g*10':
  580. temp_search_key = temp_search_key.replace("6g*10", "")
  581. elif self.search_key == '999小儿感冒颗粒6g*24':
  582. temp_search_key = temp_search_key.replace("6g*24", "")
  583. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
  584. temp_search_key = temp_search_key.replace("6g*10袋", "")
  585. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  586. temp_search_key = temp_search_key.replace("6g*20袋", "")
  587. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  588. temp_search_key = temp_search_key.replace("8g*10袋", "")
  589. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  590. temp_search_key = temp_search_key.replace("2.5g*10袋", "")
  591. elif self.search_key == '999感冒清热颗粒12g*18':
  592. temp_search_key = temp_search_key.replace("12g*18", "")
  593. # elif self.search_key == '三九胃泰颗粒':
  594. # self.search_key = '三九胃泰' #兼容三九胃泰 温胃舒颗粒
  595. print(f'获取商品title时的搜索关键字:{temp_search_key}')
  596. # title = self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text
  597. #初始化
  598. drugs_name = ''
  599. specifications = ''
  600. title = ''
  601. #循环的获取title为了有时间来处理人机验证
  602. for m in range(1, 6000) :
  603. if self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').exists:
  604. title = self.safe_exec(
  605. lambda: self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  606. )
  607. print(f"第{m}次获取title成功")
  608. break
  609. else:
  610. time.sleep(3)
  611. # return drugs_name, specifications
  612. # drugs_name = ''
  613. # specifications = ''
  614. # try:
  615. # title_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  616. # title_xpath_2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  617. # if self.d.xpath(title_xpath).exists:
  618. # title = self.d.xpath(title_xpath).text
  619. # print(f"title_xpath获取的title={title}")
  620. # if temp_search_key not in title:
  621. # return drugs_name, specifications
  622. # elif self.d.xpath(title_xpath_2).exists:
  623. # title = self.d.xpath(title_xpath_2).text
  624. # print(f"title_xpath_2获取的title={title}")
  625. # if temp_search_key not in title:
  626. # return drugs_name, specifications
  627. # else:
  628. # print('title_xpath不存在,请确认')
  629. # return drugs_name, specifications
  630. # # title = self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
  631. # except Exception as e:
  632. # print(f"发生异常: {e}")
  633. # return drugs_name, specifications
  634. #奇怪:有的时候title取出来的记过第一位会多一个0
  635. # title = self.safe_exec(self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text)
  636. # title = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  637. title = title[1:] if title.startswith('0') else title
  638. print(f'获取到药品标题:{title}')
  639. # 从里面匹配出药品名和规格
  640. # drugs_name
  641. # specifications
  642. # match = re.search(r'([^\d]+)([\d\D]+)', title)
  643. if self.search_key == '999赐多康大豆':
  644. return title, '1罐'
  645. if self.search_key == "999感冒清热颗粒" :
  646. match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
  647. else:
  648. match = re.match(r'(\[[^\]]+\])(.*?)\s*((?:\d+\S*|\(.+))$', title)
  649. if match:
  650. #drugs_name = match.group(1).strip() + match.group(2).strip()
  651. drugs_name = title
  652. specifications = match.group(3).strip()
  653. print("药品名:", drugs_name)
  654. print("规格:", specifications)
  655. # print('完整药名:', drugs_name + specifications)
  656. return drugs_name, specifications
  657. else:
  658. if title == '999抗病毒口服液10ml*12' or title == '999抗病毒口服液':
  659. drugs_name = title
  660. specifications = '10ml*12支/盒'
  661. return drugs_name, specifications
  662. elif title == '999抗病毒口服液10ml*10':
  663. drugs_name = title
  664. specifications = '10ml*10支/盒'
  665. return drugs_name, specifications
  666. elif title == '999小柴胡颗粒':
  667. drugs_name = title
  668. specifications = '10g*9袋/盒'
  669. return drugs_name, specifications
  670. elif title == '999养胃舒颗粒':
  671. drugs_name = title
  672. specifications = '10g*6袋/盒'
  673. return drugs_name, specifications
  674. elif title == '三九胃泰胶囊':
  675. drugs_name = title
  676. specifications = '0.5g*24粒/盒'
  677. return drugs_name, specifications
  678. elif title == '999补脾益肠丸':
  679. drugs_name = title
  680. specifications = '6g*15袋/盒'
  681. return drugs_name, specifications
  682. elif title == '999复方感冒灵颗粒':
  683. drugs_name = title
  684. specifications = '14g*9袋/盒'
  685. return drugs_name, specifications
  686. else:
  687. print("没有匹配到预期格式")
  688. drugs_name = title
  689. specifications = ''
  690. return drugs_name, specifications
  691. # 用 safe_exec 包装内部逻辑,确保验证码阻塞
  692. return self.safe_exec(_inner)
  693. def enter_shop(self):
  694. """
  695. 进店,方便提取资质环境
  696. :return:
  697. """
  698. # self.d.xpath('//*[@text="进店"]').click()
  699. self.d.xpath('//*[@text="店铺"]').click()
  700. time.sleep(self.get_sleep_time())
  701. def enter_shoper(self):
  702. """
  703. 进入商家
  704. :return:
  705. """
  706. is_shoper_exists = 0
  707. for i in range(10):
  708. if self.d.xpath('//*[@text="商家"]').exists:
  709. print(f'第{i}次商家存在')
  710. is_shoper_exists = 1
  711. break
  712. else:
  713. print(f'第{i}次商家不存在')
  714. time.sleep(self.get_sleep_time())
  715. if is_shoper_exists == 1:
  716. self.d.xpath('//*[@text="商家"]').click()
  717. time.sleep(self.get_sleep_time())
  718. return True
  719. else:
  720. return False
  721. #点击查看商家资质
  722. def scan_shoper_license(self):
  723. exist_shoper = 0
  724. for i in range(10):
  725. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  726. print(f'第{i}次查看商家资质存在')
  727. exist_shoper = 1
  728. break
  729. else:
  730. print(f'第{i}次查看商家资质不存在')
  731. time.sleep(self.get_sleep_time())
  732. if exist_shoper == 1:
  733. self.d.xpath('//*[@text="查看商家资质"]').click()
  734. time.sleep(self.get_sleep_time())
  735. else:
  736. self.swipe_back(1)
  737. #验证商品的信息是否在数据库中已存在
  738. def data_is_exists(self, data):
  739. """
  740. 检查指定数据是否已存在于数据库表中(仅检查存在性)
  741. 参数:
  742. data: 包含查询条件的字典,键为列名,值为条件值
  743. 返回:
  744. True: 数据存在
  745. False: 数据不存在
  746. None: 检查过程中出错
  747. """
  748. # dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  749. # 'platform': '美团'}
  750. # 1. 验证必要字段
  751. required_keys = ['product', 'min_price', 'shop', 'scrape_date', 'platform']
  752. if not all(key in data for key in required_keys):
  753. missing = [key for key in required_keys if key not in data]
  754. logging.error(f"缺少必要字段: {', '.join(missing)}")
  755. return None
  756. try:
  757. # 连接数据库
  758. conn = get_mysql()
  759. # 创建游标对象
  760. cur = conn.cursor()
  761. # query_sql = f"SELECT * FROM {self.table_name} WHERE product = '{data['product']}' AND min_price = '{data['min_price']}' AND shop = '{data['shop']}' AND scrape_date = '{data['scrape_date']}' AND platform = '{data['platform']}'"
  762. # cur.execute(query_sql)
  763. query_sql = """
  764. SELECT * FROM {}
  765. WHERE product = %s
  766. AND min_price = %s
  767. AND shop = %s
  768. AND scrape_date = %s
  769. AND platform = %s
  770. """.format(self.table_name)
  771. cur.execute(query_sql, (
  772. data['product'],
  773. data['min_price'],
  774. data['shop'],
  775. data['scrape_date'],
  776. data['platform']
  777. ))
  778. result = cur.fetchone()
  779. return bool(result) # 如果存在返回True,否则False
  780. except Exception as e:
  781. print(f"MySQL 错误: {str(e)}")
  782. #验证店铺信息是否在数据库中已存在
  783. def shop_is_exists_database(self, shop):
  784. try:
  785. # 连接数据库
  786. conn = get_mysql()
  787. # 创建游标对象
  788. cur = conn.cursor()
  789. query_sql = """
  790. SELECT * FROM {}
  791. WHERE shop = %s
  792. """.format(self.shop_table_name)
  793. cur.execute(query_sql, (
  794. shop
  795. ))
  796. result = cur.fetchone()
  797. return bool(result) # 如果存在返回True,否则False
  798. except Exception as e:
  799. print(f"MySQL 错误: {str(e)}")
  800. def wait_if_verifying(self, monitor, timeout=120):
  801. """验证码处理期间阻塞主线程"""
  802. start = time.time()
  803. while monitor.pausing.is_set() and time.time() - start < timeout:
  804. time.sleep(1)
  805. # def safe_xpath(self, xpath, timeout=10):
  806. # """线程安全 xpath 查找"""
  807. # self.wait_if_verifying(self.monitor)
  808. # return self.d.xpath(xpath).wait(timeout=timeout)
  809. def wait_for_ready(self, monitor, timeout=86400):
  810. """进入每一页前都先等验证码"""
  811. start = time.time()
  812. while monitor.pausing.is_set() and time.time() - start < timeout:
  813. time.sleep(1)
  814. # 额外保险:如果验证码突然在这一秒才弹,再主动扫一次
  815. monitor.check_and_handle_popup()
  816. def safe_list(self, xpath, monitor):
  817. """线程安全地拿商品列表"""
  818. self.wait_for_ready(monitor)
  819. return self.d.xpath(xpath).all()
  820. def safe_exec(self, func, *args, **kwargs):
  821. """
  822. 万能安全壳:执行 func 前检查验证码,
  823. 若监控线程已置位 pausing,则一直阻塞直到放行。
  824. """
  825. while self.monitor.pausing.is_set():
  826. time.sleep(1)
  827. # 执行真正逻辑
  828. return func(*args, **kwargs)
  829. def get_next_data(self, data, target):
  830. for i, item in enumerate(data):
  831. if item['words'] == target:
  832. if i + 1 < len(data):
  833. return data[i + 1]['words']
  834. return None
  835. def delete_instruction_screenshot(self, screenshot_path):
  836. # 删除截图文件
  837. try:
  838. os.remove(screenshot_path)
  839. print(f"截图文件已删除:{screenshot_path}")
  840. except FileNotFoundError:
  841. print(f"文件未找到,无法删除:{screenshot_path}")
  842. except Exception as e:
  843. print(f"删除文件时出错:{e}")
  844. '''
  845. def get_instructions_data(self):
  846. """
  847. 确定有说明书之后,提取所有的说明书数据
  848. :return:
  849. """
  850. self.d.xpath('//*[@text="说明"]').click()
  851. # time.sleep(random.randint(3, 5))
  852. time.sleep(0.5)
  853. self.d.xpath('//*[@text="查看详细说明"]').click()
  854. # time.sleep(random.randint(3, 5))
  855. time.sleep(0.5)
  856. self.d.xpath('//*[@text="加载更多"]').click_exists()
  857. loop_page = 5
  858. # new_list = list()
  859. new_list = []
  860. for i in range(loop_page):
  861. self.d.xpath('//*[@text="加载更多"]').click_exists()
  862. time.sleep(0.2)
  863. if i == 0:
  864. self.d.swipe(200, 1000, 200, 300, 0.4)
  865. else:
  866. self.d.swipe(200, 1000, 200, 62)
  867. time.sleep(0.2)
  868. if self.d.xpath('//*[@text="加载更多"]').exists:
  869. self.d.xpath('//*[@text="加载更多"]').click()
  870. time.sleep(0.2)
  871. all_tt = self.d.xpath(
  872. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup').all()
  873. for idx in range(1, len(all_tt) + 1):
  874. all_tt1 = self.d.xpath(
  875. f'//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[{idx}]//android.widget.TextView').all()
  876. # print(f'当前说明书列表数据:{all_tt1}')
  877. for tt in all_tt1:
  878. if tt.text and tt.text != '展开全文':
  879. new_list.append(tt.text)
  880. if i == 0:
  881. height = 938
  882. else:
  883. drug_box = self.d.xpath(
  884. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]').info
  885. bounds = drug_box['bounds']
  886. height = bounds['bottom'] - bounds['top']
  887. if height < 938:
  888. # print('说明书翻页到底部')
  889. break
  890. # 展开全文
  891. new_list = [item for item in new_list if item != '展开全文']
  892. print(f'当前说明书列表数据:{new_list}')
  893. # expiry_date_index = next(idx for idx, i in enumerate(new_list) if i == '有效期')
  894. # manufacturer_index = next(idx for idx, i in enumerate(new_list) if i == '生产单位')
  895. # approval_number_index = next(idx for idx, i in enumerate(new_list) if i == '批准文号')
  896. # res_data = {
  897. # "有效期": new_list[expiry_date_index + 1],
  898. # "生产单位": new_list[manufacturer_index + 1],
  899. # "批准文号": new_list[approval_number_index + 1]
  900. # }
  901. res_data = {
  902. "有效期": (new_list[new_list.index("有效期") + 1]) if "有效期" in new_list and new_list.index("有效期") + 1 < len(new_list) else "",
  903. "生产单位": (new_list[new_list.index("生产单位") + 1]) if "生产单位" in new_list and new_list.index("生产单位") + 1 < len(new_list) else "",
  904. "批准文号": (new_list[new_list.index("批准文号") + 1]) if "批准文号" in new_list and new_list.index("批准文号") + 1 < len(new_list) else ""
  905. }
  906. print(f'当前说明书字典数据:{res_data}')
  907. return res_data
  908. '''
  909. '''
  910. def get_instructions_data(self):
  911. """
  912. 确定有说明书之后,提取所有的说明书数据
  913. :return:
  914. """
  915. self.d.xpath('//*[@text="说明"]').click()
  916. # time.sleep(random.randint(3, 5))
  917. time.sleep(0.5)
  918. self.d.xpath('//*[@text="查看详细说明"]').click()
  919. # time.sleep(random.randint(3, 5))
  920. time.sleep(0.5)
  921. # 1) 先向上滑动一次,触发“加载更多”出现
  922. self.d.swipe(200, 1000, 200, 300, 0.4)
  923. time.sleep(0.3)
  924. # 2) 再进入“出现就点”的循环
  925. while self.d.xpath('//*[@text="加载更多"]').click_exists(timeout=1):
  926. time.sleep(0.2)
  927. self.d.swipe(200, 1000, 200, 300, 0.4)
  928. # self.d.swipe(200, 1000, 200, 62)
  929. time.sleep(0.2)
  930. # 一次性获取所有文本
  931. texts = [
  932. node.text.strip()
  933. # for node in self.d.xpath('//android.widget.TextView').all()
  934. for node in self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.TextView').all()
  935. if node.text and node.text.strip() and node.text != '加载更多'
  936. ]
  937. print(f'当前说明书列表数据:{texts}')
  938. # 提取关键字段
  939. def safe_get(key):
  940. # try:
  941. # idx = texts.index(key)
  942. # return texts[idx + 1] if idx + 1 < len(texts) else ""
  943. # except ValueError:
  944. # return ""
  945. try:
  946. idx = next(i for i, text in enumerate(texts) if text == key)
  947. return texts[idx + 1] if idx + 1 < len(texts) else ""
  948. except StopIteration:
  949. return ""
  950. res_data = {
  951. "有效期": safe_get("有效期"),
  952. "生产单位": safe_get("生产单位"),
  953. "批准文号": safe_get("批准文号")
  954. }
  955. print(f'当前说明书字典数据:{res_data}')
  956. return res_data
  957. '''
  958. '''
  959. def get_instructions_data(self):
  960. """
  961. 说明书键值对采集:连续两个 TextView 为一对,精确提取
  962. """
  963. # 1. 进入说明书
  964. self.d(text="说明").click()
  965. time.sleep(0.5)
  966. self.d(text="查看详细说明").click()
  967. time.sleep(0.5)
  968. # self.d(text="加载更多").click_exists(timeout=0.5)
  969. # 2. 找到说明书最外层 ScrollView(页面主体)
  970. scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.ScrollView")
  971. count = scroll_view.count
  972. print(f"找到的 ScrollView 数量: {count}")
  973. if not scroll_view.exists:
  974. return {"有效期": "", "生产单位": "", "批准文号": ""}
  975. # 3. 在 ScrollView 内再定位真正包含键值对的容器
  976. # 绝大多数美团说明书页面对应的是 ScrollView > ViewGroup > 若干 TextView
  977. kv_container = scroll_view.child(className="android.view.ViewGroup")
  978. if not kv_container.exists:
  979. kv_container = scroll_view # 降级:直接对 ScrollView 取子孙 TextView
  980. # 4. 滑动到底并收集所有 TextView(保留顺序)
  981. all_texts = []
  982. max_swipe = 5
  983. last_length = 0
  984. for _ in range(max_swipe):
  985. texts = kv_container.child(className="android.widget.TextView")
  986. #获取texts中的文本
  987. print(f'当前说明书列表数据:{texts}')
  988. current_texts = []
  989. self.loggerMT.info(f'说明书111')
  990. for tv in texts:
  991. try:
  992. txt = tv.get_text().strip()
  993. # txt = tv.info['text'].strip()
  994. except Exception:
  995. continue
  996. if txt and txt != "展开全文":
  997. current_texts.append(txt)
  998. self.loggerMT.info(f'说明书222')
  999. print(f'当前说明书列表数据:{current_texts}')
  1000. # 去重
  1001. if current_texts:
  1002. current_texts = [t for t in current_texts if t not in all_texts]
  1003. all_texts.extend(current_texts)
  1004. # 判断是否到底
  1005. # if not scroll_view.info.get("scrollable"):
  1006. # break
  1007. # 判断是否到底
  1008. if len(all_texts) == last_length:
  1009. break
  1010. last_length = len(all_texts)
  1011. # self.d.swipe_ext("up", scale=0.7)
  1012. #向上滑动一次
  1013. self.d.swipe(200, 1000, 200, 300, 0.2)
  1014. time.sleep(0.2)
  1015. if self.d.xpath('//*[@text="加载更多"]').exists:
  1016. self.d.xpath('//*[@text="加载更多"]').click()
  1017. # 5. 成对解析
  1018. res_data = {"有效期": "", "生产单位": "", "批准文号": ""}
  1019. for i in range(len(all_texts) - 1):
  1020. key = all_texts[i]
  1021. val = all_texts[i + 1]
  1022. if key in res_data:
  1023. res_data[key] = val
  1024. print(f'说明书文本共 {len(all_texts)} 条,提取结果: {res_data}')
  1025. # time.sleep(1000000)
  1026. return res_data
  1027. '''
  1028. def get_instructions_data(self):
  1029. """
  1030. 确定有说明书之后,提取所有的说明书数据
  1031. :return:
  1032. """
  1033. self.d.xpath('//*[@text="说明"]').click()
  1034. # time.sleep(random.randint(3, 5))
  1035. time.sleep(0.5)
  1036. if self.d.xpath('//*[@text="查看详细说明"]').exists:
  1037. self.d.xpath('//*[@text="查看详细说明"]').click()
  1038. else:
  1039. for i in range(8):
  1040. if self.d.xpath('//*[@text="查看全部"]').exists:
  1041. print('开始点击查看全部')
  1042. break
  1043. self.d.swipe_ext('down', 0.3)
  1044. time.sleep(1)
  1045. if self.d.xpath('//*[@text="查看全部"]').exists:
  1046. print('开始点击查看全部2')
  1047. break
  1048. if self.d.xpath('//*[@text="查看全部"]').exists:
  1049. self.d.xpath('//*[@text="查看全部"]').click()
  1050. else:
  1051. res_data = {
  1052. "有效期": '',
  1053. "生产单位": '',
  1054. "批准文号": ''
  1055. }
  1056. self.loggerMT.info('获取到的说明书信息为空。')
  1057. return res_data
  1058. # time.sleep(random.randint(3, 5))
  1059. time.sleep(0.5)
  1060. # self.d.xpath('//*[@text="加载更多"]').click_exists()
  1061. # loop_page = 5
  1062. # new_list = list()
  1063. # new_list = []
  1064. for ii in range(8):
  1065. if self.d.xpath('//*[@text="加载更多"]').exists:
  1066. self.d.xpath('//*[@text="加载更多"]').click()
  1067. time.sleep(0.2)
  1068. break
  1069. else:
  1070. self.d.swipe(200, 1000, 200, 300, 0.3)
  1071. for iii in range(10):
  1072. if self.d.xpath('//*[@text="生产单位"]').exists and self.d.xpath('//*[@text="批准文号"]').exists:
  1073. break
  1074. else:
  1075. self.d.swipe(200, 1300, 200, 300, 0.3)
  1076. instruction_path = self.screenshot_instruction()
  1077. print(f"instruction_path= {instruction_path}")
  1078. time.sleep(2)
  1079. ocr_res = self.get_ocr_res_image(instruction_path)
  1080. # print(f'ocr_res:{ocr_res}')
  1081. if ocr_res:
  1082. # 获取有效期的下一个数据
  1083. validity = self.get_next_data(ocr_res, '有效期')
  1084. # 获取批准文号的下一个数据
  1085. approval_number = self.get_next_data(ocr_res, '批准文号')
  1086. # 获取生产单位的下一个数据
  1087. manufacturer = self.get_next_data(ocr_res, '生产单位')
  1088. else:
  1089. validity = ''
  1090. approval_number = ''
  1091. manufacturer = ''
  1092. # print("有效期:", validity)
  1093. # print("批准文号:", approval_number)
  1094. # print("生产单位:", manufacturer)
  1095. res_data = {
  1096. "有效期": validity,
  1097. "生产单位": manufacturer,
  1098. "批准文号": approval_number
  1099. }
  1100. print(f"res_data={res_data}")
  1101. time.sleep(1)
  1102. self.delete_instruction_screenshot(instruction_path)
  1103. return res_data
  1104. def has_instructions(self):
  1105. """
  1106. 是否有说明书
  1107. :return:
  1108. """
  1109. # 没有说明书的无法采集具体数据
  1110. time.sleep(self.get_sleep_time())
  1111. is_has_instructions = False
  1112. for i in range(8):
  1113. if self.d.xpath('//*[@text="说明"]').exists:
  1114. print(f"第{i}次有说明书1")
  1115. is_has_instructions = True
  1116. break
  1117. self.d.swipe_ext('down', 0.3)
  1118. time.sleep(1)
  1119. # detail_info = self.d.xpath(
  1120. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
  1121. # bounds = detail_info['bounds']
  1122. # height = bounds['bottom'] - bounds['top']
  1123. # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
  1124. if self.d.xpath('//*[@text="说明"]').exists:
  1125. is_has_instructions = True
  1126. print(f"第{i}次有说明书2")
  1127. break
  1128. # is_has_instructions = self.d.xpath('//*[@text="说明"]').exists
  1129. return is_has_instructions
  1130. def has_shop(self):
  1131. """
  1132. 是否有进店按钮
  1133. :return:
  1134. """
  1135. # self.d.swipe_ext('up', 0.1)
  1136. time.sleep(self.get_sleep_time())
  1137. is_has_enter_shop = self.d.xpath('//*[@text="进店"]').exists
  1138. return is_has_enter_shop
  1139. #获取商品对应的店铺信息
  1140. def get_license_info_ex(self):
  1141. # self.enter_shop()
  1142. self.safe_exec(self.enter_shop)
  1143. # self.enter_shoper()
  1144. result = self.safe_exec(self.enter_shoper)
  1145. if result == False:
  1146. license_info_data = {'contact_address': '', 'qualification_number': '', 'business_license_company': '', 'business_license_address': ''}
  1147. return license_info_data
  1148. for i in range(10):
  1149. if self.d.xpath('//*[@text="查看商家资质"]').exists:
  1150. print(f"第{i}次有商家资质")
  1151. break
  1152. else:
  1153. print(f"第{i}次没有商家资质")
  1154. time.sleep(self.get_sleep_time())
  1155. #获取地址
  1156. # contact_address = self.get_shop_address()
  1157. contact_address = self.safe_exec(self.get_shop_address)
  1158. # time.sleep(50000)
  1159. ###
  1160. # self.scan_shoper_license()
  1161. self.safe_exec(self.scan_shoper_license)
  1162. # 获取资质编码
  1163. # qualification_number = self.get_qualification_number()
  1164. qualification_number = self.safe_exec(self.get_qualification_number)
  1165. #qualification_number 不为None继续下一步
  1166. if qualification_number:
  1167. #营业执照公司名称
  1168. business_license_company = ''
  1169. #营业执照地址
  1170. business_license_address = ''
  1171. self.d.click(0.603, 0.27)
  1172. time.sleep(self.get_sleep_time())
  1173. cropped_screenshot_path = self.screenshot_the_business_license(qualification_number)
  1174. print(f'cropped_screenshot_path:{cropped_screenshot_path}')
  1175. # if qualification_number:
  1176. # cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
  1177. # else:
  1178. # cropped_screenshot_path = 'cropped_screenshot.png'
  1179. # ocr_res = self.get_ocr_res('cropped_screenshot.png')
  1180. ocr_res = self.get_ocr_res(cropped_screenshot_path)
  1181. print(f'ocr_res:{ocr_res}')
  1182. #获取ocr_res 中的地址、单位名称
  1183. if ocr_res:
  1184. if '单位名称' in ocr_res.keys():
  1185. business_license_company = ocr_res['单位名称']
  1186. if '地址' in ocr_res.keys():
  1187. business_license_address = ocr_res['地址']
  1188. license_info_data = {'contact_address': contact_address, 'qualification_number': qualification_number, 'business_license_company': business_license_company, 'business_license_address': business_license_address}
  1189. else:
  1190. license_info_data = {'contact_address': contact_address, 'qualification_number': '', 'business_license_company': '', 'business_license_address': ''}
  1191. return license_info_data
  1192. """暂不用该功能
  1193. def get_license_info(self):
  1194. self.enter_shop()
  1195. self.enter_shoper()
  1196. self.scan_shoper_license()
  1197. # 获取资质编码
  1198. qualification_number = self.get_qualification_number()
  1199. if qualification_number:
  1200. table_license_info = self.get_table_license_info(qualification_number)
  1201. if table_license_info:
  1202. return {
  1203. '单位名称': table_license_info[0],
  1204. '地址': table_license_info[1],
  1205. '社会信用代码': table_license_info[2]
  1206. }
  1207. else:
  1208. # operate_no = random.randint(0, 1)
  1209. self.d.click(0.603, 0.27)
  1210. # if operate_no == 0:
  1211. # self.d.xpath('//*[@text="营业执照"]').click()
  1212. # else:
  1213. # self.d.click(0.603, 0.27)
  1214. time.sleep(self.get_sleep_time())
  1215. self.screenshot_the_business_license()
  1216. ocr_res = self.get_ocr_res('cropped_screenshot.png')
  1217. return ocr_res
  1218. # operate_no = random.randint(0, 1)
  1219. self.d.click(0.603, 0.27)
  1220. # if operate_no == 0:
  1221. # self.d.xpath('//*[@text="营业执照"]').click()
  1222. # else:
  1223. # self.d.click(0.603, 0.27)
  1224. time.sleep(self.get_sleep_time())
  1225. self.screenshot_the_business_license()
  1226. ocr_res = self.get_ocr_res('cropped_screenshot.png')
  1227. return ocr_res
  1228. """
  1229. def distinct_target(self):
  1230. result = False
  1231. position_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  1232. position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  1233. is_position = self.d.xpath(position_xpath).exists
  1234. is_position2 = self.d.xpath(position_xpath2).exists
  1235. xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1236. xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1237. xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1238. xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1239. is_position5 = self.d.xpath(xpath).exists
  1240. is_position6 = self.d.xpath(xpath2).exists
  1241. is_position7 = self.d.xpath(xpath3).exists
  1242. is_position8 = self.d.xpath(xpath4).exists
  1243. # print(f"is_position = {is_position}")
  1244. # print(f"is_position2 = {is_position2}")
  1245. if is_position or is_position2 or is_position5 or is_position6 or is_position7 or is_position8:
  1246. result = True
  1247. return result
  1248. # return is_position
  1249. def enter_target_page(self):
  1250. self.d.xpath('//*[@content-desc="看病买药"]').click()
  1251. time.sleep(self.get_sleep_time())
  1252. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/vf_search_carousel_text"]').click()
  1253. time.sleep(self.get_sleep_time())
  1254. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]').click()
  1255. time.sleep(self.get_sleep_time())
  1256. self.d.send_keys(self.search_key, clear=True)
  1257. time.sleep(self.get_sleep_time())
  1258. self.d.xpath('//*[@text="搜索"]').click()
  1259. time.sleep(self.get_sleep_time())
  1260. # content_frame = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]').exists
  1261. # print(content_frame)
  1262. # position_xpath1 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  1263. # position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
  1264. # if self.d.xpath(position_xpath1).exists:
  1265. # print("position_xpath1 exist")
  1266. # elif self.d.xpath(position_xpath2).exists:
  1267. # print("position_xpath2 exist")
  1268. # else:
  1269. # print("position_xpath not exist")
  1270. # time.sleep(10000)
  1271. #增加点击快递送
  1272. self.click_express_send()
  1273. time.sleep(self.get_sleep_time())
  1274. def click_express_send(self):
  1275. # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.HorizontalScrollView[last()]'
  1276. slide_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  1277. slide_xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  1278. slide_xpath3= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  1279. slide_xpath4= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
  1280. for i in range (1,3):
  1281. if self.d.xpath(slide_xpath).exists:
  1282. bounds = self.d.xpath(slide_xpath).info['bounds']
  1283. top = bounds['top']
  1284. bottom = bounds['bottom']
  1285. print(f'top={top}')
  1286. print(f'bottom={bottom}')
  1287. y = (top + bottom) // 2
  1288. print(f'y={y}')
  1289. self.loggerMT.info('开始滑动1')
  1290. self.d.swipe(500, y, 100, y, 0.5)
  1291. time.sleep(self.get_sleep_time())
  1292. break
  1293. elif self.d.xpath(slide_xpath2).exists:
  1294. bounds = self.d.xpath(slide_xpath2).info['bounds']
  1295. top = bounds['top']
  1296. bottom = bounds['bottom']
  1297. print(f'top={top}')
  1298. print(f'bottom={bottom}')
  1299. y = (top + bottom) // 2
  1300. print(f'y={y}')
  1301. self.loggerMT.info('开始滑动2')
  1302. self.d.swipe(500, y, 100, y, 0.5)
  1303. time.sleep(self.get_sleep_time())
  1304. break
  1305. elif self.d.xpath(slide_xpath3).exists:
  1306. bounds = self.d.xpath(slide_xpath3).info['bounds']
  1307. top = bounds['top']
  1308. bottom = bounds['bottom']
  1309. print(f'top={top}')
  1310. print(f'bottom={bottom}')
  1311. y = (top + bottom) // 2
  1312. print(f'y={y}')
  1313. self.loggerMT.info('开始滑动3')
  1314. self.d.swipe(500, y, 100, y, 0.5)
  1315. time.sleep(self.get_sleep_time())
  1316. break
  1317. elif self.d.xpath(slide_xpath4).exists:
  1318. bounds = self.d.xpath(slide_xpath4).info['bounds']
  1319. top = bounds['top']
  1320. bottom = bounds['bottom']
  1321. print(f'top={top}')
  1322. print(f'bottom={bottom}')
  1323. y = (top + bottom) // 2
  1324. print(f'y={y}')
  1325. self.loggerMT.info('开始滑动4')
  1326. self.d.swipe(500, y, 100, y, 0.5)
  1327. time.sleep(self.get_sleep_time())
  1328. break
  1329. max_retry = 5 # 最多尝试次数
  1330. for idx in range(1, max_retry + 1):
  1331. # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]'
  1332. xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1333. xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1334. xpath3= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1335. xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1336. # print(f"xpath:{xpath}")
  1337. # scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.HorizontalScrollView")
  1338. if self.d.xpath(xpath).exists:
  1339. self.d.xpath(xpath).click()
  1340. # time.sleep(self.get_sleep_time())
  1341. print(f"第{idx}次点击xpath快递送成功")
  1342. time.sleep(self.get_sleep_time())
  1343. break
  1344. elif self.d.xpath(xpath2).exists:
  1345. self.d.xpath(xpath2).click()
  1346. # time.sleep(self.get_sleep_time())
  1347. print(f"第{idx}次点击xpath2快递送成功")
  1348. time.sleep(self.get_sleep_time())
  1349. break
  1350. elif self.d.xpath(xpath3).exists:
  1351. self.d.xpath(xpath3).click()
  1352. # time.sleep(self.get_sleep_time())
  1353. print(f"第{idx}次点击xpath3快递送成功")
  1354. time.sleep(self.get_sleep_time())
  1355. break
  1356. elif self.d.xpath(xpath4).exists:
  1357. self.d.xpath(xpath4).click()
  1358. # time.sleep(self.get_sleep_time())
  1359. print(f"第{idx}次点击xpath4快递送成功")
  1360. time.sleep(self.get_sleep_time())
  1361. break
  1362. else:
  1363. print(f"第{idx}次点击xpath或xpath2或xpath3快递送都失败")
  1364. time.sleep(self.get_sleep_time())
  1365. # xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
  1366. # if self.d.xpath(xpath2).exists:
  1367. # self.d.xpath(xpath2).click()
  1368. # print(f"第{idx}次点击xpath2快递送成功")
  1369. # time.sleep(self.get_sleep_time())
  1370. # break
  1371. """暂不用该功能
  1372. def get_table_license_info(self, qualification_number):
  1373. try:
  1374. sql = f'select business_license_company,city,credit_code from mt_drug where credit_code = "{qualification_number}"'
  1375. self.mysql_client.cur.execute(sql)
  1376. res = self.mysql_client.cur.fetchone()
  1377. return res
  1378. except:
  1379. return None
  1380. """
  1381. # def get_clipboard(self):
  1382. # """通过ADB获取Android手机剪贴板内容"""
  1383. # try:
  1384. # result = subprocess.run(
  1385. # ["adb", "shell", "am", "broadcast", "-a", "clipper.get"],
  1386. # capture_output=True,
  1387. # text=True,
  1388. # timeout=5
  1389. # )
  1390. # print(f"获取剪贴板结果: {result.stdout}")
  1391. # # 解析返回信息中的剪贴板内容
  1392. # for line in result.stdout.splitlines():
  1393. # if "data=" in line:
  1394. # return line.split("data=")[1].strip()
  1395. # return ""
  1396. # except Exception as e:
  1397. # print("获取剪贴板失败:", e)
  1398. # return ""
  1399. # def get_clipboard(self):
  1400. # """读取 Android 剪贴板(系统自带命令)"""
  1401. # try:
  1402. # text = subprocess.check_output(
  1403. # ["adb", "shell", "cmd", "clipboard", "get"],
  1404. # text=True, timeout=5, stderr=subprocess.STDOUT
  1405. # ).strip()
  1406. # print(f"获取剪贴板结果: {text}")
  1407. # return text if text else ""
  1408. # except Exception as e:
  1409. # print("获取剪贴板失败:", e)
  1410. # return ""
  1411. def get_clipboard(self):
  1412. time.sleep(1)
  1413. self.loggerMT.info(f"Clipboard content:{self.d.clipboard}") # 打印调试信息
  1414. clipboard_content = self.d.clipboard
  1415. if clipboard_content is None:
  1416. return ''
  1417. return clipboard_content.strip()
  1418. # return self.d.clipboard.strip()
  1419. def clear_clipboard(self):
  1420. self.d.set_clipboard("", "text/plain")
  1421. # def clear_clipboard(self):
  1422. # """清空手机剪贴板:写入空字符串(subprocess 版)"""
  1423. # try:
  1424. # subprocess.run(
  1425. # ["adb", "shell", "am", "broadcast", "-a", "clipper.set", "-e", "text", " "],
  1426. # check=True,
  1427. # capture_output=True,
  1428. # text=True,
  1429. # timeout=5
  1430. # )
  1431. # except subprocess.CalledProcessError as e:
  1432. # print("ADB 清空失败:", e.stderr)
  1433. # def clear_clipboard():
  1434. # """清空手机剪贴板:写入空字符串"""
  1435. # try:
  1436. # adb_shell(["shell", "am", "broadcast", "-a", "clipper.set", "-e", "text", ""])
  1437. # except subprocess.CalledProcessError as e:
  1438. # print("ADB 清空失败:", e.output)
  1439. #获取一个商品的数据、商品对应的店铺的数据
  1440. def get_product_link(self):
  1441. product_link = ''
  1442. # 两种可能的“···”按钮
  1443. dots_xpaths = [
  1444. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
  1445. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
  1446. '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]'
  1447. ]
  1448. max_retry = 5 # 最多尝试次数
  1449. for idx in range(1, max_retry + 1):
  1450. if product_link: # 已经拿到则退出
  1451. break
  1452. for xp in dots_xpaths:
  1453. if self.d.xpath(xp).exists:
  1454. print(f'{idx}-进入分享点点点')
  1455. self.loggerMT.info(f'{idx}-进入分享点点点')
  1456. # #先清空剪贴板的内容
  1457. # self.clear_clipboard()
  1458. # print("清空剪贴板内容成功。")
  1459. self.d.xpath(xp).click()
  1460. time.sleep(0.2)
  1461. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1462. time.sleep(0.2)
  1463. link_xpath = '//*[@text="复制链接"]'
  1464. if self.d.xpath(link_xpath).exists:
  1465. self.d.xpath(link_xpath).click()
  1466. time.sleep(1)
  1467. product_link = self.get_clipboard()
  1468. time.sleep(0.5)
  1469. print(f'{idx}-商品链接:{product_link}')
  1470. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  1471. break # 找到并执行后跳出内层循环
  1472. else:
  1473. print(f'{idx}-商品链接:{product_link}')
  1474. self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  1475. product_link = ''
  1476. # self.d.xpath('//*[@text="复制链接"]').click_exists()
  1477. # time.sleep(1)
  1478. # product_link = self.get_clipboard()
  1479. # time.sleep(0.5)
  1480. # print(f'{idx}-商品链接:{product_link}')
  1481. # self.loggerMT.info(f'{idx}-商品链接:{product_link}')
  1482. # break # 找到并执行后跳出内层循环
  1483. if not product_link and idx < max_retry:
  1484. time.sleep(0.5) # 最后一次不需要再等待
  1485. return product_link
  1486. def integrate_data(self):
  1487. #测试说明书详情:
  1488. # instructions_info = self.safe_exec(self.get_instructions_data)
  1489. # time.sleep(1000000)
  1490. #测试店铺信息
  1491. # license_info = self.safe_exec(self.get_license_info_ex)
  1492. # time.sleep(1000000)
  1493. #测试定位地址
  1494. #获取链接开始
  1495. #self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
  1496. #1、点击页面的... 先判断元素是否存在
  1497. '''
  1498. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1499. print('1-进入分享点点点111')
  1500. self.loggerMT.info('1-进入分享点点点111')
  1501. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1502. #点击分享商品
  1503. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1504. time.sleep(0.2)
  1505. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1506. time.sleep(0.2)
  1507. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1508. time.sleep(1)
  1509. #获取剪切板的数据
  1510. product_link = self.get_clipboard()
  1511. time.sleep(0.5)
  1512. print(f'1-商品链接:{product_link}')
  1513. self.loggerMT.info(f'1-商品链接:{product_link}')
  1514. #清空剪切板
  1515. # self.clear_clipboard()
  1516. # if self.d.xpath('//*[@text="加载更多"]').click_exists():
  1517. # self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1518. # if self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').exists:
  1519. # self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').click()
  1520. # #获取剪切板的数据
  1521. # product_link = self.get_clipboard()
  1522. # time.sleep(0.5)
  1523. # print(f'商品链接:{product_link}')
  1524. # #清空剪切板
  1525. # self.clear_clipboard()
  1526. # else:
  1527. # print('未找到分享按钮111')
  1528. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1529. print('1-进入分享点点点222')
  1530. self.loggerMT.info('1-进入分享点点点222')
  1531. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1532. time.sleep(0.2)
  1533. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1534. time.sleep(0.2)
  1535. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1536. time.sleep(1)
  1537. #获取剪切板的数据
  1538. product_link = self.get_clipboard()
  1539. time.sleep(0.5)
  1540. print(f'1-商品链接:{product_link}')
  1541. self.loggerMT.info(f'1-商品链接:{product_link}')
  1542. #如果为获取到product_link 则等待0.5秒再获取
  1543. if not product_link:
  1544. time.sleep(0.5)
  1545. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1546. print('2-进入分享点点点111')
  1547. self.loggerMT.info('2-进入分享点点点111')
  1548. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1549. #点击分享商品
  1550. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1551. time.sleep(0.2)
  1552. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1553. time.sleep(0.2)
  1554. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1555. time.sleep(1)
  1556. #获取剪切板的数据
  1557. product_link = self.get_clipboard()
  1558. time.sleep(0.5)
  1559. print(f'2-商品链接:{product_link}')
  1560. self.loggerMT.info(f'2-商品链接:{product_link}')
  1561. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1562. print('2-进入分享点点点222')
  1563. self.loggerMT.info('2-进入分享点点点222')
  1564. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1565. time.sleep(0.2)
  1566. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1567. time.sleep(0.2)
  1568. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1569. time.sleep(1)
  1570. #获取剪切板的数据
  1571. product_link = self.get_clipboard()
  1572. time.sleep(0.5)
  1573. print(f'2-商品链接:{product_link}')
  1574. self.loggerMT.info(f'2-商品链接:{product_link}')
  1575. #如果为获取到product_link 则等待0.5秒再获取
  1576. if not product_link:
  1577. time.sleep(0.5)
  1578. if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1579. print('3-进入分享点点点111')
  1580. self.loggerMT.info('3-进入分享点点点111')
  1581. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1582. #点击分享商品
  1583. # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1584. time.sleep(0.2)
  1585. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1586. time.sleep(0.2)
  1587. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1588. time.sleep(1)
  1589. #获取剪切板的数据
  1590. product_link = self.get_clipboard()
  1591. time.sleep(0.5)
  1592. print(f'3-商品链接:{product_link}')
  1593. self.loggerMT.info(f'3-商品链接:{product_link}')
  1594. elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
  1595. print('3-进入分享点点点222')
  1596. self.loggerMT.info('3-进入分享点点点222')
  1597. self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
  1598. time.sleep(0.2)
  1599. self.d.xpath('//*[@text="分享商品"]').click_exists()
  1600. time.sleep(0.2)
  1601. self.d.xpath('//*[@text="复制链接"]').click_exists()
  1602. time.sleep(1)
  1603. #获取剪切板的数据
  1604. product_link = self.get_clipboard()
  1605. time.sleep(0.5)
  1606. print(f'3-商品链接:{product_link}')
  1607. self.loggerMT.info(f'3-商品链接:{product_link}')
  1608. '''
  1609. #获取链接结束
  1610. """
  1611. 整合数据
  1612. :return:
  1613. """
  1614. # title_info = self.get_title() # 药品,规格
  1615. # title_info = self.safe_exec(self.get_title) # 药品,规格
  1616. product, specifications = self.safe_exec(self.get_title) # 药品,规格
  1617. if product:
  1618. # product, specifications = title_info
  1619. #如果关键字包含999 则 product必须包含999 和 999后面的那段字符串 ps 999感冒灵颗粒必须包含:"999"和"感冒灵颗粒"
  1620. if '999' in self.search_key:
  1621. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  1622. temp_search_key = self.search_key.replace('999皮炎平', '')
  1623. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  1624. temp_search_key = self.search_key.replace('999必无忧', '')
  1625. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  1626. temp_search_key = self.search_key.replace('999必无忧', '')
  1627. elif self.search_key == '999速复康布洛芬缓释胶囊':
  1628. temp_search_key = self.search_key.replace('999速复康', '')
  1629. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  1630. temp_search_key = self.search_key.replace('999选平', '')
  1631. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  1632. temp_search_key = self.search_key.replace('999皮炎平', '')
  1633. else:
  1634. temp_search_key = self.search_key.replace('999', '')
  1635. if self.search_key == '999糠酸莫米松凝胶15':
  1636. temp_search_key = temp_search_key.replace('15', '')
  1637. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  1638. temp_search_key = temp_search_key.replace('30', '')
  1639. elif self.search_key == '999抗病毒口服液10ml*6支/盒':
  1640. temp_search_key = temp_search_key.replace("10ml*6支/盒", "")
  1641. elif self.search_key == '999复方金银花颗粒10g':
  1642. temp_search_key = temp_search_key.replace("10g", "")
  1643. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  1644. temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
  1645. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  1646. temp_search_key = temp_search_key.replace("6粒", "")
  1647. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  1648. temp_search_key = temp_search_key.replace("50", "")
  1649. elif self.search_key == '999止泻利颗粒15g*8':
  1650. temp_search_key = temp_search_key.replace("15g*8", "")
  1651. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  1652. temp_search_key = temp_search_key.replace("30", "")
  1653. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  1654. temp_search_key = temp_search_key.replace("15g", "")
  1655. elif self.search_key == '999复方苦参肠炎康片12片':
  1656. temp_search_key = temp_search_key.replace("12片", "")
  1657. elif self.search_key == '999强力枇杷露16袋':
  1658. temp_search_key = temp_search_key.replace("16袋", "")
  1659. elif self.search_key == '999三蛇胆川贝膏138':
  1660. temp_search_key = temp_search_key.replace("138", "")
  1661. elif self.search_key == '999抗病毒口服液10ml*12':
  1662. temp_search_key = temp_search_key.replace("10ml*12", "")
  1663. elif self.search_key == '999抗病毒口服液10ml*10':
  1664. temp_search_key = temp_search_key.replace("10ml*10", "")
  1665. elif self.search_key == '999强力枇杷露120ml':
  1666. temp_search_key = temp_search_key.replace("120ml", "")
  1667. elif self.search_key == '999强力枇杷露150ml':
  1668. temp_search_key = temp_search_key.replace("150ml", "")
  1669. elif self.search_key == '999糠酸莫米松乳膏10g支':
  1670. temp_search_key = temp_search_key.replace("10g支", "")
  1671. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  1672. temp_search_key = temp_search_key.replace("20g", "")
  1673. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  1674. temp_search_key = temp_search_key.replace("(无糖)6g", "")
  1675. elif self.search_key == '999壮骨关节丸6g*20':
  1676. temp_search_key = temp_search_key.replace("6g*20", "")
  1677. elif self.search_key == '999正天丸6g*15':
  1678. temp_search_key = temp_search_key.replace("6g*15", "")
  1679. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  1680. temp_search_key = temp_search_key.replace("20", "")
  1681. elif self.search_key == '999糠酸莫米松凝胶10':
  1682. temp_search_key = temp_search_key.replace("10", "")
  1683. elif self.search_key == '999板蓝根颗粒10g*20':
  1684. temp_search_key = temp_search_key.replace("10g*20", "")
  1685. elif self.search_key == '999复方氨酚烷胺胶囊10粒':
  1686. temp_search_key = temp_search_key.replace("10粒", "")
  1687. elif self.search_key == '999复方氨酚烷胺胶囊12粒':
  1688. temp_search_key = temp_search_key.replace("12粒", "")
  1689. elif self.search_key == '999咽炎片0.26g*12片*2板':
  1690. temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
  1691. elif self.search_key == '999小儿止咳糖浆120':
  1692. temp_search_key = temp_search_key.replace("120", "")
  1693. elif self.search_key == '999小儿止咳糖浆225':
  1694. temp_search_key = temp_search_key.replace("225", "")
  1695. elif self.search_key == '999小儿感冒颗粒6g*10':
  1696. temp_search_key = temp_search_key.replace("6g*10", "")
  1697. elif self.search_key == '999小儿感冒颗粒6g*24':
  1698. temp_search_key = temp_search_key.replace("6g*24", "")
  1699. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
  1700. temp_search_key = temp_search_key.replace("6g*10袋", "")
  1701. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  1702. temp_search_key = temp_search_key.replace("6g*20袋", "")
  1703. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  1704. temp_search_key = temp_search_key.replace("8g*10袋", "")
  1705. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  1706. temp_search_key = temp_search_key.replace("2.5g*10袋", "")
  1707. elif self.search_key == '999感冒清热颗粒12g*18':
  1708. temp_search_key = temp_search_key.replace("12g*18", "")
  1709. if '999' not in product or temp_search_key not in product:
  1710. self.swipe_back(1)
  1711. self.unrelated_data += 1
  1712. return
  1713. else:
  1714. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  1715. temp_search_key = self.search_key.replace('史达功', '')
  1716. temp_search_key = temp_search_key.replace('120', '')
  1717. if '史达功' not in product or temp_search_key not in product:
  1718. self.swipe_back(1)
  1719. self.unrelated_data += 1
  1720. return
  1721. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  1722. temp_search_key = self.search_key.replace('三九胃泰', '')
  1723. temp_search_key = temp_search_key.replace('8袋', '')
  1724. if '三九胃泰' not in product or temp_search_key not in product:
  1725. self.swipe_back(1)
  1726. self.unrelated_data += 1
  1727. return
  1728. elif self.search_key == '今维多赐多康牌蛋白粉':
  1729. temp_search_key = self.search_key.replace('今维多', '')
  1730. if '今维多' not in product or temp_search_key not in product:
  1731. self.swipe_back(1)
  1732. self.unrelated_data += 1
  1733. return
  1734. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  1735. temp_search_key = self.search_key.replace('佳美舒', '')
  1736. temp_search_key = temp_search_key.replace('4', '')
  1737. if '佳美舒' not in product or temp_search_key not in product:
  1738. self.swipe_back(1)
  1739. self.unrelated_data += 1
  1740. return
  1741. elif self.search_key == '三九胃泰颗粒20g*10':
  1742. temp_search_key = self.search_key.replace('20g*10', '')
  1743. if temp_search_key not in product:
  1744. self.swipe_back(1)
  1745. self.unrelated_data += 1
  1746. return
  1747. elif self.search_key == '三九胃泰颗粒20g*6袋':
  1748. temp_search_key = self.search_key.replace('20g*6袋', '')
  1749. if temp_search_key not in product:
  1750. self.swipe_back(1)
  1751. self.unrelated_data += 1
  1752. return
  1753. elif self.search_key == '顺峰康王酮康他索乳膏':
  1754. temp_search_key = self.search_key.replace('顺峰康王', '')
  1755. if '顺峰康王' not in product or temp_search_key not in product:
  1756. self.swipe_back(1)
  1757. self.unrelated_data += 1
  1758. return
  1759. else:
  1760. if self.search_key not in product.replace(' ', ''):
  1761. self.swipe_back(1)
  1762. self.unrelated_data += 1
  1763. return
  1764. # if self.search_key not in product.replace(' ', ''):
  1765. # self.swipe_back(1)
  1766. # self.unrelated_data += 1
  1767. # return
  1768. else:
  1769. self.swipe_back(1)
  1770. return
  1771. min_price = self.drug_price() # 最低价格
  1772. # 商品链接
  1773. product_link = self.get_product_link()
  1774. #判断是否有自营的文本,有的话不需要获取店铺的信息
  1775. if self.d.xpath('//*[@text="自营"]').exists:
  1776. shop = "美团自营大药房(快递电商)"
  1777. # 爬取日期
  1778. scrape_date = self.get_current_date()
  1779. # scrape_date = "2025-07-18"
  1780. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  1781. 'platform': '美团'}
  1782. print(f'当前数据:{dup_data}')
  1783. if self.data_is_exists(dup_data):
  1784. print('存在相同数据不入库')
  1785. self.swipe_back(1)
  1786. return
  1787. else:
  1788. for i in range(8):
  1789. if self.d.xpath('//*[@text="进店"]').exists:
  1790. print('开始获取店铺名1')
  1791. break
  1792. self.d.swipe_ext('up', 0.3)
  1793. time.sleep(1)
  1794. # detail_info = self.d.xpath(
  1795. # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
  1796. # bounds = detail_info['bounds']
  1797. # height = bounds['bottom'] - bounds['top']
  1798. # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
  1799. if self.d.xpath('//*[@text="进店"]').exists:
  1800. print('开始获取店铺名2')
  1801. break
  1802. shop = self.get_shop_name()
  1803. # 爬取日期
  1804. scrape_date = self.get_current_date()
  1805. # scrape_date = "2025-07-18"
  1806. dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
  1807. 'platform': '美团'}
  1808. print(f'当前数据:{dup_data}')
  1809. #获取店铺信息开始
  1810. #暂时不获取店铺信息 start
  1811. is_has_enter_shop = self.has_shop()
  1812. #需要判断shop是否已经在数据库中存在,如果存在,则不再进入店铺,直接进入下一个商品
  1813. shop_is_exists = self.shop_is_exists_database(shop)
  1814. #存在进店 并且店铺的名称不包含美团官方的字样
  1815. print(f"已采集{self.shop_data_num}家店铺数据")
  1816. if is_has_enter_shop and '美团官方' not in shop and '美团自营' not in shop and not shop_is_exists and self.shop_data_num < 500:
  1817. # license_info = self.get_license_info_ex()
  1818. license_info = self.safe_exec(self.get_license_info_ex)
  1819. contact_address = license_info['contact_address']
  1820. qualification_number = license_info['qualification_number']
  1821. business_license_company = license_info['business_license_company']
  1822. business_license_address = license_info['business_license_address']
  1823. save_shop_data = {
  1824. 'shop': shop,
  1825. 'contact_address': contact_address,
  1826. 'qualification_number': qualification_number,
  1827. 'scrape_date': scrape_date,
  1828. 'business_license_company':business_license_company,
  1829. 'business_license_address':business_license_address,
  1830. 'platform': '美团'
  1831. }
  1832. self.save_shop_info_to_database(save_shop_data)
  1833. self.shop_data_num += 1 # 店铺数据数量+1
  1834. self.swipe_back(2)
  1835. else:
  1836. print('不采集店铺信息')
  1837. #获取店铺信息结束
  1838. #暂时不获取店铺信息 end
  1839. if self.data_is_exists(dup_data):
  1840. print('存在相同数据不入库')
  1841. self.swipe_back(1)
  1842. return
  1843. if not shop:
  1844. print('未获取到店铺名:开始回退')
  1845. self.swipe_back(1)
  1846. return
  1847. if not shop or '自营' in shop:
  1848. self.swipe_back(1)
  1849. return
  1850. time.sleep(self.get_sleep_time())
  1851. # 生产日期为空
  1852. manufacture_date = ''
  1853. # 执政信息
  1854. # if is_has_enter_shop:
  1855. # license_info = self.get_license_info()
  1856. # business_license_company = license_info["单位名称"]
  1857. # credit_code = license_info['社会信用代码']
  1858. # city_str = license_info['地址']
  1859. # # 先把省份啥的替换掉
  1860. # city_sub_str = re.sub(r'[u4e00-\u9fa5]+省', '', city_str)
  1861. # try:
  1862. # city = re.search(r'[\u4e00-\u9fa5]+?(市|区|县)', city_sub_str).group(0)
  1863. # except:
  1864. # city = city_sub_str
  1865. # try:
  1866. # province = self.city2province[city]
  1867. # except:
  1868. # province = ''
  1869. # self.swipe_back(2)
  1870. # else:
  1871. # business_license_company = ''
  1872. # credit_code = ''
  1873. # city = ''
  1874. # province = ''
  1875. business_license_company = ''
  1876. credit_code = ''
  1877. city = ''
  1878. province = ''
  1879. expiry_date = ''
  1880. manufacturer = ''
  1881. approval_number = ''
  1882. #暂时不获取说明书信息 start
  1883. #是否存在说明书
  1884. # is_has_instructions = self.has_instructions()
  1885. #有的药品没有说明书,直接默认
  1886. if self.search_key == '今维多赐多康牌蛋白粉':
  1887. expiry_date = '18个月'
  1888. manufacturer = '华润圣海健康科技有限公司'
  1889. approval_number = '食健备G202437001992'
  1890. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  1891. expiry_date = '24个月'
  1892. manufacturer = '浙江华润三九众益制药有限公司'
  1893. approval_number = '国药准字H20090152'
  1894. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  1895. expiry_date = '3年'
  1896. manufacturer = '江苏萨瑞斯医疗科技有限公司'
  1897. approval_number = '苏械注准20212140025'
  1898. elif self.search_key == '999蒲地蓝消炎片':
  1899. expiry_date = '24个月'
  1900. manufacturer = '特一药业集团股份有限公司'
  1901. approval_number = '国药准字Z20063596'
  1902. elif self.search_key == '999养胃舒颗粒':
  1903. expiry_date = '36个月'
  1904. manufacturer = '合肥华润神鹿药业有限公司'
  1905. approval_number = '国药准字Z34020289'
  1906. elif self.search_key == '999糠酸莫米松凝胶15':
  1907. expiry_date = '36个月'
  1908. manufacturer = '华润三九(南昌)药业有限公司'
  1909. approval_number = '国药准字H20080010'
  1910. elif self.search_key == '999黄芪精':
  1911. expiry_date = '36个月'
  1912. manufacturer = '台州南峰药业有限公司'
  1913. approval_number = '国药准字Z33020783'
  1914. elif self.search_key == '999复方感冒灵颗粒':
  1915. expiry_date = '24个月'
  1916. manufacturer = '华润三九(郴州)制药有限公司'
  1917. approval_number = '国药准字Z43020334'
  1918. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  1919. expiry_date = '36个月'
  1920. manufacturer = '华润三九(南昌)药业有限公司'
  1921. approval_number = '国药准字H20074155'
  1922. elif self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  1923. expiry_date = '暂定24个月,具体有效期以实物说明书为准'
  1924. manufacturer = '史达德药业(北京)有限公司'
  1925. approval_number = '国药准字H11021837'
  1926. elif self.search_key == '999速复康布洛芬缓释胶囊':
  1927. expiry_date = '24个月'
  1928. manufacturer = '北京红林制药有限公司'
  1929. approval_number = '国药准字H20074172'
  1930. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  1931. expiry_date = '24个月'
  1932. manufacturer = '重庆科瑞东和制药有限责任公司'
  1933. approval_number = '国药准字Z50020420'
  1934. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  1935. expiry_date = '24个月'
  1936. manufacturer = '华润三九(南昌)药业有限公司'
  1937. approval_number = '国药准字H20073954'
  1938. elif self.search_key == '999维生素C咀嚼片':
  1939. expiry_date = '24个月'
  1940. manufacturer = '甘肃成纪生物药业有限公司'
  1941. approval_number = '国药准字H62021166'
  1942. elif self.search_key == '999强力枇杷露120ml':
  1943. expiry_date = '36个月'
  1944. manufacturer = '华润三九(南昌)药业有限公司'
  1945. approval_number = '国药准字Z36021533'
  1946. elif self.search_key == '999强力枇杷露150ml':
  1947. expiry_date = '36个月'
  1948. manufacturer = '华润三九(南昌)药业有限公司'
  1949. approval_number = '国药准字Z36021533'
  1950. elif self.search_key == '999抗病毒口服液10ml*10' or self.search_key == '999抗病毒口服液10ml*12':
  1951. expiry_date = '24个月'
  1952. manufacturer = '杭州华润老桐君药业有限公司'
  1953. approval_number = '国药准字Z33020518'
  1954. elif self.search_key == '999精氨酸布洛芬颗粒':
  1955. expiry_date = '暂定36个月'
  1956. manufacturer = '华润三九(唐山)药业有限公司'
  1957. approval_number = '国药准字H20070139'
  1958. elif self.search_key == '999糠酸莫米松乳膏10g支':
  1959. expiry_date = '36个月'
  1960. manufacturer = '华润三九(南昌)药业有限公司'
  1961. approval_number = '国药准字H20074090'
  1962. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  1963. expiry_date = '24个月'
  1964. manufacturer = '华润三九(南昌)药业有限公司'
  1965. approval_number = '国药准字H20074079'
  1966. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  1967. expiry_date = '36个月'
  1968. manufacturer = '合肥华润神鹿药业有限公司'
  1969. approval_number = '国药准字Z20055023'
  1970. elif self.search_key == '999银菊清咽颗粒':
  1971. expiry_date = '30个月'
  1972. manufacturer = '合肥华润神鹿药业有限公司'
  1973. approval_number = '国药准字Z20026680'
  1974. elif self.search_key == '999阿奇霉素片':
  1975. expiry_date = '48个月'
  1976. manufacturer = '浙江华润三九众益制药有限公司'
  1977. approval_number = '国药准字H20084458'
  1978. elif self.search_key == '999补脾益肠丸':
  1979. expiry_date = '24个月'
  1980. manufacturer = '惠州市九惠制药股份有限公司'
  1981. approval_number = '国药准字Z44023376'
  1982. elif self.search_key == '999壮骨关节丸6g*20':
  1983. expiry_date = '24个月'
  1984. manufacturer = '华润三九医药股份有限公司'
  1985. approval_number = '国药准字Z44023377'
  1986. elif self.search_key == '999壮骨关节胶囊':
  1987. expiry_date = '24个月'
  1988. manufacturer = '华润三九医药股份有限公司'
  1989. approval_number = '国药准字Z20080055'
  1990. elif self.search_key == '999正天丸6g*15':
  1991. expiry_date = '30个月'
  1992. manufacturer = '华润三九医药股份有限公司'
  1993. approval_number = '国药准字Z44020711'
  1994. elif self.search_key == '999正天胶囊':
  1995. expiry_date = '24个月'
  1996. manufacturer = '华润三九医药股份有限公司'
  1997. approval_number = '国药准字Z20010142'
  1998. elif self.search_key == '三九胃泰胶囊':
  1999. expiry_date = '24个月'
  2000. manufacturer = '华润三九医药股份有限公司'
  2001. approval_number = '国药准字Z44020704'
  2002. elif self.search_key == '三九胃泰颗粒20g*10':
  2003. expiry_date = '24个月'
  2004. manufacturer = '华润三九医药股份有限公司'
  2005. approval_number = '国药准字Z44020705'
  2006. elif self.search_key == '999感冒灵颗粒':
  2007. expiry_date = '24个月'
  2008. manufacturer = '华润三九(枣庄)药业有限公司'
  2009. approval_number = '国药准字Z44021940'
  2010. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  2011. expiry_date = '36个月'
  2012. manufacturer = '华润三九医药股份有限公司'
  2013. approval_number = '国药准字H44024170'
  2014. elif self.search_key == '三九胃泰颗粒20g*6袋':
  2015. expiry_date = '24个月'
  2016. manufacturer = '华润三九医药股份有限公司'
  2017. approval_number = '国药准字Z44020705'
  2018. elif self.search_key == '顺峰康王酮康他索乳膏':
  2019. expiry_date = '24个月'
  2020. manufacturer = '广东华润顺峰药业有限公司'
  2021. approval_number = '国药准字H10980204'
  2022. elif self.search_key == '999糠酸莫米松凝胶10':
  2023. expiry_date = '36个月'
  2024. manufacturer = '华润三九(南昌)药业有限公司'
  2025. approval_number = '国药准字H20080010'
  2026. elif self.search_key == '999板蓝根颗粒10g*20':
  2027. expiry_date = '36个月'
  2028. manufacturer = '广东恒诚制药股份有限公司'
  2029. approval_number = '国药准字Z44021520'
  2030. elif self.search_key == '999复方氨酚烷胺胶囊' or self.search_key == '999复方氨酚烷胺胶囊12粒' or self.search_key == '999复方氨酚烷胺胶囊10粒' or self.search_key == '999复方氨酚烷胺胶囊6粒' :
  2031. expiry_date = '36个月'
  2032. manufacturer = '华润三九(唐山)药业有限公司'
  2033. approval_number = '国药准字H13021912'
  2034. elif self.search_key == '999咽炎片0.26g*12片*2板':
  2035. expiry_date = '24个月'
  2036. manufacturer = '华润三九(黄石)药业有限公司'
  2037. approval_number = '国药准字Z42021062'
  2038. elif self.search_key == '999小儿止咳糖浆120' or self.search_key == '999小儿止咳糖浆225':
  2039. expiry_date = '24个月'
  2040. manufacturer = '华润三九(雅安)药业有限公司'
  2041. approval_number = '国药准字Z51020675'
  2042. elif self.search_key == '999小儿感冒颗粒6g*10' or self.search_key == '999小儿感冒颗粒6g*24':
  2043. expiry_date = '36个月'
  2044. manufacturer = '华润三九(枣庄)药业有限公司'
  2045. approval_number = '国药准字Z37021392'
  2046. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋' or self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  2047. expiry_date = '36个月'
  2048. manufacturer = '华润三九(黄石)药业有限公司'
  2049. approval_number = '国药准字H42022510'
  2050. elif self.search_key == '999感冒灵胶囊':
  2051. expiry_date = '24个月'
  2052. manufacturer = '华润三九医药股份有限公司'
  2053. approval_number = '国药准字Z44021939'
  2054. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  2055. expiry_date = '24个月'
  2056. manufacturer = '华润三九(黄石)药业有限公司'
  2057. approval_number = '国药准字Z42021105'
  2058. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  2059. expiry_date = '18个月'
  2060. manufacturer = '华润三九医药股份有限公司'
  2061. approval_number = '国药准字Z20100067'
  2062. elif self.search_key == '999感冒清热颗粒12g*18':
  2063. expiry_date = '36个月'
  2064. manufacturer = '山东新大陆制药有限公司'
  2065. approval_number = '国药准字Z37020066'
  2066. elif self.search_key == '999小柴胡颗粒':
  2067. expiry_date = '24个月'
  2068. manufacturer = '华润三九医药股份有限公司'
  2069. approval_number = '国药准字Z44020709'
  2070. else:
  2071. is_has_instructions = self.safe_exec(self.has_instructions)
  2072. # 说明书等信息
  2073. if is_has_instructions:
  2074. print('开始获取说明书信息')
  2075. # instructions_info = self.get_instructions_data()
  2076. instructions_info = self.safe_exec(self.get_instructions_data)
  2077. if instructions_info['有效期'] is not None:
  2078. expiry_date = instructions_info['有效期'].strip('。')
  2079. if instructions_info['生产单位'] is not None:
  2080. manufacturer = instructions_info['生产单位'].strip('。')
  2081. if instructions_info['批准文号'] is not None:
  2082. approval_number = instructions_info['批准文号'].strip('。')
  2083. else:
  2084. # 没有说明书不入库
  2085. print('没有获取到说明书信息')
  2086. self.swipe_back(1)
  2087. return
  2088. #暂时不获取说明书信息 end
  2089. self.unrelated_data = 0
  2090. # 爬取省份
  2091. scrape_province = '广东' # 这里先默认广东
  2092. # 是否有货
  2093. availability = ''
  2094. save_data = {
  2095. 'product': product,
  2096. 'min_price': min_price,
  2097. 'manufacture_date': manufacture_date,
  2098. 'expiry_date': expiry_date,
  2099. 'shop': shop,
  2100. 'business_license_company': business_license_company,
  2101. 'province': province,
  2102. 'city': city,
  2103. 'manufacturer': manufacturer,
  2104. 'specification': specifications,
  2105. 'approval_number': approval_number,
  2106. 'product_link': product_link,
  2107. 'scrape_date': scrape_date,
  2108. 'scrape_province': scrape_province,
  2109. 'availability': availability,
  2110. 'credit_code': credit_code,
  2111. 'platform': '美团',
  2112. 'search_key': self.search_key,
  2113. }
  2114. self.save_to_database(save_data)
  2115. # time.sleep(100000)
  2116. time.sleep(self.get_sleep_time())
  2117. if self.distinct_target():
  2118. print('已到达搜索列表页')
  2119. else:
  2120. for i in range(1):
  2121. print('在详情页')
  2122. self.swipe_back(1)
  2123. time.sleep(self.get_sleep_time())
  2124. # 最外部有个定位按钮
  2125. if self.distinct_target():
  2126. break
  2127. #主函数
  2128. def main(self, device_id, start_page, end_page, retry_count=0):
  2129. MAX_RETRY = 3 # 最大重试次数
  2130. spider_no = 0
  2131. self.connect_devices(device_id)
  2132. time.sleep(self.get_sleep_time())
  2133. self.d.toast.show("测试toast", 20)
  2134. # 启动全局弹窗监控
  2135. self.monitor = SpiderMonitor(self)
  2136. self.monitor.start()
  2137. try:
  2138. # 重新开启美团应用
  2139. self.restart_app()
  2140. # 搜索关键字
  2141. self.enter_target_page()
  2142. # print('开始滑动')
  2143. # self.d.drag(300, 1400, 300, 400, 1)
  2144. # time.sleep(100000)
  2145. for idx in range(300):
  2146. print(f'第{idx + 1}页')
  2147. if spider_no > 30:
  2148. time.sleep(60)
  2149. spider_no = 0
  2150. print('目前无关数据量: ', self.unrelated_data)
  2151. # 检查是否需要暂停(验证码过多)
  2152. if self.monitor.verification_count >= self.monitor.MAX_VERIFICATION_RETRY:
  2153. print("频繁遇到验证码,暂停程序")
  2154. self.d.toast("请处理验证码后点击继续", 30)
  2155. # 等待用户点击屏幕继续
  2156. self.d.click(0, 0) # 无效点击,等待用户操作
  2157. self.monitor.verification_count = 0
  2158. # if self.unrelated_data > 10:
  2159. # # 连续超过5个不达标的数据则停止采集
  2160. # break
  2161. # 线程安全获取商品列表
  2162. # drug_lis = self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all()
  2163. # drug_lis = self.safe_list('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout', self.monitor)
  2164. while True:
  2165. if self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').exists:
  2166. break
  2167. time.sleep(1)
  2168. drug_lis = self.safe_exec(self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all)
  2169. lis_len = len(drug_lis)
  2170. print(f'当前页面共有{lis_len}个商品')
  2171. for idxx,drug_one in enumerate(drug_lis,start = 1):
  2172. bounds = drug_one.info['bounds']
  2173. top = bounds['top']
  2174. bottom = bounds['bottom']
  2175. # height = bottom - top
  2176. print(f'当前商品bottom:{bottom}')
  2177. print(f'当前商品top:{top}')
  2178. # if 304 <= top and bottom <= 1475: # 默认高度241的才行
  2179. if 304 <= top and bottom <= 1475: # 默认高度241的才行 1559
  2180. # print('目标-->', drug_one.info)
  2181. # drug_one.click()
  2182. #获取当前元素中的属性来判断是否要点击进入采集
  2183. print(f"这页的第几个商品:{idxx}")
  2184. product_title = ''
  2185. price = ''
  2186. shop_name = ''
  2187. #商品名称的xpath
  2188. product_tittle_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2189. product_tittle_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  2190. if self.d.xpath(product_tittle_xpath).exists:
  2191. product_title = self.d.xpath(product_tittle_xpath).text
  2192. product_title = product_title[1:] if product_title.startswith('0') else product_title
  2193. print(f"product_tittle_xpath列表当前商品名称:{product_title}")
  2194. if '999' in self.search_key:
  2195. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  2196. temp_search_key = self.search_key.replace('999皮炎平', '')
  2197. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  2198. temp_search_key = self.search_key.replace('999必无忧', '')
  2199. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  2200. temp_search_key = self.search_key.replace('999必无忧', '')
  2201. elif self.search_key == '999速复康布洛芬缓释胶囊':
  2202. temp_search_key = self.search_key.replace('999速复康', '')
  2203. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  2204. temp_search_key = self.search_key.replace('999选平', '')
  2205. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  2206. temp_search_key = self.search_key.replace('999皮炎平', '')
  2207. else:
  2208. temp_search_key = self.search_key.replace('999', '')
  2209. if self.search_key == '999糠酸莫米松凝胶15':
  2210. temp_search_key = temp_search_key.replace('15', '')
  2211. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  2212. temp_search_key = temp_search_key.replace('30', '')
  2213. elif self.search_key == '999复方金银花颗粒10g':
  2214. temp_search_key = temp_search_key.replace('10g', '')
  2215. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  2216. temp_search_key = temp_search_key.replace('15g*15袋/盒', '')
  2217. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  2218. temp_search_key = temp_search_key.replace('6粒', '')
  2219. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  2220. temp_search_key = temp_search_key.replace('50', '')
  2221. elif self.search_key == '999止泻利颗粒15g*8':
  2222. temp_search_key = temp_search_key.replace('15g*8', '')
  2223. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  2224. temp_search_key = temp_search_key.replace('30', '')
  2225. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  2226. temp_search_key = temp_search_key.replace('15g', '')
  2227. elif self.search_key == '999复方苦参肠炎康片12片':
  2228. temp_search_key = temp_search_key.replace("12片", "")
  2229. elif self.search_key == '999强力枇杷露16袋':
  2230. temp_search_key = temp_search_key.replace("16袋", "")
  2231. elif self.search_key == '999三蛇胆川贝膏138':
  2232. temp_search_key = temp_search_key.replace("138", "")
  2233. elif self.search_key == '999抗病毒口服液10ml*12':
  2234. temp_search_key = temp_search_key.replace("10ml*12", "")
  2235. elif self.search_key == '999抗病毒口服液10ml*10':
  2236. temp_search_key = temp_search_key.replace("10ml*10", "")
  2237. elif self.search_key == '999强力枇杷露120ml':
  2238. temp_search_key = temp_search_key.replace("120ml", "")
  2239. elif self.search_key == '999强力枇杷露150ml':
  2240. temp_search_key = temp_search_key.replace("150ml", "")
  2241. elif self.search_key == '999糠酸莫米松乳膏10g支':
  2242. temp_search_key = temp_search_key.replace("10g支", "")
  2243. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  2244. temp_search_key = temp_search_key.replace("20g", "")
  2245. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  2246. temp_search_key = temp_search_key.replace("(无糖)6g", "")
  2247. elif self.search_key == '999壮骨关节丸6g*20':
  2248. temp_search_key = temp_search_key.replace("6g*20", "")
  2249. elif self.search_key == '999正天丸6g*15':
  2250. temp_search_key = temp_search_key.replace("6g*15", "")
  2251. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  2252. temp_search_key = temp_search_key.replace("20", "")
  2253. elif self.search_key == '999糠酸莫米松凝胶10':
  2254. temp_search_key = temp_search_key.replace("10", "")
  2255. elif self.search_key == '999板蓝根颗粒10g*20':
  2256. temp_search_key = temp_search_key.replace("10g*20", "")
  2257. elif self.search_key == '999复方氨酚烷胺胶囊10粒':
  2258. temp_search_key = temp_search_key.replace("10粒", "")
  2259. elif self.search_key == '999复方氨酚烷胺胶囊12粒':
  2260. temp_search_key = temp_search_key.replace("12粒", "")
  2261. elif self.search_key == '999咽炎片0.26g*12片*2板':
  2262. temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
  2263. elif self.search_key == '999小儿止咳糖浆120':
  2264. temp_search_key = temp_search_key.replace("120", "")
  2265. elif self.search_key == '999小儿止咳糖浆225':
  2266. temp_search_key = temp_search_key.replace("225", "")
  2267. elif self.search_key == '999小儿感冒颗粒6g*10':
  2268. temp_search_key = temp_search_key.replace("6g*10", "")
  2269. elif self.search_key == '999小儿感冒颗粒6g*24':
  2270. temp_search_key = temp_search_key.replace("6g*24", "")
  2271. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
  2272. temp_search_key = temp_search_key.replace("6g*10袋", "")
  2273. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  2274. temp_search_key = temp_search_key.replace("6g*20袋", "")
  2275. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  2276. temp_search_key = temp_search_key.replace("8g*10袋", "")
  2277. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  2278. temp_search_key = temp_search_key.replace("2.5g*10袋", "")
  2279. elif self.search_key == '999感冒清热颗粒12g*18':
  2280. temp_search_key = temp_search_key.replace("12g*18", "")
  2281. if self.search_key == '999抗病毒口服液':
  2282. if '999' not in product_title or temp_search_key not in product_title :
  2283. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2284. continue
  2285. elif '10ml*12' not in product_title and '10ml*10' not in product_title:
  2286. print(f"当前商品名称:{product_title} 不包含10ml*12和10ml*10品规")
  2287. continue
  2288. elif self.search_key == '999抗病毒口服液10ml*12':
  2289. if '999' not in product_title or temp_search_key not in product_title :
  2290. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2291. continue
  2292. elif '10ml*12' not in product_title:
  2293. print(f"当前商品名称:{product_title} 不包含10ml*12品规")
  2294. continue
  2295. elif self.search_key == '999抗病毒口服液10ml*10':
  2296. if '999' not in product_title or temp_search_key not in product_title :
  2297. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2298. continue
  2299. elif '10ml*10' not in product_title:
  2300. print(f"当前商品名称:{product_title} 不包含10ml*10品规")
  2301. continue
  2302. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  2303. if '999' not in product_title or temp_search_key not in product_title :
  2304. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2305. continue
  2306. elif '30' not in product_title:
  2307. print(f"当前商品名称:{product_title} 不包含30品规")
  2308. continue
  2309. elif self.search_key == '999复方感冒灵颗粒':
  2310. if '999' not in product_title or temp_search_key not in product_title :
  2311. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2312. continue
  2313. elif '14g*15' not in product_title and '14g*9' not in product_title:
  2314. print(f"当前商品名称:{product_title} 不包含14g*15 和 14g*9品规")
  2315. continue
  2316. elif self.search_key == '999养胃舒颗粒':
  2317. if '999' not in product_title or temp_search_key not in product_title :
  2318. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2319. continue
  2320. elif '10g*6' not in product_title:
  2321. print(f"当前商品名称:{product_title} 不包含10g*6品规")
  2322. continue
  2323. elif self.search_key == '999糠酸莫米松凝胶15':
  2324. if '999' not in product_title or temp_search_key not in product_title :
  2325. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2326. continue
  2327. elif '15' not in product_title:
  2328. print(f"当前商品名称:{product_title} 不包含15品规")
  2329. continue
  2330. elif self.search_key == '999复方金银花颗粒10g':
  2331. if '999' not in product_title or temp_search_key not in product_title :
  2332. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2333. continue
  2334. elif '10g' not in product_title:
  2335. print(f"当前商品名称:{product_title} 不包含10g品规")
  2336. continue
  2337. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  2338. if '999' not in product_title or temp_search_key not in product_title :
  2339. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2340. continue
  2341. elif '15g*15' not in product_title:
  2342. print(f"当前商品名称:{product_title} 不包含15g*15品规")
  2343. continue
  2344. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  2345. if '999' not in product_title or temp_search_key not in product_title :
  2346. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2347. continue
  2348. elif '6' not in product_title:
  2349. print(f"当前商品名称:{product_title} 不包含6品规")
  2350. continue
  2351. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  2352. if '999' not in product_title or temp_search_key not in product_title :
  2353. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2354. continue
  2355. elif '50' not in product_title:
  2356. print(f"当前商品名称:{product_title} 不包含50品规")
  2357. continue
  2358. elif self.search_key == '999止泻利颗粒15g*8':
  2359. if '999' not in product_title or temp_search_key not in product_title :
  2360. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2361. continue
  2362. elif '15g*8' not in product_title:
  2363. print(f"当前商品名称:{product_title} 不包含15g*8品规")
  2364. continue
  2365. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  2366. if '999' not in product_title or temp_search_key not in product_title :
  2367. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2368. continue
  2369. elif '30' not in product_title:
  2370. print(f"当前商品名称:{product_title} 不包含30品规")
  2371. continue
  2372. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  2373. if '999' not in product_title or temp_search_key not in product_title :
  2374. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2375. continue
  2376. elif '15' not in product_title:
  2377. print(f"当前商品名称:{product_title} 不包含15品规")
  2378. continue
  2379. elif self.search_key == '999复方苦参肠炎康片12片':
  2380. if '999' not in product_title or temp_search_key not in product_title :
  2381. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2382. continue
  2383. elif '12' not in product_title:
  2384. print(f"当前商品名称:{product_title} 不包含12品规")
  2385. continue
  2386. elif self.search_key == '999强力枇杷露16袋':
  2387. if '999' not in product_title or temp_search_key not in product_title :
  2388. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2389. continue
  2390. elif '16' not in product_title:
  2391. print(f"当前商品名称:{product_title} 不包含16品规")
  2392. continue
  2393. elif self.search_key == '999三蛇胆川贝膏138':
  2394. if '999' not in product_title or temp_search_key not in product_title :
  2395. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2396. continue
  2397. elif '138' not in product_title:
  2398. print(f"当前商品名称:{product_title} 不包含16品规")
  2399. continue
  2400. elif self.search_key == '999速复康布洛芬缓释胶囊':
  2401. if '999' not in product_title or temp_search_key not in product_title :
  2402. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2403. continue
  2404. elif self.search_key == '999维生素C咀嚼片':
  2405. if '999' not in product_title or temp_search_key not in product_title :
  2406. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2407. continue
  2408. elif '80' not in product_title:
  2409. print(f"当前商品名称:{product_title} 不包含80品规")
  2410. elif self.search_key == '999精氨酸布洛芬颗粒':
  2411. if '999' not in product_title or temp_search_key not in product_title :
  2412. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2413. continue
  2414. elif '9' not in product_title:
  2415. print(f"当前商品名称:{product_title} 不包含9品规")
  2416. continue
  2417. elif self.search_key == '999强力枇杷露120ml':
  2418. if '999' not in product_title or temp_search_key not in product_title :
  2419. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2420. continue
  2421. elif '120' not in product_title:
  2422. print(f"当前商品名称:{product_title} 不包含120品规")
  2423. continue
  2424. elif self.search_key == '999强力枇杷露150ml':
  2425. if '999' not in product_title or temp_search_key not in product_title :
  2426. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2427. continue
  2428. elif '150' not in product_title:
  2429. print(f"当前商品名称:{product_title} 不包含120品规")
  2430. continue
  2431. elif self.search_key == '999糠酸莫米松乳膏10g支':
  2432. if '999' not in product_title or temp_search_key not in product_title :
  2433. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2434. continue
  2435. elif '10' not in product_title:
  2436. print(f"当前商品名称:{product_title} 不包含10品规")
  2437. continue
  2438. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  2439. if '999' not in product_title or temp_search_key not in product_title :
  2440. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2441. continue
  2442. elif '20' not in product_title:
  2443. print(f"当前商品名称:{product_title} 不包含20品规")
  2444. continue
  2445. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  2446. if '999' not in product_title or temp_search_key not in product_title :
  2447. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2448. continue
  2449. elif '6' not in product_title:
  2450. print(f"当前商品名称:{product_title} 不包含6品规")
  2451. continue
  2452. elif self.search_key == '999阿奇霉素片':
  2453. if '999' not in product_title or temp_search_key not in product_title :
  2454. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2455. continue
  2456. elif '0.25g*6' not in product_title:
  2457. print(f"当前商品名称:{product_title} 不包含0.25g*6品规")
  2458. continue
  2459. elif self.search_key == '999补脾益肠丸':
  2460. if '999' not in product_title or temp_search_key not in product_title :
  2461. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2462. continue
  2463. elif '6g*15' not in product_title:
  2464. print(f"当前商品名称:{product_title} 不包含6g*15品规")
  2465. continue
  2466. elif self.search_key == '999壮骨关节丸6g*20':
  2467. if '999' not in product_title or temp_search_key not in product_title :
  2468. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2469. continue
  2470. elif '6g*20' not in product_title:
  2471. print(f"当前商品名称:{product_title} 不包含6g*20品规")
  2472. continue
  2473. elif self.search_key == '999正天丸6g*15':
  2474. if '999' not in product_title or temp_search_key not in product_title :
  2475. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2476. continue
  2477. elif '6g*15' not in product_title:
  2478. print(f"当前商品名称:{product_title} 不包含6g*15品规")
  2479. continue
  2480. elif self.search_key == '999感冒灵颗粒':
  2481. if '999' not in product_title or temp_search_key not in product_title :
  2482. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2483. continue
  2484. elif '10g*9' not in product_title:
  2485. print(f"当前商品名称:{product_title} 不包含10g*9品规")
  2486. continue
  2487. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  2488. if '999' not in product_title or temp_search_key not in product_title :
  2489. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2490. continue
  2491. elif '20' not in product_title:
  2492. print(f"当前商品名称:{product_title} 不包含20品规")
  2493. continue
  2494. elif self.search_key == '999糠酸莫米松凝胶10':
  2495. if '999' not in product_title or temp_search_key not in product_title :
  2496. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2497. continue
  2498. elif '10' not in product_title:
  2499. print(f"当前商品名称:{product_title} 不包含10品规")
  2500. continue
  2501. elif self.search_key == '999板蓝根颗粒10g*20':
  2502. if '999' not in product_title or temp_search_key not in product_title :
  2503. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2504. continue
  2505. elif '10g*20' not in product_title:
  2506. print(f"当前商品名称:{product_title} 不包含10g*20品规")
  2507. continue
  2508. elif self.search_key == '999复方氨酚烷胺胶囊10粒':
  2509. if '999' not in product_title or temp_search_key not in product_title :
  2510. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2511. continue
  2512. elif '10' not in product_title:
  2513. print(f"当前商品名称:{product_title} 不包含10品规")
  2514. continue
  2515. elif self.search_key == '999复方氨酚烷胺胶囊12粒':
  2516. if '999' not in product_title or temp_search_key not in product_title :
  2517. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2518. continue
  2519. elif '12' not in product_title:
  2520. print(f"当前商品名称:{product_title} 不包含12品规")
  2521. continue
  2522. elif self.search_key == '999复方氨酚烷胺胶囊':
  2523. if '999' not in product_title or temp_search_key not in product_title :
  2524. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2525. continue
  2526. elif self.search_key == '999咽炎片0.26g*12片*2板':
  2527. if '999' not in product_title or temp_search_key not in product_title :
  2528. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2529. continue
  2530. elif '0.26g*12' not in product_title:
  2531. print(f"当前商品名称:{product_title} 不包含0.26g*12品规")
  2532. continue
  2533. elif self.search_key == '999小儿止咳糖浆120':
  2534. if '999' not in product_title or temp_search_key not in product_title :
  2535. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2536. continue
  2537. elif '120' not in product_title:
  2538. print(f"当前商品名称:{product_title} 不包含120品规")
  2539. continue
  2540. elif self.search_key == '999小儿止咳糖浆225':
  2541. if '999' not in product_title or temp_search_key not in product_title :
  2542. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2543. continue
  2544. elif '225' not in product_title:
  2545. print(f"当前商品名称:{product_title} 不包含225品规")
  2546. continue
  2547. elif self.search_key == '999小儿感冒颗粒6g*10':
  2548. if '999' not in product_title or temp_search_key not in product_title :
  2549. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2550. continue
  2551. elif '6g*10' not in product_title:
  2552. print(f"当前商品名称:{product_title} 不包含6g*10品规")
  2553. continue
  2554. elif self.search_key == '999小儿感冒颗粒6g*24':
  2555. if '999' not in product_title or temp_search_key not in product_title :
  2556. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2557. continue
  2558. elif '6g*24' not in product_title:
  2559. print(f"当前商品名称:{product_title} 不包含6g*24品规")
  2560. continue
  2561. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
  2562. if '999' not in product_title or temp_search_key not in product_title :
  2563. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2564. continue
  2565. elif '6g*10' not in product_title:
  2566. print(f"当前商品名称:{product_title} 不包含6g*10品规")
  2567. continue
  2568. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  2569. if '999' not in product_title or temp_search_key not in product_title :
  2570. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2571. continue
  2572. elif '6g*20' not in product_title:
  2573. print(f"当前商品名称:{product_title} 不包含6g*20品规")
  2574. continue
  2575. elif self.search_key == '999感冒灵胶囊':
  2576. if '999' not in product_title or temp_search_key not in product_title :
  2577. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2578. continue
  2579. elif '0.5g*12' not in product_title:
  2580. print(f"当前商品名称:{product_title} 不包含0.5g*12品规")
  2581. continue
  2582. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  2583. if '999' not in product_title or temp_search_key not in product_title :
  2584. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2585. continue
  2586. elif '8g*10' not in product_title:
  2587. print(f"当前商品名称:{product_title} 不包含8g*10品规")
  2588. continue
  2589. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  2590. if '999' not in product_title or temp_search_key not in product_title :
  2591. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2592. continue
  2593. elif '2.5g*10' not in product_title:
  2594. print(f"当前商品名称:{product_title} 不包含2.5g*10品规")
  2595. continue
  2596. elif self.search_key == '999感冒清热颗粒12g*18':
  2597. if '999' not in product_title or temp_search_key not in product_title :
  2598. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2599. continue
  2600. elif '12g*18' not in product_title:
  2601. print(f"当前商品名称:{product_title} 不包含12g*18品规")
  2602. continue
  2603. elif self.search_key == '999藿香正气合剂':
  2604. if '999' not in product_title or temp_search_key not in product_title :
  2605. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2606. continue
  2607. elif '10ml*6' not in product_title and '10ml*10' not in product_title:
  2608. print(f"当前商品名称:{product_title} 不包含10ml*6 和 10ml*10品规")
  2609. continue
  2610. else:
  2611. if '999' not in product_title or temp_search_key not in product_title:
  2612. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2613. continue
  2614. else:
  2615. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  2616. temp_search_key = self.search_key.replace('史达功', '')
  2617. temp_search_key = temp_search_key.replace('120', '')
  2618. if '史达功' not in product_title or temp_search_key not in product_title :
  2619. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2620. continue
  2621. elif '120' not in product_title:
  2622. print(f"当前商品名称:{product_title} 不包含120品规")
  2623. continue
  2624. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  2625. temp_search_key = self.search_key.replace('三九胃泰', '')
  2626. temp_search_key = temp_search_key.replace('8袋', '')
  2627. if '三九胃泰' not in product_title or temp_search_key not in product_title :
  2628. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2629. continue
  2630. elif '8' not in product_title:
  2631. print(f"当前商品名称:{product_title} 不包含8品规")
  2632. continue
  2633. elif self.search_key == '今维多赐多康牌蛋白粉':
  2634. temp_search_key = self.search_key.replace('今维多', '')
  2635. if '今维多' not in product_title or temp_search_key not in product_title :
  2636. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2637. continue
  2638. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  2639. temp_search_key = self.search_key.replace('佳美舒', '')
  2640. temp_search_key = temp_search_key.replace('4', '')
  2641. if '佳美舒' not in product_title or temp_search_key not in product_title :
  2642. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2643. continue
  2644. elif '4' not in product_title and '8' not in product_title:
  2645. print(f"当前商品名称:{product_title} 不包含4和或8品规")
  2646. continue
  2647. elif self.search_key == '三九胃泰颗粒20g*10':
  2648. temp_search_key = self.search_key.replace('20g*10', '')
  2649. if temp_search_key not in product_title :
  2650. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2651. continue
  2652. elif '20g*10' not in product_title:
  2653. print(f"当前商品名称:{product_title} 不包含20g*10品规")
  2654. continue
  2655. elif self.search_key == '三九胃泰颗粒20g*6袋':
  2656. temp_search_key = self.search_key.replace('20g*6袋', '')
  2657. if temp_search_key not in product_title :
  2658. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2659. continue
  2660. elif '20g*6' not in product_title:
  2661. print(f"当前商品名称:{product_title} 不包含20g*6品规")
  2662. continue
  2663. elif self.search_key == '顺峰康王酮康他索乳膏':
  2664. temp_search_key = self.search_key.replace('顺峰康王', '')
  2665. if '顺峰康王' not in product_title or temp_search_key not in product_title :
  2666. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2667. continue
  2668. else:
  2669. if self.search_key not in product_title.replace(' ', ''):
  2670. continue
  2671. elif self.d.xpath(product_tittle_xpath2).exists:
  2672. product_title = self.d.xpath(product_tittle_xpath2).text
  2673. product_title = product_title[1:] if product_title.startswith('0') else product_title
  2674. print(f"product_tittle_xpath2列表当前商品名称:{product_title}")
  2675. if '999' in self.search_key:
  2676. if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  2677. temp_search_key = self.search_key.replace('999皮炎平', '')
  2678. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  2679. temp_search_key = self.search_key.replace('999必无忧', '')
  2680. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  2681. temp_search_key = self.search_key.replace('999必无忧', '')
  2682. elif self.search_key == '999速复康布洛芬缓释胶囊':
  2683. temp_search_key = self.search_key.replace('999速复康', '')
  2684. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  2685. temp_search_key = self.search_key.replace('999选平', '')
  2686. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  2687. temp_search_key = self.search_key.replace('999皮炎平', '')
  2688. else:
  2689. temp_search_key = self.search_key.replace('999', '')
  2690. if self.search_key == '999糠酸莫米松凝胶15':
  2691. temp_search_key = temp_search_key.replace('15', '')
  2692. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  2693. temp_search_key = temp_search_key.replace('30', '')
  2694. elif self.search_key == '999复方金银花颗粒10g':
  2695. temp_search_key = temp_search_key.replace('10g', '')
  2696. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  2697. temp_search_key = temp_search_key.replace('15g*15袋/盒', '')
  2698. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  2699. temp_search_key = temp_search_key.replace('6粒', '')
  2700. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  2701. temp_search_key = temp_search_key.replace('50', '')
  2702. elif self.search_key == '999止泻利颗粒15g*8':
  2703. temp_search_key = temp_search_key.replace('15g*8', '')
  2704. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  2705. temp_search_key = temp_search_key.replace('30', '')
  2706. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  2707. temp_search_key = temp_search_key.replace('15g', '')
  2708. elif self.search_key == '999复方苦参肠炎康片12片':
  2709. temp_search_key = temp_search_key.replace("12片", "")
  2710. elif self.search_key == '999强力枇杷露16袋':
  2711. temp_search_key = temp_search_key.replace("16袋", "")
  2712. elif self.search_key == '999三蛇胆川贝膏138':
  2713. temp_search_key = temp_search_key.replace("138", "")
  2714. elif self.search_key == '999抗病毒口服液10ml*12':
  2715. temp_search_key = temp_search_key.replace("10ml*12", "")
  2716. elif self.search_key == '999抗病毒口服液10ml*10':
  2717. temp_search_key = temp_search_key.replace("10ml*10", "")
  2718. elif self.search_key == '999强力枇杷露120ml':
  2719. temp_search_key = temp_search_key.replace("120ml", "")
  2720. elif self.search_key == '999强力枇杷露150ml':
  2721. temp_search_key = temp_search_key.replace("150ml", "")
  2722. elif self.search_key == '999糠酸莫米松乳膏10g支':
  2723. temp_search_key = temp_search_key.replace("10g支", "")
  2724. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  2725. temp_search_key = temp_search_key.replace("20g", "")
  2726. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  2727. temp_search_key = temp_search_key.replace("(无糖)6g", "")
  2728. elif self.search_key == '999壮骨关节丸6g*20':
  2729. temp_search_key = temp_search_key.replace("6g*20", "")
  2730. elif self.search_key == '999正天丸6g*15':
  2731. temp_search_key = temp_search_key.replace("6g*15", "")
  2732. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  2733. temp_search_key = temp_search_key.replace("20", "")
  2734. elif self.search_key == '999糠酸莫米松凝胶10':
  2735. temp_search_key = temp_search_key.replace("10", "")
  2736. elif self.search_key == '999板蓝根颗粒10g*20':
  2737. temp_search_key = temp_search_key.replace("10g*20", "")
  2738. elif self.search_key == '999复方氨酚烷胺胶囊10粒':
  2739. temp_search_key = temp_search_key.replace("10粒", "")
  2740. elif self.search_key == '999复方氨酚烷胺胶囊12粒':
  2741. temp_search_key = temp_search_key.replace("12粒", "")
  2742. elif self.search_key == '999咽炎片0.26g*12片*2板':
  2743. temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
  2744. elif self.search_key == '999小儿止咳糖浆120':
  2745. temp_search_key = temp_search_key.replace("120", "")
  2746. elif self.search_key == '999小儿止咳糖浆225':
  2747. temp_search_key = temp_search_key.replace("225", "")
  2748. elif self.search_key == '999小儿感冒颗粒6g*10':
  2749. temp_search_key = temp_search_key.replace("6g*10", "")
  2750. elif self.search_key == '999小儿感冒颗粒6g*24':
  2751. temp_search_key = temp_search_key.replace("6g*24", "")
  2752. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
  2753. temp_search_key = temp_search_key.replace("6g*10袋", "")
  2754. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  2755. temp_search_key = temp_search_key.replace("6g*20袋", "")
  2756. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  2757. temp_search_key = temp_search_key.replace("8g*10袋", "")
  2758. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  2759. temp_search_key = temp_search_key.replace("2.5g*10袋", "")
  2760. elif self.search_key == '999感冒清热颗粒12g*18':
  2761. temp_search_key = temp_search_key.replace("12g*18", "")
  2762. if self.search_key == '999抗病毒口服液':
  2763. if '999' not in product_title or temp_search_key not in product_title :
  2764. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2765. continue
  2766. elif '10ml*12' not in product_title and '10ml*10' not in product_title:
  2767. print(f"当前商品名称:{product_title} 不包含10ml*12和10ml*10品规")
  2768. continue
  2769. elif self.search_key == '999抗病毒口服液10ml*12':
  2770. if '999' not in product_title or temp_search_key not in product_title :
  2771. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2772. continue
  2773. elif '10ml*12' not in product_title:
  2774. print(f"当前商品名称:{product_title} 不包含10ml*12品规")
  2775. continue
  2776. elif self.search_key == '999抗病毒口服液10ml*10':
  2777. if '999' not in product_title or temp_search_key not in product_title :
  2778. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2779. continue
  2780. elif '10ml*10' not in product_title:
  2781. print(f"当前商品名称:{product_title} 不包含10ml*10品规")
  2782. continue
  2783. elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
  2784. if '999' not in product_title or temp_search_key not in product_title :
  2785. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2786. continue
  2787. elif '30' not in product_title:
  2788. print(f"当前商品名称:{product_title} 不包含30品规")
  2789. continue
  2790. elif self.search_key == '999复方感冒灵颗粒':
  2791. if '999' not in product_title or temp_search_key not in product_title :
  2792. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2793. continue
  2794. elif '14g*15' not in product_title and '14g*9' not in product_title:
  2795. print(f"当前商品名称:{product_title} 不包含14g*15 和 14g*9品规")
  2796. continue
  2797. elif self.search_key == '999养胃舒颗粒':
  2798. if '999' not in product_title or temp_search_key not in product_title :
  2799. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2800. continue
  2801. elif '10g*6' not in product_title:
  2802. print(f"当前商品名称:{product_title} 不包含10g*6品规")
  2803. continue
  2804. elif self.search_key == '999糠酸莫米松凝胶15':
  2805. if '999' not in product_title or temp_search_key not in product_title :
  2806. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2807. continue
  2808. elif '15' not in product_title:
  2809. print(f"当前商品名称:{product_title} 不包含15品规")
  2810. continue
  2811. elif self.search_key == '999复方金银花颗粒10g':
  2812. if '999' not in product_title or temp_search_key not in product_title :
  2813. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2814. continue
  2815. elif '10g' not in product_title:
  2816. print(f"当前商品名称:{product_title} 不包含10g品规")
  2817. continue
  2818. elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
  2819. if '999' not in product_title or temp_search_key not in product_title :
  2820. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2821. continue
  2822. elif '15g*15' not in product_title:
  2823. print(f"当前商品名称:{product_title} 不包含15g*15品规")
  2824. continue
  2825. elif self.search_key == '999复方氨酚烷胺胶囊6粒':
  2826. if '999' not in product_title or temp_search_key not in product_title :
  2827. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2828. continue
  2829. elif '6' not in product_title:
  2830. print(f"当前商品名称:{product_title} 不包含6品规")
  2831. continue
  2832. elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
  2833. if '999' not in product_title or temp_search_key not in product_title :
  2834. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2835. continue
  2836. elif '50' not in product_title:
  2837. print(f"当前商品名称:{product_title} 不包含50品规")
  2838. continue
  2839. elif self.search_key == '999止泻利颗粒15g*8':
  2840. if '999' not in product_title or temp_search_key not in product_title :
  2841. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2842. continue
  2843. elif '15g*8' not in product_title:
  2844. print(f"当前商品名称:{product_title} 不包含15g*8品规")
  2845. continue
  2846. elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
  2847. if '999' not in product_title or temp_search_key not in product_title :
  2848. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2849. continue
  2850. elif '30' not in product_title:
  2851. print(f"当前商品名称:{product_title} 不包含30品规")
  2852. continue
  2853. elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
  2854. if '999' not in product_title or temp_search_key not in product_title :
  2855. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2856. continue
  2857. elif '15' not in product_title:
  2858. print(f"当前商品名称:{product_title} 不包含15品规")
  2859. continue
  2860. elif self.search_key == '999复方苦参肠炎康片12片':
  2861. if '999' not in product_title or temp_search_key not in product_title :
  2862. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2863. continue
  2864. elif '12' not in product_title:
  2865. print(f"当前商品名称:{product_title} 不包含12品规")
  2866. continue
  2867. elif self.search_key == '999强力枇杷露16袋':
  2868. if '999' not in product_title or temp_search_key not in product_title :
  2869. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2870. continue
  2871. elif '16' not in product_title:
  2872. print(f"当前商品名称:{product_title} 不包含16品规")
  2873. continue
  2874. elif self.search_key == '999三蛇胆川贝膏138':
  2875. if '999' not in product_title or temp_search_key not in product_title :
  2876. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2877. continue
  2878. elif '138' not in product_title:
  2879. print(f"当前商品名称:{product_title} 不包含138品规")
  2880. elif self.search_key == '999速复康布洛芬缓释胶囊':
  2881. if '999' not in product_title or temp_search_key not in product_title :
  2882. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  2883. continue
  2884. elif self.search_key == '999维生素C咀嚼片':
  2885. if '999' not in product_title or temp_search_key not in product_title :
  2886. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2887. continue
  2888. elif '80' not in product_title:
  2889. print(f"当前商品名称:{product_title} 不包含80品规")
  2890. continue
  2891. elif self.search_key == '999精氨酸布洛芬颗粒':
  2892. if '999' not in product_title or temp_search_key not in product_title :
  2893. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2894. continue
  2895. elif '9' not in product_title:
  2896. print(f"当前商品名称:{product_title} 不包含9品规")
  2897. continue
  2898. elif self.search_key == '999强力枇杷露120ml':
  2899. if '999' not in product_title or temp_search_key not in product_title :
  2900. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2901. continue
  2902. elif '120' not in product_title:
  2903. print(f"当前商品名称:{product_title} 不包含120品规")
  2904. continue
  2905. elif self.search_key == '999强力枇杷露150ml':
  2906. if '999' not in product_title or temp_search_key not in product_title :
  2907. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2908. continue
  2909. elif '150' not in product_title:
  2910. print(f"当前商品名称:{product_title} 不包含150品规")
  2911. continue
  2912. elif self.search_key == '999糠酸莫米松乳膏10g支':
  2913. if '999' not in product_title or temp_search_key not in product_title :
  2914. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2915. continue
  2916. elif '10' not in product_title:
  2917. print(f"当前商品名称:{product_title} 不包含10品规")
  2918. continue
  2919. elif self.search_key == '999选平硝酸咪康唑乳膏20g':
  2920. if '999' not in product_title or temp_search_key not in product_title :
  2921. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2922. continue
  2923. elif '20' not in product_title:
  2924. print(f"当前商品名称:{product_title} 不包含20品规")
  2925. continue
  2926. elif self.search_key == '999感冒清热颗粒(无糖)6g':
  2927. if '999' not in product_title or temp_search_key not in product_title :
  2928. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2929. continue
  2930. elif '6' not in product_title:
  2931. print(f"当前商品名称:{product_title} 不包含6品规")
  2932. continue
  2933. elif self.search_key == '999阿奇霉素片':
  2934. if '999' not in product_title or temp_search_key not in product_title :
  2935. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2936. continue
  2937. elif '0.25g*6' not in product_title:
  2938. print(f"当前商品名称:{product_title} 不包含0.25g*6品规")
  2939. continue
  2940. elif self.search_key == '999补脾益肠丸':
  2941. if '999' not in product_title or temp_search_key not in product_title :
  2942. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2943. continue
  2944. elif '6g*15' not in product_title:
  2945. print(f"当前商品名称:{product_title} 不包含6g*15品规")
  2946. continue
  2947. elif self.search_key == '999壮骨关节丸6g*20':
  2948. if '999' not in product_title or temp_search_key not in product_title :
  2949. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2950. continue
  2951. elif '6g*20' not in product_title:
  2952. print(f"当前商品名称:{product_title} 不包含6g*20品规")
  2953. continue
  2954. elif self.search_key == '999正天丸6g*15':
  2955. if '999' not in product_title or temp_search_key not in product_title :
  2956. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2957. continue
  2958. elif '6g*15' not in product_title:
  2959. print(f"当前商品名称:{product_title} 不包含6g*15品规")
  2960. continue
  2961. elif self.search_key == '999感冒灵颗粒':
  2962. if '999' not in product_title or temp_search_key not in product_title :
  2963. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2964. continue
  2965. elif '10g*9' not in product_title:
  2966. print(f"当前商品名称:{product_title} 不包含10g*9品规")
  2967. continue
  2968. elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
  2969. if '999' not in product_title or temp_search_key not in product_title :
  2970. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2971. continue
  2972. elif '20' not in product_title:
  2973. print(f"当前商品名称:{product_title} 不包含20品规")
  2974. continue
  2975. elif self.search_key == '999糠酸莫米松凝胶10':
  2976. if '999' not in product_title or temp_search_key not in product_title :
  2977. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2978. continue
  2979. elif '10' not in product_title:
  2980. print(f"当前商品名称:{product_title} 不包含10品规")
  2981. continue
  2982. elif self.search_key == '999板蓝根颗粒10g*20':
  2983. if '999' not in product_title or temp_search_key not in product_title :
  2984. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2985. continue
  2986. elif '10g*20' not in product_title:
  2987. print(f"当前商品名称:{product_title} 不包含10g*20品规")
  2988. continue
  2989. elif self.search_key == '999复方氨酚烷胺胶囊10粒':
  2990. if '999' not in product_title or temp_search_key not in product_title :
  2991. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2992. continue
  2993. elif '10' not in product_title:
  2994. print(f"当前商品名称:{product_title} 不包含10品规")
  2995. continue
  2996. elif self.search_key == '999复方氨酚烷胺胶囊12粒':
  2997. if '999' not in product_title or temp_search_key not in product_title :
  2998. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  2999. continue
  3000. elif '12' not in product_title:
  3001. print(f"当前商品名称:{product_title} 不包含12品规")
  3002. continue
  3003. elif self.search_key == '999复方氨酚烷胺胶囊':
  3004. if '999' not in product_title or temp_search_key not in product_title :
  3005. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3006. continue
  3007. elif self.search_key == '999咽炎片0.26g*12片*2板':
  3008. if '999' not in product_title or temp_search_key not in product_title :
  3009. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3010. continue
  3011. elif '0.26g*12' not in product_title:
  3012. print(f"当前商品名称:{product_title} 不包含0.26g*12品规")
  3013. continue
  3014. elif self.search_key == '999小儿止咳糖浆120':
  3015. if '999' not in product_title or temp_search_key not in product_title :
  3016. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3017. continue
  3018. elif '120' not in product_title:
  3019. print(f"当前商品名称:{product_title} 不包含120品规")
  3020. continue
  3021. elif self.search_key == '999小儿止咳糖浆225':
  3022. if '999' not in product_title or temp_search_key not in product_title :
  3023. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3024. continue
  3025. elif '225' not in product_title:
  3026. print(f"当前商品名称:{product_title} 不包含225品规")
  3027. continue
  3028. elif self.search_key == '999小儿感冒颗粒6g*10':
  3029. if '999' not in product_title or temp_search_key not in product_title :
  3030. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3031. continue
  3032. elif '6g*10' not in product_title:
  3033. print(f"当前商品名称:{product_title} 不包含6g*10品规")
  3034. continue
  3035. elif self.search_key == '999小儿感冒颗粒6g*24':
  3036. if '999' not in product_title or temp_search_key not in product_title :
  3037. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3038. continue
  3039. elif '6g*24' not in product_title:
  3040. print(f"当前商品名称:{product_title} 不包含6g*24品规")
  3041. continue
  3042. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
  3043. if '999' not in product_title or temp_search_key not in product_title :
  3044. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3045. continue
  3046. elif '6g*10' not in product_title:
  3047. print(f"当前商品名称:{product_title} 不包含6g*10品规")
  3048. continue
  3049. elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
  3050. if '999' not in product_title or temp_search_key not in product_title :
  3051. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3052. continue
  3053. elif '6g*20' not in product_title:
  3054. print(f"当前商品名称:{product_title} 不包含6g*20品规")
  3055. continue
  3056. elif self.search_key == '999感冒灵胶囊':
  3057. if '999' not in product_title or temp_search_key not in product_title :
  3058. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3059. continue
  3060. elif '0.5g*12' not in product_title:
  3061. print(f"当前商品名称:{product_title} 不包含0.5g*12品规")
  3062. continue
  3063. elif self.search_key == '999小儿咽扁颗粒8g*10袋':
  3064. if '999' not in product_title or temp_search_key not in product_title :
  3065. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3066. continue
  3067. elif '8g*10' not in product_title:
  3068. print(f"当前商品名称:{product_title} 不包含8g*10品规")
  3069. continue
  3070. elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
  3071. if '999' not in product_title or temp_search_key not in product_title :
  3072. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3073. continue
  3074. elif '2.5g*10' not in product_title:
  3075. print(f"当前商品名称:{product_title} 不包含2.5g*10品规")
  3076. continue
  3077. elif self.search_key == '999感冒清热颗粒12g*18':
  3078. if '999' not in product_title or temp_search_key not in product_title :
  3079. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3080. continue
  3081. elif '12g*18' not in product_title:
  3082. print(f"当前商品名称:{product_title} 不包含12g*18品规")
  3083. continue
  3084. elif self.search_key == '999藿香正气合剂':
  3085. if '999' not in product_title or temp_search_key not in product_title :
  3086. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3087. continue
  3088. elif '10ml*6' not in product_title and '10ml*10' not in product_title:
  3089. print(f"当前商品名称:{product_title} 不包含10ml*6 和 10ml*10品规")
  3090. continue
  3091. else:
  3092. if '999' not in product_title or temp_search_key not in product_title:
  3093. print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  3094. continue
  3095. # if '999' not in product_title or temp_search_key not in product_title:
  3096. # print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
  3097. # continue
  3098. else:
  3099. if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
  3100. temp_search_key = self.search_key.replace('史达功', '')
  3101. temp_search_key = temp_search_key.replace('120', '')
  3102. if '史达功' not in product_title or temp_search_key not in product_title :
  3103. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3104. continue
  3105. elif '120' not in product_title:
  3106. print(f"当前商品名称:{product_title} 不包含120品规")
  3107. continue
  3108. elif self.search_key == '三九胃泰养胃舒颗粒8袋':
  3109. temp_search_key = self.search_key.replace('三九胃泰', '')
  3110. temp_search_key = temp_search_key.replace('8袋', '')
  3111. if '三九胃泰' not in product_title or temp_search_key not in product_title :
  3112. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3113. continue
  3114. elif '8' not in product_title:
  3115. print(f"当前商品名称:{product_title} 不包含8品规")
  3116. continue
  3117. elif self.search_key == '今维多赐多康牌蛋白粉':
  3118. temp_search_key = self.search_key.replace('今维多', '')
  3119. if '今维多' not in product_title or temp_search_key not in product_title :
  3120. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3121. continue
  3122. elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
  3123. temp_search_key = self.search_key.replace('佳美舒', '')
  3124. temp_search_key = temp_search_key.replace('4', '')
  3125. if '佳美舒' not in product_title or temp_search_key not in product_title :
  3126. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3127. continue
  3128. elif '4' not in product_title and '8' not in product_title:
  3129. print(f"当前商品名称:{product_title} 不包含4和或8品规")
  3130. continue
  3131. elif self.search_key == '三九胃泰颗粒20g*10':
  3132. temp_search_key = self.search_key.replace('20g*10', '')
  3133. if temp_search_key not in product_title :
  3134. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3135. continue
  3136. elif '20g*10' not in product_title:
  3137. print(f"当前商品名称:{product_title} 不包含20g*10品规")
  3138. continue
  3139. elif self.search_key == '三九胃泰颗粒20g*6袋':
  3140. temp_search_key = self.search_key.replace('20g*6袋', '')
  3141. if temp_search_key not in product_title :
  3142. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3143. continue
  3144. elif '20g*6' not in product_title:
  3145. print(f"当前商品名称:{product_title} 不包含20g*6品规")
  3146. continue
  3147. elif self.search_key == '顺峰康王酮康他索乳膏':
  3148. temp_search_key = self.search_key.replace('顺峰康王', '')
  3149. if '顺峰康王' not in product_title or temp_search_key not in product_title :
  3150. print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
  3151. continue
  3152. else:
  3153. if self.search_key not in product_title.replace(' ', ''):
  3154. continue
  3155. else:
  3156. print(f"列表当前商品名称不存在")
  3157. #价格
  3158. price_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3159. price_xpath3 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3160. if self.d.xpath(price_xpath).exists:
  3161. price_str = self.d.xpath(price_xpath).text
  3162. print(f"price_xpath列表当前商品价格:{price_str}")
  3163. if price_str:
  3164. price = float(re.search('[\d\.]+', price_str).group())
  3165. elif self.d.xpath(price_xpath3).exists:
  3166. price_str = self.d.xpath(price_xpath3).text
  3167. print(f"price_xpath3列表当前商品价格:{price_str}")
  3168. if price_str:
  3169. price = float(re.search('[\d\.]+', price_str).group())
  3170. else:
  3171. price_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
  3172. if self.d.xpath(price_xpath2).exists:
  3173. price_str = self.d.xpath(price_xpath2).text
  3174. print(f"price_xpath2列表当前商品价格:{price_str}")
  3175. if price_str:
  3176. price = float(re.search('[\d\.]+', price_str).group())
  3177. else:
  3178. print(f"列表当前商品价格不存在")
  3179. # price_str = self.d.xpath(f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]//*[starts-with(@text,"¥")]').text
  3180. print(f'列表获取到价格:{price}')
  3181. #店铺名称的xpath
  3182. shop_name_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
  3183. shop_name_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
  3184. if self.d.xpath(shop_name_xpath).exists:
  3185. shop_name = self.d.xpath(shop_name_xpath).text
  3186. print(f"shop_name_xpath列表当前商品店铺名称:{shop_name}")
  3187. elif self.d.xpath(shop_name_xpath2).exists:
  3188. shop_name = self.d.xpath(shop_name_xpath2).text
  3189. print(f"shop_name_xpath2列表当前商品店铺名称:{shop_name}")
  3190. else:
  3191. print(f"列表当前商品店铺名称不存在")
  3192. #如果商品的名称、价格和生产厂家都不存在则直接下一条数据。 跳过一些不是商品的数据。
  3193. if product_title == '' and price == '' and shop_name == '':
  3194. continue
  3195. scrape_date = self.get_current_date()
  3196. if product_title and price and shop_name:
  3197. #判断数据表中是否存在
  3198. dup_data = {'product': product_title, 'min_price': price, 'shop': shop_name, 'scrape_date': scrape_date,'platform': '美团'}
  3199. if self.data_is_exists(dup_data):
  3200. print('列表存在相同数据不入库')
  3201. continue
  3202. self.safe_exec(drug_one.click)
  3203. print('点击目标药品完毕')
  3204. time.sleep(2)
  3205. # 采集药品信息
  3206. try:
  3207. # self.integrate_data()
  3208. self.safe_exec(self.integrate_data)
  3209. # 检测下是否回退到列表页
  3210. if self.distinct_target():
  3211. print('回退到列表页', True)
  3212. else:
  3213. if self.d.xpath('//*[@text="搜索"]').exists:
  3214. print("检测到搜索按钮,重新开始采集流程")
  3215. if retry_count < MAX_RETRY:
  3216. # 停止当前监控线程
  3217. self.monitor.stop()
  3218. self.monitor.join()
  3219. # 递归重启采集
  3220. return self.main(device_id, retry_count+1)
  3221. else:
  3222. print("超过最大重试次数,终止程序")
  3223. return
  3224. else:
  3225. print("无法恢复页面,终止采集")
  3226. return
  3227. # print('回退到列表页失败,终止采集')
  3228. # return
  3229. time.sleep(self.get_sleep_time())
  3230. spider_no += 1
  3231. except Exception as e:
  3232. print(f'采集药品详情数据出错:{e}')
  3233. #增加阻塞的方法:
  3234. if not self.distinct_target():
  3235. for i in range(1):
  3236. self.swipe_back(1)
  3237. # 最外部有个定位按钮
  3238. if self.distinct_target():
  3239. break
  3240. if i == 0 and not self.distinct_target():
  3241. print('页面出错,退出采集')
  3242. return
  3243. else:
  3244. continue
  3245. if self.d.xpath('//*[@text="已经到底啦"]').exists:
  3246. print('已经到达列表页最底部')
  3247. return
  3248. search_list = self.d.xpath('//android.support.v7.widget.RecyclerView').info
  3249. bounds = search_list['bounds']
  3250. #print('搜索列表高度', 1400 + bounds['top'] - bounds['bottom'])
  3251. # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'])
  3252. # 计算滑动距离
  3253. scroll_distance = bounds['bottom'] - bounds['top'] # 正数
  3254. start_y = 1600
  3255. end_y = start_y - scroll_distance # 向上滑动,y 坐标减小
  3256. # 确保 end_y 不小于 0
  3257. end_y = max(end_y, 304) # 留出一点边距,避免滑出屏幕
  3258. # print('滑动起点 y:', start_y, '终点 y:', end_y)
  3259. # self.d.swipe(200, start_y, 200, end_y, 0.4)
  3260. print('开始滑动')
  3261. self.d.drag(300, 1400, 300, 400, 1)
  3262. # self.safe_exec(self.d.drag, 300, 1400, 300, 400, 1)
  3263. print('滑动结束')
  3264. #print('搜索列表高度', 1400 + bounds['top'] - bounds['bottom'])
  3265. # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'])
  3266. # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'], 0.4)
  3267. time.sleep(self.get_sleep_time())
  3268. finally:
  3269. # 确保监控线程被停止
  3270. self.monitor.stop()
  3271. self.monitor.join()
  3272. def unitest(self):
  3273. """
  3274. 单元测试
  3275. :return:
  3276. """
  3277. save_data = {
  3278. 'product':"[昆中药]舒肝颗粒(低糖型)",
  3279. 'min_price': 14.0,
  3280. 'manufacture_date': '',
  3281. 'expiry_date': '36个月',
  3282. 'shop': '美团自营大药房(快递电商)',
  3283. 'business_license_company': '',
  3284. 'province': '',
  3285. 'city': '',
  3286. 'manufacturer': '昆明中药厂有限公司',
  3287. 'specification': '3g*16袋/盒',
  3288. 'approval_number': '国药准字Z53021161',
  3289. 'product_link': '',
  3290. 'scrape_date': '2025/07/09',
  3291. 'scrape_province': '广东',
  3292. 'availability': '',
  3293. 'credit_code': '',
  3294. 'platform': '美团'
  3295. }
  3296. self.save_to_database(save_data)
  3297. time.sleep(100000)
  3298. pass
  3299. # retrieve database
  3300. def get_retrieve_mysql():
  3301. """
  3302. 建立远端连接并返回一个到数据库的连接对象
  3303. """
  3304. import pymysql
  3305. return pymysql.connect(
  3306. host='39.108.116.125', # 修改后的主机
  3307. port=3306, # 添加端口号
  3308. user='drug_retrieve', # 修改后的用户名
  3309. password='Pem287cwM58jNpe2', # 修改后的密码
  3310. db='drug_retrieve', # 修改后的数据库名
  3311. charset='utf8mb4'
  3312. )
  3313. def main():
  3314. #从配置的系统里面读取采集用到的设备和搜索关键词
  3315. #1、数据库的连接,从数据库中获取采集品规和设备adb码,启动程序进行采集,如果配置了采集时间,还需要支持到了时间终止采集,如果配置了采集的页数,需要滑动到指定的页数后再进行采集
  3316. #2、代码要支持多线程(线程池)的管理,每个线程有自己的生命周期。
  3317. #获取未开始的美团平台的采集任务
  3318. retrieve_conn = get_retrieve_mysql()
  3319. cursor = retrieve_conn.cursor()
  3320. query = "SELECT id,collect_equipment_id,product_name,start_page,end_page FROM retrieve_collect_task_allocate WHERE status = 1 AND platform = 4"
  3321. cursor.execute(query)
  3322. result = cursor.fetchone()
  3323. if result:
  3324. collect_equipment_id = result [1]
  3325. product_name = result[2]
  3326. start_page = result[3]
  3327. end_page = result[4]
  3328. print(f"collect_equipment_id={collect_equipment_id}")
  3329. print(f"product_name={product_name}")
  3330. if collect_equipment_id == 0:
  3331. print("设备id不存在")
  3332. return
  3333. if product_name == '':
  3334. print("采集关键字获取失败")
  3335. return
  3336. #通过 collect_equipment_id 获取设别adb码
  3337. device_query = "SELECT device_id FROM retrieve_collect_equipment WHERE id = %s and status = 0"
  3338. cursor.execute(device_query, (collect_equipment_id))
  3339. device_result = cursor.fetchone()
  3340. if device_result:
  3341. device_id = device_result[0]
  3342. print(f"device_id={device_id}")
  3343. else:
  3344. # self.loggerMT.info("没有可用的设备进行数据采集")
  3345. print("没有可用的设备进行数据采集")
  3346. return
  3347. else:
  3348. # self.loggerMT.info("MT 没有要采集的品规")
  3349. print("MT 没有要采集的品规")
  3350. return
  3351. key = product_name
  3352. try:
  3353. mt = MT(key) # 用当前关键字实例化
  3354. mt.main(device_id,start_page,end_page) # 执行一次完整采集
  3355. logging.info(f'关键字 {key} 本轮采集完成')
  3356. except Exception as e:
  3357. # 发生异常直接跳过该关键字,继续下一轮
  3358. logging.exception(f'关键字 {key} 采集异常:{e}')
  3359. finally:
  3360. # 关闭当前 MT 实例资源(如有需要)
  3361. if hasattr(mt, 'close'):
  3362. mt.close()
  3363. # keys_list = [
  3364. # # '三九胃泰颗粒',
  3365. # # '999小柴胡颗粒',
  3366. # # '999强力枇杷露',
  3367. # # '[999]感冒清热颗粒',
  3368. # # '999抗病毒口服液',
  3369. # # '999皮炎平',
  3370. # # '999盐酸特比萘芬乳膏',
  3371. # # '999盐酸特比萘芬',
  3372. # # '999藿香正气合剂',
  3373. # # '999必无忧盐酸特比萘芬乳膏',
  3374. # # '999复方感冒灵颗粒',
  3375. # # '999糠酸莫米松凝胶',
  3376. # # '999铝碳酸镁咀嚼片',
  3377. # # '999阿奇霉素片',
  3378. # # '999选平硝酸咪康唑乳膏',
  3379. # # 按需继续添加,
  3380. # #暂时不需要
  3381. # # '999必无忧盐酸特比萘芬喷雾剂30'
  3382. # # '999冰连清咽'
  3383. # # '999复方金银花颗粒10g'
  3384. # # '999复方苦参肠炎康片12片'
  3385. # # '999强力枇杷露16袋'
  3386. # # '999三蛇胆川贝膏138'
  3387. # # '999维生素ec颗粒'
  3388. # # '三九胃泰养胃舒颗粒8袋'
  3389. # # '999止泻利颗粒15g*8'
  3390. # # '999阿奇霉素片'
  3391. # # '999可调式生理性海水鼻腔喷雾50'
  3392. # # '999小儿止咳糖浆120' #不低于19.8
  3393. # # '999小儿止咳糖浆225' #禁止挂网
  3394. # # '999小儿感冒颗粒6g*10' #不低于24.9
  3395. # # '999小儿感冒颗粒6g*24' #禁止挂网
  3396. # # '999小儿氨酚黄那敏颗粒6g*10袋' #不低于15.8
  3397. # # '999小儿氨酚黄那敏颗粒6g*20袋' #禁止挂网
  3398. # # '999小儿咽扁颗粒8g*10袋' #仅限999官旗店
  3399. # #2025-08-01最新 其中 藿香正气合剂两种规格 10支和6支 抗病毒口服液 12支和18支 蒲地蓝 24片 36片和44片 枇杷露225ml 小柴胡颗粒9袋和15袋 养胃舒 6袋 复方感冒灵颗粒15袋,
  3400. # #曲安奈德益康唑乳膏 30g 葡萄糖酸锌口服溶液 12支 18支 24支和30支,
  3401. # # 1、999止泻利颗粒15g*8 没有数据 2、999维生素ec颗粒 没有数据 3、999三蛇胆川贝膏138 没有数据 4、999强力枇杷露16袋 没有数据 5、999冰连清咽 没有数据
  3402. # # '999藿香正气合剂',
  3403. # # '999糠酸莫米松凝胶15',
  3404. # # '999抗病毒口服液'
  3405. # # '999抗病毒口服液10ml*10'
  3406. # # '999抗病毒口服液10ml*12',
  3407. # # '999蒲地蓝消炎片',
  3408. # # '999强力枇杷露225ml',
  3409. # # '999小柴胡颗粒',
  3410. # # '999养胃舒颗粒',
  3411. # # '999复方感冒灵颗粒',
  3412. # # '999黄芪精',
  3413. # # '999皮炎平曲安奈德益康唑乳膏30',
  3414. # # '999葡萄糖酸锌口服溶液',
  3415. # # '今维多赐多康牌蛋白粉',
  3416. # # '佳美舒阿奇霉素肠溶胶囊4'
  3417. # # '999必无忧盐酸特比萘芬乳膏15g'
  3418. # # '999复方板蓝根颗粒15g*15袋/盒'
  3419. # # '999速复康布洛芬缓释胶囊'
  3420. # # '999维生素C咀嚼片'
  3421. # # '999精氨酸布洛芬颗粒'
  3422. # # '999强力枇杷露120ml'
  3423. # #OTC
  3424. # # '999强力枇杷露150ml'
  3425. # # '999糠酸莫米松乳膏10g支'
  3426. # # '999选平硝酸咪康唑乳膏20g'
  3427. # '999感冒清热颗粒(无糖)6g'
  3428. # # '999银菊清咽颗粒' #只有一条数据
  3429. # # '999补脾益肠丸'
  3430. # # '999壮骨关节丸6g*20'
  3431. # # '999壮骨关节胶囊'
  3432. # # '999正天丸6g*15'
  3433. # # '999正天胶囊'
  3434. # # '三九胃泰胶囊'
  3435. # # '三九胃泰颗粒20g*10'
  3436. # # '三九胃泰颗粒(无糖)2.5g*6' # 没有数据
  3437. # #10.31 new add
  3438. # # '999感冒灵颗粒' #不低于15.5
  3439. # # '999皮炎平复方醋酸地塞米松乳膏20' #不低于12.5
  3440. # # '三九胃泰颗粒20g*6袋' #不低于13.5
  3441. # # '顺峰康王酮康他索乳膏' #包含10g和20g两个规格 10g 不低于7.5 20g 不低于12.5
  3442. # # '999糠酸莫米松凝胶10' #不低于26.9
  3443. # # '999板蓝根颗粒10g*20' #不低于26.9
  3444. # # '999复方氨酚烷胺胶囊12粒' #不低于17.9
  3445. # # '999复方氨酚烷胺胶囊10粒' #禁止挂网
  3446. # # '999复方氨酚烷胺胶囊6粒' #禁止挂网
  3447. # # '999复方氨酚烷胺胶囊'
  3448. # # '999咽炎片0.26g*12片*2板' #不低于13.5
  3449. # # '999感冒灵胶囊' #仅限999官旗店
  3450. # # '999荆防颗粒' #美团没有数据 #禁止挂网
  3451. # # '999小儿感冒宁颗粒2.5g*10袋' #禁止挂网
  3452. # # '999磷酸奥司他韦胶囊75mg*10' #仅限999官旗店
  3453. # # '史达功右美沙芬愈创甘油醚糖浆120' #仅限999官旗店
  3454. # # '999感冒清热颗粒12g*18'
  3455. # ]
  3456. # #美团手机号:
  3457. # # device_id = '21885f5' # 设备序列号
  3458. # # device_id = '2e58510' # 设备序列号
  3459. # # device_id = 'KNNNEMNVWCJZQOLZ'
  3460. # # device_id = 'B6JVE6AYSWU4LRLZ'
  3461. # # device_id = '656DTOPRZDEALZ5X'
  3462. # # device_id = 'GIOFIBRKZTUGJJAE'
  3463. # # device_id = 'fcb3c749'
  3464. # # device_id = 'UCQGF6CQFMU8WKHI'
  3465. # # device_id = '4TZDUGTOAIFMJVGU'
  3466. # # device_id = '95b2c764'
  3467. # # device_id = 'ZDQWUSSWBEDI896T'
  3468. # # device_id = 'R4SKMJPNBQAENRAM'
  3469. # # device_id = '1462a51f'
  3470. # # device_id = '97ae80e0' # 设备序列号
  3471. # # device_id = 'IZTOWWDQT45D49BU'
  3472. # # device_id = 'N7ZXBITOSOGMYXQS'
  3473. # # device_id = '369dcf96'
  3474. # # device_id = 'GQIRKB7LVOONM7VW'
  3475. # device_id = 'T4UCEQGQEEYP65ZL'
  3476. # # device_id = '49L7GMPRVS85LJHE'
  3477. # # device_id = 'WWRO9LTGG6KFGQCM'
  3478. # # device_id = 'DYF67TM7KJ4POJLF'
  3479. # # device_id = 'ea4e4eb8'
  3480. # # device_id = 'U8ONIJJJS4CELVD6'
  3481. # cycle_no = 0 # 轮次计数
  3482. # while True:
  3483. # cycle_no += 1
  3484. # logging.info(f'========== 第 {cycle_no} 轮采集开始 ==========')
  3485. # for idx, key in enumerate(keys_list, 1):
  3486. # logging.info(f'[{idx}/{len(keys_list)}] 开始采集关键字:{key}')
  3487. # try:
  3488. # mt = MT(key) # 用当前关键字实例化
  3489. # mt.main(device_id) # 执行一次完整采集
  3490. # logging.info(f'关键字 {key} 本轮采集完成')
  3491. # except Exception as e:
  3492. # # 发生异常直接跳过该关键字,继续下一轮
  3493. # logging.exception(f'关键字 {key} 采集异常:{e}')
  3494. # finally:
  3495. # # 关闭当前 MT 实例资源(如有需要)
  3496. # if hasattr(mt, 'close'):
  3497. # mt.close()
  3498. # logging.info('本轮全部关键字采集完成,等待 2 小时后下一轮...')
  3499. # time.sleep(1 * 3600) # 2 小时 = 7200 秒
  3500. # keys = '小柴胡颗粒' # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒 小柴胡颗粒
  3501. # mt = MT(keys) # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒
  3502. # # mt.main('95b2c764')
  3503. # mt.main('fcb3c749')
  3504. class TimeoutException(Exception):
  3505. pass
  3506. @contextmanager
  3507. def time_limit(seconds):
  3508. """超时上下文管理器"""
  3509. def signal_handler(signum, frame):
  3510. raise TimeoutException("任务执行超时")
  3511. signal.signal(signal.SIGALRM, signal_handler)
  3512. signal.alarm(seconds)
  3513. try:
  3514. yield
  3515. finally:
  3516. signal.alarm(0)
  3517. # 如果需要并行处理(提高效率),可以使用线程池:
  3518. def process_tasks_in_parallel(max_workers=10):
  3519. """使用线程池并行处理多个任务""" """使用线程池并行处理多个任务,每个任务最多执行30分钟"""
  3520. from concurrent.futures import ThreadPoolExecutor, as_completed
  3521. retrieve_conn = get_retrieve_mysql()
  3522. cursor = retrieve_conn.cursor()
  3523. query = """
  3524. SELECT id, collect_equipment_id, product_name, start_page, end_page
  3525. FROM retrieve_collect_task_allocate
  3526. WHERE status = 1 AND platform = 4
  3527. """
  3528. cursor.execute(query)
  3529. results = cursor.fetchall()
  3530. print(f"获取到的任务结果={results}")
  3531. if not results:
  3532. print("MT 没有要采集的品规")
  3533. return
  3534. # 准备任务列表
  3535. tasks = []
  3536. device_map = {}
  3537. for result in results:
  3538. task_id = result[0]
  3539. collect_equipment_id = result[1]
  3540. product_name = result[2]
  3541. start_page = result[3]
  3542. end_page = result[4]
  3543. if collect_equipment_id != 0 and product_name and product_name.strip():
  3544. # 缓存设备查询
  3545. if collect_equipment_id not in device_map:
  3546. device_query = "SELECT device_id FROM retrieve_collect_equipment WHERE id = %s AND status = 0"
  3547. cursor.execute(device_query, (collect_equipment_id,))
  3548. device_result = cursor.fetchone()
  3549. device_map[collect_equipment_id] = device_result[0] if device_result else None
  3550. if device_map[collect_equipment_id]:
  3551. tasks.append({
  3552. 'task_id': task_id,
  3553. 'device_id': device_map[collect_equipment_id],
  3554. 'key': product_name.strip(),
  3555. 'start_page': start_page,
  3556. 'end_page': end_page
  3557. })
  3558. cursor.close()
  3559. retrieve_conn.close()
  3560. if not tasks:
  3561. print("没有有效的采集任务")
  3562. return
  3563. print(f"准备并行处理 {len(tasks)} 个任务")
  3564. def process_single_task(task):
  3565. """处理单个任务的函数""""""处理单个任务的函数,带有30分钟超时限制"""
  3566. start_time = time.time()
  3567. timeout_seconds = 30 * 60 # 30分钟
  3568. try:
  3569. # 检查任务是否已经超时
  3570. elapsed_time = time.time() - start_time
  3571. if elapsed_time >= timeout_seconds:
  3572. raise TimeoutException(f"任务执行已超过 {elapsed_time//60} 分钟")
  3573. mt = MT(task['key'])
  3574. # 使用超时机制执行主任务
  3575. with time_limit(timeout_seconds):
  3576. mt.main(task['device_id'], task['start_page'], task['end_page'])
  3577. # mt.main(task['device_id'], task['start_page'], task['end_page'])
  3578. # logging.info(f"任务 {task['task_id']}: 关键字 {task['key']} 采集完成")
  3579. execution_time = time.time() - start_time
  3580. logging.info(f"任务 {task['task_id']}: 关键字 {task['key']} 采集完成,耗时 {execution_time//60} 分钟 {execution_time%60:.1f} 秒")
  3581. return task['task_id'], True, None, execution_time
  3582. except TimeoutException as e:
  3583. execution_time = time.time() - start_time
  3584. logging.warning(f"任务 {task['task_id']}: 执行超时,已执行 {execution_time//60} 分钟")
  3585. if 'mt' in locals() and hasattr(mt, 'close'):
  3586. mt.close()
  3587. return task['task_id'], False, str(e), execution_time
  3588. except Exception as e:
  3589. execution_time = time.time() - start_time
  3590. logging.exception(f"任务 {task['task_id']}: 采集异常:{e},已执行 {execution_time//60} 分钟")
  3591. if 'mt' in locals() and hasattr(mt, 'close'):
  3592. mt.close()
  3593. return task['task_id'], False, str(e), execution_time
  3594. finally:
  3595. if 'mt' in locals() and hasattr(mt, 'close'):
  3596. mt.close()
  3597. # 使用线程池并行执行
  3598. successful_tasks = 0
  3599. failed_tasks = 0
  3600. with ThreadPoolExecutor(max_workers=max_workers) as executor:
  3601. # 提交所有任务,每个任务最多30分钟
  3602. future_to_task = {
  3603. executor.submit(process_single_task, task): task
  3604. for task in tasks
  3605. }
  3606. # 处理完成的任务
  3607. for future in as_completed(future_to_task):
  3608. task = future_to_task[future]
  3609. try:
  3610. # 设置future的超时时间为30分钟
  3611. task_id, success, error, execution_time = future.result(timeout=30*60)
  3612. total_execution_time += execution_time
  3613. # task_id, success, error = future.result()
  3614. if success:
  3615. successful_tasks += 1
  3616. # print(f"任务 {task_id}: 完成")
  3617. print(f"任务 {task_id}: 完成,耗时 {execution_time//60} 分钟 {execution_time%60:.1f} 秒")
  3618. else:
  3619. failed_tasks += 1
  3620. # print(f"任务 {task_id}: 失败")
  3621. if "超时" in error or "Timeout" in error:
  3622. timeout_tasks += 1
  3623. print(f"任务 {task_id}: 超时失败,已执行 {execution_time//60} 分钟")
  3624. else:
  3625. print(f"任务 {task_id}: 失败,耗时 {execution_time//60} 分钟 {execution_time%60:.1f} 秒,错误:{error}")
  3626. except TimeoutException as e:
  3627. failed_tasks += 1
  3628. timeout_tasks += 1
  3629. print(f"任务 {task['task_id']}: 执行超时 (30分钟限制)")
  3630. except Exception as e:
  3631. failed_tasks += 1
  3632. print(f"任务 {task['task_id']}: 执行异常 {e}")
  3633. avg_time = total_execution_time / (successful_tasks + failed_tasks) if (successful_tasks + failed_tasks) > 0 else 0
  3634. # print(f"\n并行采集完成: 成功 {successful_tasks} 个, 失败 {failed_tasks} 个")
  3635. print(f"\n并行采集完成:")
  3636. print(f"成功: {successful_tasks} 个")
  3637. print(f"失败: {failed_tasks} 个 (其中超时: {timeout_tasks} 个)")
  3638. print(f"平均执行时间: {avg_time//60} 分钟 {avg_time%60:.1f} 秒")
  3639. print(f"总计执行时间: {total_execution_time//60} 分钟 {total_execution_time%60:.1f} 秒")
  3640. if __name__ == '__main__':
  3641. # main()
  3642. process_tasks_in_parallel(max_workers=10) # 可以同时处理3个任务
  3643. # scheduler = BlockingScheduler()
  3644. # scheduler.add_job(main, 'cron', hour=21, minute=30, misfire_grace_time=120)
  3645. # try:
  3646. # scheduler.start()
  3647. # except (KeyboardInterrupt, SystemExit):
  3648. # pass