| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392139313941395139613971398139914001401140214031404140514061407140814091410141114121413141414151416141714181419142014211422142314241425142614271428142914301431143214331434143514361437143814391440144114421443144414451446144714481449145014511452145314541455145614571458145914601461146214631464146514661467146814691470147114721473147414751476147714781479148014811482148314841485148614871488148914901491149214931494149514961497149814991500150115021503150415051506150715081509151015111512151315141515151615171518151915201521152215231524152515261527152815291530153115321533153415351536153715381539154015411542154315441545154615471548154915501551155215531554155515561557155815591560156115621563156415651566156715681569157015711572157315741575157615771578157915801581158215831584158515861587158815891590159115921593159415951596159715981599160016011602160316041605160616071608160916101611161216131614161516161617161816191620162116221623162416251626162716281629163016311632163316341635163616371638163916401641164216431644164516461647164816491650165116521653165416551656165716581659166016611662166316641665166616671668166916701671167216731674167516761677167816791680168116821683168416851686168716881689169016911692169316941695169616971698169917001701170217031704170517061707170817091710171117121713171417151716171717181719172017211722172317241725172617271728172917301731173217331734173517361737173817391740174117421743174417451746174717481749175017511752175317541755175617571758175917601761176217631764176517661767176817691770177117721773177417751776177717781779178017811782178317841785178617871788178917901791179217931794179517961797179817991800180118021803180418051806180718081809181018111812181318141815181618171818181918201821182218231824182518261827182818291830183118321833183418351836183718381839184018411842184318441845184618471848184918501851185218531854185518561857185818591860186118621863186418651866186718681869187018711872187318741875187618771878187918801881188218831884188518861887188818891890189118921893189418951896189718981899190019011902190319041905190619071908190919101911191219131914191519161917191819191920192119221923192419251926192719281929193019311932193319341935193619371938193919401941194219431944194519461947194819491950195119521953195419551956195719581959196019611962196319641965196619671968196919701971197219731974197519761977197819791980198119821983198419851986198719881989199019911992199319941995199619971998199920002001200220032004200520062007200820092010201120122013201420152016201720182019202020212022202320242025202620272028202920302031203220332034203520362037203820392040204120422043204420452046204720482049205020512052205320542055205620572058205920602061206220632064206520662067206820692070207120722073207420752076207720782079208020812082208320842085208620872088208920902091209220932094209520962097209820992100210121022103210421052106210721082109211021112112211321142115211621172118211921202121212221232124212521262127212821292130213121322133213421352136213721382139214021412142214321442145214621472148214921502151215221532154215521562157215821592160216121622163216421652166216721682169217021712172217321742175217621772178217921802181218221832184218521862187218821892190219121922193219421952196219721982199220022012202220322042205220622072208220922102211221222132214221522162217221822192220222122222223222422252226222722282229223022312232223322342235223622372238223922402241224222432244224522462247224822492250225122522253225422552256225722582259226022612262226322642265226622672268226922702271227222732274227522762277227822792280228122822283228422852286228722882289229022912292229322942295229622972298229923002301230223032304230523062307230823092310231123122313231423152316231723182319232023212322232323242325232623272328232923302331233223332334233523362337233823392340234123422343234423452346234723482349235023512352235323542355235623572358235923602361236223632364236523662367236823692370237123722373237423752376237723782379238023812382238323842385238623872388238923902391239223932394239523962397239823992400240124022403240424052406240724082409241024112412241324142415241624172418241924202421242224232424242524262427242824292430243124322433243424352436243724382439244024412442244324442445244624472448244924502451245224532454245524562457245824592460246124622463246424652466246724682469247024712472247324742475247624772478247924802481248224832484248524862487248824892490249124922493249424952496249724982499250025012502250325042505250625072508250925102511251225132514251525162517251825192520252125222523252425252526252725282529253025312532253325342535253625372538253925402541254225432544254525462547254825492550255125522553255425552556255725582559256025612562256325642565256625672568256925702571257225732574257525762577257825792580258125822583258425852586258725882589259025912592259325942595259625972598259926002601260226032604260526062607260826092610261126122613261426152616261726182619262026212622262326242625262626272628262926302631263226332634263526362637263826392640264126422643264426452646264726482649265026512652265326542655265626572658265926602661266226632664266526662667266826692670267126722673267426752676267726782679268026812682268326842685268626872688268926902691269226932694269526962697269826992700270127022703270427052706270727082709271027112712271327142715271627172718271927202721272227232724272527262727272827292730273127322733273427352736273727382739274027412742274327442745274627472748274927502751275227532754275527562757275827592760276127622763276427652766276727682769277027712772277327742775277627772778277927802781278227832784278527862787278827892790279127922793279427952796279727982799280028012802280328042805280628072808280928102811281228132814281528162817281828192820282128222823282428252826282728282829283028312832283328342835283628372838283928402841284228432844284528462847284828492850285128522853285428552856285728582859286028612862286328642865286628672868286928702871287228732874287528762877287828792880288128822883288428852886288728882889289028912892289328942895289628972898289929002901290229032904290529062907290829092910291129122913291429152916291729182919292029212922292329242925292629272928292929302931293229332934293529362937293829392940294129422943294429452946294729482949295029512952295329542955295629572958295929602961296229632964296529662967296829692970297129722973297429752976297729782979298029812982298329842985298629872988298929902991299229932994299529962997299829993000300130023003300430053006300730083009301030113012301330143015301630173018301930203021302230233024302530263027302830293030303130323033303430353036303730383039304030413042304330443045304630473048304930503051305230533054305530563057305830593060306130623063306430653066306730683069307030713072307330743075307630773078307930803081308230833084308530863087308830893090309130923093309430953096309730983099310031013102310331043105310631073108310931103111311231133114311531163117311831193120312131223123312431253126312731283129313031313132313331343135313631373138313931403141314231433144314531463147314831493150315131523153315431553156315731583159316031613162316331643165316631673168316931703171317231733174317531763177317831793180318131823183318431853186318731883189319031913192319331943195319631973198319932003201320232033204320532063207320832093210321132123213321432153216321732183219322032213222322332243225322632273228322932303231323232333234323532363237323832393240324132423243324432453246324732483249325032513252325332543255325632573258325932603261326232633264326532663267326832693270327132723273327432753276327732783279328032813282328332843285328632873288328932903291329232933294329532963297329832993300330133023303330433053306330733083309331033113312331333143315331633173318331933203321332233233324332533263327332833293330333133323333333433353336333733383339334033413342334333443345334633473348334933503351335233533354335533563357335833593360336133623363336433653366336733683369337033713372337333743375337633773378337933803381338233833384338533863387338833893390339133923393339433953396339733983399340034013402340334043405340634073408340934103411341234133414341534163417341834193420342134223423342434253426342734283429343034313432343334343435343634373438343934403441344234433444344534463447344834493450345134523453345434553456345734583459346034613462346334643465346634673468346934703471347234733474347534763477347834793480348134823483348434853486348734883489349034913492349334943495349634973498349935003501350235033504350535063507350835093510351135123513351435153516351735183519352035213522352335243525352635273528352935303531353235333534353535363537353835393540354135423543354435453546354735483549355035513552355335543555355635573558355935603561356235633564356535663567356835693570357135723573357435753576357735783579358035813582358335843585358635873588358935903591359235933594359535963597359835993600360136023603360436053606360736083609361036113612361336143615361636173618361936203621362236233624362536263627362836293630363136323633363436353636363736383639364036413642364336443645364636473648364936503651365236533654365536563657365836593660366136623663366436653666366736683669367036713672367336743675367636773678367936803681368236833684368536863687368836893690369136923693369436953696369736983699370037013702370337043705370637073708370937103711371237133714371537163717371837193720372137223723372437253726372737283729373037313732373337343735373637373738373937403741374237433744374537463747374837493750375137523753375437553756375737583759376037613762376337643765376637673768376937703771377237733774377537763777377837793780378137823783378437853786378737883789379037913792379337943795379637973798379938003801380238033804380538063807380838093810381138123813381438153816381738183819382038213822382338243825382638273828382938303831383238333834383538363837383838393840384138423843384438453846384738483849385038513852385338543855385638573858385938603861386238633864386538663867386838693870387138723873387438753876387738783879388038813882388338843885388638873888388938903891389238933894389538963897389838993900390139023903390439053906390739083909391039113912391339143915391639173918391939203921392239233924392539263927392839293930393139323933393439353936393739383939394039413942394339443945394639473948394939503951395239533954395539563957395839593960396139623963396439653966396739683969397039713972397339743975397639773978397939803981398239833984398539863987398839893990399139923993399439953996399739983999400040014002400340044005400640074008400940104011401240134014401540164017401840194020402140224023402440254026402740284029403040314032403340344035403640374038403940404041404240434044404540464047404840494050405140524053405440554056405740584059406040614062406340644065406640674068406940704071407240734074407540764077407840794080408140824083408440854086408740884089409040914092409340944095409640974098409941004101410241034104410541064107410841094110411141124113411441154116411741184119412041214122412341244125412641274128412941304131413241334134413541364137413841394140414141424143414441454146414741484149415041514152415341544155415641574158415941604161416241634164416541664167416841694170417141724173417441754176417741784179418041814182418341844185418641874188418941904191419241934194419541964197419841994200420142024203420442054206420742084209421042114212421342144215421642174218421942204221422242234224422542264227422842294230423142324233423442354236423742384239424042414242424342444245424642474248424942504251425242534254425542564257425842594260426142624263426442654266426742684269427042714272427342744275427642774278427942804281428242834284428542864287428842894290429142924293429442954296429742984299430043014302430343044305430643074308430943104311431243134314431543164317431843194320432143224323432443254326432743284329433043314332433343344335433643374338433943404341434243434344434543464347434843494350435143524353435443554356435743584359436043614362436343644365436643674368436943704371437243734374437543764377437843794380438143824383438443854386438743884389439043914392439343944395439643974398439944004401440244034404440544064407440844094410441144124413441444154416441744184419442044214422442344244425442644274428442944304431443244334434443544364437443844394440444144424443444444454446444744484449445044514452445344544455445644574458445944604461446244634464446544664467446844694470447144724473447444754476447744784479448044814482448344844485448644874488 |
- import requests
- import base64
- import cv2
- import uiautomator2 as u2
- import time
- import subprocess
- import re
- import random
- import datetime
- import json
- from aip import AipOcr
- from apscheduler.schedulers.blocking import BlockingScheduler
- # from db_mysql import mysqlClient
- import threading
- from collections import deque
- import numpy as np
- import secrets
- import os
- import oss2
- import urllib.parse
- # import pyperclip
- from config import Config
- from logger import setup_logger
- import logging
- # from database import MySQLClient
- from PIL import Image
- from pathlib import Path
- from PIL import Image, ImageDraw, ImageFont
- # 配置日志
- # logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
- setup_logger("mt_spider") # 初始化日志
- class SpiderMonitor(threading.Thread):
- """全局弹窗监控线程(增强版)"""
- def __init__(self, spider_instance):
- super().__init__(daemon=True)
- self.spider = spider_instance
- self.running = True
- self.pausing = threading.Event() # 主线程同步事件
- self.last_verification_time = 0
- self.verification_count = 0
- self.MAX_VERIFICATION_RETRY = 10
- self.recent_clicks = deque(maxlen=10) # 防重复点击
- self.logger = logging.getLogger("SpiderMonitor")
- # 可配置化弹窗规则
- self.popup_rules = {
- "simple": [
- ('//*[@text="确定"]', "点击确定"),
- ('//*[@text="允许"]', "点击允许"),
- ('//*[@text="关闭"]', "点击关闭"),
- ('//*[@resource-id="com.sankuai.meituan:id/close"]', "关闭按钮"),
- ('//*[@resource-id="com.sankuai.meituan:id/address_center_location_close"]', "关闭按钮"),
- ('//*[@resource-id="com.sankuai.meituan:id/location_close"]', "关闭按钮"),
- ('//*[@resource-id="com.sankuai.meituan:id/btn_close"]', "关闭按钮"),
- ],
- "verification": [
- '//*[contains(@text, "验证")]',
- '//*[contains(@text, "滑块")]',
- '//*[contains(@text, "依次点击")]',
- '//*[contains(@text, "请点击")]',
- '//*[contains(@text, "拖动滑块刚")]', #这个需要拖动滑块至最右边,然后再截图
- '//*[contains(@text, "请输入图片中的内容")]',
- '//*[contains(@text, "用最短线连接")]',
- '//*[contains(@text, "请按语序依次点击")]',
- '//*[contains(@text, "请向右滑动滑块")]',
- '//*[contains(@text, "请拖动下方滑块完成拼图")]',
- '//*[contains(@resource-id, "captcha")]'
- ]
- }
- def run(self):
- while self.running:
- try:
- handled = self.check_and_handle_popup()
- time.sleep(2 if handled else 1)
- except Exception as e:
- self.logger.exception("监控线程异常: %s", e)
- time.sleep(3)
- def _is_recent_click(self, xpath):
- """防止重复点击同一个弹窗"""
- key = f"{xpath}_{int(time.time())}"
- if key in self.recent_clicks:
- return True
- self.recent_clicks.append(key)
- return False
- def check_and_handle_popup(self):
- d = self.spider.d
- # 1. 处理简单弹窗
- for xpath, desc in self.popup_rules["simple"]:
- if d.xpath(xpath).exists and not self._is_recent_click(xpath):
- self.logger.info("检测到弹窗: %s", desc)
- d.xpath(xpath).click()
- return True
- # 2. 处理验证码弹窗
- for xpath in self.popup_rules["verification"]:
- if d.xpath(xpath).exists:
- now = time.time()
- if now - self.last_verification_time < 30:
- return False # 30秒内不重复触发
- self.last_verification_time = now
- self.verification_count += 1
- self.logger.warning("验证码弹窗触发,等待人工处理...")
- if self.verification_count > self.MAX_VERIFICATION_RETRY:
- self.logger.error("验证码重试超限,终止任务")
- self.spider.stop_all()
- return True
-
- self.pausing.set() # 通知主线程暂停
- # d.toast.show("需要人工处理验证码", 120)
- # 等待人工处理
- start = time.time()
- # while time.time() - start < 120*60:
- # if not d.xpath(xpath).exists:
- # self.logger.info("验证码已处理")
- # d.toast.show("验证完成", 2)
- # self.pausing.clear() # 放行主线程
- # return True
- # time.sleep(5)
- while True:
- if not d.xpath(xpath).exists:
- self.logger.info("验证码已处理")
- # d.toast.show("验证完成", 2)
- self.pausing.clear() # 放行主线程
- return True
- time.sleep(5)
- self.logger.warning("验证码超时,重启APP")
- self.spider.restart_app()
- return True
- # 3. 处理广告弹窗(点击右上角)
- if d.xpath('//*[contains(@text, "广告")]').exists:
- w, h = d.info['displayWidth'], d.info['displayHeight']
- d.click(w - 50, 50)
- self.logger.info("关闭广告弹窗")
- return True
- return False
- def stop(self):
- self.running = False
- class MTScreenshot:
- def __init__(self, d, oss_config, search_key, scroll_times=4, compress_quality=7, resize_ratio=0.8):
- # 接收外部已连接好的u2设备实例
- self.d = d
- self.search_key = search_key # 添加这行
- # 启动全局弹窗监控
- self.monitor = SpiderMonitor(self)
- self.monitor.start()
- self.loggerMT = logging.getLogger()
- # 日志初始化
- self.logger = self._init_logger()
- # OSS配置与初始化(核心配置,无冗余)
- self.oss_config = oss_config
- self.oss_bucket = self._init_oss_bucket()
- # 截图核心参数
- self.scroll_times = scroll_times
- self.compress_quality = compress_quality
- self.resize_ratio = resize_ratio
- # self.title_xpaths = [
- # '//*[@resource-id="com.jd.lib.productdetail.feature:id/db"]',
- # '//*[@resource-id="com.jd.lib.productdetail.feature:id/cx"]',
- # '//*[@resource-id="com.jd.lib.productdetail.feature:id/cj"]'
- # ]
- def _init_logger(self):
- # 极简日志配置,仅保留必要输出
- logger = logging.getLogger("mt_screenshot")
- logger.setLevel(logging.INFO)
- logger.handlers.clear()
- handler = logging.StreamHandler()
- handler.setFormatter(logging.Formatter("%(asctime)s - %(levelname)s - %(message)s"))
- logger.addHandler(handler)
- return logger
- def _init_oss_bucket(self):
- # 仅做OSS配置校验和Bucket连接,无额外功能
- if not all([self.oss_config.get("access_key_id"),
- self.oss_config.get("access_key_secret"),
- self.oss_config.get("endpoint"),
- self.oss_config.get("bucket_name")]):
- self.logger.warning("OSS配置不完整,无法上传")
- return None
- try:
- auth = oss2.Auth(self.oss_config["access_key_id"], self.oss_config["access_key_secret"])
- bucket = oss2.Bucket(auth, self.oss_config["endpoint"], self.oss_config["bucket_name"])
- bucket.get_bucket_info() # 验证连接
- self.logger.info("OSS Bucket连接成功")
- return bucket
- except Exception as e:
- self.logger.error(f"OSS Bucket连接失败: {e}")
- return None
- def _upload_to_oss(self, local_path):
- # 极简上传逻辑,仅返回OSS URL或None
- if not self.oss_bucket or not os.path.exists(local_path):
- return None
- file_name = os.path.basename(local_path)
- safe_name = re.sub(r'[^\w\.\-]', '_', file_name)
- oss_key = f"{self.oss_config.get('oss_prefix', 'scrape_data/')}{safe_name}"
- try:
- oss2.resumable_upload(self.oss_bucket, oss_key, local_path)
- # 生成并返回完整OSS URL
- oss_file_url = f"https://{self.oss_config['bucket_name']}.{self.oss_config['endpoint']}/{urllib.parse.quote(oss_key, safe='/')}"
- self.logger.info(f"OSS上传成功: {oss_file_url}")
- return oss_file_url
- except Exception as e:
- self.logger.error(f"OSS上传失败: {e}")
- return None
- # def _get_title(self):
- # # 仅提取标题,无冗余逻辑
- # for xpath in self.title_xpaths:
- # elem = self.d.xpath(xpath)
- # if elem.exists:
- # info = elem.info
- # title = (info.get("contentDescription") or info.get("content-desc") or info.get("text") or "").strip()
- # return title[:50] # 限制标题长度,避免文件名过长
- # return ""
- def safe_exec(self, func, *args, **kwargs):
- """
- 万能安全壳:执行 func 前检查验证码,
- 若监控线程已置位 pausing,则一直阻塞直到放行。
- """
- while self.monitor.pausing.is_set():
- time.sleep(1)
- # 执行真正逻辑
- return func(*args, **kwargs)
- def _get_title(self):
- # try:
- # title = self.d.xpath(
- # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
- # except:
- # title = self.d.xpath(
- # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView').text
- # title = self.d.xpath('//*[contains(@text, "舒肝颗粒")]').text
- def _inner():
- temp_search_key = self.search_key
- if "999" in self.search_key:
- if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = self.search_key.replace("999皮炎平", "")
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = self.search_key.replace("999必无忧", "")
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = self.search_key.replace("999必无忧", "")
- elif self.search_key == '999速复康布洛芬缓释胶囊':
- temp_search_key = self.search_key.replace("999速复康", "")
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = self.search_key.replace("999选平", "")
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = self.search_key.replace("999皮炎平", "")
- else:
- temp_search_key = self.search_key.replace("999", "")
- else:
- if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
- temp_search_key = self.search_key.replace("史达功", "")
- temp_search_key = temp_search_key.replace("120", "")
- elif self.search_key == '三九胃泰养胃舒颗粒8袋':
- temp_search_key = self.search_key.replace("三九胃泰", "")
- temp_search_key = temp_search_key.replace("8袋", "")
- elif self.search_key == '今维多赐多康牌蛋白粉':
- temp_search_key = self.search_key.replace("今维多", "")
- elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
- temp_search_key = self.search_key.replace("佳美舒", "")
- temp_search_key = temp_search_key.replace("4", "")
- elif self.search_key == '三九胃泰颗粒20g*10':
- temp_search_key = self.search_key.replace("20g*10", "")
- elif self.search_key == '三九胃泰颗粒20g*6袋':
- temp_search_key = self.search_key.replace("20g*6袋", "")
- elif self.search_key == '顺峰康王酮康他索乳膏':
- temp_search_key = self.search_key.replace("顺峰康王", "")
- if self.search_key == '999糠酸莫米松凝胶15':
- temp_search_key = temp_search_key.replace("15", "")
- elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = temp_search_key.replace("30", "")
- elif self.search_key == '999复方感冒灵颗粒15':
- temp_search_key = temp_search_key.replace("15", "")
- elif self.search_key == '999复方金银花颗粒10g':
- temp_search_key = temp_search_key.replace("10g", "")
- elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
- temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
- elif self.search_key == '999复方氨酚烷胺胶囊6粒':
- temp_search_key = temp_search_key.replace("6粒", "")
- elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
- temp_search_key = temp_search_key.replace("50", "")
- elif self.search_key == '999止泻利颗粒15g*8':
- temp_search_key = temp_search_key.replace("15g*8", "")
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = temp_search_key.replace("30", "")
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = temp_search_key.replace("15g", "")
- elif self.search_key == '999复方苦参肠炎康片12片':
- temp_search_key = temp_search_key.replace("12片", "")
- elif self.search_key == '999强力枇杷露16袋':
- temp_search_key = temp_search_key.replace("16袋", "")
- elif self.search_key == '999三蛇胆川贝膏138':
- temp_search_key = temp_search_key.replace("138", "")
- elif self.search_key == '999强力枇杷露120ml':
- temp_search_key = temp_search_key.replace("120ml", "")
- elif self.search_key == '999强力枇杷露150ml':
- temp_search_key = temp_search_key.replace("150ml", "")
- elif self.search_key == '999抗病毒口服液10ml*10':
- temp_search_key = temp_search_key.replace("10ml*10", "")
- elif self.search_key == '999抗病毒口服液10ml*12':
- temp_search_key = temp_search_key.replace("10ml*12", "")
- elif self.search_key == '999糠酸莫米松乳膏10g支':
- temp_search_key = temp_search_key.replace("10g支", "")
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = temp_search_key.replace("20g", "")
- elif self.search_key == '999感冒清热颗粒(无糖)6g':
- temp_search_key = temp_search_key.replace("6g", "")
- elif self.search_key == '999壮骨关节丸6g*20':
- temp_search_key = temp_search_key.replace("6g*20", "")
- elif self.search_key == '999正天丸6g*15':
- temp_search_key = temp_search_key.replace("6g*15", "")
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = temp_search_key.replace("20", "")
- elif self.search_key == '999糠酸莫米松凝胶10':
- temp_search_key = temp_search_key.replace("10", "")
- elif self.search_key == '999板蓝根颗粒10g*20':
- temp_search_key = temp_search_key.replace("10g*20", "")
- elif self.search_key == '999复方氨酚烷胺胶囊10粒':
- temp_search_key = temp_search_key.replace("10粒", "")
- elif self.search_key == '999复方氨酚烷胺胶囊12粒':
- temp_search_key = temp_search_key.replace("12粒", "")
- elif self.search_key == '999咽炎片0.26g*12片*2板':
- temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
- elif self.search_key == '999小儿止咳糖浆120':
- temp_search_key = temp_search_key.replace("120", "")
- elif self.search_key == '999小儿止咳糖浆225':
- temp_search_key = temp_search_key.replace("225", "")
- elif self.search_key == '999小儿感冒颗粒6g*10':
- temp_search_key = temp_search_key.replace("6g*10", "")
- elif self.search_key == '999小儿感冒颗粒6g*24':
- temp_search_key = temp_search_key.replace("6g*24", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
- temp_search_key = temp_search_key.replace("6g*10袋", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
- temp_search_key = temp_search_key.replace("6g*20袋", "")
- elif self.search_key == '999小儿咽扁颗粒8g*10袋':
- temp_search_key = temp_search_key.replace("8g*10袋", "")
- elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
- temp_search_key = temp_search_key.replace("2.5g*10袋", "")
- # elif self.search_key == '三九胃泰颗粒':
- # self.search_key = '三九胃泰' #兼容三九胃泰 温胃舒颗粒
- print(f'获取商品title时的搜索关键字:{temp_search_key}')
- # title = self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text
- # 初始化
- drugs_name = ''
- specifications = ''
- title = ''
- # 循环的获取title为了有时间来处理人机验证
- for m in range(1, 6000):
- if self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').exists:
- title = self.safe_exec(
- lambda: self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
- )
- self.loggerMT.info(f"第{m}次获取title成功")
- print(f"第{m}次获取title成功")
- break
- else:
- time.sleep(3)
- # return drugs_name, specifications
- # drugs_name = ''
- # specifications = ''
- # try:
- # title_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- # title_xpath_2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- # if self.d.xpath(title_xpath).exists:
- # title = self.d.xpath(title_xpath).text
- # print(f"title_xpath获取的title={title}")
- # if temp_search_key not in title:
- # return drugs_name, specifications
- # elif self.d.xpath(title_xpath_2).exists:
- # title = self.d.xpath(title_xpath_2).text
- # print(f"title_xpath_2获取的title={title}")
- # if temp_search_key not in title:
- # return drugs_name, specifications
- # else:
- # print('title_xpath不存在,请确认')
- # return drugs_name, specifications
- # # title = self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
- # except Exception as e:
- # print(f"发生异常: {e}")
- # return drugs_name, specifications
- # 奇怪:有的时候title取出来的记过第一位会多一个0
- # title = self.safe_exec(self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text)
- # title = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
- title = title[1:] if title.startswith('0') else title
- print(f'获取到药品标题:{title}')
- # 从里面匹配出药品名和规格
- # drugs_name
- # specifications
- # match = re.search(r'([^\d]+)([\d\D]+)', title)
- if self.search_key == '999赐多康大豆':
- return title, '1罐'
- if self.search_key == "999感冒清热颗粒":
- match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
- else:
- match = re.match(r'(\[[^\]]+\])(.*?)\s*((?:\d+\S*|\(.+))$', title)
- if match:
- # drugs_name = match.group(1).strip() + match.group(2).strip()
- drugs_name = title
- specifications = match.group(3).strip()
- print("药品名:", drugs_name)
- print("规格:", specifications)
- # print('完整药名:', drugs_name + specifications)
- return drugs_name #, specifications
- else:
- if title == '999抗病毒口服液10ml*12' or title == '999抗病毒口服液':
- drugs_name = title
- specifications = '10ml*12支/盒'
- return drugs_name # , specifications
- elif title == '999抗病毒口服液10ml*10':
- drugs_name = title
- specifications = '10ml*10支/盒'
- return drugs_name #, specifications
- elif title == '999小柴胡颗粒':
- drugs_name = title
- specifications = '10g*9袋/盒'
- return drugs_name #, specifications
- elif title == '999养胃舒颗粒':
- drugs_name = title
- specifications = '10g*10袋/盒'
- return drugs_name #, specifications
- elif title == '三九胃泰胶囊':
- drugs_name = title
- specifications = '0.5g*24粒/盒'
- return drugs_name #, specifications
- elif title == '999补脾益肠丸':
- drugs_name = title
- specifications = '6g*15袋/盒'
- return drugs_name #, specifications
- elif title == '999感冒灵颗粒':
- drugs_name = title
- specifications = '10g*9袋/盒'
- return drugs_name #, specifications
- elif title == '999感冒灵胶囊':
- drugs_name = title
- specifications = '0.5g*12粒/盒'
- return drugs_name #, specifications
- else:
- print("没有匹配到预期格式")
- drugs_name = title
- specifications = ''
- return drugs_name #, specifications
- # 用 safe_exec 包装内部逻辑,确保验证码阻塞
- return self.safe_exec(_inner)
- def _merge_screenshots(self, screens):
- # 仅拼接截图,无额外功能
- if len(screens) == 1:
- return screens[0].convert('RGB')
- rgb_screens = [s.convert('RGB') for s in screens]
- total_width = rgb_screens[0].width
- total_height = sum(s.height for s in rgb_screens)
- merged_img = Image.new('RGB', (total_width, total_height))
- y_offset = 0
- for img in rgb_screens:
- merged_img.paste(img, (0, y_offset))
- y_offset += img.height
- return merged_img
- def get_oss_url(self):
- """核心方法:截图+临时本地保存+上传OSS+上传成功删本地文件+返回OSS URL,可直接赋值给oss_file"""
- local_file_path = None
- try:
- # 1. 提取标题
- title = self._get_title()
- self.logger.info(f"获取标题: {title[:20]}..." if title else "未获取到标题")
- # 2. 生成本地文件路径
- timestamp = datetime.datetime.now().strftime("%Y%m%d_%H%M%S")
- safe_title = re.sub(r'[\\/*?:"<>|]', '_', title)
- local_dir = "../scrape_data"
- os.makedirs(local_dir, exist_ok=True)
- local_file_path = os.path.join(local_dir, f"{timestamp}_{safe_title}.jpg")
- # 3. 滚动截图
- screen_list = [self.d.screenshot()]
- w, h = self.d.window_size()
- for i in range(self.scroll_times):
- #可能滑动距离太短,截不到店名。原本是0.8
- # self.d.swipe(w // 2, h * 0.9, w // 2, h * 0.1, duration=random.uniform(0.6, 1.2))
- self.d.swipe(w // 2, h * 0.85, w // 2, h * 0.15, # 滑动到15%
- duration=random.uniform(0.8, 1.5))
- time.sleep(random.uniform(2.0, 4.0))
- screen_list.append(self.d.screenshot())
- if self.d(textContains='商家服务').exists:
- #看情况是否需要补滑
- break
- # # ========== 自动处理“是否存储图像”弹窗 ==========
- # # 检测弹窗是否存在(根据弹窗的文本/控件ID定位)
- # # 通过“是否存储图像”文本定位弹窗
- # # if self.d(text="是否存储图像").exists(timeout=2):
- # # # 点击“取消”(不需要系统存储截图)
- # # self.d(text="取消").click(timeout=2)
- # # self.logger.info("已自动关闭“是否存储图像”弹窗")
- #
- # #出现标题 break
- # ========== 滑动截图完成后,滑回初始位置 ==========
- # self.logger.info("开始滑回初始位置")
- #
- # for i in range(self.scroll_times):
- # # 反向滑动(与正向滑动方向相反)
- # self.d.swipe_ext('down', 0.8)
- # time.sleep(random.uniform(1.0, 2.0))
- # print(f"第{i+1}次反向滑动,已滑回部分距离")
- # self.logger.info("✅ 已滑回初始页面位置")
- # 4. 拼接+压缩+保存
- merged_img = self._merge_screenshots(screen_list)
- if 0.1 < self.resize_ratio < 1.0:
- new_size = (int(merged_img.width * self.resize_ratio), int(merged_img.height * self.resize_ratio))
- resample_mode = Image.Resampling.LANCZOS if hasattr(Image, 'Resampling') else Image.LANCZOS
- merged_img = merged_img.resize(new_size, resample_mode)
- #临时保存到本地
- merged_img.save(local_file_path, format='JPEG', quality=self.compress_quality)
- merged_img.close() #释放长图句柄
- self.logger.info(f"临时本地保存: {local_file_path}")
- # 5. 上传OSS
- oss_url = self._upload_to_oss(local_file_path)
- # 6. 核心:OSS上传成功后,删除本地临时文件
- if oss_url is not None:
- try:
- #先不删除,检查还有没有问题
- # os.remove(local_file_path)
- self.logger.info(f"✅ OSS上传成功,已删除本地临时文件: {local_file_path}")
- # 若本地目录为空,可删除目录(按需开启)
- # if not os.listdir(local_dir):
- # os.rmdir(local_dir)
- # self.logger.info(f"本地目录{local_dir}为空,已删除")
- except Exception as e:
- self.logger.warning(f"⚠️ OSS上传成功,但删除本地文件失败: {e}")
- return oss_url
- except Exception as e:
- self.logger.error(f"截图/上传失败: {e}")
- return None
- def get_access_token():
- AppKey = "tRK2RhyItCSh6BzyT4CNVXQa"
- AppSrcret = "TDgKiPo94i2mOM1sDqOuDnlcK1bG66jh"
- token_url = 'https://aip.baidubce.com/oauth/2.0/token'
- url = f"{token_url}?grant_type=client_credentials&client_id={AppKey}&client_secret={AppSrcret}"
- payload = ""
- headers = {
- 'Content-Type': 'application/json',
- 'Accept': 'application/json'
- }
- response = requests.request("POST", url, headers=headers, data=payload)
- try:
- return response.json()['access_token']
- except:
- return None
-
- def get_mysql():
- """
- 建立并返回一个到数据库的连接对象
- """
- import pymysql
- return pymysql.connect(
- host = Config.DB_HOST, #"localhost", # 修改后的主机
- port = Config.DB_PORT, #3306, # 添加端口号
- user = Config.DB_USER, #'root', # 修改后的用户名
- password = Config.DB_PASSWORD, # 修改后的密码
- db = Config.DB_NAME, #"drug_data", # 修改后的数据库名
- charset='utf8mb4'
- )
- class MT:
- def __init__(self, key):
- # self.package_name = 'com.sankuai.meituan'
- self.package_name = Config.PACKAGE_NAME
- self.access_token = get_access_token()
- self.city2province = self.get_city_info()
- self.APP_ID = '116857964'
- self.API_KEY = '1gAzACJOAr7BeILKqkqPOETh'
- self.SECRET_KEY = 'ZNArANb9GwJYgLKg4EfYhukKBfPdl1n3'
- self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
- # host = Config.DB_HOST #"localhost"
- # user = Config.DB_USER #"root"
- # password = Config.DB_PASSWORD #"dfwy2025"
- # database = Config.DB_NAME #"drug_data"
- # port = Config.DB_PORT#3306
- # print(f'数据库配置:host:{host},user:{user},password:{password},database:{database},port:{port}')
-
- self.table_name = Config.DB_TABLE #"mt_drug_middle"
- self.shop_table_name = Config.DB_SHOP_TABLE
- # print(f'数据库表名:table_name:{self.table_name},shop_table_name:{self.shop_table_name}')
- # self.mysql_client = mysqlClient(host, user, password, database, port)
- self.loggerMT = logging.getLogger()
- self.search_key = key # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒
- self.unrelated_data = 0 # 无关数据数量
- self.shop_data_num = 0 # 店铺数据数量
- def stop_app(self):
- self.d.app_stop(self.package_name)
- time.sleep(5)
- def start_app(self):
- self.d.app_start(self.package_name)
- time.sleep(5)
- def restart_app(self):
- """
- 重启app
- :return:
- """
- self.stop_app()
- self.start_app()
- @staticmethod
- def get_sleep_time():
- # return random.randint(5, 8)
- return random.randint(1, 3)
- @staticmethod
- def get_current_date():
- return datetime.datetime.now().strftime('%Y/%m/%d')
- @staticmethod
- def get_city_info():
- """
- 获取所有的省市数据
- :return:
- """
- file_path = '../kailin_city.json'
- with open(file_path, 'r', encoding='utf-8') as f:
- data = json.load(f)
- province = {province_one["id"]: province_one for province_one in data['province']}
- city2province = dict()
- city = data['city']
- for city_one in city:
- name = city_one['name']
- pid = city_one['pid']
- if len(str(pid)) > 2:
- pid = int(re.match('^\d{2}', str(pid)).group())
- city2province[name] = province[pid]['name']
- return city2province
- def get_shop_name(self):
- """
- 获取店铺名
- :return:
- """
-
- try:
- shop_name = self.d.xpath(
- '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
- print(f'获取到店铺名:{shop_name}')
- return shop_name
- except:
- try:
- shop_name = self.d.xpath(
- '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.FrameLayout[1]/android.widget.TextView').text
- print(f'获取到店铺名2:{shop_name}')
- return shop_name
- except Exception as e:
- #点击店铺曲获取店铺名称
- print("点击店铺进入后获取店铺名称")
- self.enter_shop()
- shop_xpath = '//*[@resource-id="com.sankuai.meituan:id/layout_header_view"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]//android.widget.FrameLayout[2]/android.widget.FrameLayout[1]/android.widget.TextView'
- if self.d.xpath(shop_xpath).exists:
- shop_name = self.d.xpath(shop_xpath).text
- self.swipe_back(1)
- return shop_name
- else:
- print(f'获取店铺名出错:{e}')
- return None
- def get_qualification_number(self):
- """
- 获取资质编号
- :return:
- """
- try:
- qualification_number_str = self.d.xpath(
- '//*[@resource-id="com.sankuai.meituan:id/mil_container"]/android.webkit.WebView[1]/android.webkit.WebView[1]/android.view.View[1]/android.view.View[1]/android.widget.TextView[2]').text
- qualification_number = qualification_number_str.strip('资质编号:').strip()
- return qualification_number
- except:
- return None
-
- def get_shop_address(self):
- try:
- xpath = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView'
- if self.d.xpath(xpath).exists:
- shop_address = self.d.xpath(xpath).text
- print(f'111-获取到店铺地址:{shop_address}')
- if '发货时间' in shop_address:
- print(f'店铺地址包含发货时间,再次获取店铺地址')
- xpath2 = '//*[@resource-id="com.sankuai.meituan:id/wm_sc_drug_shop_content_mrn_container_id_2"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.TextView'
- if self.d.xpath(xpath2).exists:
- shop_address = self.d.xpath(xpath2).text
- print(f'222-获取到店铺地址:{shop_address}')
- else:
- print(f'222-xpath2获取店铺地址失败')
- else:
- shop_address = ''
- print(f'333-获取到店铺地址:{shop_address}')
- return shop_address
- except:
- print(f'获取店铺地址出错-get_shop_address')
- return None
- def enter_detail(self):
- self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/recycler"]/android.widget.FrameLayout[1]').click()
- time.sleep(self.get_sleep_time())
- def save_to_database(self, data):
- print(f'保存数据到数据库:{data}')
- # 连接数据库
- conn = get_mysql()
- # 创建游标对象
- cur = conn.cursor()
- # add_sql = "insert into delete_friend_table(delete_user_name,delete_user_id,delete_content,delete_time) value(%s,%s,%s,%s)"
- add_sql = f"""
- INSERT INTO {self.table_name}
- (product, min_price, manufacture_date, expiry_date, shop, business_license_company, province, city, manufacturer, specification, approval_number, product_link, scrape_date, scrape_province, availability, credit_code, platform, search_key, sales, inventory, snapshot_url)
- VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
- """
- # cur.execute(add_sql, (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'], data['business_license_company'],data['province'], data['city'], data['manufacturer'], data['specification'], data['approval_number'], data['product_link'], self.get_current_date(), data['scrape_province'], data['availability'], data['credit_code'], data['platform']))
- cur.execute(add_sql, (data['product'], data['min_price'], data['manufacture_date'], data['expiry_date'], data['shop'], data['business_license_company'],data['province'], data['city'], data['manufacturer'], data['specification'], data['approval_number'], data['product_link'], data['scrape_date'], data['scrape_province'], data['availability'], data['credit_code'], data['platform'], data['search_key'], data['sales'], data['inventory'], data['snapshot_url']))
- conn.commit() # 提交数据
- #self.mysql_client.insert(self.table_name, data)
- print(f"存入数据库成功")
- def save_shop_info_to_database(self, data):
- print(f'保存店铺数据到数据库:{data}')
- # 连接数据库
- conn = get_mysql()
- # 创建游标对象
- cur = conn.cursor()
- add_sql = f"""
- INSERT INTO {self.shop_table_name}
- (shop, contact_address, qualification_number, business_license_company, business_license_address, scrape_date, platform)
- VALUES (%s, %s, %s, %s, %s, %s, %s)
- """
- cur.execute(add_sql, (data['shop'], data['contact_address'], data['qualification_number'], data['business_license_company'], data['business_license_address'], data['scrape_date'], data['platform']))
- conn.commit() # 提交数据
- #self.mysql_client.insert(self.shop_table_name, data)
- print(f'存入店铺信息到数据库成功')
- def swipe_up(self):
- """
- 上滑
- :return:
- """
- screen_width = self.d.info['displayWidth']
- screen_height = self.d.info['displayHeight']
- duration_rate = random.uniform(0, 0.3)
- self.d.swipe(screen_width // 2, screen_height - 100, screen_width // 2, 100, duration=duration_rate)
- no = random.uniform(0, 1)
- if no > 0.85:
- # 有的时候卡着 再稍微往上滑一点点
- self.d.swipe_ext("up", 0.1)
- time.sleep(self.get_sleep_time())
- def swipe_back(self, no):
- """
- 返回
- :param no: 回退次数
- :return:
- """
- for idx in range(no):
- self.d.press('back')
- time.sleep(self.get_sleep_time())
- def drug_price(self):
- """
- 获取药品价格
- :return:
- """
- try:
- price_str = self.d.xpath('//*[starts-with(@text,"¥")]').text
- price = float(re.search(r'[\d\.]+', price_str).group())
- print(f'获取到价格:{price}')
- return price
- except Exception as e:
- print(f'提取价格出错-->{e}')
- return None
-
- def drug_sale_num(self):
- """
- 获取药品销量
- :return:
- """
- try:
- sales_element = self.d.xpath('//*[starts-with(@text,"已售")]')
- if sales_element.exists:
- sales_num_str = self.d.xpath('//*[starts-with(@text,"已售")]').text
- sales_num_str = sales_num_str.replace("已售", "").strip()
- # price = float(re.search(r'[\d\.]+', price_str).group())
- print(f'获取到已售数量:{sales_num_str}')
- return sales_num_str
- return None
- except Exception as e:
- print(f'提取已售数量出错-->{e}')
- return None
- def restart_uiautomator_services(self, device_id):
- """
- 重启atx的uiautomator 服务
- :param device_id:
- :return:
- """
- stop_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d --stop'
- start_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d'
- # result = subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
- # print(result.stdout)
- subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
- time.sleep(self.get_sleep_time())
- subprocess.run(start_uiautomator_services, capture_output=True, text=True, shell=True)
- time.sleep(self.get_sleep_time())
- # def connect_devices(self, device_id):
- # """
- # 连接设备
- # :return:
- # """
- # try:
- # self.d = u2.connect_usb(device_id)
- # # 设置隐形等待时间
- # # self.d.implicitly_wait(5)
- # self.restart_uiautomator_services(device_id)
- # print(f'连接到设备:{device_id}')
- # except Exception as e:
- # print(f'{device_id} 连接错误: {e}')
- # raise Exception(e)
-
- def connect_devices(self, device_id):
- """
- 连接设备
- :return:
- """
- try:
- self.d = u2.connect_usb(device_id)
- self.restart_uiautomator_services(device_id)
- self.oss_config = {
- "access_key_id": 'LTAI5tDwjfteBvivYN41r8sJ',
- "access_key_secret": 'yowuOGi2nYYnrqGpO3qcz94C4brcPp',
- "endpoint": "oss-cn-shenzhen.aliyuncs.com", # 例:oss-cn-beijing.aliyuncs.com
- "bucket_name": "zhijiayun-jiansuo",
- "oss_prefix": "scrape_data/" # OSS中存放截图的前缀(虚拟文件夹)
- }
- # jd_screenshot_ins = JDScreenshot(
- # d=self.d, # 传入你已连接好的设备实例
- # oss_config=self.oss_config,
- # scroll_times=2, # 可选,自定义滚动次数
- # compress_quality=8, # 可选,自定义压缩质量
- # resize_ratio=0.9 # 可选,自定义缩放比例
- # )
- print(f'连接到设备:{device_id}')
- self.loggerMT.info(f'连接到设备:{device_id}')
- except Exception as e:
- print(f'{device_id} 连接错误: {e}')
- self.loggerMT.info(f'{device_id} 连接错误: {e}')
- raise Exception(e)
- def get_ocr_res(self, img):
- try:
- #img地址
- print(f'开始识别图片:{img}')
- request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/business_license"
- # 二进制方式打开图片文件
- f = open(img, 'rb')
- img = base64.b64encode(f.read())
- params = {"image": img}
- # access_token = get_access_token()
- request_url = request_url + "?access_token=" + self.access_token
- headers = {'content-type': 'application/x-www-form-urlencoded'}
- response = requests.post(request_url, data=params, headers=headers)
- if response:
- res = response.json()
- new_dic = dict()
- for ite in res['words_result'].keys():
- new_dic[ite] = res['words_result'][ite]['words']
- print('资质数据信息', new_dic)
- return new_dic
- else:
- return None
- except:
- return None
-
- def remove_watermark(self, img_path):
- """
- 图片去水印(将水印部分变成白色背景)并将数据转化为二进制数据
- :param img_path: 图片路径
- :return: 二进制图片数据
- """
- img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
- endswith = os.path.splitext(img_path)[1]
- new = np.clip(1.4057577998008846 * img - 38.33089999653017, 0, 255).astype(np.uint8)
- _, img_binary = cv2.imencode(endswith, new)
- return img_binary
-
- def get_ocr_res_image(self, img):
- try:
- image = self.remove_watermark(img)
- # image_file = open(img,'wb')
- # image_file.write(image)
- # res_image = self.client.basicAccurate(image) # 高精度
- res_image = self.client.basicGeneral(image)
- # print(f'百度api返回结果:{res_image}')
- # print(res_image.get('words_result', ''))
- # new_dic = dict()
- data = res_image.get('words_result', '')
- print(f'百度api返回结果:{data}')
- # full_text = ';'.join(item['words'] for item in data)
- # address = ''
- # for item in data:
- # if '企业注册号' in item['words']:
- # print('come in 111')
- # reg_number = item['words'].split(':', 1)[1].strip()
- # elif '企业名称' in item['words']:
- # print('come in 222')
- # company_name = item['words'].split(':', 1)[1].strip()
- # elif '所:' in item['words']:
- # print('come in 333')
- # address = item['words'].split(':', 1)[1].strip()
- # # 输出结果
- # print("企业注册号:", reg_number)
- # print("企业名称:", company_name)
- # print("住所:", address)
- return data
- except:
- return None
- def screenshot_the_business_license(self, qualification_number):
- screenshot_path = 'screenshot1.png'
- self.d.screenshot(screenshot_path)
- img = cv2.imread(screenshot_path)
- # 指定裁剪区域 (left, top, right, bottom)
- left = 0
- top = 480
- right = 720
- bottom = 1420
- cropped_img = img[top:bottom, left:right]
- # 创建目录
- SCREENSHOT_DIR = Path('screenshot') # 注意这里的变化和py文件同一级目录即可
- SCREENSHOT_DIR.mkdir(parents=True, exist_ok=True)
- if qualification_number:
- # cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
- cropped_screenshot_path = SCREENSHOT_DIR / f'{qualification_number}.png'
- else:
- cropped_screenshot_path = 'cropped_screenshot.png'
- cv2.imwrite(cropped_screenshot_path, cropped_img)
- return cropped_screenshot_path
-
- def screenshot_instruction(self):
- # 获取当前时间
- current_time = datetime.datetime.now()
- # 格式化为时分秒
- time_str = current_time.strftime("%H-%M-%S")
- # 生成随机的 8 位字符串
- random_str = secrets.token_hex(4) # 生成 4 个字节的随机字符串,转换为 8 位十六进制字符串
- print(time_str)
- screenshot_path = 'instructionscreenshot1-' + time_str + '-' + random_str + '.png'
- self.d.screenshot(screenshot_path)
- return screenshot_path
-
- def extract_specification(self, text):
- """提取药品规格信息"""
- # 方法1:简单去除到期信息
- pattern = r'^[^【]+'
- match = re.search(pattern, text)
- if match:
- return match.group(0).strip()
- return text
- #获取商品title
- def get_title(self):
- # try:
- # title = self.d.xpath(
- # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
- # except:
- # title = self.d.xpath(
- # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.TextView').text
-
- # title = self.d.xpath('//*[contains(@text, "舒肝颗粒")]').text
- def _inner():
-
- temp_search_key = self.search_key
- if "999" in self.search_key:
- if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = self.search_key.replace("999皮炎平", "")
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = self.search_key.replace("999必无忧", "")
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = self.search_key.replace("999必无忧", "")
- elif self.search_key == '999速复康布洛芬缓释胶囊':
- temp_search_key = self.search_key.replace("999速复康", "")
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = self.search_key.replace("999选平", "")
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = self.search_key.replace("999皮炎平", "")
- else:
- temp_search_key = self.search_key.replace("999", "")
- else:
- if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
- temp_search_key = self.search_key.replace("史达功", "")
- temp_search_key = temp_search_key.replace("120", "")
- elif self.search_key == '三九胃泰养胃舒颗粒8袋':
- temp_search_key = self.search_key.replace("三九胃泰", "")
- temp_search_key = temp_search_key.replace("8袋", "")
- elif self.search_key == '今维多赐多康牌蛋白粉':
- temp_search_key = self.search_key.replace("今维多", "")
- elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
- temp_search_key = self.search_key.replace("佳美舒", "")
- temp_search_key = temp_search_key.replace("4", "")
- elif self.search_key == '三九胃泰颗粒20g*10':
- temp_search_key = self.search_key.replace("20g*10", "")
- elif self.search_key == '三九胃泰颗粒20g*6袋':
- temp_search_key = self.search_key.replace("20g*6袋", "")
- elif self.search_key == '顺峰康王酮康他索乳膏':
- temp_search_key = self.search_key.replace("顺峰康王", "")
-
- if self.search_key == '999糠酸莫米松凝胶15':
- temp_search_key = temp_search_key.replace("15", "")
- elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = temp_search_key.replace("30", "")
- elif self.search_key == '999复方金银花颗粒10g':
- temp_search_key = temp_search_key.replace("10g", "")
- elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
- temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
- elif self.search_key == '999复方氨酚烷胺胶囊6粒':
- temp_search_key = temp_search_key.replace("6粒", "")
- elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
- temp_search_key = temp_search_key.replace("50", "")
- elif self.search_key == '999止泻利颗粒15g*8':
- temp_search_key = temp_search_key.replace("15g*8", "")
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = temp_search_key.replace("30", "")
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = temp_search_key.replace("15g", "")
- elif self.search_key == '999复方苦参肠炎康片12片':
- temp_search_key = temp_search_key.replace("12片", "")
- elif self.search_key == '999强力枇杷露16袋':
- temp_search_key = temp_search_key.replace("16袋", "")
- elif self.search_key == '999三蛇胆川贝膏138':
- temp_search_key = temp_search_key.replace("138", "")
- elif self.search_key == '999强力枇杷露120ml':
- temp_search_key = temp_search_key.replace("120ml", "")
- elif self.search_key == '999强力枇杷露150ml':
- temp_search_key = temp_search_key.replace("150ml", "")
- elif self.search_key == '999抗病毒口服液10ml*10':
- temp_search_key = temp_search_key.replace("10ml*10", "")
- elif self.search_key == '999抗病毒口服液10ml*12':
- temp_search_key = temp_search_key.replace("10ml*12", "")
- elif self.search_key == '999糠酸莫米松乳膏10g支':
- temp_search_key = temp_search_key.replace("10g支", "")
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = temp_search_key.replace("20g", "")
- elif self.search_key == '999感冒清热颗粒(无糖)6g':
- temp_search_key = temp_search_key.replace("(无糖)6g", "")
- elif self.search_key == '999壮骨关节丸6g*20':
- temp_search_key = temp_search_key.replace("6g*20", "")
- elif self.search_key == '999正天丸6g*15':
- temp_search_key = temp_search_key.replace("6g*15", "")
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = temp_search_key.replace("20", "")
- elif self.search_key == '999糠酸莫米松凝胶10':
- temp_search_key = temp_search_key.replace("10", "")
- elif self.search_key == '999板蓝根颗粒10g*20':
- temp_search_key = temp_search_key.replace("10g*20", "")
- elif self.search_key == '999复方氨酚烷胺胶囊10粒':
- temp_search_key = temp_search_key.replace("10粒", "")
- elif self.search_key == '999复方氨酚烷胺胶囊12粒':
- temp_search_key = temp_search_key.replace("12粒", "")
- elif self.search_key == '999咽炎片0.26g*12片*2板':
- temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
- elif self.search_key == '999小儿止咳糖浆120':
- temp_search_key = temp_search_key.replace("120", "")
- elif self.search_key == '999小儿止咳糖浆225':
- temp_search_key = temp_search_key.replace("225", "")
- elif self.search_key == '999小儿感冒颗粒6g*10':
- temp_search_key = temp_search_key.replace("6g*10", "")
- elif self.search_key == '999小儿感冒颗粒6g*24':
- temp_search_key = temp_search_key.replace("6g*24", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
- temp_search_key = temp_search_key.replace("6g*10袋", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
- temp_search_key = temp_search_key.replace("6g*20袋", "")
- elif self.search_key == '999小儿咽扁颗粒8g*10袋':
- temp_search_key = temp_search_key.replace("8g*10袋", "")
- elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
- temp_search_key = temp_search_key.replace("2.5g*10袋", "")
- elif self.search_key == '999感冒清热颗粒12g*18':
- temp_search_key = temp_search_key.replace("12g*18", "")
- elif self.search_key == '999小柴胡颗粒10g*15':
- temp_search_key = temp_search_key.replace("10g*15", "")
- # elif self.search_key == '三九胃泰颗粒':
- # self.search_key = '三九胃泰' #兼容三九胃泰 温胃舒颗粒
- print(f'获取商品title时的搜索关键字:{temp_search_key}')
- # title = self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text
- #初始化
- drugs_name = ''
- specifications = ''
- title = ''
- #循环的获取title为了有时间来处理人机验证
- for m in range(1, 6000) :
- if self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').exists:
- title = self.safe_exec(
- lambda: self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
- )
- print(f"第{m}次获取title成功")
- break
- else:
- time.sleep(3)
- # return drugs_name, specifications
-
- # drugs_name = ''
- # specifications = ''
- # try:
- # title_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- # title_xpath_2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- # if self.d.xpath(title_xpath).exists:
- # title = self.d.xpath(title_xpath).text
- # print(f"title_xpath获取的title={title}")
- # if temp_search_key not in title:
- # return drugs_name, specifications
- # elif self.d.xpath(title_xpath_2).exists:
- # title = self.d.xpath(title_xpath_2).text
- # print(f"title_xpath_2获取的title={title}")
- # if temp_search_key not in title:
- # return drugs_name, specifications
- # else:
- # print('title_xpath不存在,请确认')
- # return drugs_name, specifications
- # # title = self.d.xpath(f'//*[contains(@text, "{temp_search_key}")]').text
- # except Exception as e:
- # print(f"发生异常: {e}")
- # return drugs_name, specifications
- #奇怪:有的时候title取出来的记过第一位会多一个0
- # title = self.safe_exec(self.d.xpath(f'//*[contains(@text, "{self.search_key}")]').text)
- # title = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
- title = title[1:] if title.startswith('0') else title
- print(f'获取到药品标题:{title}')
- # 从里面匹配出药品名和规格
- # drugs_name
- # specifications
- # match = re.search(r'([^\d]+)([\d\D]+)', title)
- if self.search_key == '999赐多康大豆':
- return title, '1罐'
- if self.search_key == "999感冒清热颗粒" :
- match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
- else:
- match = re.match(r'(\[[^\]]+\])(.*?)\s*((?:\d+\S*|\(.+))$', title)
-
-
- if match:
- #drugs_name = match.group(1).strip() + match.group(2).strip()
- drugs_name = title
- specifications = match.group(3).strip()
- print("药品名:", drugs_name)
- print("规格:", specifications)
- #如果品规中包含到期则需要再次的正则处理
- if '到期' in specifications:
- specifications = self.extract_specification(specifications)
- # print('完整药名:', drugs_name + specifications)
- return drugs_name, specifications
- else:
- if title == '999抗病毒口服液10ml*12' or title == '999抗病毒口服液':
- drugs_name = title
- specifications = '10ml*12支/盒'
- return drugs_name, specifications
- elif title == '999抗病毒口服液10ml*10':
- drugs_name = title
- specifications = '10ml*10支/盒'
- return drugs_name, specifications
- elif title == '999小柴胡颗粒':
- drugs_name = title
- specifications = '10g*9袋/盒'
- return drugs_name, specifications
- elif title == '999养胃舒颗粒':
- drugs_name = title
- specifications = '10g*10袋/盒'
- return drugs_name, specifications
- elif title == '三九胃泰胶囊':
- drugs_name = title
- specifications = '0.5g*24粒/盒'
- return drugs_name, specifications
- elif title == '999补脾益肠丸':
- drugs_name = title
- specifications = '6g*15袋/盒'
- return drugs_name, specifications
- elif title == '999复方感冒灵颗粒':
- drugs_name = title
- specifications = '14g*9袋/盒'
- return drugs_name, specifications
- else:
- print("没有匹配到预期格式")
- drugs_name = title
- specifications = ''
- return drugs_name, specifications
- # 用 safe_exec 包装内部逻辑,确保验证码阻塞
- return self.safe_exec(_inner)
- def enter_shop(self):
- """
- 进店,方便提取资质环境
- :return:
- """
- # self.d.xpath('//*[@text="进店"]').click()
- self.d.xpath('//*[@text="店铺"]').click()
- time.sleep(self.get_sleep_time())
- def enter_shoper(self):
- """
- 进入商家
- :return:
- """
- is_shoper_exists = 0
- for i in range(10):
- if self.d.xpath('//*[@text="商家"]').exists:
- print(f'第{i}次商家存在')
- is_shoper_exists = 1
- break
- else:
- print(f'第{i}次商家不存在')
- time.sleep(self.get_sleep_time())
- if is_shoper_exists == 1:
- self.d.xpath('//*[@text="商家"]').click()
- time.sleep(self.get_sleep_time())
- return True
- else:
- return False
- #点击查看商家资质
- def scan_shoper_license(self):
- exist_shoper = 0
- for i in range(10):
- if self.d.xpath('//*[@text="查看商家资质"]').exists:
- print(f'第{i}次查看商家资质存在')
- exist_shoper = 1
- break
- else:
- print(f'第{i}次查看商家资质不存在')
- time.sleep(self.get_sleep_time())
- if exist_shoper == 1:
- self.d.xpath('//*[@text="查看商家资质"]').click()
- time.sleep(self.get_sleep_time())
- else:
- self.swipe_back(1)
- #验证商品的信息是否在数据库中已存在
- def data_is_exists(self, data):
- """
- 检查指定数据是否已存在于数据库表中(仅检查存在性)
-
- 参数:
- data: 包含查询条件的字典,键为列名,值为条件值
-
- 返回:
- True: 数据存在
- False: 数据不存在
- None: 检查过程中出错
- """
- # dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
- # 'platform': '美团'}
-
- # 1. 验证必要字段
- required_keys = ['product', 'min_price', 'shop', 'scrape_date', 'platform']
- if not all(key in data for key in required_keys):
- missing = [key for key in required_keys if key not in data]
- logging.error(f"缺少必要字段: {', '.join(missing)}")
- return None
-
- try:
- # 连接数据库
- conn = get_mysql()
- # 创建游标对象
- cur = conn.cursor()
- # query_sql = f"SELECT * FROM {self.table_name} WHERE product = '{data['product']}' AND min_price = '{data['min_price']}' AND shop = '{data['shop']}' AND scrape_date = '{data['scrape_date']}' AND platform = '{data['platform']}'"
- # cur.execute(query_sql)
- query_sql = """
- SELECT * FROM {}
- WHERE product = %s
- AND min_price = %s
- AND shop = %s
- AND scrape_date = %s
- AND platform = %s
- """.format(self.table_name)
- cur.execute(query_sql, (
- data['product'],
- data['min_price'],
- data['shop'],
- data['scrape_date'],
- data['platform']
- ))
- result = cur.fetchone()
- return bool(result) # 如果存在返回True,否则False
- except Exception as e:
- print(f"MySQL 错误: {str(e)}")
-
- #验证店铺信息是否在数据库中已存在
- def shop_is_exists_database(self, shop):
- try:
- # 连接数据库
- conn = get_mysql()
- # 创建游标对象
- cur = conn.cursor()
- query_sql = """
- SELECT * FROM {}
- WHERE shop = %s
- """.format(self.shop_table_name)
- cur.execute(query_sql, (
- shop
- ))
- result = cur.fetchone()
- return bool(result) # 如果存在返回True,否则False
- except Exception as e:
- print(f"MySQL 错误: {str(e)}")
- def wait_if_verifying(self, monitor, timeout=120):
- """验证码处理期间阻塞主线程"""
- start = time.time()
- while monitor.pausing.is_set() and time.time() - start < timeout:
- time.sleep(1)
- # def safe_xpath(self, xpath, timeout=10):
- # """线程安全 xpath 查找"""
- # self.wait_if_verifying(self.monitor)
- # return self.d.xpath(xpath).wait(timeout=timeout)
-
- def wait_for_ready(self, monitor, timeout=86400):
- """进入每一页前都先等验证码"""
- start = time.time()
- while monitor.pausing.is_set() and time.time() - start < timeout:
- time.sleep(1)
- # 额外保险:如果验证码突然在这一秒才弹,再主动扫一次
- monitor.check_and_handle_popup()
- def safe_list(self, xpath, monitor):
- """线程安全地拿商品列表"""
- self.wait_for_ready(monitor)
- return self.d.xpath(xpath).all()
-
- def safe_exec(self, func, *args, **kwargs):
- """
- 万能安全壳:执行 func 前检查验证码,
- 若监控线程已置位 pausing,则一直阻塞直到放行。
- """
- while self.monitor.pausing.is_set():
- time.sleep(1)
- # 执行真正逻辑
- return func(*args, **kwargs)
-
- def get_next_data(self, data, target):
- for i, item in enumerate(data):
- if item['words'] == target:
- if i + 1 < len(data):
- return data[i + 1]['words']
- return None
-
- def delete_instruction_screenshot(self, screenshot_path):
- # 删除截图文件
- try:
- os.remove(screenshot_path)
- print(f"截图文件已删除:{screenshot_path}")
- except FileNotFoundError:
- print(f"文件未找到,无法删除:{screenshot_path}")
- except Exception as e:
- print(f"删除文件时出错:{e}")
-
- '''
- def get_instructions_data(self):
- """
- 确定有说明书之后,提取所有的说明书数据
- :return:
- """
- self.d.xpath('//*[@text="说明"]').click()
- # time.sleep(random.randint(3, 5))
- time.sleep(0.5)
- self.d.xpath('//*[@text="查看详细说明"]').click()
- # time.sleep(random.randint(3, 5))
- time.sleep(0.5)
- self.d.xpath('//*[@text="加载更多"]').click_exists()
- loop_page = 5
- # new_list = list()
- new_list = []
- for i in range(loop_page):
- self.d.xpath('//*[@text="加载更多"]').click_exists()
- time.sleep(0.2)
- if i == 0:
- self.d.swipe(200, 1000, 200, 300, 0.4)
- else:
- self.d.swipe(200, 1000, 200, 62)
- time.sleep(0.2)
- if self.d.xpath('//*[@text="加载更多"]').exists:
- self.d.xpath('//*[@text="加载更多"]').click()
- time.sleep(0.2)
- all_tt = self.d.xpath(
- '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup').all()
- for idx in range(1, len(all_tt) + 1):
- all_tt1 = self.d.xpath(
- f'//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[{idx}]//android.widget.TextView').all()
- # print(f'当前说明书列表数据:{all_tt1}')
- for tt in all_tt1:
- if tt.text and tt.text != '展开全文':
- new_list.append(tt.text)
- if i == 0:
- height = 938
- else:
- drug_box = self.d.xpath(
- '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]').info
- bounds = drug_box['bounds']
- height = bounds['bottom'] - bounds['top']
- if height < 938:
- # print('说明书翻页到底部')
- break
- # 展开全文
- new_list = [item for item in new_list if item != '展开全文']
- print(f'当前说明书列表数据:{new_list}')
- # expiry_date_index = next(idx for idx, i in enumerate(new_list) if i == '有效期')
- # manufacturer_index = next(idx for idx, i in enumerate(new_list) if i == '生产单位')
- # approval_number_index = next(idx for idx, i in enumerate(new_list) if i == '批准文号')
- # res_data = {
- # "有效期": new_list[expiry_date_index + 1],
- # "生产单位": new_list[manufacturer_index + 1],
- # "批准文号": new_list[approval_number_index + 1]
- # }
- res_data = {
- "有效期": (new_list[new_list.index("有效期") + 1]) if "有效期" in new_list and new_list.index("有效期") + 1 < len(new_list) else "",
- "生产单位": (new_list[new_list.index("生产单位") + 1]) if "生产单位" in new_list and new_list.index("生产单位") + 1 < len(new_list) else "",
- "批准文号": (new_list[new_list.index("批准文号") + 1]) if "批准文号" in new_list and new_list.index("批准文号") + 1 < len(new_list) else ""
- }
- print(f'当前说明书字典数据:{res_data}')
- return res_data
- '''
-
- '''
- def get_instructions_data(self):
- """
- 确定有说明书之后,提取所有的说明书数据
- :return:
- """
- self.d.xpath('//*[@text="说明"]').click()
- # time.sleep(random.randint(3, 5))
- time.sleep(0.5)
- self.d.xpath('//*[@text="查看详细说明"]').click()
- # time.sleep(random.randint(3, 5))
- time.sleep(0.5)
- # 1) 先向上滑动一次,触发“加载更多”出现
- self.d.swipe(200, 1000, 200, 300, 0.4)
- time.sleep(0.3)
- # 2) 再进入“出现就点”的循环
- while self.d.xpath('//*[@text="加载更多"]').click_exists(timeout=1):
- time.sleep(0.2)
- self.d.swipe(200, 1000, 200, 300, 0.4)
- # self.d.swipe(200, 1000, 200, 62)
- time.sleep(0.2)
-
- # 一次性获取所有文本
- texts = [
- node.text.strip()
- # for node in self.d.xpath('//android.widget.TextView').all()
- for node in self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.TextView').all()
- if node.text and node.text.strip() and node.text != '加载更多'
- ]
- print(f'当前说明书列表数据:{texts}')
- # 提取关键字段
- def safe_get(key):
- # try:
- # idx = texts.index(key)
- # return texts[idx + 1] if idx + 1 < len(texts) else ""
- # except ValueError:
- # return ""
- try:
- idx = next(i for i, text in enumerate(texts) if text == key)
- return texts[idx + 1] if idx + 1 < len(texts) else ""
- except StopIteration:
- return ""
- res_data = {
- "有效期": safe_get("有效期"),
- "生产单位": safe_get("生产单位"),
- "批准文号": safe_get("批准文号")
- }
- print(f'当前说明书字典数据:{res_data}')
- return res_data
- '''
-
- '''
- def get_instructions_data(self):
- """
- 说明书键值对采集:连续两个 TextView 为一对,精确提取
- """
- # 1. 进入说明书
- self.d(text="说明").click()
- time.sleep(0.5)
- self.d(text="查看详细说明").click()
- time.sleep(0.5)
- # self.d(text="加载更多").click_exists(timeout=0.5)
- # 2. 找到说明书最外层 ScrollView(页面主体)
- scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.ScrollView")
- count = scroll_view.count
- print(f"找到的 ScrollView 数量: {count}")
- if not scroll_view.exists:
- return {"有效期": "", "生产单位": "", "批准文号": ""}
- # 3. 在 ScrollView 内再定位真正包含键值对的容器
- # 绝大多数美团说明书页面对应的是 ScrollView > ViewGroup > 若干 TextView
- kv_container = scroll_view.child(className="android.view.ViewGroup")
- if not kv_container.exists:
- kv_container = scroll_view # 降级:直接对 ScrollView 取子孙 TextView
- # 4. 滑动到底并收集所有 TextView(保留顺序)
- all_texts = []
- max_swipe = 5
- last_length = 0
- for _ in range(max_swipe):
- texts = kv_container.child(className="android.widget.TextView")
- #获取texts中的文本
- print(f'当前说明书列表数据:{texts}')
- current_texts = []
- self.loggerMT.info(f'说明书111')
- for tv in texts:
- try:
- txt = tv.get_text().strip()
- # txt = tv.info['text'].strip()
- except Exception:
- continue
- if txt and txt != "展开全文":
- current_texts.append(txt)
- self.loggerMT.info(f'说明书222')
- print(f'当前说明书列表数据:{current_texts}')
- # 去重
- if current_texts:
- current_texts = [t for t in current_texts if t not in all_texts]
- all_texts.extend(current_texts)
- # 判断是否到底
- # if not scroll_view.info.get("scrollable"):
- # break
-
- # 判断是否到底
- if len(all_texts) == last_length:
- break
- last_length = len(all_texts)
- # self.d.swipe_ext("up", scale=0.7)
- #向上滑动一次
- self.d.swipe(200, 1000, 200, 300, 0.2)
- time.sleep(0.2)
- if self.d.xpath('//*[@text="加载更多"]').exists:
- self.d.xpath('//*[@text="加载更多"]').click()
- # 5. 成对解析
- res_data = {"有效期": "", "生产单位": "", "批准文号": ""}
- for i in range(len(all_texts) - 1):
- key = all_texts[i]
- val = all_texts[i + 1]
- if key in res_data:
- res_data[key] = val
- print(f'说明书文本共 {len(all_texts)} 条,提取结果: {res_data}')
- # time.sleep(1000000)
- return res_data
- '''
- def get_instructions_data(self):
- """
- 确定有说明书之后,提取所有的说明书数据
- :return:
- """
- self.d.xpath('//*[@text="说明"]').click()
- # time.sleep(random.randint(3, 5))
- time.sleep(0.5)
- if self.d.xpath('//*[@text="查看详细说明"]').exists:
- self.d.xpath('//*[@text="查看详细说明"]').click()
- else:
- for i in range(8):
- if self.d.xpath('//*[@text="查看全部"]').exists:
- print('开始点击查看全部')
- break
- self.d.swipe_ext('down', 0.3)
- time.sleep(1)
- if self.d.xpath('//*[@text="查看全部"]').exists:
- print('开始点击查看全部2')
- break
- if self.d.xpath('//*[@text="查看全部"]').exists:
- self.d.xpath('//*[@text="查看全部"]').click()
- else:
- res_data = {
- "有效期": '',
- "生产单位": '',
- "批准文号": ''
- }
- self.loggerMT.info('获取到的说明书信息为空。')
- return res_data
- # time.sleep(random.randint(3, 5))
- time.sleep(0.5)
- # self.d.xpath('//*[@text="加载更多"]').click_exists()
- # loop_page = 5
- # new_list = list()
- # new_list = []
- for ii in range(8):
- if self.d.xpath('//*[@text="加载更多"]').exists:
- self.d.xpath('//*[@text="加载更多"]').click()
- time.sleep(0.2)
- break
- else:
- self.d.swipe(200, 1000, 200, 300, 0.3)
- # self.d.swipe_ext("up", scale=0.3)
-
- for iii in range(10):
- if self.d.xpath('//*[@text="生产单位"]').exists and self.d.xpath('//*[@text="批准文号"]').exists:
- break
- else:
- self.d.swipe(200, 1300, 200, 300, 0.3)
- # self.d.swipe_ext("up", scale=0.3)
-
- instruction_path = self.screenshot_instruction()
- print(f"instruction_path= {instruction_path}")
- time.sleep(2)
- ocr_res = self.get_ocr_res_image(instruction_path)
- # print(f'ocr_res:{ocr_res}')
- if ocr_res:
- # 获取有效期的下一个数据
- validity = self.get_next_data(ocr_res, '有效期')
- # 获取批准文号的下一个数据
- approval_number = self.get_next_data(ocr_res, '批准文号')
- # 获取生产单位的下一个数据
- manufacturer = self.get_next_data(ocr_res, '生产单位')
- else:
- validity = ''
- approval_number = ''
- manufacturer = ''
- # print("有效期:", validity)
- # print("批准文号:", approval_number)
- # print("生产单位:", manufacturer)
- res_data = {
- "有效期": validity,
- "生产单位": manufacturer,
- "批准文号": approval_number
- }
- print(f"res_data={res_data}")
- time.sleep(1)
- self.delete_instruction_screenshot(instruction_path)
- return res_data
-
- def has_instructions(self):
- """
- 是否有说明书
- :return:
- """
- # 没有说明书的无法采集具体数据
- time.sleep(self.get_sleep_time())
- is_has_instructions = False
- for i in range(8):
- if self.d.xpath('//*[@text="说明"]').exists:
- print(f"第{i}次有说明书1")
- is_has_instructions = True
- break
- self.d.swipe_ext('down', 0.3)
- time.sleep(1)
- # detail_info = self.d.xpath(
- # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
- # bounds = detail_info['bounds']
- # height = bounds['bottom'] - bounds['top']
- # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
- if self.d.xpath('//*[@text="说明"]').exists:
- is_has_instructions = True
- print(f"第{i}次有说明书2")
- break
- # is_has_instructions = self.d.xpath('//*[@text="说明"]').exists
- return is_has_instructions
- def has_shop(self):
- """
- 是否有进店按钮
- :return:
- """
- # self.d.swipe_ext('up', 0.1)
- time.sleep(self.get_sleep_time())
- is_has_enter_shop = self.d.xpath('//*[@text="进店"]').exists
- return is_has_enter_shop
- #获取商品对应的店铺信息
- def get_license_info_ex(self):
- # self.enter_shop()
- self.safe_exec(self.enter_shop)
- # self.enter_shoper()
- result = self.safe_exec(self.enter_shoper)
- if result == False:
- license_info_data = {'contact_address': '', 'qualification_number': '', 'business_license_company': '', 'business_license_address': ''}
- return license_info_data
-
- for i in range(10):
- if self.d.xpath('//*[@text="查看商家资质"]').exists:
- print(f"第{i}次有商家资质")
- break
- else:
- print(f"第{i}次没有商家资质")
- time.sleep(self.get_sleep_time())
- #获取地址
- # contact_address = self.get_shop_address()
- contact_address = self.safe_exec(self.get_shop_address)
- # time.sleep(50000)
- ###
- # self.scan_shoper_license()
- self.safe_exec(self.scan_shoper_license)
- # 获取资质编码
- # qualification_number = self.get_qualification_number()
- qualification_number = self.safe_exec(self.get_qualification_number)
- #qualification_number 不为None继续下一步
-
- if qualification_number:
- #营业执照公司名称
- business_license_company = ''
- #营业执照地址
- business_license_address = ''
- self.d.click(0.603, 0.27)
- time.sleep(self.get_sleep_time())
- cropped_screenshot_path = self.screenshot_the_business_license(qualification_number)
- print(f'cropped_screenshot_path:{cropped_screenshot_path}')
- # if qualification_number:
- # cropped_screenshot_path = 'D:\\work\\dfwy_spider\\drug_data\\mt\\screenshot\\' + qualification_number + '.png'
- # else:
- # cropped_screenshot_path = 'cropped_screenshot.png'
- # ocr_res = self.get_ocr_res('cropped_screenshot.png')
- ocr_res = self.get_ocr_res(cropped_screenshot_path)
- print(f'ocr_res:{ocr_res}')
- #获取ocr_res 中的地址、单位名称
- if ocr_res:
- if '单位名称' in ocr_res.keys():
- business_license_company = ocr_res['单位名称']
-
- if '地址' in ocr_res.keys():
- business_license_address = ocr_res['地址']
-
- license_info_data = {'contact_address': contact_address, 'qualification_number': qualification_number, 'business_license_company': business_license_company, 'business_license_address': business_license_address}
- else:
- license_info_data = {'contact_address': contact_address, 'qualification_number': '', 'business_license_company': '', 'business_license_address': ''}
- return license_info_data
- """暂不用该功能
- def get_license_info(self):
- self.enter_shop()
- self.enter_shoper()
- self.scan_shoper_license()
- # 获取资质编码
- qualification_number = self.get_qualification_number()
- if qualification_number:
- table_license_info = self.get_table_license_info(qualification_number)
- if table_license_info:
- return {
- '单位名称': table_license_info[0],
- '地址': table_license_info[1],
- '社会信用代码': table_license_info[2]
- }
- else:
- # operate_no = random.randint(0, 1)
- self.d.click(0.603, 0.27)
- # if operate_no == 0:
- # self.d.xpath('//*[@text="营业执照"]').click()
- # else:
- # self.d.click(0.603, 0.27)
- time.sleep(self.get_sleep_time())
- self.screenshot_the_business_license()
- ocr_res = self.get_ocr_res('cropped_screenshot.png')
- return ocr_res
- # operate_no = random.randint(0, 1)
- self.d.click(0.603, 0.27)
- # if operate_no == 0:
- # self.d.xpath('//*[@text="营业执照"]').click()
- # else:
- # self.d.click(0.603, 0.27)
- time.sleep(self.get_sleep_time())
- self.screenshot_the_business_license()
- ocr_res = self.get_ocr_res('cropped_screenshot.png')
- return ocr_res
- """
- def distinct_target(self):
- result = False
- position_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
- position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
- is_position = self.d.xpath(position_xpath).exists
- is_position2 = self.d.xpath(position_xpath2).exists
- xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
- xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
- xpath3 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
- xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
- is_position5 = self.d.xpath(xpath).exists
- is_position6 = self.d.xpath(xpath2).exists
- is_position7 = self.d.xpath(xpath3).exists
- is_position8 = self.d.xpath(xpath4).exists
-
- # print(f"is_position = {is_position}")
- # print(f"is_position2 = {is_position2}")
- if result == False:
- print("---检测没有回到列表页---")
- else:
- print("---检测回到了列表页---")
- if is_position or is_position2 or is_position5 or is_position6 or is_position7 or is_position8:
- result = True
- return result
- # return is_position
- def enter_target_page(self):
- self.d.xpath('//*[@content-desc="看病买药"]').click()
- time.sleep(self.get_sleep_time())
- self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/vf_search_carousel_text"]').click()
- time.sleep(self.get_sleep_time())
- self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]').click()
- time.sleep(self.get_sleep_time())
- self.d.send_keys(self.search_key, clear=True)
- time.sleep(self.get_sleep_time())
- self.d.xpath('//*[@text="搜索"]').click()
- time.sleep(self.get_sleep_time())
- # content_frame = self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]').exists
- # print(content_frame)
- # position_xpath1 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
- # position_xpath2 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]'
- # if self.d.xpath(position_xpath1).exists:
- # print("position_xpath1 exist")
- # elif self.d.xpath(position_xpath2).exists:
- # print("position_xpath2 exist")
- # else:
- # print("position_xpath not exist")
- # time.sleep(10000)
- #增加点击快递送
- self.click_express_send()
- print("价格高到低")
- time.sleep(self.get_sleep_time())
- self.d.xpath('//*[@text="价格"]').click()
- time.sleep(self.get_sleep_time())
- self.d.xpath('//*[@text="价格"]').click()
- time.sleep(self.get_sleep_time())
-
-
- def click_express_send(self):
- # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]//android.widget.HorizontalScrollView[last()]'
- slide_xpath = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
- slide_xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
- slide_xpath3= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
- slide_xpath4= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]'
- for i in range (1,3):
- if self.d.xpath(slide_xpath).exists:
- bounds = self.d.xpath(slide_xpath).info['bounds']
- top = bounds['top']
- bottom = bounds['bottom']
- print(f'top={top}')
- print(f'bottom={bottom}')
- y = (top + bottom) // 2
- print(f'y={y}')
- self.loggerMT.info('开始滑动1')
- self.d.swipe(500, y, 100, y, 0.5)
- time.sleep(self.get_sleep_time())
- break
- elif self.d.xpath(slide_xpath2).exists:
- bounds = self.d.xpath(slide_xpath2).info['bounds']
- top = bounds['top']
- bottom = bounds['bottom']
- print(f'top={top}')
- print(f'bottom={bottom}')
- y = (top + bottom) // 2
- print(f'y={y}')
- self.loggerMT.info('开始滑动2')
- self.d.swipe(500, y, 100, y, 0.5)
- time.sleep(self.get_sleep_time())
- break
- elif self.d.xpath(slide_xpath3).exists:
- bounds = self.d.xpath(slide_xpath3).info['bounds']
- top = bounds['top']
- bottom = bounds['bottom']
- print(f'top={top}')
- print(f'bottom={bottom}')
- y = (top + bottom) // 2
- print(f'y={y}')
- self.loggerMT.info('开始滑动3')
- self.d.swipe(500, y, 100, y, 0.5)
- time.sleep(self.get_sleep_time())
- break
- elif self.d.xpath(slide_xpath4).exists:
- bounds = self.d.xpath(slide_xpath4).info['bounds']
- top = bounds['top']
- bottom = bounds['bottom']
- print(f'top={top}')
- print(f'bottom={bottom}')
- y = (top + bottom) // 2
- print(f'y={y}')
- self.loggerMT.info('开始滑动4')
- self.d.swipe(500, y, 100, y, 0.5)
- time.sleep(self.get_sleep_time())
- break
- max_retry = 5 # 最多尝试次数
- for idx in range(1, max_retry + 1):
- # xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()-1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]'
- xpath= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
- xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
- xpath3= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
- xpath4 = '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
- # print(f"xpath:{xpath}")
- # scroll_view = self.d(resourceId="com.sankuai.meituan:id/container") .child(className="android.widget.HorizontalScrollView")
- if self.d.xpath(xpath).exists:
- self.d.xpath(xpath).click()
- # time.sleep(self.get_sleep_time())
- print(f"第{idx}次点击xpath快递送成功")
- time.sleep(self.get_sleep_time())
- break
- elif self.d.xpath(xpath2).exists:
- self.d.xpath(xpath2).click()
- # time.sleep(self.get_sleep_time())
- print(f"第{idx}次点击xpath2快递送成功")
- time.sleep(self.get_sleep_time())
- break
- elif self.d.xpath(xpath3).exists:
- self.d.xpath(xpath3).click()
- # time.sleep(self.get_sleep_time())
- print(f"第{idx}次点击xpath3快递送成功")
- time.sleep(self.get_sleep_time())
- break
- elif self.d.xpath(xpath4).exists:
- self.d.xpath(xpath4).click()
- # time.sleep(self.get_sleep_time())
- print(f"第{idx}次点击xpath4快递送成功")
- time.sleep(self.get_sleep_time())
- break
- else:
- print(f"第{idx}次点击xpath或xpath2或xpath3快递送都失败")
- time.sleep(self.get_sleep_time())
- # xpath2= '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.HorizontalScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
- # if self.d.xpath(xpath2).exists:
- # self.d.xpath(xpath2).click()
- # print(f"第{idx}次点击xpath2快递送成功")
- # time.sleep(self.get_sleep_time())
- # break
-
-
- """暂不用该功能
- def get_table_license_info(self, qualification_number):
- try:
- sql = f'select business_license_company,city,credit_code from mt_drug where credit_code = "{qualification_number}"'
- self.mysql_client.cur.execute(sql)
- res = self.mysql_client.cur.fetchone()
- return res
- except:
- return None
- """
-
- # def get_clipboard(self):
- # """通过ADB获取Android手机剪贴板内容"""
- # try:
- # result = subprocess.run(
- # ["adb", "shell", "am", "broadcast", "-a", "clipper.get"],
- # capture_output=True,
- # text=True,
- # timeout=5
- # )
- # print(f"获取剪贴板结果: {result.stdout}")
- # # 解析返回信息中的剪贴板内容
- # for line in result.stdout.splitlines():
- # if "data=" in line:
- # return line.split("data=")[1].strip()
- # return ""
- # except Exception as e:
- # print("获取剪贴板失败:", e)
- # return ""
-
- # def get_clipboard(self):
- # """读取 Android 剪贴板(系统自带命令)"""
- # try:
- # text = subprocess.check_output(
- # ["adb", "shell", "cmd", "clipboard", "get"],
- # text=True, timeout=5, stderr=subprocess.STDOUT
- # ).strip()
- # print(f"获取剪贴板结果: {text}")
- # return text if text else ""
- # except Exception as e:
- # print("获取剪贴板失败:", e)
- # return ""
- def get_clipboard(self):
- time.sleep(1)
- self.loggerMT.info(f"Clipboard content:{self.d.clipboard}") # 打印调试信息
- clipboard_content = self.d.clipboard
- if clipboard_content is None:
- return ''
- return clipboard_content.strip()
- # return self.d.clipboard.strip()
-
- def clear_clipboard(self):
- self.d.set_clipboard("", "text/plain")
-
- # def clear_clipboard(self):
- # """清空手机剪贴板:写入空字符串(subprocess 版)"""
- # try:
- # subprocess.run(
- # ["adb", "shell", "am", "broadcast", "-a", "clipper.set", "-e", "text", " "],
- # check=True,
- # capture_output=True,
- # text=True,
- # timeout=5
- # )
- # except subprocess.CalledProcessError as e:
- # print("ADB 清空失败:", e.stderr)
-
- # def clear_clipboard():
- # """清空手机剪贴板:写入空字符串"""
- # try:
- # adb_shell(["shell", "am", "broadcast", "-a", "clipper.set", "-e", "text", ""])
- # except subprocess.CalledProcessError as e:
- # print("ADB 清空失败:", e.output)
-
- #获取一个商品的数据、商品对应的店铺的数据
- def get_product_link(self):
- product_link = ''
- # 两种可能的“···”按钮
- dots_xpaths = [
- '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
- '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]',
- '//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]'
- ]
- max_retry = 5 # 最多尝试次数
- for idx in range(1, max_retry + 1):
- if product_link: # 已经拿到则退出
- break
- for xp in dots_xpaths:
- if self.d.xpath(xp).exists:
- print(f'{idx}-进入分享点点点')
- self.loggerMT.info(f'{idx}-进入分享点点点')
- # #先清空剪贴板的内容
- # self.clear_clipboard()
- # print("清空剪贴板内容成功。")
- self.d.xpath(xp).click()
- time.sleep(0.2)
- self.d.xpath('//*[@text="分享商品"]').click_exists()
- time.sleep(0.2)
- link_xpath = '//*[@text="复制链接"]'
- if self.d.xpath(link_xpath).exists:
- self.d.xpath(link_xpath).click()
- time.sleep(1)
- product_link = self.get_clipboard()
- time.sleep(0.5)
- print(f'{idx}-商品链接:{product_link}')
- self.loggerMT.info(f'{idx}-商品链接:{product_link}')
- break # 找到并执行后跳出内层循环
- else:
- print(f'{idx}-商品链接:{product_link}')
- self.loggerMT.info(f'{idx}-商品链接:{product_link}')
- product_link = ''
- # self.d.xpath('//*[@text="复制链接"]').click_exists()
- # time.sleep(1)
- # product_link = self.get_clipboard()
- # time.sleep(0.5)
- # print(f'{idx}-商品链接:{product_link}')
- # self.loggerMT.info(f'{idx}-商品链接:{product_link}')
- # break # 找到并执行后跳出内层循环
- if not product_link and idx < max_retry:
- time.sleep(0.5) # 最后一次不需要再等待
-
- return product_link
- def integrate_data(self):
- #测试说明书详情:
- # instructions_info = self.safe_exec(self.get_instructions_data)
- # time.sleep(1000000)
- #测试店铺信息
- # license_info = self.safe_exec(self.get_license_info_ex)
- # time.sleep(1000000)
- #测试定位地址
-
- #获取链接开始
- #self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ScrollView[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView').text
- #1、点击页面的... 先判断元素是否存在
- '''
- if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
- print('1-进入分享点点点111')
- self.loggerMT.info('1-进入分享点点点111')
- self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
- #点击分享商品
- # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
- time.sleep(0.2)
- self.d.xpath('//*[@text="分享商品"]').click_exists()
- time.sleep(0.2)
- self.d.xpath('//*[@text="复制链接"]').click_exists()
- time.sleep(1)
- #获取剪切板的数据
- product_link = self.get_clipboard()
- time.sleep(0.5)
- print(f'1-商品链接:{product_link}')
- self.loggerMT.info(f'1-商品链接:{product_link}')
- #清空剪切板
- # self.clear_clipboard()
- # if self.d.xpath('//*[@text="加载更多"]').click_exists():
- # self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
- # if self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').exists:
- # self.d.xpath('//android.support.v7.widget.RecyclerView/android.view.ViewGroup[3]/android.widget.ImageView[1]').click()
- # #获取剪切板的数据
- # product_link = self.get_clipboard()
- # time.sleep(0.5)
- # print(f'商品链接:{product_link}')
- # #清空剪切板
- # self.clear_clipboard()
- # else:
- # print('未找到分享按钮111')
- elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
- print('1-进入分享点点点222')
- self.loggerMT.info('1-进入分享点点点222')
- self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
- time.sleep(0.2)
- self.d.xpath('//*[@text="分享商品"]').click_exists()
- time.sleep(0.2)
- self.d.xpath('//*[@text="复制链接"]').click_exists()
- time.sleep(1)
- #获取剪切板的数据
- product_link = self.get_clipboard()
- time.sleep(0.5)
- print(f'1-商品链接:{product_link}')
- self.loggerMT.info(f'1-商品链接:{product_link}')
- #如果为获取到product_link 则等待0.5秒再获取
- if not product_link:
- time.sleep(0.5)
- if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
- print('2-进入分享点点点111')
- self.loggerMT.info('2-进入分享点点点111')
- self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
- #点击分享商品
- # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
- time.sleep(0.2)
- self.d.xpath('//*[@text="分享商品"]').click_exists()
- time.sleep(0.2)
- self.d.xpath('//*[@text="复制链接"]').click_exists()
- time.sleep(1)
- #获取剪切板的数据
- product_link = self.get_clipboard()
- time.sleep(0.5)
- print(f'2-商品链接:{product_link}')
- self.loggerMT.info(f'2-商品链接:{product_link}')
- elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
- print('2-进入分享点点点222')
- self.loggerMT.info('2-进入分享点点点222')
- self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
- time.sleep(0.2)
- self.d.xpath('//*[@text="分享商品"]').click_exists()
- time.sleep(0.2)
- self.d.xpath('//*[@text="复制链接"]').click_exists()
- time.sleep(1)
- #获取剪切板的数据
- product_link = self.get_clipboard()
- time.sleep(0.5)
- print(f'2-商品链接:{product_link}')
- self.loggerMT.info(f'2-商品链接:{product_link}')
-
- #如果为获取到product_link 则等待0.5秒再获取
- if not product_link:
- time.sleep(0.5)
- if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
- print('3-进入分享点点点111')
- self.loggerMT.info('3-进入分享点点点111')
- self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
- #点击分享商品
- # if self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[3]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
- time.sleep(0.2)
- self.d.xpath('//*[@text="分享商品"]').click_exists()
- time.sleep(0.2)
- self.d.xpath('//*[@text="复制链接"]').click_exists()
- time.sleep(1)
- #获取剪切板的数据
- product_link = self.get_clipboard()
- time.sleep(0.5)
- print(f'3-商品链接:{product_link}')
- self.loggerMT.info(f'3-商品链接:{product_link}')
- elif self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').exists:
- print('3-进入分享点点点222')
- self.loggerMT.info('3-进入分享点点点222')
- self.d.xpath('//*[@resource-id="com.sankuai.meituan:id/container"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[3]/android.widget.FrameLayout[1]/android.view.ViewGroup[3]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.ImageView[1]').click()
- time.sleep(0.2)
- self.d.xpath('//*[@text="分享商品"]').click_exists()
- time.sleep(0.2)
- self.d.xpath('//*[@text="复制链接"]').click_exists()
- time.sleep(1)
- #获取剪切板的数据
- product_link = self.get_clipboard()
- time.sleep(0.5)
- print(f'3-商品链接:{product_link}')
- self.loggerMT.info(f'3-商品链接:{product_link}')
- '''
- #获取链接结束
- """
- 整合数据
- :return:
- """
- # title_info = self.get_title() # 药品,规格
- # title_info = self.safe_exec(self.get_title) # 药品,规格
- product, specifications = self.safe_exec(self.get_title) # 药品,规格
- if product:
- # product, specifications = title_info
- #如果关键字包含999 则 product必须包含999 和 999后面的那段字符串 ps 999感冒灵颗粒必须包含:"999"和"感冒灵颗粒"
- if '999' in self.search_key:
- if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = self.search_key.replace('999皮炎平', '')
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = self.search_key.replace('999必无忧', '')
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = self.search_key.replace('999必无忧', '')
- elif self.search_key == '999速复康布洛芬缓释胶囊':
- temp_search_key = self.search_key.replace('999速复康', '')
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = self.search_key.replace('999选平', '')
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = self.search_key.replace('999皮炎平', '')
- else:
- temp_search_key = self.search_key.replace('999', '')
- if self.search_key == '999糠酸莫米松凝胶15':
- temp_search_key = temp_search_key.replace('15', '')
- elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = temp_search_key.replace('30', '')
- elif self.search_key == '999抗病毒口服液10ml*6支/盒':
- temp_search_key = temp_search_key.replace("10ml*6支/盒", "")
- elif self.search_key == '999复方金银花颗粒10g':
- temp_search_key = temp_search_key.replace("10g", "")
- elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
- temp_search_key = temp_search_key.replace("15g*15袋/盒", "")
- elif self.search_key == '999复方氨酚烷胺胶囊6粒':
- temp_search_key = temp_search_key.replace("6粒", "")
- elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
- temp_search_key = temp_search_key.replace("50", "")
- elif self.search_key == '999止泻利颗粒15g*8':
- temp_search_key = temp_search_key.replace("15g*8", "")
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = temp_search_key.replace("30", "")
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = temp_search_key.replace("15g", "")
- elif self.search_key == '999复方苦参肠炎康片12片':
- temp_search_key = temp_search_key.replace("12片", "")
- elif self.search_key == '999强力枇杷露16袋':
- temp_search_key = temp_search_key.replace("16袋", "")
- elif self.search_key == '999三蛇胆川贝膏138':
- temp_search_key = temp_search_key.replace("138", "")
- elif self.search_key == '999抗病毒口服液10ml*12':
- temp_search_key = temp_search_key.replace("10ml*12", "")
- elif self.search_key == '999抗病毒口服液10ml*10':
- temp_search_key = temp_search_key.replace("10ml*10", "")
- elif self.search_key == '999强力枇杷露120ml':
- temp_search_key = temp_search_key.replace("120ml", "")
- elif self.search_key == '999强力枇杷露150ml':
- temp_search_key = temp_search_key.replace("150ml", "")
- elif self.search_key == '999糠酸莫米松乳膏10g支':
- temp_search_key = temp_search_key.replace("10g支", "")
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = temp_search_key.replace("20g", "")
- elif self.search_key == '999感冒清热颗粒(无糖)6g':
- temp_search_key = temp_search_key.replace("(无糖)6g", "")
- elif self.search_key == '999壮骨关节丸6g*20':
- temp_search_key = temp_search_key.replace("6g*20", "")
- elif self.search_key == '999正天丸6g*15':
- temp_search_key = temp_search_key.replace("6g*15", "")
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = temp_search_key.replace("20", "")
- elif self.search_key == '999糠酸莫米松凝胶10':
- temp_search_key = temp_search_key.replace("10", "")
- elif self.search_key == '999板蓝根颗粒10g*20':
- temp_search_key = temp_search_key.replace("10g*20", "")
- elif self.search_key == '999复方氨酚烷胺胶囊10粒':
- temp_search_key = temp_search_key.replace("10粒", "")
- elif self.search_key == '999复方氨酚烷胺胶囊12粒':
- temp_search_key = temp_search_key.replace("12粒", "")
- elif self.search_key == '999咽炎片0.26g*12片*2板':
- temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
- elif self.search_key == '999小儿止咳糖浆120':
- temp_search_key = temp_search_key.replace("120", "")
- elif self.search_key == '999小儿止咳糖浆225':
- temp_search_key = temp_search_key.replace("225", "")
- elif self.search_key == '999小儿感冒颗粒6g*10':
- temp_search_key = temp_search_key.replace("6g*10", "")
- elif self.search_key == '999小儿感冒颗粒6g*24':
- temp_search_key = temp_search_key.replace("6g*24", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
- temp_search_key = temp_search_key.replace("6g*10袋", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
- temp_search_key = temp_search_key.replace("6g*20袋", "")
- elif self.search_key == '999小儿咽扁颗粒8g*10袋':
- temp_search_key = temp_search_key.replace("8g*10袋", "")
- elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
- temp_search_key = temp_search_key.replace("2.5g*10袋", "")
- elif self.search_key == '999感冒清热颗粒12g*18':
- temp_search_key = temp_search_key.replace("12g*18", "")
- elif self.search_key == '999小柴胡颗粒10g*15':
- temp_search_key = temp_search_key.replace("10g*15", "")
-
- if '999' not in product or temp_search_key not in product:
- self.swipe_back(1)
- self.unrelated_data += 1
- return
- else:
- if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
- temp_search_key = self.search_key.replace('史达功', '')
- temp_search_key = temp_search_key.replace('120', '')
- if '史达功' not in product or temp_search_key not in product:
- self.swipe_back(1)
- self.unrelated_data += 1
- return
- elif self.search_key == '三九胃泰养胃舒颗粒8袋':
- temp_search_key = self.search_key.replace('三九胃泰', '')
- temp_search_key = temp_search_key.replace('8袋', '')
- if '三九胃泰' not in product or temp_search_key not in product:
- self.swipe_back(1)
- self.unrelated_data += 1
- return
- elif self.search_key == '今维多赐多康牌蛋白粉':
- temp_search_key = self.search_key.replace('今维多', '')
- if '今维多' not in product or temp_search_key not in product:
- self.swipe_back(1)
- self.unrelated_data += 1
- return
- elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
- temp_search_key = self.search_key.replace('佳美舒', '')
- temp_search_key = temp_search_key.replace('4', '')
- if '佳美舒' not in product or temp_search_key not in product:
- self.swipe_back(1)
- self.unrelated_data += 1
- return
- elif self.search_key == '三九胃泰颗粒20g*10':
- temp_search_key = self.search_key.replace('20g*10', '')
- if temp_search_key not in product:
- self.swipe_back(1)
- self.unrelated_data += 1
- return
- elif self.search_key == '三九胃泰颗粒20g*6袋':
- temp_search_key = self.search_key.replace('20g*6袋', '')
- if temp_search_key not in product:
- self.swipe_back(1)
- self.unrelated_data += 1
- return
- elif self.search_key == '顺峰康王酮康他索乳膏':
- temp_search_key = self.search_key.replace('顺峰康王', '')
- if '顺峰康王' not in product or temp_search_key not in product:
- self.swipe_back(1)
- self.unrelated_data += 1
- return
-
- else:
- if self.search_key not in product.replace(' ', ''):
- self.swipe_back(1)
- self.unrelated_data += 1
- return
-
-
- # if self.search_key not in product.replace(' ', ''):
- # self.swipe_back(1)
- # self.unrelated_data += 1
- # return
- else:
- self.swipe_back(1)
- return
- min_price = self.drug_price() # 最低价格
- sales_num = self.drug_sale_num() #销售数量
- snapshot_url = '' #网页快照
- # 在这里截图存放到OSS;#采集图片存放的oss_url;
- # mt_screenshot = MTScreenshot(
- # d=self.d,
- # oss_config=self.oss_config,
- # search_key=self.search_key # 添加这行
- # )
-
- # snapshot_url = mt_screenshot.get_oss_url() #网页快照
-
- #判断是否有自营的文本,有的话不需要获取店铺的信息
- if self.d.xpath('//*[@text="自营"]').exists:
- shop = "美团自营大药房(快递电商)"
- # 爬取日期
- scrape_date = self.get_current_date()
- # scrape_date = "2025-07-18"
- dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
- 'platform': '美团'}
- print(f'当前数据:{dup_data}')
- if self.data_is_exists(dup_data):
- print('存在相同数据不入库')
- self.swipe_back(1)
- return
- else:
- for i in range(8):
- if self.d.xpath('//*[@text="进店"]').exists:
- print('开始获取店铺名1')
- break
- self.d.swipe_ext('up', 0.3)
- time.sleep(1)
- # detail_info = self.d.xpath(
- # '//android.widget.ScrollView/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[6]').info
- # bounds = detail_info['bounds']
- # height = bounds['bottom'] - bounds['top']
- # if self.d.xpath('//*[@text="进店"]').exists and height > 100:
- if self.d.xpath('//*[@text="进店"]').exists:
- print('开始获取店铺名2')
- break
- shop = self.get_shop_name()
- # 爬取日期
- scrape_date = self.get_current_date()
- # scrape_date = "2025-07-18"
- dup_data = {'product': product, 'min_price': min_price, 'shop': shop, 'scrape_date': scrape_date,
- 'platform': '美团'}
- print(f'当前数据:{dup_data}')
- #获取店铺信息开始
-
- #暂时不获取店铺信息 start
-
- is_has_enter_shop = self.has_shop()
- #需要判断shop是否已经在数据库中存在,如果存在,则不再进入店铺,直接进入下一个商品
- shop_is_exists = self.shop_is_exists_database(shop)
-
- #存在进店 并且店铺的名称不包含美团官方的字样
- print(f"已采集{self.shop_data_num}家店铺数据")
- if is_has_enter_shop and '美团官方' not in shop and '美团自营' not in shop and not shop_is_exists and self.shop_data_num < 500:
- # license_info = self.get_license_info_ex()
- license_info = self.safe_exec(self.get_license_info_ex)
- contact_address = license_info['contact_address']
- qualification_number = license_info['qualification_number']
- business_license_company = license_info['business_license_company']
- business_license_address = license_info['business_license_address']
- save_shop_data = {
- 'shop': shop,
- 'contact_address': contact_address,
- 'qualification_number': qualification_number,
- 'scrape_date': scrape_date,
- 'business_license_company':business_license_company,
- 'business_license_address':business_license_address,
- 'platform': '美团'
- }
- self.save_shop_info_to_database(save_shop_data)
- self.shop_data_num += 1 # 店铺数据数量+1
-
- self.swipe_back(2)
- else:
- print('不采集店铺信息')
-
-
- #获取店铺信息结束
-
- #暂时不获取店铺信息 end
-
- if self.data_is_exists(dup_data):
- print('存在相同数据不入库')
- self.swipe_back(1)
- return
-
- # 商品链接
- product_link = self.get_product_link()
-
- if not shop:
- print('未获取到店铺名:开始回退')
- self.swipe_back(1)
- return
- if not shop or '自营' in shop:
- self.swipe_back(1)
- return
- time.sleep(self.get_sleep_time())
-
-
-
-
- # 生产日期为空
- manufacture_date = ''
- # 执政信息
- # if is_has_enter_shop:
- # license_info = self.get_license_info()
- # business_license_company = license_info["单位名称"]
- # credit_code = license_info['社会信用代码']
- # city_str = license_info['地址']
- # # 先把省份啥的替换掉
- # city_sub_str = re.sub(r'[u4e00-\u9fa5]+省', '', city_str)
- # try:
- # city = re.search(r'[\u4e00-\u9fa5]+?(市|区|县)', city_sub_str).group(0)
- # except:
- # city = city_sub_str
- # try:
- # province = self.city2province[city]
- # except:
- # province = ''
- # self.swipe_back(2)
- # else:
- # business_license_company = ''
- # credit_code = ''
- # city = ''
- # province = ''
- business_license_company = ''
- credit_code = ''
- city = ''
- province = ''
- expiry_date = ''
- manufacturer = ''
- approval_number = ''
- #暂时不获取说明书信息 start
-
- #是否存在说明书
- # is_has_instructions = self.has_instructions()
- #有的药品没有说明书,直接默认
- if self.search_key == '今维多赐多康牌蛋白粉':
- expiry_date = '18个月'
- manufacturer = '华润圣海健康科技有限公司'
- approval_number = '食健备G202437001992'
- elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
- expiry_date = '24个月'
- manufacturer = '浙江华润三九众益制药有限公司'
- approval_number = '国药准字H20090152'
- elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
- expiry_date = '3年'
- manufacturer = '江苏萨瑞斯医疗科技有限公司'
- approval_number = '苏械注准20212140025'
- elif self.search_key == '999蒲地蓝消炎片':
- expiry_date = '24个月'
- manufacturer = '特一药业集团股份有限公司'
- approval_number = '国药准字Z20063596'
- elif self.search_key == '999养胃舒颗粒':
- expiry_date = '36个月'
- manufacturer = '合肥华润神鹿药业有限公司'
- approval_number = '国药准字Z34020289'
- elif self.search_key == '999糠酸莫米松凝胶15':
- expiry_date = '36个月'
- manufacturer = '华润三九(南昌)药业有限公司'
- approval_number = '国药准字H20080010'
- elif self.search_key == '999黄芪精':
- expiry_date = '36个月'
- manufacturer = '台州南峰药业有限公司'
- approval_number = '国药准字Z33020783'
- elif self.search_key == '999复方感冒灵颗粒':
- expiry_date = '24个月'
- manufacturer = '华润三九(郴州)制药有限公司'
- approval_number = '国药准字Z43020334'
- elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- expiry_date = '36个月'
- manufacturer = '华润三九(南昌)药业有限公司'
- approval_number = '国药准字H20074155'
- elif self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
- expiry_date = '暂定24个月,具体有效期以实物说明书为准'
- manufacturer = '史达德药业(北京)有限公司'
- approval_number = '国药准字H11021837'
- elif self.search_key == '999速复康布洛芬缓释胶囊':
- expiry_date = '24个月'
- manufacturer = '北京红林制药有限公司'
- approval_number = '国药准字H20074172'
- elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
- expiry_date = '24个月'
- manufacturer = '重庆科瑞东和制药有限责任公司'
- approval_number = '国药准字Z50020420'
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- expiry_date = '24个月'
- manufacturer = '华润三九(南昌)药业有限公司'
- approval_number = '国药准字H20073954'
- elif self.search_key == '999维生素C咀嚼片':
- expiry_date = '24个月'
- manufacturer = '甘肃成纪生物药业有限公司'
- approval_number = '国药准字H62021166'
- elif self.search_key == '999强力枇杷露120ml':
- expiry_date = '36个月'
- manufacturer = '华润三九(南昌)药业有限公司'
- approval_number = '国药准字Z36021533'
- elif self.search_key == '999强力枇杷露150ml':
- expiry_date = '36个月'
- manufacturer = '华润三九(南昌)药业有限公司'
- approval_number = '国药准字Z36021533'
- elif self.search_key == '999抗病毒口服液10ml*10' or self.search_key == '999抗病毒口服液10ml*12':
- expiry_date = '24个月'
- manufacturer = '杭州华润老桐君药业有限公司'
- approval_number = '国药准字Z33020518'
- elif self.search_key == '999精氨酸布洛芬颗粒':
- expiry_date = '暂定36个月'
- manufacturer = '华润三九(唐山)药业有限公司'
- approval_number = '国药准字H20070139'
- elif self.search_key == '999糠酸莫米松乳膏10g支':
- expiry_date = '36个月'
- manufacturer = '华润三九(南昌)药业有限公司'
- approval_number = '国药准字H20074090'
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- expiry_date = '24个月'
- manufacturer = '华润三九(南昌)药业有限公司'
- approval_number = '国药准字H20074079'
- elif self.search_key == '999感冒清热颗粒(无糖)6g':
- expiry_date = '36个月'
- manufacturer = '合肥华润神鹿药业有限公司'
- approval_number = '国药准字Z20055023'
- elif self.search_key == '999银菊清咽颗粒':
- expiry_date = '30个月'
- manufacturer = '合肥华润神鹿药业有限公司'
- approval_number = '国药准字Z20026680'
- elif self.search_key == '999阿奇霉素片':
- expiry_date = '48个月'
- manufacturer = '浙江华润三九众益制药有限公司'
- approval_number = '国药准字H20084458'
- elif self.search_key == '999补脾益肠丸':
- expiry_date = '24个月'
- manufacturer = '惠州市九惠制药股份有限公司'
- approval_number = '国药准字Z44023376'
- elif self.search_key == '999壮骨关节丸6g*20':
- expiry_date = '24个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字Z44023377'
- elif self.search_key == '999壮骨关节胶囊':
- expiry_date = '24个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字Z20080055'
- elif self.search_key == '999正天丸6g*15':
- expiry_date = '30个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字Z44020711'
- elif self.search_key == '999正天胶囊':
- expiry_date = '24个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字Z20010142'
- elif self.search_key == '三九胃泰胶囊':
- expiry_date = '24个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字Z44020704'
- elif self.search_key == '三九胃泰颗粒20g*10':
- expiry_date = '24个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字Z44020705'
- elif self.search_key == '999感冒灵颗粒':
- expiry_date = '24个月'
- manufacturer = '华润三九(枣庄)药业有限公司'
- approval_number = '国药准字Z44021940'
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- expiry_date = '36个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字H44024170'
- elif self.search_key == '三九胃泰颗粒20g*6袋':
- expiry_date = '24个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字Z44020705'
- elif self.search_key == '顺峰康王酮康他索乳膏':
- expiry_date = '24个月'
- manufacturer = '广东华润顺峰药业有限公司'
- approval_number = '国药准字H10980204'
- elif self.search_key == '999糠酸莫米松凝胶10':
- expiry_date = '36个月'
- manufacturer = '华润三九(南昌)药业有限公司'
- approval_number = '国药准字H20080010'
- elif self.search_key == '999板蓝根颗粒10g*20':
- expiry_date = '36个月'
- manufacturer = '广东恒诚制药股份有限公司'
- approval_number = '国药准字Z44021520'
- elif self.search_key == '999复方氨酚烷胺胶囊' or self.search_key == '999复方氨酚烷胺胶囊12粒' or self.search_key == '999复方氨酚烷胺胶囊10粒' or self.search_key == '999复方氨酚烷胺胶囊6粒' :
- expiry_date = '36个月'
- manufacturer = '华润三九(唐山)药业有限公司'
- approval_number = '国药准字H13021912'
- elif self.search_key == '999咽炎片0.26g*12片*2板':
- expiry_date = '24个月'
- manufacturer = '华润三九(黄石)药业有限公司'
- approval_number = '国药准字Z42021062'
- elif self.search_key == '999小儿止咳糖浆120' or self.search_key == '999小儿止咳糖浆225':
- expiry_date = '24个月'
- manufacturer = '华润三九(雅安)药业有限公司'
- approval_number = '国药准字Z51020675'
- elif self.search_key == '999小儿感冒颗粒6g*10' or self.search_key == '999小儿感冒颗粒6g*24':
- expiry_date = '36个月'
- manufacturer = '华润三九(枣庄)药业有限公司'
- approval_number = '国药准字Z37021392'
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋' or self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
- expiry_date = '36个月'
- manufacturer = '华润三九(黄石)药业有限公司'
- approval_number = '国药准字H42022510'
- elif self.search_key == '999感冒灵胶囊':
- expiry_date = '24个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字Z44021939'
- elif self.search_key == '999小儿咽扁颗粒8g*10袋':
- expiry_date = '24个月'
- manufacturer = '华润三九(黄石)药业有限公司'
- approval_number = '国药准字Z42021105'
- elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
- expiry_date = '18个月'
- manufacturer = '华润三九医药股份有限公司'
- approval_number = '国药准字Z20100067'
- elif self.search_key == '999感冒清热颗粒12g*18':
- expiry_date = '36个月'
- manufacturer = '山东新大陆制药有限公司'
- approval_number = '国药准字Z37020066'
- elif self.search_key == '999小柴胡颗粒10g*15':
- expiry_date = '24个月'
- manufacturer = '广东一力罗定制药有限公司'
- approval_number = '国药准字Z44020709'
- elif self.search_key == '999布洛芬混悬液':
- expiry_date = '24个月'
- manufacturer = '华润三九(南昌)药业有限公司'
- approval_number = '国药准字H20223755'
-
- else:
- is_has_instructions = self.safe_exec(self.has_instructions)
- # 说明书等信息
- if is_has_instructions:
- print('开始获取说明书信息')
- # instructions_info = self.get_instructions_data()
- instructions_info = self.safe_exec(self.get_instructions_data)
- if instructions_info['有效期'] is not None:
- expiry_date = instructions_info['有效期'].strip('。')
- if instructions_info['生产单位'] is not None:
- manufacturer = instructions_info['生产单位'].strip('。')
- if instructions_info['批准文号'] is not None:
- approval_number = instructions_info['批准文号'].strip('。')
- else:
- # 没有说明书不入库
- print('没有获取到说明书信息')
- self.swipe_back(1)
- return
-
-
-
- #暂时不获取说明书信息 end
- self.unrelated_data = 0
- if self.search_key == '999小柴胡颗粒10g*15':
- save_search_key = '999小柴胡颗粒'
- else:
- save_search_key = self.search_key
-
- # 爬取省份
- scrape_province = '广东' # 这里先默认广东
- # 是否有货
- availability = ''
- save_data = {
- 'product': product,
- 'min_price': min_price,
- 'manufacture_date': manufacture_date,
- 'expiry_date': expiry_date,
- 'shop': shop,
- 'business_license_company': business_license_company,
- 'province': province,
- 'city': city,
- 'manufacturer': manufacturer,
- 'specification': specifications,
- 'approval_number': approval_number,
- 'product_link': product_link,
- 'scrape_date': scrape_date,
- 'scrape_province': scrape_province,
- 'availability': availability,
- 'credit_code': credit_code,
- 'platform': '美团',
- 'search_key': save_search_key,
- 'sales': sales_num,
- 'inventory':'',
- 'snapshot_url':snapshot_url
- }
- self.save_to_database(save_data)
- # time.sleep(100000)
- time.sleep(self.get_sleep_time())
- if self.distinct_target():
- print('已到达搜索列表页')
- else:
- for i in range(1):
- print('在详情页')
- self.swipe_back(1)
- time.sleep(self.get_sleep_time())
- # 最外部有个定位按钮
- if self.distinct_target():
- break
-
- #主函数
- def main(self, device_id, retry_count=0):
- MAX_RETRY = 3 # 最大重试次数
- spider_no = 0
- self.connect_devices(device_id)
- time.sleep(self.get_sleep_time())
- # self.d.toast.show("测试toast", 20)
- # 启动全局弹窗监控
- self.monitor = SpiderMonitor(self)
- self.monitor.start()
- try:
- # 重新开启美团应用
- self.restart_app()
- # 搜索关键字
- # self.enter_target_page()
- self.safe_exec(self.enter_target_page)
- # print('开始滑动')
- # self.d.drag(300, 1400, 300, 400, 1)
- # time.sleep(100000)
- for idx in range(300):
- print(f'第{idx + 1}页')
- if spider_no > 30:
- time.sleep(60)
- spider_no = 0
- print('目前无关数据量: ', self.unrelated_data)
- # 检查是否需要暂停(验证码过多)
- if self.monitor.verification_count >= self.monitor.MAX_VERIFICATION_RETRY:
- print("频繁遇到验证码,暂停程序")
- # self.d.toast("请处理验证码后点击继续", 30)
- # 等待用户点击屏幕继续
- self.d.click(0, 0) # 无效点击,等待用户操作
- self.monitor.verification_count = 0
- if self.unrelated_data > 15:
- # 连续超过15个不达标的数据则停止采集
- print("连续超过15个不达标的数据则停止采集")
- return
-
- # 线程安全获取商品列表
- # drug_lis = self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all()
- # drug_lis = self.safe_list('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout', self.monitor)
-
- while True:
- if self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').exists:
- break
- time.sleep(1)
- drug_lis = self.safe_exec(self.d.xpath('//android.support.v7.widget.RecyclerView/android.widget.FrameLayout').all)
- lis_len = len(drug_lis)
- print(f'当前页面共有{lis_len}个商品')
- for idxx,drug_one in enumerate(drug_lis,start = 1):
- bounds = drug_one.info['bounds']
- top = bounds['top']
- bottom = bounds['bottom']
- # height = bottom - top
- print(f'当前商品bottom:{bottom}')
- print(f'当前商品top:{top}')
- # if 304 <= top and bottom <= 1475: # 默认高度241的才行
- if 304 <= top and bottom <= 1475: # 默认高度241的才行 1559
- # print('目标-->', drug_one.info)
- # drug_one.click()
- #获取当前元素中的属性来判断是否要点击进入采集
- print(f"这页的第几个商品:{idxx}")
- product_title = ''
- price = ''
- shop_name = ''
- #商品名称的xpath
- product_tittle_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- product_tittle_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- if self.d.xpath(product_tittle_xpath).exists:
- product_title = self.d.xpath(product_tittle_xpath).text
- product_title = product_title[1:] if product_title.startswith('0') else product_title
- print(f"product_tittle_xpath列表当前商品名称:{product_title}")
- if '999' in self.search_key:
- if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = self.search_key.replace('999皮炎平', '')
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = self.search_key.replace('999必无忧', '')
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = self.search_key.replace('999必无忧', '')
- elif self.search_key == '999速复康布洛芬缓释胶囊':
- temp_search_key = self.search_key.replace('999速复康', '')
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = self.search_key.replace('999选平', '')
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = self.search_key.replace('999皮炎平', '')
- else:
- temp_search_key = self.search_key.replace('999', '')
- if self.search_key == '999糠酸莫米松凝胶15':
- temp_search_key = temp_search_key.replace('15', '')
- elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = temp_search_key.replace('30', '')
- elif self.search_key == '999复方金银花颗粒10g':
- temp_search_key = temp_search_key.replace('10g', '')
- elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
- temp_search_key = temp_search_key.replace('15g*15袋/盒', '')
- elif self.search_key == '999复方氨酚烷胺胶囊6粒':
- temp_search_key = temp_search_key.replace('6粒', '')
- elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
- temp_search_key = temp_search_key.replace('50', '')
- elif self.search_key == '999止泻利颗粒15g*8':
- temp_search_key = temp_search_key.replace('15g*8', '')
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = temp_search_key.replace('30', '')
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = temp_search_key.replace('15g', '')
- elif self.search_key == '999复方苦参肠炎康片12片':
- temp_search_key = temp_search_key.replace("12片", "")
- elif self.search_key == '999强力枇杷露16袋':
- temp_search_key = temp_search_key.replace("16袋", "")
- elif self.search_key == '999三蛇胆川贝膏138':
- temp_search_key = temp_search_key.replace("138", "")
- elif self.search_key == '999抗病毒口服液10ml*12':
- temp_search_key = temp_search_key.replace("10ml*12", "")
- elif self.search_key == '999抗病毒口服液10ml*10':
- temp_search_key = temp_search_key.replace("10ml*10", "")
- elif self.search_key == '999强力枇杷露120ml':
- temp_search_key = temp_search_key.replace("120ml", "")
- elif self.search_key == '999强力枇杷露150ml':
- temp_search_key = temp_search_key.replace("150ml", "")
- elif self.search_key == '999糠酸莫米松乳膏10g支':
- temp_search_key = temp_search_key.replace("10g支", "")
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = temp_search_key.replace("20g", "")
- elif self.search_key == '999感冒清热颗粒(无糖)6g':
- temp_search_key = temp_search_key.replace("(无糖)6g", "")
- elif self.search_key == '999壮骨关节丸6g*20':
- temp_search_key = temp_search_key.replace("6g*20", "")
- elif self.search_key == '999正天丸6g*15':
- temp_search_key = temp_search_key.replace("6g*15", "")
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = temp_search_key.replace("20", "")
- elif self.search_key == '999糠酸莫米松凝胶10':
- temp_search_key = temp_search_key.replace("10", "")
- elif self.search_key == '999板蓝根颗粒10g*20':
- temp_search_key = temp_search_key.replace("10g*20", "")
- elif self.search_key == '999复方氨酚烷胺胶囊10粒':
- temp_search_key = temp_search_key.replace("10粒", "")
- elif self.search_key == '999复方氨酚烷胺胶囊12粒':
- temp_search_key = temp_search_key.replace("12粒", "")
- elif self.search_key == '999咽炎片0.26g*12片*2板':
- temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
- elif self.search_key == '999小儿止咳糖浆120':
- temp_search_key = temp_search_key.replace("120", "")
- elif self.search_key == '999小儿止咳糖浆225':
- temp_search_key = temp_search_key.replace("225", "")
- elif self.search_key == '999小儿感冒颗粒6g*10':
- temp_search_key = temp_search_key.replace("6g*10", "")
- elif self.search_key == '999小儿感冒颗粒6g*24':
- temp_search_key = temp_search_key.replace("6g*24", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
- temp_search_key = temp_search_key.replace("6g*10袋", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
- temp_search_key = temp_search_key.replace("6g*20袋", "")
- elif self.search_key == '999小儿咽扁颗粒8g*10袋':
- temp_search_key = temp_search_key.replace("8g*10袋", "")
- elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
- temp_search_key = temp_search_key.replace("2.5g*10袋", "")
- elif self.search_key == '999感冒清热颗粒12g*18':
- temp_search_key = temp_search_key.replace("12g*18", "")
- elif self.search_key == '999小柴胡颗粒10g*15':
- temp_search_key = temp_search_key.replace("10g*15", "")
-
-
- if self.search_key == '999抗病毒口服液':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10ml*12' not in product_title and '10ml*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10ml*12和10ml*10品规")
- continue
- elif self.search_key == '999抗病毒口服液10ml*12':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10ml*12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10ml*12品规")
- continue
- elif self.search_key == '999抗病毒口服液10ml*10':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10ml*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10ml*10品规")
- continue
- elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '30' not in product_title:
- print(f"当前商品名称:{product_title} 不包含30品规")
- continue
- elif self.search_key == '999复方感冒灵颗粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- # elif '14g*15' not in product_title and '14g*9' not in product_title:
- elif '14g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含14g*15品规")
- continue
- elif self.search_key == '999养胃舒颗粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g*10品规")
- continue
- elif self.search_key == '999糠酸莫米松凝胶15':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含15品规")
- continue
- elif self.search_key == '999复方金银花颗粒10g':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10g' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g品规")
- continue
- elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '15g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含15g*15品规")
- continue
- elif self.search_key == '999复方氨酚烷胺胶囊6粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6品规")
- continue
- elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '50' not in product_title:
- print(f"当前商品名称:{product_title} 不包含50品规")
- continue
- elif self.search_key == '999止泻利颗粒15g*8':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '15g*8' not in product_title:
- print(f"当前商品名称:{product_title} 不包含15g*8品规")
- continue
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '30' not in product_title:
- print(f"当前商品名称:{product_title} 不包含30品规")
- continue
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含15品规")
- continue
- elif self.search_key == '999复方苦参肠炎康片12片':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含12品规")
- continue
- elif self.search_key == '999强力枇杷露16袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '16' not in product_title:
- print(f"当前商品名称:{product_title} 不包含16品规")
- continue
- elif self.search_key == '999三蛇胆川贝膏138':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '138' not in product_title:
- print(f"当前商品名称:{product_title} 不包含16品规")
- continue
- elif self.search_key == '999速复康布洛芬缓释胶囊':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif self.search_key == '999维生素C咀嚼片':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '80' not in product_title:
- print(f"当前商品名称:{product_title} 不包含80品规")
- elif self.search_key == '999精氨酸布洛芬颗粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '9' not in product_title:
- print(f"当前商品名称:{product_title} 不包含9品规")
- continue
- elif self.search_key == '999强力枇杷露120ml':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '120' not in product_title:
- print(f"当前商品名称:{product_title} 不包含120品规")
- continue
- elif self.search_key == '999强力枇杷露150ml':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '150' not in product_title:
- print(f"当前商品名称:{product_title} 不包含120品规")
- continue
- elif self.search_key == '999糠酸莫米松乳膏10g支':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10品规")
- continue
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含20品规")
- continue
- elif self.search_key == '999感冒清热颗粒(无糖)6g':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6品规")
- continue
- elif self.search_key == '999阿奇霉素片':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '0.25g*6' not in product_title:
- print(f"当前商品名称:{product_title} 不包含0.25g*6品规")
- continue
- elif self.search_key == '999补脾益肠丸':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*15品规")
- continue
- elif self.search_key == '999壮骨关节丸6g*20':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*20品规")
- continue
- elif self.search_key == '999正天丸6g*15':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*15品规")
- continue
- elif self.search_key == '999感冒灵颗粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10g*9' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g*9品规")
- continue
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含20品规")
- continue
- elif self.search_key == '999糠酸莫米松凝胶10':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10品规")
- continue
- elif self.search_key == '999板蓝根颗粒10g*20':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10g*20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g*20品规")
- continue
- elif self.search_key == '999复方氨酚烷胺胶囊10粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10品规")
- continue
- elif self.search_key == '999复方氨酚烷胺胶囊12粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含12品规")
- continue
- elif self.search_key == '999复方氨酚烷胺胶囊':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif self.search_key == '999咽炎片0.26g*12片*2板':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '0.26g*12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含0.26g*12品规")
- continue
- elif self.search_key == '999小儿止咳糖浆120':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '120' not in product_title:
- print(f"当前商品名称:{product_title} 不包含120品规")
- continue
- elif self.search_key == '999小儿止咳糖浆225':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '225' not in product_title:
- print(f"当前商品名称:{product_title} 不包含225品规")
- continue
- elif self.search_key == '999小儿感冒颗粒6g*10':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*10品规")
- continue
- elif self.search_key == '999小儿感冒颗粒6g*24':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*24' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*24品规")
- continue
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*10品规")
- continue
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*20品规")
- continue
- elif self.search_key == '999感冒灵胶囊':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '0.5g*12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含0.5g*12品规")
- continue
- elif self.search_key == '999小儿咽扁颗粒8g*10袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '8g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含8g*10品规")
- continue
- elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '2.5g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含2.5g*10品规")
- continue
- elif self.search_key == '999感冒清热颗粒12g*18':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '12g*18' not in product_title:
- print(f"当前商品名称:{product_title} 不包含12g*18品规")
- continue
- elif self.search_key == '999小柴胡颗粒10g*15':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g*15品规")
- continue
- elif self.search_key == '999藿香正气合剂':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10ml*6' not in product_title and '10ml*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10ml*6 和 10ml*10品规")
- continue
- else:
- if '999' not in product_title or temp_search_key not in product_title:
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- else:
- if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
- temp_search_key = self.search_key.replace('史达功', '')
- temp_search_key = temp_search_key.replace('120', '')
- if '史达功' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '120' not in product_title:
- print(f"当前商品名称:{product_title} 不包含120品规")
- continue
- elif self.search_key == '三九胃泰养胃舒颗粒8袋':
- temp_search_key = self.search_key.replace('三九胃泰', '')
- temp_search_key = temp_search_key.replace('8袋', '')
- if '三九胃泰' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '8' not in product_title:
- print(f"当前商品名称:{product_title} 不包含8品规")
- continue
- elif self.search_key == '今维多赐多康牌蛋白粉':
- temp_search_key = self.search_key.replace('今维多', '')
- if '今维多' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
- temp_search_key = self.search_key.replace('佳美舒', '')
- temp_search_key = temp_search_key.replace('4', '')
- if '佳美舒' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '4' not in product_title and '8' not in product_title:
- print(f"当前商品名称:{product_title} 不包含4和或8品规")
- continue
- elif self.search_key == '三九胃泰颗粒20g*10':
- temp_search_key = self.search_key.replace('20g*10', '')
- if temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '20g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含20g*10品规")
- continue
- elif self.search_key == '三九胃泰颗粒20g*6袋':
- temp_search_key = self.search_key.replace('20g*6袋', '')
- if temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '20g*6' not in product_title:
- print(f"当前商品名称:{product_title} 不包含20g*6品规")
- continue
- elif self.search_key == '顺峰康王酮康他索乳膏':
- temp_search_key = self.search_key.replace('顺峰康王', '')
- if '顺峰康王' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- else:
- if self.search_key not in product_title.replace(' ', ''):
- continue
- elif self.d.xpath(product_tittle_xpath2).exists:
- product_title = self.d.xpath(product_tittle_xpath2).text
- product_title = product_title[1:] if product_title.startswith('0') else product_title
- print(f"product_tittle_xpath2列表当前商品名称:{product_title}")
-
- if '999' in self.search_key:
- if self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = self.search_key.replace('999皮炎平', '')
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = self.search_key.replace('999必无忧', '')
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = self.search_key.replace('999必无忧', '')
- elif self.search_key == '999速复康布洛芬缓释胶囊':
- temp_search_key = self.search_key.replace('999速复康', '')
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = self.search_key.replace('999选平', '')
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = self.search_key.replace('999皮炎平', '')
- else:
- temp_search_key = self.search_key.replace('999', '')
- if self.search_key == '999糠酸莫米松凝胶15':
- temp_search_key = temp_search_key.replace('15', '')
- elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- temp_search_key = temp_search_key.replace('30', '')
- elif self.search_key == '999复方金银花颗粒10g':
- temp_search_key = temp_search_key.replace('10g', '')
- elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
- temp_search_key = temp_search_key.replace('15g*15袋/盒', '')
- elif self.search_key == '999复方氨酚烷胺胶囊6粒':
- temp_search_key = temp_search_key.replace('6粒', '')
- elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
- temp_search_key = temp_search_key.replace('50', '')
- elif self.search_key == '999止泻利颗粒15g*8':
- temp_search_key = temp_search_key.replace('15g*8', '')
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- temp_search_key = temp_search_key.replace('30', '')
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- temp_search_key = temp_search_key.replace('15g', '')
- elif self.search_key == '999复方苦参肠炎康片12片':
- temp_search_key = temp_search_key.replace("12片", "")
- elif self.search_key == '999强力枇杷露16袋':
- temp_search_key = temp_search_key.replace("16袋", "")
- elif self.search_key == '999三蛇胆川贝膏138':
- temp_search_key = temp_search_key.replace("138", "")
- elif self.search_key == '999抗病毒口服液10ml*12':
- temp_search_key = temp_search_key.replace("10ml*12", "")
- elif self.search_key == '999抗病毒口服液10ml*10':
- temp_search_key = temp_search_key.replace("10ml*10", "")
- elif self.search_key == '999强力枇杷露120ml':
- temp_search_key = temp_search_key.replace("120ml", "")
- elif self.search_key == '999强力枇杷露150ml':
- temp_search_key = temp_search_key.replace("150ml", "")
- elif self.search_key == '999糠酸莫米松乳膏10g支':
- temp_search_key = temp_search_key.replace("10g支", "")
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- temp_search_key = temp_search_key.replace("20g", "")
- elif self.search_key == '999感冒清热颗粒(无糖)6g':
- temp_search_key = temp_search_key.replace("(无糖)6g", "")
- elif self.search_key == '999壮骨关节丸6g*20':
- temp_search_key = temp_search_key.replace("6g*20", "")
- elif self.search_key == '999正天丸6g*15':
- temp_search_key = temp_search_key.replace("6g*15", "")
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- temp_search_key = temp_search_key.replace("20", "")
- elif self.search_key == '999糠酸莫米松凝胶10':
- temp_search_key = temp_search_key.replace("10", "")
- elif self.search_key == '999板蓝根颗粒10g*20':
- temp_search_key = temp_search_key.replace("10g*20", "")
- elif self.search_key == '999复方氨酚烷胺胶囊10粒':
- temp_search_key = temp_search_key.replace("10粒", "")
- elif self.search_key == '999复方氨酚烷胺胶囊12粒':
- temp_search_key = temp_search_key.replace("12粒", "")
- elif self.search_key == '999咽炎片0.26g*12片*2板':
- temp_search_key = temp_search_key.replace("0.26g*12片*2板", "")
- elif self.search_key == '999小儿止咳糖浆120':
- temp_search_key = temp_search_key.replace("120", "")
- elif self.search_key == '999小儿止咳糖浆225':
- temp_search_key = temp_search_key.replace("225", "")
- elif self.search_key == '999小儿感冒颗粒6g*10':
- temp_search_key = temp_search_key.replace("6g*10", "")
- elif self.search_key == '999小儿感冒颗粒6g*24':
- temp_search_key = temp_search_key.replace("6g*24", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
- temp_search_key = temp_search_key.replace("6g*10袋", "")
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
- temp_search_key = temp_search_key.replace("6g*20袋", "")
- elif self.search_key == '999小儿咽扁颗粒8g*10袋':
- temp_search_key = temp_search_key.replace("8g*10袋", "")
- elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
- temp_search_key = temp_search_key.replace("2.5g*10袋", "")
- elif self.search_key == '999感冒清热颗粒12g*18':
- temp_search_key = temp_search_key.replace("12g*18", "")
- elif self.search_key == '999小柴胡颗粒10g*15':
- temp_search_key = temp_search_key.replace("10g*15", "")
-
- if self.search_key == '999抗病毒口服液':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '10ml*12' not in product_title and '10ml*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10ml*12和10ml*10品规")
- continue
- elif self.search_key == '999抗病毒口服液10ml*12':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '10ml*12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10ml*12品规")
- continue
- elif self.search_key == '999抗病毒口服液10ml*10':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10ml*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10ml*10品规")
- continue
- elif self.search_key == '999皮炎平曲安奈德益康唑乳膏30':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '30' not in product_title:
- print(f"当前商品名称:{product_title} 不包含30品规")
- continue
- elif self.search_key == '999复方感冒灵颗粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- # elif '14g*15' not in product_title and '14g*9' not in product_title:
- elif '14g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含14g*15品规")
- continue
- elif self.search_key == '999养胃舒颗粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '10g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g*10品规")
- continue
- elif self.search_key == '999糠酸莫米松凝胶15':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含15品规")
- continue
- elif self.search_key == '999复方金银花颗粒10g':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '10g' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g品规")
- continue
- elif self.search_key == '999复方板蓝根颗粒15g*15袋/盒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '15g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含15g*15品规")
- continue
- elif self.search_key == '999复方氨酚烷胺胶囊6粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '6' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6品规")
- continue
- elif self.search_key == '999可调式生理性海水鼻腔喷雾50':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '50' not in product_title:
- print(f"当前商品名称:{product_title} 不包含50品规")
- continue
- elif self.search_key == '999止泻利颗粒15g*8':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '15g*8' not in product_title:
- print(f"当前商品名称:{product_title} 不包含15g*8品规")
- continue
- elif self.search_key == '999必无忧盐酸特比萘芬喷雾剂30':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '30' not in product_title:
- print(f"当前商品名称:{product_title} 不包含30品规")
- continue
- elif self.search_key == '999必无忧盐酸特比萘芬乳膏15g':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含15品规")
- continue
- elif self.search_key == '999复方苦参肠炎康片12片':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含12品规")
- continue
- elif self.search_key == '999强力枇杷露16袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '16' not in product_title:
- print(f"当前商品名称:{product_title} 不包含16品规")
- continue
- elif self.search_key == '999三蛇胆川贝膏138':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif '138' not in product_title:
- print(f"当前商品名称:{product_title} 不包含138品规")
- elif self.search_key == '999速复康布洛芬缓释胶囊':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- elif self.search_key == '999维生素C咀嚼片':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '80' not in product_title:
- print(f"当前商品名称:{product_title} 不包含80品规")
- continue
- elif self.search_key == '999精氨酸布洛芬颗粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '9' not in product_title:
- print(f"当前商品名称:{product_title} 不包含9品规")
- continue
- elif self.search_key == '999强力枇杷露120ml':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '120' not in product_title:
- print(f"当前商品名称:{product_title} 不包含120品规")
- continue
- elif self.search_key == '999强力枇杷露150ml':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '150' not in product_title:
- print(f"当前商品名称:{product_title} 不包含150品规")
- continue
- elif self.search_key == '999糠酸莫米松乳膏10g支':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10品规")
- continue
- elif self.search_key == '999选平硝酸咪康唑乳膏20g':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含20品规")
- continue
- elif self.search_key == '999感冒清热颗粒(无糖)6g':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6品规")
- continue
- elif self.search_key == '999阿奇霉素片':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '0.25g*6' not in product_title:
- print(f"当前商品名称:{product_title} 不包含0.25g*6品规")
- continue
- elif self.search_key == '999补脾益肠丸':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*15品规")
- continue
- elif self.search_key == '999壮骨关节丸6g*20':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*20品规")
- continue
- elif self.search_key == '999正天丸6g*15':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*15品规")
- continue
- elif self.search_key == '999感冒灵颗粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10g*9' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g*9品规")
- continue
- elif self.search_key == '999皮炎平复方醋酸地塞米松乳膏20':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含20品规")
- continue
- elif self.search_key == '999糠酸莫米松凝胶10':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10品规")
- continue
- elif self.search_key == '999板蓝根颗粒10g*20':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10g*20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g*20品规")
- continue
- elif self.search_key == '999复方氨酚烷胺胶囊10粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10品规")
- continue
- elif self.search_key == '999复方氨酚烷胺胶囊12粒':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含12品规")
- continue
- elif self.search_key == '999复方氨酚烷胺胶囊':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif self.search_key == '999咽炎片0.26g*12片*2板':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '0.26g*12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含0.26g*12品规")
- continue
- elif self.search_key == '999小儿止咳糖浆120':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '120' not in product_title:
- print(f"当前商品名称:{product_title} 不包含120品规")
- continue
- elif self.search_key == '999小儿止咳糖浆225':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '225' not in product_title:
- print(f"当前商品名称:{product_title} 不包含225品规")
- continue
- elif self.search_key == '999小儿感冒颗粒6g*10':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*10品规")
- continue
- elif self.search_key == '999小儿感冒颗粒6g*24':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*24' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*24品规")
- continue
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*10袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*10品规")
- continue
- elif self.search_key == '999小儿氨酚黄那敏颗粒6g*20袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '6g*20' not in product_title:
- print(f"当前商品名称:{product_title} 不包含6g*20品规")
- continue
- elif self.search_key == '999感冒灵胶囊':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '0.5g*12' not in product_title:
- print(f"当前商品名称:{product_title} 不包含0.5g*12品规")
- continue
- elif self.search_key == '999小儿咽扁颗粒8g*10袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '8g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含8g*10品规")
- continue
- elif self.search_key == '999小儿感冒宁颗粒2.5g*10袋':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '2.5g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含2.5g*10品规")
- continue
- elif self.search_key == '999感冒清热颗粒12g*18':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '12g*18' not in product_title:
- print(f"当前商品名称:{product_title} 不包含12g*18品规")
- continue
- elif self.search_key == '999小柴胡颗粒10g*15':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10g*15' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10g*15品规")
- continue
- elif self.search_key == '999藿香正气合剂':
- if '999' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '10ml*6' not in product_title and '10ml*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含10ml*6 和 10ml*10品规")
- continue
- else:
- if '999' not in product_title or temp_search_key not in product_title:
- print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- continue
- # if '999' not in product_title or temp_search_key not in product_title:
- # print(f"当前商品名称:{product_title} 不包含关键字:{self.search_key}")
- # continue
- else:
- if self.search_key == '史达功右美沙芬愈创甘油醚糖浆120':
- temp_search_key = self.search_key.replace('史达功', '')
- temp_search_key = temp_search_key.replace('120', '')
- if '史达功' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '120' not in product_title:
- print(f"当前商品名称:{product_title} 不包含120品规")
- continue
- elif self.search_key == '三九胃泰养胃舒颗粒8袋':
- temp_search_key = self.search_key.replace('三九胃泰', '')
- temp_search_key = temp_search_key.replace('8袋', '')
- if '三九胃泰' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '8' not in product_title:
- print(f"当前商品名称:{product_title} 不包含8品规")
- continue
- elif self.search_key == '今维多赐多康牌蛋白粉':
- temp_search_key = self.search_key.replace('今维多', '')
- if '今维多' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif self.search_key == '佳美舒阿奇霉素肠溶胶囊4':
- temp_search_key = self.search_key.replace('佳美舒', '')
- temp_search_key = temp_search_key.replace('4', '')
- if '佳美舒' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '4' not in product_title and '8' not in product_title:
- print(f"当前商品名称:{product_title} 不包含4和或8品规")
- continue
- elif self.search_key == '三九胃泰颗粒20g*10':
- temp_search_key = self.search_key.replace('20g*10', '')
- if temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '20g*10' not in product_title:
- print(f"当前商品名称:{product_title} 不包含20g*10品规")
- continue
- elif self.search_key == '三九胃泰颗粒20g*6袋':
- temp_search_key = self.search_key.replace('20g*6袋', '')
- if temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- elif '20g*6' not in product_title:
- print(f"当前商品名称:{product_title} 不包含20g*6品规")
- continue
- elif self.search_key == '顺峰康王酮康他索乳膏':
- temp_search_key = self.search_key.replace('顺峰康王', '')
- if '顺峰康王' not in product_title or temp_search_key not in product_title :
- print(f"当前商品名称:{product_title} 不包含关键字:{temp_search_key}")
- continue
- else:
- if self.search_key not in product_title.replace(' ', ''):
- continue
- else:
- print(f"列表当前商品名称不存在")
- #价格
- price_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- price_xpath3 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- price_xpath1 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- if self.d.xpath(price_xpath).exists:
- price_str = self.d.xpath(price_xpath).text
- print(f"price_xpath列表当前商品价格:{price_str}")
- if price_str:
- price = float(re.search(r'[\d\.]+', price_str).group())
- elif self.d.xpath(price_xpath3).exists:
- price_str = self.d.xpath(price_xpath3).text
- print(f"price_xpath3列表当前商品价格:{price_str}")
- if price_str:
- price = float(re.search(r'[\d\.]+', price_str).group())
- elif self.d.xpath(price_xpath1).exists:
- price_str = self.d.xpath(price_xpath1).text
- print(f"price_xpath1列表当前商品价格:{price_str}")
- if price_str:
- price = float(re.search(r'[\d\.]+', price_str).group())
- else:
- price_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.widget.FrameLayout[1]/android.widget.TextView'
- if self.d.xpath(price_xpath2).exists:
- price_str = self.d.xpath(price_xpath2).text
- print(f"price_xpath2列表当前商品价格:{price_str}")
- if price_str:
- price = float(re.search(r'[\d\.]+', price_str).group())
- else:
- print(f"列表当前商品价格不存在")
- # price_str = self.d.xpath(f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]//*[starts-with(@text,"¥")]').text
-
- print(f'列表获取到价格:{price}')
- #店铺名称的xpath
- shop_name_xpath = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
- shop_name_xpath2 = f'//android.support.v7.widget.RecyclerView/android.widget.FrameLayout[{idxx}]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[1]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[2]/android.view.ViewGroup[1]/android.widget.FrameLayout[last()]/android.widget.TextView[1]'
- if self.d.xpath(shop_name_xpath).exists:
- shop_name = self.d.xpath(shop_name_xpath).text
- print(f"shop_name_xpath列表当前商品店铺名称:{shop_name}")
- elif self.d.xpath(shop_name_xpath2).exists:
- shop_name = self.d.xpath(shop_name_xpath2).text
- print(f"shop_name_xpath2列表当前商品店铺名称:{shop_name}")
- else:
- print(f"列表当前商品店铺名称不存在")
- #如果商品的名称、价格和生产厂家都不存在则直接下一条数据。 跳过一些不是商品的数据。
- if product_title == '' and price == '' and shop_name == '':
- continue
- scrape_date = self.get_current_date()
- if product_title and price and shop_name:
- #判断数据表中是否存在
- dup_data = {'product': product_title, 'min_price': price, 'shop': shop_name, 'scrape_date': scrape_date,'platform': '美团'}
- if self.data_is_exists(dup_data):
- print('列表存在相同数据不入库')
- continue
-
- self.safe_exec(drug_one.click)
- print('点击目标药品完毕')
- time.sleep(2)
- # 采集药品信息
- try:
- # self.integrate_data()
- self.safe_exec(self.integrate_data)
- # 检测下是否回退到列表页
- if self.distinct_target():
- print('回退到列表页', True)
- else:
- if self.d.xpath('//*[@text="搜索"]').exists:
- print("检测到搜索按钮,重新开始采集流程")
- if retry_count < MAX_RETRY:
- # 停止当前监控线程
- self.monitor.stop()
- self.monitor.join()
- # 递归重启采集
- return self.main(device_id, retry_count+1)
- else:
- print("超过最大重试次数,终止程序")
- return
- else:
- print("无法恢复页面,终止采集")
- return
- # print('回退到列表页失败,终止采集')
- # return
- time.sleep(self.get_sleep_time())
- spider_no += 1
- except Exception as e:
- print(f'采集药品详情数据出错:{e}')
- #增加阻塞的方法:
- if not self.distinct_target():
- for i in range(1):
- self.swipe_back(1)
- # 最外部有个定位按钮
- if self.distinct_target():
- break
- if i == 0 and not self.distinct_target():
- print('页面出错,退出采集')
- return
- else:
- continue
- if self.d.xpath('//*[@text="已经到底啦"]').exists:
- print('已经到达列表页最底部')
- return
- search_list = self.d.xpath('//android.support.v7.widget.RecyclerView').info
- bounds = search_list['bounds']
- #print('搜索列表高度', 1400 + bounds['top'] - bounds['bottom'])
- # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'])
-
- # # 计算滑动距离
- scroll_distance = bounds['bottom'] - bounds['top'] # 正数
- start_y = 1600
- end_y = start_y - scroll_distance # 向上滑动,y 坐标减小
- # # 确保 end_y 不小于 0
- end_y = max(end_y, 304) # 留出一点边距,避免滑出屏幕
- # # print('滑动起点 y:', start_y, '终点 y:', end_y)
- # # self.d.swipe(200, start_y, 200, end_y, 0.4)
- print('开始滑动')
- self.d.drag(300, 1400, 300, 400, 1)
- # self.safe_exec(self.d.drag, 300, 1400, 300, 400, 1)
- print('滑动结束')
- # #print('搜索列表高度', 1400 + bounds['top'] - bounds['bottom'])
- # # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'])
- # # self.d.swipe(200, 1400, 200, 1400 + bounds['top'] - bounds['bottom'], 0.4)
- # time.sleep(self.get_sleep_time())
- finally:
- # 确保监控线程被停止
- self.monitor.stop()
- self.monitor.join()
- def unitest(self):
-
-
- # # 创建目录
- # SCREENSHOT_DIR = Path('screenshot') # 注意这里的变化
- # SCREENSHOT_DIR.mkdir(parents=True, exist_ok=True)
- # # 创建测试图片路径
- # qualification_number = '12345'
- # file_path = SCREENSHOT_DIR / f'{qualification_number}.png'
- # # 创建图片
- # img = Image.new('RGB', (300, 200), color='lightblue')
- # draw = ImageDraw.Draw(img)
- # # 添加文字
- # try:
- # font = ImageFont.truetype("arial.ttf", 20)
- # except:
- # font = ImageFont.load_default()
- # draw.text((50, 80), f"测试图片: {qualification_number}", fill='black', font=font)
- # draw.text((50, 110), "保存路径测试", fill='darkred', font=font)
- # draw.text((50, 140), f"目录: {SCREENSHOT_DIR}", fill='green', font=font)
-
- # # 保存图片
- # img.save(file_path)
- # print(f"✅ 图片已保存到: {file_path.resolve()}")
- # print(f"📁 文件大小: {file_path.stat().st_size} 字节")
- # print(f"📏 图片尺寸: {img.size}")
-
- # SCREENSHOT_DIR = Path('drug_data/mt/screenshot')
- # # 使用
- # qualification_number = '12345'
- # file_path = SCREENSHOT_DIR / f'{qualification_number}.png'
- # print(f"file_path={file_path}")
- # print("=== 路径信息 ===")
- # print(f"当前工作目录: {Path.cwd()}")
- # print(f"SCREENSHOT_DIR: {SCREENSHOT_DIR}")
- # print(f"file_path: {file_path}")
- # print(f"绝对路径: {file_path.resolve()}")
- # print(f"父目录: {file_path.parent}")
- # print(f"文件名: {file_path.name}")
- # print(f"是否存在: {file_path.exists()}")
- time.sleep(100000)
- """
- 单元测试
- :return:
- """
- save_data = {
- 'product':"[昆中药]舒肝颗粒(低糖型)",
- 'min_price': 14.0,
- 'manufacture_date': '',
- 'expiry_date': '36个月',
- 'shop': '美团自营大药房(快递电商)',
- 'business_license_company': '',
- 'province': '',
- 'city': '',
- 'manufacturer': '昆明中药厂有限公司',
- 'specification': '3g*16袋/盒',
- 'approval_number': '国药准字Z53021161',
- 'product_link': '',
- 'scrape_date': '2025/07/09',
- 'scrape_province': '广东',
- 'availability': '',
- 'credit_code': '',
- 'platform': '美团',
- 'search_key':'',
- 'sales': '',
- 'inventory':'',
- 'snapshot_url':''
- }
- self.save_to_database(save_data)
-
- time.sleep(100000)
- pass
- def main():
- keys_list = [
- # '三九胃泰颗粒',
- # '999小柴胡颗粒',
- # '999强力枇杷露',
- # '[999]感冒清热颗粒',
- # '999抗病毒口服液',
- # '999皮炎平',
- # '999盐酸特比萘芬乳膏',
- # '999盐酸特比萘芬',
- # '999藿香正气合剂',
- # '999必无忧盐酸特比萘芬乳膏',
- # '999复方感冒灵颗粒',
- # '999糠酸莫米松凝胶',
- # '999铝碳酸镁咀嚼片',
- # '999阿奇霉素片',
- # '999选平硝酸咪康唑乳膏',
- # 按需继续添加,
- #暂时不需要
- # '999必无忧盐酸特比萘芬喷雾剂30'
- # '999冰连清咽'
- # '999复方金银花颗粒10g'
- # '999复方苦参肠炎康片12片'
- # '999强力枇杷露16袋'
- # '999三蛇胆川贝膏138'
- # '999维生素ec颗粒'
- # '三九胃泰养胃舒颗粒8袋'
- # '999止泻利颗粒15g*8'
- # '999阿奇霉素片'
- # '999可调式生理性海水鼻腔喷雾50'
- # '999小儿止咳糖浆120' #不低于19.8
- # '999小儿止咳糖浆225' #禁止挂网
- # '999小儿感冒颗粒6g*10' #不低于24.9
- # '999小儿感冒颗粒6g*24' #禁止挂网
- # '999小儿氨酚黄那敏颗粒6g*10袋' #不低于15.8
- # '999小儿氨酚黄那敏颗粒6g*20袋' #禁止挂网
- # '999小儿咽扁颗粒8g*10袋' #仅限999官旗店
- # '佳美舒阿奇霉素肠溶胶囊4'
- # '999维生素C咀嚼片'
- # '999黄芪精',
- # '999皮炎平曲安奈德益康唑乳膏30',
- # '999葡萄糖酸锌口服溶液',
- # '今维多赐多康牌蛋白粉',
- #OTC
- # '999强力枇杷露150ml'
- # '999糠酸莫米松乳膏10g支'
- # '999选平硝酸咪康唑乳膏20g'
- # '999感冒清热颗粒(无糖)6g'
- # '999银菊清咽颗粒' #只有一条数据
- # '999补脾益肠丸'
- # '999壮骨关节丸6g*20'
- # '999壮骨关节胶囊'
- # '999正天丸6g*15'
- # '999正天胶囊'
- # '三九胃泰胶囊'
- # '三九胃泰颗粒20g*10'
- # '三九胃泰颗粒(无糖)2.5g*6' # 没有数据
- # '999复方氨酚烷胺胶囊12粒' #不低于17.9
- # '999复方氨酚烷胺胶囊10粒' #禁止挂网
- # '999复方氨酚烷胺胶囊6粒' #禁止挂网
- # '顺峰康王酮康他索乳膏' #包含10g和20g两个规格 10g 不低于7.5 20g 不低于12.5
- # '999咽炎片0.26g*12片*2板' #不低于13.5
- # '999板蓝根颗粒10g*20' #不低于26.9
- # '999小儿感冒宁颗粒2.5g*10袋' #禁止挂网
- # '999抗病毒口服液'
- # '999蒲地蓝消炎片',
- # '999小柴胡颗粒10g*15',
- # '999必无忧盐酸特比萘芬乳膏15g'
- # '999复方板蓝根颗粒15g*15袋/盒'
- # '999速复康布洛芬缓释胶囊'
- # '999精氨酸布洛芬颗粒'
- # '999强力枇杷露120ml'
- # '999小儿感冒颗粒6g*10' #不低于24.9
- # '999感冒灵颗粒' #不低于15.5
- # '999皮炎平复方醋酸地塞米松乳膏20' #不低于12.5
- # '三九胃泰颗粒20g*6袋' #不低于13.5
- # '999复方氨酚烷胺胶囊'
- # '999感冒灵胶囊' #仅限999官旗店
- # '999荆防颗粒' #美团没有数据 #禁止挂网
- # '史达功右美沙芬愈创甘油醚糖浆120' #仅限999官旗店
- # '999磷酸奥司他韦胶囊75mg*10' #仅限999官旗店
-
- #2025-08-01最新 其中 藿香正气合剂两种规格 10支和6支 抗病毒口服液 12支和18支 蒲地蓝 24片 36片和44片 枇杷露225ml 小柴胡颗粒9袋和15袋 养胃舒 6袋 复方感冒灵颗粒15袋,
- #曲安奈德益康唑乳膏 30g 葡萄糖酸锌口服溶液 12支 18支 24支和30支,
- # 1、999止泻利颗粒15g*8 没有数据 2、999维生素ec颗粒 没有数据 3、999三蛇胆川贝膏138 没有数据 4、999强力枇杷露16袋 没有数据 5、999冰连清咽 没有数据
-
- #############以下是日常采集的品规start##################
- # '999养胃舒颗粒',
- # '999复方感冒灵颗粒',
- # '999感冒清热颗粒12g*18', #禁止挂网
- # '999小儿感冒宁颗粒2.5g*10袋', #禁止挂网 999小儿感冒宁颗粒2.5g*10袋 改成 999小儿感冒宁颗粒
- # '999小儿咽扁颗粒8g*10袋', #仅限999官旗店
- # '999布洛芬混悬液',
- #########2026春节采集的品规start################################
- # '999藿香正气合剂',
- # '999糠酸莫米松凝胶15',
- # '999抗病毒口服液10ml*10',
- # '999抗病毒口服液10ml*12',
- # '999强力枇杷露225ml',
- '999糠酸莫米松凝胶10' #不低于26.9
- #########2026春节采集的品规end################################
- ############以下是日常采集的品规start###########################
- ]
-
- #美团手机号:
- #美团手机号:
- # device_id = '21885f5' # 设备序列号
- # device_id = '2e58510' # 设备序列号
- # device_id = 'KNNNEMNVWCJZQOLZ'
- # device_id = 'B6JVE6AYSWU4LRLZ'
- # device_id = '656DTOPRZDEALZ5X'
- # device_id = 'GIOFIBRKZTUGJJAE'
- # device_id = 'fcb3c749'
- # device_id = 'UCQGF6CQFMU8WKHI'
- # device_id = '4TZDUGTOAIFMJVGU'
- device_id = '95b2c764'
- # device_id = 'ZDQWUSSWBEDI896T'
- # device_id = 'R4SKMJPNBQAENRAM'
- # device_id = '1462a51f'
- # device_id = '97ae80e0' # 设备序列号
- # device_id = 'IZTOWWDQT45D49BU'
- # device_id = 'N7ZXBITOSOGMYXQS'
- # device_id = '369dcf96'
- # device_id = 'GQIRKB7LVOONM7VW'
- # device_id = 'T4UCEQGQEEYP65ZL'
- # device_id = '49L7GMPRVS85LJHE'
- # device_id = 'WWRO9LTGG6KFGQCM'
- # device_id = 'DYF67TM7KJ4POJLF'
- # device_id = 'OVFETO8PCY45E6A6'
- # device_id = 'U8ONIJJJS4CELVD6'
- # device_id = 'LJ9PN7A6K7HQ9DPF'
- cycle_no = 0 # 轮次计数
-
-
- while True:
- cycle_no += 1
- logging.info(f'========== 第 {cycle_no} 轮采集开始 ==========')
- for idx, key in enumerate(keys_list, 1):
- logging.info(f'[{idx}/{len(keys_list)}] 开始采集关键字:{key}')
- try:
- mt = MT(key) # 用当前关键字实例化
- # mt.unitest()
- mt.main(device_id) # 执行一次完整采集
- logging.info(f'关键字 {key} 本轮采集完成')
- except Exception as e:
- # 发生异常直接跳过该关键字,继续下一轮
- logging.exception(f'关键字 {key} 采集异常:{e}')
- finally:
- # 关闭当前 MT 实例资源(如有需要)
- if hasattr(mt, 'close'):
- mt.close()
-
- # logging.info('本轮全部关键字采集完成,等待 2 小时后下一轮...')
- # time.sleep(1 * 3600) # 2 小时 = 7200 秒
- # keys = '小柴胡颗粒' # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒 小柴胡颗粒
- # mt = MT(keys) # 参苓健脾胃颗粒 舒肝颗粒 清肺化痰丸 香砂平胃颗粒
- # # mt.main('95b2c764')
- # mt.main('fcb3c749')
- if __name__ == '__main__':
- main()
- # scheduler = BlockingScheduler()
- # scheduler.add_job(main, 'cron', hour=21, minute=30, misfire_grace_time=120)
- # try:
- # scheduler.start()
- # except (KeyboardInterrupt, SystemExit):
- # pass
|