from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError import os import json import random from logger_config import logger from config import * import re COOKIE_FILE_PATH = "ybm_cookies.json" # Cookie保存路径 LOGIN_VALIDATE_URL = "https://www.ybm100.com/new/" TARGET_LOGIN_URL = "https://www.ybm100.com/new/login" # ========== 新增:滚动相关配置(可根据需要调整) ========== SCROLL_STEP = 200 # 每次滚动的步长(像素),越小越慢越逼真 SCROLL_DELAY = 0.15 # 每步滚动的延迟(秒),越大越慢 MAX_SCROLL_ATTEMPTS = 50 # 最大滚动次数(防止无限循环) def load_cookies(context, cookie_path=COOKIE_FILE_PATH): """从本地JSON文件加载Cookie到浏览器上下文""" if not os.path.exists(cookie_path): # logger.warning(f" Cookie文件不存在:{cookie_path}") return False try: with open(cookie_path, "r", encoding="utf-8") as f: cookies = json.load(f) context.add_cookies(cookies) # logger.info(f"✅ 已从{cookie_path}加载Cookie") return True except Exception as e: # logger.error(f" 加载Cookie失败:{e}") return False def is_login(page): """验证是否已登录(核心:检测登录态)""" try: # 访问需要登录的页面 page.goto(LOGIN_VALIDATE_URL, timeout=5000) page.wait_for_load_state("networkidle") # 检测是否跳转到登录页(URL包含login则未登录) if "login" in page.url.lower(): # logger.warning(" Cookie失效,需要重新登录") return False # 可选:检测登录后的专属元素(比如用户名、个人中心等) # if page.locator("用户中心选择器").count() > 0: # return True # logger.info(" Cookie有效,已保持登录状态") return True except Exception as e: # logger.error(f" 验证登录状态失败:{e}") return False def popup_guard(page, tag=""): """ 全局弹窗/遮罩守卫:多步引导 + 关闭按钮 + 遮罩清理 + 恢复滚动 tag 仅用于日志区分调用位置 """ try: # 给弹窗一点出现时间 page.wait_for_timeout(300) # 1) 连续点“下一步/完成/我知道了/关闭” for _ in range(6): btn = page.locator( "xpath=//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']" ).first if btn.count() > 0 and btn.is_visible(): btn.click(timeout=1500) page.wait_for_timeout(250) continue # 2) 常见的 close icon close_btn = page.locator( "css=.el-dialog__headerbtn, .el-message-box__headerbtn, .close, .icon-close, .el-icon-close" ).first if close_btn.count() > 0 and close_btn.is_visible(): close_btn.click(timeout=1200) page.wait_for_timeout(250) continue break # 3) 清遮罩 + 恢复滚动/交互 page.evaluate(r""" () => { // 第一步:精准清理已知的遮罩/弹窗类名(Element UI框架常用) const selectors = [ '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper', '.el-message-box__wrapper', '.el-loading-mask' ]; selectors.forEach(sel => document.querySelectorAll(sel).forEach(e => e.remove())); // 泛化兜底:近似全屏 + 高 z-index 的层直接屏蔽 const all = Array.from(document.querySelectorAll('body *')); for (const el of all) { const s = getComputedStyle(el); // 获取元素的实际样式(含CSS生效的样式) const z = parseInt(s.zIndex || '0', 10); // 取元素的层级(z-index),默认0 // 条件1:元素是固定/绝对定位(弹窗/遮罩常见定位方式)+ 层级≥1000(高优先级遮挡)+ 能拦截鼠标事件 if ((s.position === 'fixed' || s.position === 'absolute') && z >= 1000 && s.pointerEvents !== 'none') { const r = el.getBoundingClientRect(); // 获取元素的尺寸和位置 // 条件2:元素宽度/高度≥屏幕80%(近似全屏遮罩) const nearFull = r.width >= innerWidth * 0.8 && r.height >= innerHeight * 0.8; if (nearFull) { el.style.pointerEvents = 'none'; // 让元素不拦截鼠标点击 el.style.display = 'none'; // 隐藏元素 } } } // 第三步:恢复页面滚动功能(弹窗常把页面设为不可滚动) document.documentElement.style.overflow = 'auto'; // html标签恢复滚动 document.body.style.overflow = 'auto'; // body标签恢复滚动 document.body.classList.remove('el-popup-parent--hidden'); // 移除Element UI的滚动禁用类 } """) # logger.info("杀除弹窗成功") except Exception: pass SEARCH_INPUT_SELECTOR = "input[placeholder*='药品名称/厂家名称']" def pick_search_input(page): """优先选可见且可用的搜索输入框;第一个不行就尝试第二个""" inputs = page.locator(SEARCH_INPUT_SELECTOR) cnt = inputs.count() # 优先检查前两个(你说只有两个) for i in range(min(cnt, 2)): candidate = inputs.nth(i) try: candidate.wait_for(state="visible", timeout=1500) # 小超时快速试探 if candidate.is_enabled(): return candidate except PlaywrightTimeoutError: continue # 兜底:直接找任意可见的(避免命中 hidden 模板) candidate = page.locator(f"{SEARCH_INPUT_SELECTOR}:visible").first candidate.wait_for(state="visible", timeout=5000) return candidate def type_slow(locator, text: str, min_delay=0.06, max_delay=0.18): """逐字输入,模拟真人打字""" for ch in text: locator.type(ch, delay=int(random.uniform(min_delay, max_delay) * 1000)) SEARCH_BTN_SELECTOR = 'div.home-search-container-search-head-btn[data-scmd="text-搜索"]' def force_close_popup(page): """增强版:优先清理遮罩,再处理引导按钮""" try: # 第一步:先强制移除所有遮罩层(前置操作,关键!) page.evaluate(""" () => { // 1. 移除所有高优先级遮罩 const maskSelectors = [ '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper', '.el-message-box__wrapper', '.el-loading-mask', '[class*="mask"]', '[class*="overlay"]', '[style*="position: fixed"][style*="z-index: 9999"]', '[style*="position: absolute"][style*="z-index: 9999"]' ]; maskSelectors.forEach(sel => { document.querySelectorAll(sel).forEach(el => { el.remove(); // 直接删除遮罩元素 }); }); // 2. 恢复body和列表容器的交互 document.body.style.overflow = 'auto'; document.body.style.pointerEvents = 'auto'; // 3. 强制解除列表容器的样式限制 document.querySelectorAll('.product-list-container, .list-container, .el-table__body-wrapper').forEach(el => { el.style.overflow = 'auto !important'; el.style.height = 'auto !important'; el.style.maxHeight = 'calc(100vh - 200px) !important'; el.style.pointerEvents = 'auto !important'; }); } """) page.wait_for_timeout(500) # 第二步:处理引导按钮(下一步/完成/关闭) for _ in range(5): btn = page.locator( "//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']" ).first if btn.count() > 0 and btn.is_visible(): btn.click(timeout=1500) page.wait_for_timeout(300) continue close_icon = page.locator( "xpath=//*[contains(@class,'close') or contains(@class,'el-icon-close') or name()='svg' or name()='i'][1]" ).first if close_icon.count() > 0 and close_icon.is_visible(): close_icon.click(timeout=1000) page.wait_for_timeout(300) continue break except Exception as e: logger.warning(f"⚠️ 强制清理弹窗时异常:{e}") def kill_masks(page): """增强版:强制解除所有样式限制""" page.evaluate(r""" () => { // 1. 移除所有已知遮罩类 const knownSelectors = [ '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper', '.el-message-box__wrapper', '.el-loading-mask', '.el-popup-parent--hidden' ]; knownSelectors.forEach(sel => { document.querySelectorAll(sel).forEach(el => el.remove()); }); // 2. 泛化清理所有高z-index遮挡层 const all = Array.from(document.querySelectorAll('body *')); for (const el of all) { const s = getComputedStyle(el); if (!s) continue; const z = parseInt(s.zIndex || '0', 10); const pos = s.position; const pe = s.pointerEvents; if ((pos === 'fixed' || pos === 'absolute') && z >= 1000) { el.remove(); // 直接删除高优先级遮挡元素 } } // 3. 强制恢复页面所有元素的交互和滚动 document.documentElement.style.overflow = 'auto !important'; document.body.style.overflow = 'auto !important'; document.body.style.position = 'static !important'; document.body.style.width = 'auto !important'; document.body.style.paddingRight = '0px !important'; document.body.style.pointerEvents = 'auto !important'; document.body.classList.remove('el-popup-parent--hidden'); // 4. 强制恢复商品列表容器的样式(关键!) document.querySelectorAll('.product-list-container').forEach(el => { el.style.overflow = 'auto !important'; el.style.height = 'auto !important'; el.style.maxHeight = 'calc(100vh - 200px) !important'; el.style.display = 'block !important'; el.style.visibility = 'visible !important'; el.style.pointerEvents = 'auto !important'; }); return { success: true }; } """) def slow_scroll_to_bottom(page): """ 模拟真人慢速滚动到页面最底部(适配全局/局部滚动容器) :param page: 页面对象 :return: None """ try: logger.info("📜 开始慢速滚动到页面底部...") # ========== 前置:强制刷新容器样式 ========== page.evaluate(""" () => { // 1. 强制重绘商品列表容器 const container = document.querySelector('.product-list-container'); if (container) { container.style.overflow = 'auto !important'; container.offsetHeight; // 触发重绘,更新高度 } // 2. 强制刷新页面滚动高度 document.documentElement.offsetHeight; } """) page.wait_for_timeout(500) # 调试日志:打印滚动容器信息(保留) container_debug = page.evaluate(""" () => { const container = document.querySelector('.product-list-container') || document.documentElement; return { tag: container.tagName, className: container.className, scrollHeight: container.scrollHeight, clientHeight: container.clientHeight, scrollTop: container.scrollTop }; } """) logger.info(f"滚动容器信息:{container_debug}") scroll_attempts = 0 last_scroll_height = -1 # 初始值改为-1,避免首次误判 while scroll_attempts < MAX_SCROLL_ATTEMPTS: # ========== 关键:每次循环都重新获取容器信息 ========== scroll_container_info = page.evaluate(""" () => { // 优先找商品列表容器 const container = document.querySelector('.product-list-container') || document.documentElement; return { isGlobal: container === document.documentElement, scrollHeight: container.scrollHeight, scrollTop: container.scrollTop, clientHeight: container.clientHeight }; } """) current_scroll_height = scroll_container_info["scrollHeight"] current_scroll_top = scroll_container_info["scrollTop"] client_height = scroll_container_info["clientHeight"] is_global = scroll_container_info["isGlobal"] # ========== 优化到底判断逻辑 ========== # 条件1:滚动高度无变化(连续2次相同);条件2:已滚到底(留50px余量) is_height_same = current_scroll_height == last_scroll_height is_scroll_bottom = (current_scroll_top + client_height) >= (current_scroll_height - 50) if is_height_same and scroll_attempts > 2: # 至少滚动2次再判断高度无变化 logger.info(f"✅ 滚动高度无变化,判定已到底部") break if is_scroll_bottom: logger.info(f"✅ 已滚动到容器底部") break # ========== 执行滚动 ========== random_delay = random.uniform(SCROLL_DELAY - 0.05, SCROLL_DELAY + 0.05) if is_global: # 全局滚动 page.evaluate(f"window.scrollBy(0, {SCROLL_STEP})") else: # 局部容器滚动(核心!) page.evaluate(f""" () => {{ const container = document.querySelector('.product-list-container'); if (container) {{ container.scrollTop += {SCROLL_STEP}; // 滚动后触发重绘 container.offsetHeight; }} }} """) page.wait_for_timeout(int(random_delay * 1000)) # 更新状态 last_scroll_height = current_scroll_height scroll_attempts += 1 # 最后强制滚到底 page.evaluate(""" () => { const container = document.querySelector('.product-list-container') || document.documentElement; container.scrollTop = container.scrollHeight; } """) page.wait_for_timeout(500) except Exception as e: logger.warning(f"⚠️ 慢速滚动到底部时出现异常:{e}") def search_operation(page, keyword, is_first_search: bool = True): """搜索框填充+提交搜索(遮罩前置清理)""" try: # ========== 前置清理:先清遮罩,再操作搜索框 ========== force_close_popup(page) kill_masks(page) search_locator = page.locator(SEARCH_INPUT_SELECTOR) search_locator.wait_for(timeout=ELEMENT_TIMEOUT) # 清空搜索框 search_locator.click(force=True) search_locator.fill("") page.keyboard.down("Control") page.keyboard.press("a") page.keyboard.up("Control") page.keyboard.press("Backspace") # 逐字输入 type_slow(search_locator, keyword, min_delay=0.06, max_delay=0.18) logger.info(f"📝 已输入搜索关键词:{keyword}") # ========== 再次清理:点击搜索前再清一次 ========== force_close_popup(page) # 点击搜索按钮 btn = page.locator(f"{SEARCH_BTN_SELECTOR}") btn.wait_for(state="visible", timeout=SEARCH_BTN_TIMEOUT) page.wait_for_timeout(3000) detail_page = page if is_first_search: try: with page.context.expect_page(timeout=60000) as new_page_info: btn.click() detail_page = new_page_info.value # ========== 新页面立即清遮罩 ========== detail_page.wait_for_load_state("domcontentloaded", timeout=20000) force_close_popup(detail_page) kill_masks(detail_page) detail_page.wait_for_load_state("networkidle", timeout=20000) except PlaywrightTimeoutError: logger.warning(f" 未检测到新标签页") return None, False except Exception as e: logger.warning(f" 等待新标签页异常:{e}") return None, False else: btn.click() # ========== 原页面跳转后立即清遮罩 ========== page.wait_for_load_state("domcontentloaded", timeout=20000) force_close_popup(page) kill_masks(page) page.wait_for_load_state("networkidle", timeout=20000) detail_page = page logger.info("✅ 后续搜索:已在原页面完成跳转加载") # 处理引导按钮 test_btn = detail_page.locator("div[data-v-c65c36bc].first-time-highlight-message-btn button") btn_count = test_btn.count() logger.info(f"✅ 匹配到的引导按钮数量:{btn_count}") if btn_count > 0: test_btn.wait_for(state="attached", timeout=5000) test_btn.click() # 最终清理:确保无残留遮罩 force_close_popup(detail_page) kill_masks(detail_page) logger.info("✅ 已触发搜索并清理弹窗") # ========== 关键:等待列表渲染 + 强制刷新容器高度 ========== # 等待商品列表容器加载 detail_page.wait_for_selector(".product-list-container", timeout=5000) # 强制刷新容器样式(解决高度计算错误) detail_page.evaluate(""" () => { const container = document.querySelector('.product-list-container'); if (container) { container.style.overflow = 'auto !important'; // 强制重绘容器 container.offsetHeight; // 触发重绘 } } """) detail_page.wait_for_timeout(1000) return detail_page, True except PlaywrightTimeoutError as e: logger.error(f" 搜索失败:元素定位超时 - {str(e)}") return None, False except Exception as e: logger.error(f" 搜索异常:{str(e)}") return None, False #翻下一页 def goto_next_page(page) -> bool: """ 核心修改:基于 button.btn-next 的 aria-disabled 属性判断是否有下一页 :param page: 搜索结果页面对象(detail_page) :return: True=翻页成功,False=无下一页/翻页失败 """ try: # 1. 定位下一页按钮(精准匹配你指定的元素) next_btn = page.locator("button.btn-next").first # 2. 先等待按钮加载(确保元素存在) next_btn.wait_for(state="attached", timeout=3000) # 3. 获取 aria-disabled 属性值(核心判断依据) aria_disabled = next_btn.get_attribute("aria-disabled") logger.info(f"下一页按钮 aria-disabled 属性值:{aria_disabled}") # 4. 判断是否有下一页:aria-disabled="true" 表示无下一页 if aria_disabled == "true": logger.warning("⚠️ 下一页按钮 aria-disabled=true,已无更多页面") return False # 5. 按钮可用(aria-disabled="false"),先滚动到顶部(避免按钮被遮挡) page.evaluate("window.scrollTo(0, 0);") page.wait_for_timeout(500) # 6. 确保按钮可见且可点击(强制点击兜底) if next_btn.is_visible() and next_btn.is_enabled(): next_btn.click(timeout=5000) else: # 兜底:强制点击(避免元素不可见但实际可点击的情况) next_btn.click(force=True, timeout=5000) # 7. 等待页面加载完成(确保翻页后内容刷新) page.wait_for_load_state("networkidle", timeout=15000) # 8. 翻页后清理遮罩(避免新页面遮罩影响) force_close_popup(page) kill_masks(page) logger.info("✅ 翻页成功,下一页按钮 aria-disabled=false") return True except PlaywrightTimeoutError: logger.warning("⚠️ 下一页按钮加载超时,判定无更多页面") return False except Exception as e: logger.warning(f"⚠️ 翻页操作异常:{e},判定无更多页面") return False import time import random # from playwright.sync_api import MouseWheelDirection # ========== 滚动配置(保持你的原有配置) ========== SCROLL_STEP = 50 # 每步滚动50px SCROLL_INTERVAL = 0.05 # 每步间隔0.05秒 SCROLL_OFFSET_RANGE = 50 # 滚动距离随机偏移±50px MIN_CLICK_DELAY = 0.5 MAX_CLICK_DELAY = 1.0 def random_delay(min_delay, max_delay): """随机延迟(模拟真人操作)""" time.sleep(random.uniform(min_delay, max_delay)) def slow_scroll_400px(page, scroll_distance1=400): """ 慢速滚动400px±50px(模拟真人鼠标滚轮+强制解除滚动限制) 适配:全局滚动条动,但JS scrollBy无效的场景 :param page: 页面对象 :return: 滚动是否成功 """ try: # 1. 前置:强制解除页面所有滚动限制(核心!) page.evaluate(""" () => { // 强制恢复html/body的滚动能力 document.documentElement.style.overflow = 'auto !important'; document.body.style.overflow = 'auto !important'; document.documentElement.style.pointerEvents = 'auto !important'; document.body.style.pointerEvents = 'auto !important'; document.documentElement.style.position = 'static !important'; document.body.style.position = 'static !important'; // 移除所有可能禁用滚动的类/属性 document.body.classList.remove('el-popup-parent--hidden', 'no-scroll'); document.documentElement.classList.remove('el-popup-parent--hidden', 'no-scroll'); // 强制刷新滚动高度(避免计算错误) document.documentElement.offsetHeight; } """) page.wait_for_timeout(300) # 2. 生成随机滚动距离(400±50px) scroll_distance = random.randint( scroll_distance1 - SCROLL_OFFSET_RANGE, scroll_distance1 + SCROLL_OFFSET_RANGE ) # 转换为鼠标滚轮的“步长”(1个滚轮步长≈100px,需适配) wheel_steps = int(scroll_distance / 100) remaining_pixels = scroll_distance % 100 logger.info( f"📜 开始模拟鼠标滚轮滚动(目标距离:{scroll_distance}px,滚轮步数:{wheel_steps}步 + {remaining_pixels}px)" ) # 3. 第一步:用鼠标滚轮模拟真人滚动(反爬友好) # 先把鼠标移到页面中间(商品列表区域),避免滚动空白处 page.mouse.move(random.randint(300, 800), random.randint(400, 600)) for _ in range(wheel_steps): # 模拟鼠标滚轮向下滚动(1步≈100px) page.mouse.wheel(delta_x=0, delta_y=100) time.sleep(random.uniform(SCROLL_INTERVAL*2, SCROLL_INTERVAL*5)) # 随机间隔,更像真人 # 4. 第二步:处理剩余不足1步的像素(用scrollTo兜底) if remaining_pixels > 0: current_scroll_top = page.evaluate("window.scrollY || document.documentElement.scrollTop") target_scroll_top = current_scroll_top + remaining_pixels # 用scrollTo强制滚动(比scrollBy更稳定) page.evaluate(f"window.scrollTo(0, {target_scroll_top});") time.sleep(SCROLL_INTERVAL) # 5. 验证滚动是否生效 final_scroll_top = page.evaluate("window.scrollY || document.documentElement.scrollTop") logger.info(f" 滚动完成,当前全局滚动位置:{final_scroll_top}px") # 6. 滚动后等待懒加载+模拟真人停顿 page.wait_for_load_state("networkidle", timeout=8000) random_delay(2.0, 3.0) return True except Exception as e: logger.warning(f" 慢速滚动失败:{e}") return False def main(): with sync_playwright() as p: browser = p.chromium.launch( headless=False, # 不要用无头模式(反爬:无头模式易被识别) channel="chrome", # 使用真实Chrome内核 slow_mo=random.randint(100, 300), # 全局操作延迟(模拟真人慢速操作) args=[ "--disable-blink-features=AutomationControlled", # 禁用webdriver特征(核心!) "--enable-automation=false", # 新增:禁用自动化标识 "--disable-infobars", # 新增:禁用信息栏 "--remote-debugging-port=0", # 新增:随机调试端口 "--start-maximized", # 最大化窗口(模拟真人使用) "--disable-extensions", # 禁用扩展(避免特征) "--disable-plugins-discovery", # 禁用插件发现 "--no-sandbox", # 避免沙箱模式特征 "--disable-dev-shm-usage", # 避免内存限制导致的异常 f"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(110, 120)}.0.0.0 Safari/537.36" # 随机Chrome版本的UA ] ) # 创建页面时伪装指纹 context = browser.new_context( locale="zh-CN", # 中文环境 timezone_id="Asia/Shanghai", # 上海时区 geolocation={"latitude": 31.230416, "longitude": 121.473701}, # 模拟上海地理位置(可选) permissions=["geolocation"], # 授予定位权限(模拟真人) user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36", no_viewport=True, # 关键:隐藏自动化特征 java_script_enabled=True, bypass_csp=True, # user_data_dir="./temp_user_data" # 模拟真实用户数据目录 ) # input("...") page = context.new_page() # 关键:移除navigator.webdriver标识(反爬核心) page.add_init_script(""" Object.defineProperty(navigator, 'webdriver', { get: () => undefined }); Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] }); // 新增:模拟插件 Object.defineProperty(navigator, 'mimeTypes', { get: () => [1, 2, 3] }); // 新增:模拟MIME类型 window.chrome = { runtime: {}, loadTimes: () => ({}) }; // 增强Chrome模拟 delete window.navigator.languages; window.navigator.languages = ['zh-CN', 'zh']; // 新增:模拟真实鼠标移动特征 (() => { const originalAddEventListener = EventTarget.prototype.addEventListener; EventTarget.prototype.addEventListener = function(type, listener) { if (type === 'mousemove') { return originalAddEventListener.call(this, type, (e) => { e._automation = undefined; listener(e); }); } return originalAddEventListener.call(this, type, listener); }; })(); """) try: # ========== 核心:Cookie复用逻辑 ========== # 1. 加载本地Cookie load_cookies(context) # 2. 验证登录状态 if not is_login(page): # 3. Cookie失效/不存在,执行登录 page.goto(TARGET_LOGIN_URL) page.wait_for_load_state("networkidle") # logger.info("🔑 开始执行登录流程") # 执行登录操作 # login_success = login_operation(page, USERNAME, PASSWORD) # if not login_success: # logger.error(" 登录失败,程序终止") # return # # 4. 登录成功后保存Cookie # save_cookies(context) # logger.info(" 登录并保存Cookie成功!") KEYWORDS = ['999皮炎平 糠酸莫米松凝胶'] # get_search_keywords_from_db() # 执行搜索 total_num = 0 # current_page = page detail_page = None nums = 0 page_no = 1 for kw in KEYWORDS: popup_guard(page, "before_search") if nums == 0: popup_guard(detail_page if detail_page else page, "before_search") # page是你的初始页面对象,需提前定义 detail_page, search_success = search_operation(page, kw, is_first_search=True) nums += 1 else: if detail_page is None: logger.error(f" ❌ 无可用的搜索页面,跳过「{kw}」") continue popup_guard(detail_page, "before_search") detail_page, search_success = search_operation(detail_page, kw, is_first_search=False) if not search_success: print(f"❌ 搜索失败:{kw}") continue if detail_page is None: break popup_guard(detail_page, "after_search") #找不到数据跳过判断和出现杂数据跳过 not_found_keywords = detail_page.locator("div.filter-panel-container-empty-text") if not_found_keywords.count() > 0: logger.warning(f"⚠️ 关键词「{kw}」无匹配商品,直接跳过整个关键词采集") continue # detail_page.wait_for_selector("div[class*='product-list'], .el-table", timeout=5000) # slow_scroll_to_bottom(detail_page) while True: # ✅ 先获取当前页商品个数 detail_page.wait_for_load_state("domcontentloaded") # 先等DOM加载 detail_page.wait_for_load_state("networkidle") detail_page.wait_for_timeout(500) # 额外等待渲染稳定 goods_item = detail_page.locator("div.product-list-item").count() print(f"这页商品有{goods_item}个") slow_scroll_400px(detail_page) if goto_next_page(detail_page): logger.info(f"「{kw}」还有下一页") page_no += 1 continue else: logger.info(f" 「{kw}」已无下一页,关键词采集结束") break except Exception as e: print(f" 程序异常:{str(e)}") finally: browser.close() print(" 浏览器已关闭,程序结束") # ==================== 程序入口 ==================== if __name__ == '__main__': main()