|
|
@@ -25,7 +25,6 @@ chrome_path = r"C:\Program Files\Google\Chrome\Application\chrome.exe"
|
|
|
# 项目根目录 → spiders/yaoex(与从哪执行脚本无关)
|
|
|
PROJECT_ROOT = Path(__file__).resolve().parents[2]
|
|
|
YAOEX_SPIDER_DIR = PROJECT_ROOT / "spiders" / "yaoex"
|
|
|
-BROWSER_PROFILE_SUBDIR = "chrome_profile"
|
|
|
SLIDER_OFFSET_FIX = 10
|
|
|
DETAIL_GET_TIMEOUT = 15
|
|
|
DETAIL_URL_WAIT = 10
|
|
|
@@ -136,22 +135,20 @@ class YaoexSnapshotCrawl:
|
|
|
return s.getsockname()[1]
|
|
|
|
|
|
def _resolve_browser_profile_dir(self):
|
|
|
- """
|
|
|
- 浏览器数据固定落在 <项目根>/spiders/yaoex/ 下。
|
|
|
- 优先 chrome_profile/<账号>;若旧版直接在 yaoex/<账号> 已有登录态则继续沿用。
|
|
|
- """
|
|
|
- preferred = YAOEX_SPIDER_DIR / BROWSER_PROFILE_SUBDIR / self.account_name
|
|
|
- legacy_flat = YAOEX_SPIDER_DIR / self.account_name
|
|
|
+ """浏览器数据目录: <项目根>/spiders/yaoex/<账号>"""
|
|
|
+ profile_dir = YAOEX_SPIDER_DIR / self.account_name
|
|
|
+ # 仅兼容历史误路径,新建不再使用 chrome_profile
|
|
|
legacy_nested = YAOEX_SPIDER_DIR / "spiders" / "yaoex" / self.account_name
|
|
|
+ legacy_chrome_profile = YAOEX_SPIDER_DIR / "chrome_profile" / self.account_name
|
|
|
|
|
|
- for candidate in (preferred, legacy_flat, legacy_nested):
|
|
|
+ for candidate in (profile_dir, legacy_nested, legacy_chrome_profile):
|
|
|
if (candidate / "Default").is_dir() or (candidate / "Local State").is_file():
|
|
|
logger.info("使用已有浏览器配置目录: %s", candidate)
|
|
|
return candidate
|
|
|
|
|
|
- preferred.parent.mkdir(parents=True, exist_ok=True)
|
|
|
- logger.info("新建浏览器配置目录: %s", preferred)
|
|
|
- return preferred
|
|
|
+ profile_dir.mkdir(parents=True, exist_ok=True)
|
|
|
+ logger.info("新建浏览器配置目录: %s", profile_dir)
|
|
|
+ return profile_dir
|
|
|
|
|
|
def init_browser(self):
|
|
|
co = ChromiumOptions().set_browser_path(chrome_path)
|
|
|
@@ -384,7 +381,12 @@ class YaoexSnapshotCrawl:
|
|
|
def fetch_list_page(self, keyword, page):
|
|
|
list_url = "https://gateway-b2b.fangkuaiyi.com/home/search/homeSearchList"
|
|
|
resp = self._post_with_retry(list_url, self._list_payload(keyword, page))
|
|
|
- return resp.json().get("data", {}).get("shopProducts", []) or []
|
|
|
+ data = resp.json()
|
|
|
+ recall_status = data.get("data", {}).get("recallStatus", 0)
|
|
|
+ if int(recall_status) == 1:
|
|
|
+ return data.get("data", {}).get("shopProducts", []) or []
|
|
|
+ else:
|
|
|
+ return []
|
|
|
|
|
|
def fetch_shop(self, seller_code):
|
|
|
detail_url = "https://gateway-b2b.fangkuaiyi.com/ycapp/shop/enterpriseQualification"
|
|
|
@@ -457,7 +459,11 @@ class YaoexSnapshotCrawl:
|
|
|
time.sleep(0.5)
|
|
|
self.driver.refresh()
|
|
|
time.sleep(2)
|
|
|
- return True
|
|
|
+ ele = self.driver.ele("xpath=//div[@class='yaoex-product-detail__product-detail']")
|
|
|
+ if ele:
|
|
|
+ return True
|
|
|
+ else:
|
|
|
+ continue
|
|
|
except Exception as e:
|
|
|
logger.warning(
|
|
|
"跳转详情异常 spu=%s seller=%s attempt=%s: %s",
|