Explorar o código

修复壹药城爬取

zhuoyuncheng hai 1 semana
pai
achega
a9a01a2eb2
Modificáronse 1 ficheiros con 1 adicións e 5 borrados
  1. 1 5
      spiders/yaoex/yaoex_snapshot_crawl.py

+ 1 - 5
spiders/yaoex/yaoex_snapshot_crawl.py

@@ -93,10 +93,7 @@ class YaoexSnapshotCrawl:
         self.collect_region_id = self.task_dict.get("collect_region_id", "")
         self.collect_round = self.task_dict.get("collect_round", 1)
         self.start_page = self._parse_page(self.task_dict.get("start_page"), 1)
-        self.end_page = max(
-            self.start_page,
-            self._parse_page(self.task_dict.get("end_page"), self.start_page),
-        )
+        self.end_page = self._parse_page(self.task_dict.get("end_page"), 100)
 
     @staticmethod
     def _parse_page(value, default=1):
@@ -596,7 +593,6 @@ class YaoexSnapshotCrawl:
 
             for item in page_items:
                 item, detail_url, spu_code, seller_code = self._build_detail_url(item)
-
                 name_part = (item.get("productName") or "").strip()
                 short_part = (item.get("shortName") or "").strip()
                 product_name = f"{name_part} {short_part}".strip()