2 mesi fa · 4436e0a15f
--- a/README.md
+++ b/README.md
--- a/auto.py
+++ b/auto.py
--- a/city.json
+++ b/city.json
@@ -0,0 +1,1929 @@
 
				+[
			
 
				+    {
			
 
				+        "id": 1,
			
 
				+        "name": "北京",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 2,
			
 
				+                "name": "北京市",
			
 
				+                "pid": 1
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 368,
			
 
				+        "name": "天津",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 369,
			
 
				+                "name": "天津市",
			
 
				+                "pid": 368
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 685,
			
 
				+        "name": "河北省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 686,
			
 
				+                "name": "石家庄市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 991,
			
 
				+                "name": "唐山市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 1261,
			
 
				+                "name": "秦皇岛市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 1373,
			
 
				+                "name": "邯郸市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 1640,
			
 
				+                "name": "邢台市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 1861,
			
 
				+                "name": "保定市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 2247,
			
 
				+                "name": "张家口市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 2513,
			
 
				+                "name": "承德市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 2745,
			
 
				+                "name": "沧州市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 2974,
			
 
				+                "name": "廊坊市",
			
 
				+                "pid": 685
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 3109,
			
 
				+                "name": "衡水市",
			
 
				+                "pid": 685
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 3252,
			
 
				+        "name": "山西省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 3253,
			
 
				+                "name": "太原市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 3373,
			
 
				+                "name": "大同市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 3512,
			
 
				+                "name": "阳泉市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 3561,
			
 
				+                "name": "长治市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 3723,
			
 
				+                "name": "晋城市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 3807,
			
 
				+                "name": "朔州市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 3887,
			
 
				+                "name": "晋中市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 4029,
			
 
				+                "name": "运城市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 4204,
			
 
				+                "name": "忻州市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 4393,
			
 
				+                "name": "临汾市",
			
 
				+                "pid": 3252
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 4569,
			
 
				+                "name": "吕梁市",
			
 
				+                "pid": 3252
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 4737,
			
 
				+        "name": "内蒙古自治区",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 4738,
			
 
				+                "name": "呼和浩特市",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 4848,
			
 
				+                "name": "包头市",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 4950,
			
 
				+                "name": "乌海市",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 4976,
			
 
				+                "name": "赤峰市",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 5186,
			
 
				+                "name": "通辽市",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 5339,
			
 
				+                "name": "鄂尔多斯市",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 5437,
			
 
				+                "name": "呼伦贝尔市",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 5610,
			
 
				+                "name": "巴彦淖尔市",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 5707,
			
 
				+                "name": "乌兰察布市",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 5829,
			
 
				+                "name": "兴安盟",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 5926,
			
 
				+                "name": "锡林郭勒盟",
			
 
				+                "pid": 4737
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 6039,
			
 
				+                "name": "阿拉善盟",
			
 
				+                "pid": 4737
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 6083,
			
 
				+        "name": "辽宁省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 6084,
			
 
				+                "name": "沈阳市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 6288,
			
 
				+                "name": "大连市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 6451,
			
 
				+                "name": "鞍山市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 6555,
			
 
				+                "name": "抚顺市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 6635,
			
 
				+                "name": "本溪市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 6691,
			
 
				+                "name": "丹东市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 6787,
			
 
				+                "name": "锦州市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 6899,
			
 
				+                "name": "营口市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 6976,
			
 
				+                "name": "阜新市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 7067,
			
 
				+                "name": "辽阳市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 7125,
			
 
				+                "name": "盘锦市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 7180,
			
 
				+                "name": "铁岭市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 7294,
			
 
				+                "name": "朝阳市",
			
 
				+                "pid": 6083
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 7465,
			
 
				+                "name": "葫芦岛市",
			
 
				+                "pid": 6083
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 7604,
			
 
				+        "name": "吉林省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 7605,
			
 
				+                "name": "长春市",
			
 
				+                "pid": 7604
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 7852,
			
 
				+                "name": "吉林市",
			
 
				+                "pid": 7604
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 8015,
			
 
				+                "name": "四平市",
			
 
				+                "pid": 7604
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 8100,
			
 
				+                "name": "辽源市",
			
 
				+                "pid": 7604
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 8153,
			
 
				+                "name": "通化市",
			
 
				+                "pid": 7604
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 8270,
			
 
				+                "name": "白山市",
			
 
				+                "pid": 7604
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 8344,
			
 
				+                "name": "松原市",
			
 
				+                "pid": 7604
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 8498,
			
 
				+                "name": "白城市",
			
 
				+                "pid": 7604
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 8631,
			
 
				+                "name": "延边朝鲜族自治州",
			
 
				+                "pid": 7604
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 8758,
			
 
				+        "name": "黑龙江省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 8759,
			
 
				+                "name": "哈尔滨市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 9106,
			
 
				+                "name": "齐齐哈尔市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 9332,
			
 
				+                "name": "鸡西市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 9437,
			
 
				+                "name": "鹤岗市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 9515,
			
 
				+                "name": "双鸭山市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 9634,
			
 
				+                "name": "大庆市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 9763,
			
 
				+                "name": "伊春市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 9824,
			
 
				+                "name": "佳木斯市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 9973,
			
 
				+                "name": "七台河市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 10022,
			
 
				+                "name": "牡丹江市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 10125,
			
 
				+                "name": "黑河市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 10292,
			
 
				+                "name": "绥化市",
			
 
				+                "pid": 8758
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 10550,
			
 
				+                "name": "大兴安岭地区",
			
 
				+                "pid": 8758
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 10601,
			
 
				+        "name": "上海",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 10602,
			
 
				+                "name": "上海市",
			
 
				+                "pid": 10601
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 10853,
			
 
				+        "name": "江苏省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 10854,
			
 
				+                "name": "南京市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 11006,
			
 
				+                "name": "无锡市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 11102,
			
 
				+                "name": "徐州市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 11300,
			
 
				+                "name": "常州市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 11374,
			
 
				+                "name": "苏州市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 11503,
			
 
				+                "name": "南通市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 11648,
			
 
				+                "name": "连云港市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 11765,
			
 
				+                "name": "淮安市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 11887,
			
 
				+                "name": "盐城市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 12052,
			
 
				+                "name": "扬州市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 12158,
			
 
				+                "name": "镇江市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 12226,
			
 
				+                "name": "泰州市",
			
 
				+                "pid": 10853
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 12342,
			
 
				+                "name": "宿迁市",
			
 
				+                "pid": 10853
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 12458,
			
 
				+        "name": "浙江省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 12459,
			
 
				+                "name": "杭州市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 12669,
			
 
				+                "name": "宁波市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 12842,
			
 
				+                "name": "温州市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 13046,
			
 
				+                "name": "嘉兴市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 13126,
			
 
				+                "name": "湖州市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 13204,
			
 
				+                "name": "绍兴市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 13314,
			
 
				+                "name": "金华市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 13470,
			
 
				+                "name": "衢州市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 13577,
			
 
				+                "name": "舟山市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 13618,
			
 
				+                "name": "台州市",
			
 
				+                "pid": 12458
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 13764,
			
 
				+                "name": "丽水市",
			
 
				+                "pid": 12458
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 13947,
			
 
				+        "name": "安徽省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 13948,
			
 
				+                "name": "合肥市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 14121,
			
 
				+                "name": "芜湖市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 14210,
			
 
				+                "name": "蚌埠市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 14307,
			
 
				+                "name": "淮南市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 14410,
			
 
				+                "name": "马鞍山市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 14476,
			
 
				+                "name": "淮北市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 14518,
			
 
				+                "name": "铜陵市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 14583,
			
 
				+                "name": "安庆市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 14763,
			
 
				+                "name": "黄山市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 14880,
			
 
				+                "name": "滁州市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 15015,
			
 
				+                "name": "阜阳市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 15196,
			
 
				+                "name": "宿州市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 15326,
			
 
				+                "name": "六安市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 15480,
			
 
				+                "name": "亳州市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 15581,
			
 
				+                "name": "池州市",
			
 
				+                "pid": 13947
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 15652,
			
 
				+                "name": "宣城市",
			
 
				+                "pid": 13947
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 15768,
			
 
				+        "name": "福建省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 15769,
			
 
				+                "name": "福州市",
			
 
				+                "pid": 15768
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 15969,
			
 
				+                "name": "厦门市",
			
 
				+                "pid": 15768
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 16030,
			
 
				+                "name": "莆田市",
			
 
				+                "pid": 15768
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 16093,
			
 
				+                "name": "三明市",
			
 
				+                "pid": 15768
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 16252,
			
 
				+                "name": "泉州市",
			
 
				+                "pid": 15768
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 16440,
			
 
				+                "name": "漳州市",
			
 
				+                "pid": 15768
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 16598,
			
 
				+                "name": "南平市",
			
 
				+                "pid": 15768
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 16751,
			
 
				+                "name": "龙岩市",
			
 
				+                "pid": 15768
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 16892,
			
 
				+                "name": "宁德市",
			
 
				+                "pid": 15768
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 17032,
			
 
				+        "name": "江西省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 17033,
			
 
				+                "name": "南昌市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 17182,
			
 
				+                "name": "景德镇市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 17246,
			
 
				+                "name": "萍乡市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 17310,
			
 
				+                "name": "九江市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 17551,
			
 
				+                "name": "新余市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 17588,
			
 
				+                "name": "鹰潭市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 17651,
			
 
				+                "name": "赣州市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 17980,
			
 
				+                "name": "吉安市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 18246,
			
 
				+                "name": "宜春市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 18476,
			
 
				+                "name": "抚州市",
			
 
				+                "pid": 17032
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 18669,
			
 
				+                "name": "上饶市",
			
 
				+                "pid": 17032
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 18911,
			
 
				+        "name": "山东省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 18912,
			
 
				+                "name": "济南市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 19090,
			
 
				+                "name": "青岛市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 19242,
			
 
				+                "name": "淄博市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 19339,
			
 
				+                "name": "枣庄市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 19412,
			
 
				+                "name": "东营市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 19472,
			
 
				+                "name": "烟台市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 19645,
			
 
				+                "name": "潍坊市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 19781,
			
 
				+                "name": "济宁市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 19952,
			
 
				+                "name": "泰安市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 20047,
			
 
				+                "name": "威海市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 20128,
			
 
				+                "name": "日照市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 20190,
			
 
				+                "name": "临沂市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 20360,
			
 
				+                "name": "德州市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 20508,
			
 
				+                "name": "聊城市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 20653,
			
 
				+                "name": "滨州市",
			
 
				+                "pid": 18911
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 20753,
			
 
				+                "name": "菏泽市",
			
 
				+                "pid": 18911
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 20932,
			
 
				+        "name": "河南省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 20933,
			
 
				+                "name": "郑州市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 21157,
			
 
				+                "name": "开封市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 21291,
			
 
				+                "name": "洛阳市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 21501,
			
 
				+                "name": "平顶山市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 21662,
			
 
				+                "name": "安阳市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 21810,
			
 
				+                "name": "鹤壁市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 21865,
			
 
				+                "name": "新乡市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 22051,
			
 
				+                "name": "焦作市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 22175,
			
 
				+                "name": "濮阳市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 22273,
			
 
				+                "name": "许昌市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 22386,
			
 
				+                "name": "漯河市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 22451,
			
 
				+                "name": "三门峡市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 22540,
			
 
				+                "name": "南阳市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 22812,
			
 
				+                "name": "商丘市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 23023,
			
 
				+                "name": "信阳市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 23259,
			
 
				+                "name": "周口市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 23480,
			
 
				+                "name": "驻马店市",
			
 
				+                "pid": 20932
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 23700,
			
 
				+                "name": "省直辖县级行政区划",
			
 
				+                "pid": 20932
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 23718,
			
 
				+        "name": "湖北省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 23719,
			
 
				+                "name": "武汉市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 23926,
			
 
				+                "name": "黄石市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 23990,
			
 
				+                "name": "十堰市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 24128,
			
 
				+                "name": "宜昌市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 24255,
			
 
				+                "name": "襄阳市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 24395,
			
 
				+                "name": "鄂州市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 24427,
			
 
				+                "name": "荆门市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 24517,
			
 
				+                "name": "孝感市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 24653,
			
 
				+                "name": "荆州市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 24792,
			
 
				+                "name": "黄冈市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 24969,
			
 
				+                "name": "咸宁市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 25065,
			
 
				+                "name": "随州市",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 25120,
			
 
				+                "name": "恩施土家族苗族自治州",
			
 
				+                "pid": 23718
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 25223,
			
 
				+                "name": "省直辖县级行政区划",
			
 
				+                "pid": 23718
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 25313,
			
 
				+        "name": "湖南省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 25314,
			
 
				+                "name": "长沙市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 25496,
			
 
				+                "name": "株洲市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 25618,
			
 
				+                "name": "湘潭市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 25697,
			
 
				+                "name": "衡阳市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 25903,
			
 
				+                "name": "邵阳市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 26122,
			
 
				+                "name": "岳阳市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 26271,
			
 
				+                "name": "常德市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 26460,
			
 
				+                "name": "张家界市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 26539,
			
 
				+                "name": "益阳市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 26646,
			
 
				+                "name": "郴州市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 26820,
			
 
				+                "name": "永州市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 27029,
			
 
				+                "name": "怀化市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 27250,
			
 
				+                "name": "娄底市",
			
 
				+                "pid": 25313
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 27345,
			
 
				+                "name": "湘西土家族苗族自治州",
			
 
				+                "pid": 25313
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 27470,
			
 
				+        "name": "广东省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 27471,
			
 
				+                "name": "广州市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 27661,
			
 
				+                "name": "韶关市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 27784,
			
 
				+                "name": "深圳市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 27873,
			
 
				+                "name": "珠海市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 27908,
			
 
				+                "name": "汕头市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 27983,
			
 
				+                "name": "佛山市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28030,
			
 
				+                "name": "江门市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28120,
			
 
				+                "name": "湛江市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28255,
			
 
				+                "name": "茂名市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28388,
			
 
				+                "name": "肇庆市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28503,
			
 
				+                "name": "惠州市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28593,
			
 
				+                "name": "梅州市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28719,
			
 
				+                "name": "汕尾市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28787,
			
 
				+                "name": "河源市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28895,
			
 
				+                "name": "阳江市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 28963,
			
 
				+                "name": "清远市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29064,
			
 
				+                "name": "东莞市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29102,
			
 
				+                "name": "中山市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29127,
			
 
				+                "name": "潮州市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29182,
			
 
				+                "name": "揭阳市",
			
 
				+                "pid": 27470
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29296,
			
 
				+                "name": "云浮市",
			
 
				+                "pid": 27470
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 29373,
			
 
				+        "name": "广西壮族自治区",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 29374,
			
 
				+                "name": "南宁市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29525,
			
 
				+                "name": "柳州市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29654,
			
 
				+                "name": "桂林市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29820,
			
 
				+                "name": "梧州市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29894,
			
 
				+                "name": "北海市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29929,
			
 
				+                "name": "防城港市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 29965,
			
 
				+                "name": "钦州市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30041,
			
 
				+                "name": "贵港市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30121,
			
 
				+                "name": "玉林市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30239,
			
 
				+                "name": "百色市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30389,
			
 
				+                "name": "贺州市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30456,
			
 
				+                "name": "河池市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30609,
			
 
				+                "name": "来宾市",
			
 
				+                "pid": 29373
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30687,
			
 
				+                "name": "崇左市",
			
 
				+                "pid": 29373
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 30783,
			
 
				+        "name": "海南省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 30784,
			
 
				+                "name": "海口市",
			
 
				+                "pid": 30783
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30835,
			
 
				+                "name": "三亚市",
			
 
				+                "pid": 30783
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30844,
			
 
				+                "name": "三沙市",
			
 
				+                "pid": 30783
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30853,
			
 
				+                "name": "儋州市",
			
 
				+                "pid": 30783
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 30873,
			
 
				+                "name": "省直辖县级行政区划",
			
 
				+                "pid": 30783
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 31059,
			
 
				+        "name": "重庆",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 31060,
			
 
				+                "name": "重庆市",
			
 
				+                "pid": 31059
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 31737,
			
 
				+                "name": "自治县",
			
 
				+                "pid": 31059
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 32131,
			
 
				+        "name": "四川省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 32132,
			
 
				+                "name": "成都市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 32414,
			
 
				+                "name": "自贡市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 32511,
			
 
				+                "name": "攀枝花市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 32566,
			
 
				+                "name": "泸州市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 32700,
			
 
				+                "name": "德阳市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 32791,
			
 
				+                "name": "绵阳市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 32968,
			
 
				+                "name": "广元市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 33122,
			
 
				+                "name": "遂宁市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 33223,
			
 
				+                "name": "内江市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 33312,
			
 
				+                "name": "乐山市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 33456,
			
 
				+                "name": "南充市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 33708,
			
 
				+                "name": "眉山市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 33795,
			
 
				+                "name": "宜宾市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 33942,
			
 
				+                "name": "广安市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 34073,
			
 
				+                "name": "达州市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 34281,
			
 
				+                "name": "雅安市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 34386,
			
 
				+                "name": "巴中市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 34531,
			
 
				+                "name": "资阳市",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 34625,
			
 
				+                "name": "阿坝藏族羌族自治州",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 34817,
			
 
				+                "name": "甘孜藏族自治州",
			
 
				+                "pid": 32131
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 35125,
			
 
				+                "name": "凉山彝族自治州",
			
 
				+                "pid": 32131
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 35447,
			
 
				+        "name": "贵州省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 35448,
			
 
				+                "name": "贵阳市",
			
 
				+                "pid": 35447
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 35605,
			
 
				+                "name": "六盘水市",
			
 
				+                "pid": 35447
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 35702,
			
 
				+                "name": "遵义市",
			
 
				+                "pid": 35447
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 35971,
			
 
				+                "name": "安顺市",
			
 
				+                "pid": 35447
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 36070,
			
 
				+                "name": "毕节市",
			
 
				+                "pid": 35447
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 36358,
			
 
				+                "name": "铜仁市",
			
 
				+                "pid": 35447
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 36550,
			
 
				+                "name": "黔西南布依族苗族自治州",
			
 
				+                "pid": 35447
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 36699,
			
 
				+                "name": "黔东南苗族侗族自治州",
			
 
				+                "pid": 35447
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 36933,
			
 
				+                "name": "黔南布依族苗族自治州",
			
 
				+                "pid": 35447
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 37055,
			
 
				+        "name": "云南省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 37056,
			
 
				+                "name": "昆明市",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 37221,
			
 
				+                "name": "曲靖市",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 37368,
			
 
				+                "name": "玉溪市",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 37454,
			
 
				+                "name": "保山市",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 37536,
			
 
				+                "name": "昭通市",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 37698,
			
 
				+                "name": "丽江市",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 37770,
			
 
				+                "name": "普洱市",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 37884,
			
 
				+                "name": "临沧市",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 37977,
			
 
				+                "name": "楚雄彝族自治州",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 38091,
			
 
				+                "name": "红河哈尼族彝族自治州",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 38244,
			
 
				+                "name": "文山壮族苗族自治州",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 38359,
			
 
				+                "name": "西双版纳傣族自治州",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 38405,
			
 
				+                "name": "大理白族自治州",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 38530,
			
 
				+                "name": "德宏傣族景颇族自治州",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 38592,
			
 
				+                "name": "怒江傈僳族自治州",
			
 
				+                "pid": 37055
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 38628,
			
 
				+                "name": "迪庆藏族自治州",
			
 
				+                "pid": 37055
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 38662,
			
 
				+        "name": "西藏自治区",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 38663,
			
 
				+                "name": "拉萨市",
			
 
				+                "pid": 38662
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 38745,
			
 
				+                "name": "日喀则市",
			
 
				+                "pid": 38662
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 38970,
			
 
				+                "name": "昌都市",
			
 
				+                "pid": 38662
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 39120,
			
 
				+                "name": "林芝市",
			
 
				+                "pid": 38662
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 39184,
			
 
				+                "name": "山南市",
			
 
				+                "pid": 38662
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 39282,
			
 
				+                "name": "那曲市",
			
 
				+                "pid": 38662
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 39408,
			
 
				+                "name": "阿里地区",
			
 
				+                "pid": 38662
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 39453,
			
 
				+        "name": "陕西省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 39454,
			
 
				+                "name": "西安市",
			
 
				+                "pid": 39453
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 39640,
			
 
				+                "name": "铜川市",
			
 
				+                "pid": 39453
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 39683,
			
 
				+                "name": "宝鸡市",
			
 
				+                "pid": 39453
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 39816,
			
 
				+                "name": "咸阳市",
			
 
				+                "pid": 39453
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 39973,
			
 
				+                "name": "渭南市",
			
 
				+                "pid": 39453
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 40121,
			
 
				+                "name": "延安市",
			
 
				+                "pid": 39453
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 40252,
			
 
				+                "name": "汉中市",
			
 
				+                "pid": 39453
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 40441,
			
 
				+                "name": "榆林市",
			
 
				+                "pid": 39453
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 40638,
			
 
				+                "name": "安康市",
			
 
				+                "pid": 39453
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 40788,
			
 
				+                "name": "商洛市",
			
 
				+                "pid": 39453
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 40906,
			
 
				+        "name": "甘肃省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 40907,
			
 
				+                "name": "兰州市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41033,
			
 
				+                "name": "嘉峪关市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41040,
			
 
				+                "name": "金昌市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41061,
			
 
				+                "name": "白银市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41145,
			
 
				+                "name": "天水市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41283,
			
 
				+                "name": "武威市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41396,
			
 
				+                "name": "张掖市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41481,
			
 
				+                "name": "平凉市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41602,
			
 
				+                "name": "酒泉市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41694,
			
 
				+                "name": "庆阳市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41822,
			
 
				+                "name": "定西市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 41952,
			
 
				+                "name": "陇南市",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42161,
			
 
				+                "name": "临夏回族自治州",
			
 
				+                "pid": 40906
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42301,
			
 
				+                "name": "甘南藏族自治州",
			
 
				+                "pid": 40906
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 42412,
			
 
				+        "name": "青海省",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 42413,
			
 
				+                "name": "西宁市",
			
 
				+                "pid": 42412
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42501,
			
 
				+                "name": "海东市",
			
 
				+                "pid": 42412
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42606,
			
 
				+                "name": "海北藏族自治州",
			
 
				+                "pid": 42412
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42643,
			
 
				+                "name": "黄南藏族自治州",
			
 
				+                "pid": 42412
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42681,
			
 
				+                "name": "海南藏族自治州",
			
 
				+                "pid": 42412
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42727,
			
 
				+                "name": "果洛藏族自治州",
			
 
				+                "pid": 42412
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42778,
			
 
				+                "name": "玉树藏族自治州",
			
 
				+                "pid": 42412
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42834,
			
 
				+                "name": "海西蒙古族藏族自治州",
			
 
				+                "pid": 42412
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 42888,
			
 
				+        "name": "宁夏回族自治区",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 42889,
			
 
				+                "name": "银川市",
			
 
				+                "pid": 42888
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42958,
			
 
				+                "name": "石嘴山市",
			
 
				+                "pid": 42888
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 42998,
			
 
				+                "name": "吴忠市",
			
 
				+                "pid": 42888
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43054,
			
 
				+                "name": "固原市",
			
 
				+                "pid": 42888
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43125,
			
 
				+                "name": "中卫市",
			
 
				+                "pid": 42888
			
 
				+            }
			
 
				+        ]
			
 
				+    },
			
 
				+    {
			
 
				+        "id": 43175,
			
 
				+        "name": "新疆维吾尔自治区",
			
 
				+        "pid": 0,
			
 
				+        "sons": [
			
 
				+            {
			
 
				+                "id": 43176,
			
 
				+                "name": "乌鲁木齐市",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43315,
			
 
				+                "name": "克拉玛依市",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43339,
			
 
				+                "name": "吐鲁番市",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43378,
			
 
				+                "name": "哈密市",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43440,
			
 
				+                "name": "昌吉回族自治州",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43554,
			
 
				+                "name": "博尔塔拉蒙古自治州",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43593,
			
 
				+                "name": "巴音郭楞蒙古自治州",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43712,
			
 
				+                "name": "阿克苏地区",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43836,
			
 
				+                "name": "克孜勒苏柯尔克孜自治州",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 43887,
			
 
				+                "name": "喀什地区",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 44109,
			
 
				+                "name": "和田地区",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 44224,
			
 
				+                "name": "伊犁哈萨克自治州",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 44386,
			
 
				+                "name": "塔城地区",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 44514,
			
 
				+                "name": "阿勒泰地区",
			
 
				+                "pid": 43175
			
 
				+            },
			
 
				+            {
			
 
				+                "id": 44592,
			
 
				+                "name": "自治区直辖县级行政区划",
			
 
				+                "pid": 43175
			
 
				+            }
			
 
				+        ]
			
 
				+    }
			
 
				+]
			
--- a/config.py
+++ b/config.py
@@ -0,0 +1,524 @@
 
				+# config.py - 药帮忙采集配置
			
 
				+from datetime import datetime
			
 
				+
			
 
				+import pymysql
			
 
				+from dotenv import load_dotenv
			
 
				+import os
			
 
				+import oss2
			
 
				+from PIL import Image
			
 
				+from logger_config import logger
			
 
				+
			
 
				+
			
 
				+# 步骤：加载 .env 文件（如需）
			
 
				+# load_dotenv() 默认读取当前目录下的 .env；如在其他路径可手动指定
			
 
				+# load_dotenv()
			
 
				+
			
 
				+# MySQL 配置（与 MYSQL_CONFIG 结构一致）
			
 
				+# MYSQL_CONFIG = {
			
 
				+#     "host": "47.119.164.65",       # MySQL 地址
			
 
				+#     "port": 3306,                  # 端口
			
 
				+#     "user": "test_c",             # 用户名
			
 
				+#     "password": "Dfwy@2025",      # 密码
			
 
				+#     "database": "test2",          # 数据库名
			
 
				+#     "charset": "utf8mb4"          # 字符集
			
 
				+# }
			
 
				+
			
 
				+# 测试环境
			
 
				+# MYSQL_CONFIG = {
			
 
				+#     "host": "39.108.116.125",     # MySQL 地址
			
 
				+#     "port": 3306,                  # 端口
			
 
				+#     "user": "drug_retrieve",      # 用户名
			
 
				+#     "password": "Pem287...",      # 密码
			
 
				+#     "database": "drug_retrieve",  # 数据库名
			
 
				+#     "charset": "utf8mb4"          # 字符集
			
 
				+# }
			
 
				+
			
 
				+
			
 
				+# 线上环境
			
 
				+MYSQL_CONFIG = {
			
 
				+    "host": "120.24.49.2",       # MySQL 地址
			
 
				+    "port": 3306,              # 端口
			
 
				+    "user": "drug_retrieve",            # 用户名
			
 
				+    "password": "ksCt3xm6chzdkafj",    # 密码
			
 
				+    "database": "drug_retrieve",   # 数据库名
			
 
				+    "charset": "utf8mb4"       # 字符集
			
 
				+}
			
 
				+
			
 
				+
			
 
				+# MYSQL_CONFIG = {
			
 
				+#     "host": os.getenv("MYSQL_HOST"),  # 从 .env 读取 MYSQL_HOST
			
 
				+#     "user": os.getenv("MYSQL_USER"),
			
 
				+#     "password": os.getenv("MYSQL_PASSWORD"),  # 敏感信息建议放在 .env
			
 
				+#     "database": os.getenv("MYSQL_DATABASE"),
			
 
				+#     "port": int(os.getenv("MYSQL_PORT", 3306)),  # 默认端口 3306
			
 
				+#     "charset": "utf8mb4"
			
 
				+# }
			
 
				+
			
 
				+# 模糊匹配 URL
			
 
				+def fuzzy_match_product_url_in_db_mysql(product_url):
			
 
				+    # 先做非空校验
			
 
				+    if not product_url:
			
 
				+        logger.warning("鈿狅笍 寰呭尮閰嶇殑 product_url 涓虹┖锛岃烦杩囨暟鎹烘煡")
			
 
				+        return None
			
 
				+
			
 
				+    # 如有需要可转义 % / _，避免 LIKE 通配符影响
			
 
				+    # escaped_product_url = product_url.replace("%", "\%").replace("_", "\_")
			
 
				+
			
 
				+    try:
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor()
			
 
				+
			
 
				+        # 使用 LIKE 做模糊匹配
			
 
				+        # 例如：%product_url%
			
 
				+        sql = "SELECT * FROM ybm_drug_middle WHERE product_link LIKE %s"
			
 
				+        match_value = f"%{product_url}%"
			
 
				+        cursor.execute(sql, (match_value,))
			
 
				+
			
 
				+        # 取第一条匹配记录并格式化为字典
			
 
				+        result = cursor.fetchone()  # 返回元组，如 (id, product_url, price, ...)
			
 
				+        if result:
			
 
				+            # 将查询结果转换为字典，便于后续按字段名取值
			
 
				+            column_names = [desc[0] for desc in cursor.description]
			
 
				+            result_dict = dict(zip(column_names, result))
			
 
				+            return result_dict  # 匹配成功
			
 
				+        else:
			
 
				+            return None  # 未匹配到记录
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"MySQL 模糊匹配失败：{str(e)}")
			
 
				+        return None
			
 
				+    finally:
			
 
				+        # 无论成功与否都关闭连接，避免连接泄漏
			
 
				+        if 'conn' in locals() and conn:
			
 
				+            conn.close()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 从数据库提取任务 ====================
			
 
				+def get_search_keywords_from_db(platform: int = 9):
			
 
				+    """读取待执行任务（status=1），返回 [(task_id, brand, keyword, company_id), ...]。"""
			
 
				+    keywords = []
			
 
				+    conn = None
			
 
				+    cursor = None
			
 
				+    try:
			
 
				+        required_configs = ['host', 'user', 'password', 'database']
			
 
				+        for cfg in required_configs:
			
 
				+            if cfg not in MYSQL_CONFIG:
			
 
				+                raise ValueError(f"MYSQL_CONFIG 缺少必要字段: {cfg}")
			
 
				+
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor()
			
 
				+        sql = (
			
 
				+            'SELECT id, product_brand, product_name, product_specs, company_id '
			
 
				+            'FROM retrieve_collect_task_allocate '
			
 
				+            'WHERE status = 1 AND platform = %s'
			
 
				+        )
			
 
				+        cursor.execute(sql, (platform,))
			
 
				+
			
 
				+        results = cursor.fetchall()
			
 
				+        for row in results:
			
 
				+            task_id = row[0]
			
 
				+            brand = (row[1] or '').strip()
			
 
				+            name = (row[2] or '').strip()
			
 
				+            company_id = row[4] if row[4] is not None else 0
			
 
				+
			
 
				+            parts = [p for p in [brand, name] if p]
			
 
				+            if parts:
			
 
				+                keyword = ''.join(parts)
			
 
				+                keywords.append((task_id, brand, keyword, company_id))
			
 
				+
			
 
				+        logger.debug(f"读取待执行任务完成，platform={platform}，数量={len(keywords)}")
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"读取待执行任务失败，platform={platform}，错误：{str(e)}")
			
 
				+        keywords = []
			
 
				+    finally:
			
 
				+        if cursor:
			
 
				+            try:
			
 
				+                cursor.close()
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+        if conn:
			
 
				+            try:
			
 
				+                conn.close()
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+
			
 
				+    return keywords
			
 
				+
			
 
				+
			
 
				+def has_running_task(platform: int = 9) -> bool:
			
 
				+    """检查指定 platform 在当天是否存在执行中任务（status=2）。"""
			
 
				+    conn = None
			
 
				+    cursor = None
			
 
				+    try:
			
 
				+        required_configs = ['host', 'user', 'password', 'database']
			
 
				+        for cfg in required_configs:
			
 
				+            if cfg not in MYSQL_CONFIG:
			
 
				+                raise ValueError(f"MYSQL_CONFIG 缺少必要字段: {cfg}")
			
 
				+
			
 
				+        day_start_ts = int(datetime.now().replace(hour=0, minute=0, second=0, microsecond=0).timestamp())
			
 
				+        next_day_ts = day_start_ts + 24 * 60 * 60
			
 
				+
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor()
			
 
				+        sql = (
			
 
				+            'SELECT 1 FROM retrieve_collect_task_allocate '
			
 
				+            'WHERE status = 2 AND platform = %s '
			
 
				+            'AND update_time >= %s AND update_time < %s LIMIT 1'
			
 
				+        )
			
 
				+        cursor.execute(sql, (platform, day_start_ts, next_day_ts))
			
 
				+        return cursor.fetchone() is not None
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"检查执行中任务失败，platform={platform}，错误：{str(e)}")
			
 
				+        return False
			
 
				+    finally:
			
 
				+        if cursor:
			
 
				+            try:
			
 
				+                cursor.close()
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+        if conn:
			
 
				+            try:
			
 
				+                conn.close()
			
 
				+            except Exception:
			
 
				+                pass
			
 
				+
			
 
				+# 以下历史示例注释已保留为空，避免乱码干扰
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 2. 反爬配置 ====================
			
 
				+# 随机延迟范围（模拟真人操作）
			
 
				+MIN_CLICK_DELAY = 1.5  # 点击最小延迟（秒）
			
 
				+MAX_CLICK_DELAY = 3.5  # 点击最大延迟（秒）
			
 
				+MIN_INPUT_DELAY = 0.1  # 输入最小延迟（秒）
			
 
				+MAX_INPUT_DELAY = 0.3  # 输入最大延迟（秒）
			
 
				+MIN_PAGE_DELAY = 2.0   # 页面最小等待（秒）
			
 
				+MAX_PAGE_DELAY = 4.0   # 页面最大等待（秒）
			
 
				+
			
 
				+# 关键词之间的随机延迟（秒）
			
 
				+MIN_KEYWORD_DELAY = 8.0
			
 
				+MAX_KEYWORD_DELAY = 15.0
			
 
				+
			
 
				+# 滚动配置（目标 400px，含随机偏移）
			
 
				+SCROLL_TARGET_DISTANCE = 400  # 目标滚动距离
			
 
				+SCROLL_OFFSET_RANGE = 50       # 随机偏移范围
			
 
				+SCROLL_STEP = 50               # 每次滚动步长
			
 
				+SCROLL_INTERVAL = 0.05         # 步长间隔（秒）
			
 
				+
			
 
				+# ==================== 3. Cookie & 登录配置 ====================
			
 
				+COOKIE_FILE_PATH = "ybm_cookies.json"  # Cookie 保存路径
			
 
				+# Cookie 有效性验证页面
			
 
				+LOGIN_VALIDATE_URL = "https://www.ybm100.com/new/"
			
 
				+
			
 
				+# 登录账号密码
			
 
				+USERNAME = "18008650300"
			
 
				+PASSWORD = "12345678"
			
 
				+
			
 
				+
			
 
				+# USERNAME = "yjj112031"
			
 
				+# PASSWORD = "123456"
			
 
				+
			
 
				+# 登录URL
			
 
				+TARGET_LOGIN_URL = "https://www.ybm100.com/new/login"
			
 
				+# "https://www.yyjzt.com/login?redirect=%2FgoodDetail%3FladderNum%26itemStoreId%3D124250306%26sourceProdetail%3D%252Fsearch%26is_store%3D0"
			
 
				+
			
 
				+# ==================== 4. 元素选择器配置 ====================
			
 
				+# 基础选择器
			
 
				+USERNAME_SELECTOR = "input[placeholder*=请输入账号]"
			
 
				+PASSWORD_SELECTOR = "input[placeholder*=请输入密码]"
			
 
				+LOGIN_BTN_SELECTOR = "button:has(span:text('登录'))"
			
 
				+SEARCH_INPUT_SELECTOR = "input[placeholder*='药品名称/厂家名称']"
			
 
				+SEARCH_INPUT_SELECTOR2 = "div.home-search-container-search-head"
			
 
				+SEARCH_BTN_SELECTOR = "div.home-search-container-search-head-btn[data-scmd=\"text-搜索\"]"
			
 
				+
			
 
				+# 采集元素选择器可根据页面实际情况调整
			
 
				+# 如页面结构变化，请优先更新以上选择器
			
 
				+PRODUCT_ITEM_SELECTOR = "div.product-list-item"         # 商品项容器
			
 
				+
			
 
				+PRODUCT_TITLE_SELECTOR = "div.product-name"        # 商品标题
			
 
				+PRODUCT_PRICE_SELECTOR = "div.main-price"       # 商品价格
			
 
				+PRODUCT_STORE_SELECTOR = 'div.prduct-shop-name div.shop-name'  # 店铺名称
			
 
				+PRODUCT_COMPANY_SELECTOR = "div.product-manufacturer"            # 公司名称
			
 
				+PRODUCT_VALIDITY_SELECTOR = "div.product-period"     # 有效期
			
 
				+# div.shop-info-container-left-info-name span
			
 
				+# ==================== 5. 等待时间配置（秒） ====================
			
 
				+ELEMENT_TIMEOUT = 10000
			
 
				+LOGIN_AFTER_CLICK = 5000
			
 
				+SEARCH_BTN_TIMEOUT = 5000
			
 
				+COLLECT_DELAY = 3000
			
 
				+DETAIL_LOAD_TIMEOUT = 5000  # 点击商品后等待详情加载时间
			
 
				+
			
 
				+# ==================== 6. 浏览器配置 ====================
			
 
				+BROWSER_HEADLESS = False
			
 
				+BROWSER_CHANNEL = "chrome"
			
 
				+SLOW_MO_MIN = 50
			
 
				+SLOW_MO_MAX = 100
			
 
				+
			
 
				+# ==================== 7. CSV 配置 ====================
			
 
				+CSV_HEADERS = [
			
 
				+    "商品标题", "商品采购价格", "商品折扣价格", "规格", "盒数",
			
 
				+    "店铺名称", "公司名称",
			
 
				+    "有效日期", "生产日期", "批准文号", "采集时间"
			
 
				+]  # CSV 表头
			
 
				+
			
 
				+# 存放营业执照截图路径（如需）
			
 
				+# cropped_screenshot_path =
			
 
				+
			
 
				+
			
 
				+# 百度 OCR 配置
			
 
				+request_url_config = "https://aip.baidubce.com/rest/2.0/ocr/v1/business_license"
			
 
				+
			
 
				+AppKey_config = "tRK2RhyItCSh6BzyT4CNVXQa"
			
 
				+AppSecret_config = "TDgKiPo94i2mOM1sDqOuDnlcK1bG66jh"
			
 
				+token_url_config = 'https://aip.baidubce.com/oauth/2.0/token'
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ---------------------- OSS 配置 ----------------------
			
 
				+OSS_ACCESS_KEY_ID = 'LTAI5tDwjfteBvivYN41r8sJ'
			
 
				+OSS_ACCESS_KEY_SECRET = 'yowuOGi2nYYnrqGpO3qcz94C4brcPp'
			
 
				+OSS_ENDPOINT = "oss-cn-shenzhen.aliyuncs.com"
			
 
				+OSS_BUCKET_NAME = "zhijiayun-jiansuo"
			
 
				+OSS_PREFIX = "scrape_data/"
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 本地截图配置
			
 
				+LOCAL_SCREENSHOT_DIR = "local_screenshots"  # 本地截图目录
			
 
				+LOCAL_SCREENSHOT_NAME = None  # 自动生成文件名
			
 
				+LOCAL_CROPPED_DIR = "./local_cropped_screenshots"  # 裁剪后目录
			
 
				+
			
 
				+
			
 
				+# 图片压缩配置
			
 
				+IMAGE_COMPRESS_ENABLE = True  # 是否启用压缩（True/False）
			
 
				+IMAGE_COMPRESS_QUALITY = 30  # JPG 质量（1-95）
			
 
				+IMAGE_COMPRESS_PNG_LEVEL = 9  # PNG 压缩级别（0-9）
			
 
				+
			
 
				+
			
 
				+# ---------------------- 宸ュ叿鍑芥暟 ----------------------
			
 
				+def init_local_screenshot_dir():
			
 
				+    """初始化本地截图目录（不存在则创建）。"""
			
 
				+    if not os.path.exists(LOCAL_SCREENSHOT_DIR):
			
 
				+        os.makedirs(LOCAL_SCREENSHOT_DIR)
			
 
				+        logger.info(f"本地截图目录已创建: {LOCAL_SCREENSHOT_DIR}")
			
 
				+    else:
			
 
				+        logger.debug(f"本地截图目录已存在: {LOCAL_SCREENSHOT_DIR}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+def init_oss_bucket():
			
 
				+    """初始化 OSS Bucket 对象。"""
			
 
				+    try:
			
 
				+        auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
			
 
				+        bucket = oss2.Bucket(auth, OSS_ENDPOINT, OSS_BUCKET_NAME)
			
 
				+        bucket.get_bucket_info()
			
 
				+        logger.info("OSS Bucket 初始化成功")
			
 
				+        return bucket
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"OSS Bucket 初始化失败: {str(e)}")
			
 
				+        raise
			
 
				+
			
 
				+
			
 
				+
			
 
				+def upload_local_screenshot_to_oss(bucket, local_file_path, oss_file_path=None):
			
 
				+    """将本地截图上传到 OSS，返回公网访问链接。"""
			
 
				+    if not os.path.exists(local_file_path):
			
 
				+        raise FileNotFoundError(f"本地截图文件不存在: {local_file_path}")
			
 
				+
			
 
				+    if not oss_file_path:
			
 
				+        local_file_name = os.path.basename(local_file_path)
			
 
				+        oss_file_path = f"screenshots/{local_file_name}"
			
 
				+
			
 
				+    try:
			
 
				+        bucket.put_object_from_file(oss_file_path, local_file_path)
			
 
				+        oss_file_url = f"https://{OSS_BUCKET_NAME}.{OSS_ENDPOINT}/{oss_file_path}"
			
 
				+        logger.info(f"截图上传 OSS 成功: {oss_file_url}")
			
 
				+        return oss_file_url
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"截图上传 OSS 失败: {str(e)}")
			
 
				+        raise
			
 
				+
			
 
				+# ---------------------- 图片裁剪与压缩 ----------------------
			
 
				+def crop_local_screenshot(local_file_path, cropped_file_path=None, crop_region=None):
			
 
				+    """裁剪本地截图并保存，成功后删除原图，返回裁剪后文件路径。"""
			
 
				+    if not os.path.exists(local_file_path):
			
 
				+        raise FileNotFoundError(f"原始截图文件不存在: {local_file_path}")
			
 
				+
			
 
				+    os.makedirs(LOCAL_CROPPED_DIR, exist_ok=True)
			
 
				+
			
 
				+    if not cropped_file_path:
			
 
				+        file_name = os.path.basename(local_file_path)
			
 
				+        file_name_no_ext, file_ext = os.path.splitext(file_name)
			
 
				+        cropped_file_name = f"{file_name_no_ext}_cropped{file_ext}"
			
 
				+        cropped_file_path = os.path.join(LOCAL_CROPPED_DIR, cropped_file_name)
			
 
				+
			
 
				+    with Image.open(local_file_path) as img:
			
 
				+        img_width, img_height = img.size
			
 
				+        logger.debug(f"原图尺寸: width={img_width}, height={img_height}")
			
 
				+
			
 
				+        if not crop_region:
			
 
				+            left = 0
			
 
				+            upper = 0
			
 
				+            right = int(img_width)
			
 
				+            lower = int(img_height * 0.3)
			
 
				+            crop_region = (left, upper, right, lower)
			
 
				+            logger.debug(f"未指定裁剪区域，使用默认区域: {crop_region}")
			
 
				+
			
 
				+        c_left, c_upper, c_right, c_lower = crop_region
			
 
				+        if c_right > img_width or c_lower > img_height or c_left < 0 or c_upper < 0:
			
 
				+            raise ValueError(
			
 
				+                f"裁剪区域超出图片范围，图片尺寸=({img_width}, {img_height})，裁剪区域={crop_region}"
			
 
				+            )
			
 
				+
			
 
				+        cropped_img = img.crop(crop_region)
			
 
				+
			
 
				+        file_ext = os.path.splitext(cropped_file_path)[1].lower()
			
 
				+        try:
			
 
				+            if IMAGE_COMPRESS_ENABLE:
			
 
				+                if file_ext in ['.jpg', '.jpeg']:
			
 
				+                    cropped_img.save(
			
 
				+                        cropped_file_path,
			
 
				+                        format='JPEG',
			
 
				+                        quality=IMAGE_COMPRESS_QUALITY,
			
 
				+                        optimize=True,
			
 
				+                        progressive=True,
			
 
				+                    )
			
 
				+                else:
			
 
				+                    cropped_img.save(cropped_file_path)
			
 
				+                logger.info(f"裁剪图片已保存（压缩开启）: {cropped_file_path}")
			
 
				+            else:
			
 
				+                cropped_img.save(cropped_file_path, format='JPEG')
			
 
				+                logger.info(f"裁剪图片已保存（压缩关闭）: {cropped_file_path}")
			
 
				+        except Exception as e:
			
 
				+            cropped_img.save(cropped_file_path, format='JPEG')
			
 
				+            logger.warning(f"图片压缩失败，已按普通 JPEG 保存: {str(e)}")
			
 
				+
			
 
				+    try:
			
 
				+        if os.path.exists(cropped_file_path):
			
 
				+            os.remove(local_file_path)
			
 
				+            logger.debug(f"已删除原始截图: {local_file_path}")
			
 
				+        else:
			
 
				+            logger.warning(f"裁剪文件不存在，跳过删除原图: {cropped_file_path}")
			
 
				+    except OSError as e:
			
 
				+        logger.warning(f"删除原始截图失败: {str(e)}")
			
 
				+
			
 
				+    return cropped_file_path
			
 
				+
			
 
				+
			
 
				+def screenshot_target_page_to_local_then_oss(target_page, local_file_path=None, oss_file_path=None, full_page=True, crop_region=None):
			
 
				+    """页面截图到本地后裁剪，再上传 OSS，返回(裁剪路径, OSS链接)。"""
			
 
				+    os.makedirs(LOCAL_SCREENSHOT_DIR, exist_ok=True)
			
 
				+
			
 
				+    if not local_file_path:
			
 
				+        current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
			
 
				+        local_file_name = f"{current_time}_target_page.jpg"
			
 
				+        local_file_path = os.path.join(LOCAL_SCREENSHOT_DIR, local_file_name)
			
 
				+
			
 
				+    logger.info(f"开始页面截图: {local_file_path}")
			
 
				+    target_page.screenshot(
			
 
				+        path=local_file_path,
			
 
				+        full_page=full_page,
			
 
				+        omit_background=False,
			
 
				+        timeout=10000,
			
 
				+    )
			
 
				+    logger.debug("页面截图完成")
			
 
				+
			
 
				+    cropped_file_path = crop_local_screenshot(
			
 
				+        local_file_path=local_file_path,
			
 
				+        crop_region=crop_region,
			
 
				+    )
			
 
				+
			
 
				+    bucket = init_oss_bucket()
			
 
				+    oss_file_url = upload_local_screenshot_to_oss(bucket, cropped_file_path, oss_file_path)
			
 
				+
			
 
				+    return cropped_file_path, oss_file_url
			
--- a/count_nums.py
+++ b/count_nums.py
@@ -0,0 +1,524 @@
 
				+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
			
 
				+import os
			
 
				+import json
			
 
				+import random
			
 
				+from logger_config import logger
			
 
				+from config import *
			
 
				+import re
			
 
				+COOKIE_FILE_PATH = "ybm_cookies.json"  # Cookie保存路径
			
 
				+LOGIN_VALIDATE_URL = "https://www.ybm100.com/new/"
			
 
				+TARGET_LOGIN_URL = "https://www.ybm100.com/new/login"
			
 
				+
			
 
				+
			
 
				+def load_cookies(context, cookie_path=COOKIE_FILE_PATH):
			
 
				+    """从本地JSON文件加载Cookie到浏览器上下文"""
			
 
				+    if not os.path.exists(cookie_path):
			
 
				+        # logger.warning(f" Cookie文件不存在：{cookie_path}")
			
 
				+        return False
			
 
				+    try:
			
 
				+        with open(cookie_path, "r", encoding="utf-8") as f:
			
 
				+            cookies = json.load(f)
			
 
				+        context.add_cookies(cookies)
			
 
				+        # logger.info(f"✅ 已从{cookie_path}加载Cookie")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        # logger.error(f" 加载Cookie失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def is_login(page):
			
 
				+    """验证是否已登录（核心：检测登录态）"""
			
 
				+    try:
			
 
				+        # 访问需要登录的页面
			
 
				+        page.goto(LOGIN_VALIDATE_URL, timeout=5000)
			
 
				+        page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+        # 检测是否跳转到登录页（URL包含login则未登录）
			
 
				+        if "login" in page.url.lower():
			
 
				+            # logger.warning(" Cookie失效，需要重新登录")
			
 
				+            return False
			
 
				+
			
 
				+        # 可选：检测登录后的专属元素（比如用户名、个人中心等）
			
 
				+        # if page.locator("用户中心选择器").count() > 0:
			
 
				+        #     return True
			
 
				+        # logger.info(" Cookie有效，已保持登录状态")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        # logger.error(f" 验证登录状态失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def popup_guard(page, tag=""):
			
 
				+    """
			
 
				+    全局弹窗/遮罩守卫：多步引导 + 关闭按钮 + 遮罩清理 + 恢复滚动
			
 
				+    tag 仅用于日志区分调用位置
			
 
				+    """
			
 
				+    try:
			
 
				+        # 给弹窗一点出现时间
			
 
				+        page.wait_for_timeout(300)
			
 
				+
			
 
				+        # 1) 连续点“下一步/完成/我知道了/关闭”
			
 
				+        for _ in range(6):
			
 
				+            btn = page.locator(
			
 
				+                "xpath=//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            # 2) 常见的 close icon
			
 
				+            close_btn = page.locator(
			
 
				+                "css=.el-dialog__headerbtn, .el-message-box__headerbtn, .close, .icon-close, .el-icon-close"
			
 
				+            ).first
			
 
				+            if close_btn.count() > 0 and close_btn.is_visible():
			
 
				+                close_btn.click(timeout=1200)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            break
			
 
				+
			
 
				+        # 3) 清遮罩 + 恢复滚动/交互
			
 
				+        page.evaluate(r"""
			
 
				+        () => {
			
 
				+          // 第一步：精准清理已知的遮罩/弹窗类名（Element UI框架常用）
			
 
				+          const selectors = [
			
 
				+            '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+            '.el-message-box__wrapper', '.el-loading-mask'
			
 
				+          ];
			
 
				+          selectors.forEach(sel => document.querySelectorAll(sel).forEach(e => e.remove()));
			
 
				+
			
 
				+          // 泛化兜底：近似全屏 + 高 z-index 的层直接屏蔽
			
 
				+          const all = Array.from(document.querySelectorAll('body *'));
			
 
				+          for (const el of all) {
			
 
				+            const s = getComputedStyle(el); // 获取元素的实际样式（含CSS生效的样式）
			
 
				+            const z = parseInt(s.zIndex || '0', 10);    // 取元素的层级（z-index），默认0
			
 
				+            // 条件1：元素是固定/绝对定位（弹窗/遮罩常见定位方式）+ 层级≥1000（高优先级遮挡）+ 能拦截鼠标事件
			
 
				+            if ((s.position === 'fixed' || s.position === 'absolute') && z >= 1000 && s.pointerEvents !== 'none') {
			
 
				+              const r = el.getBoundingClientRect();     // 获取元素的尺寸和位置
			
 
				+                // 条件2：元素宽度/高度≥屏幕80%（近似全屏遮罩）
			
 
				+              const nearFull = r.width >= innerWidth * 0.8 && r.height >= innerHeight * 0.8;
			
 
				+              if (nearFull) {
			
 
				+                el.style.pointerEvents = 'none';    // 让元素不拦截鼠标点击
			
 
				+                el.style.display = 'none';          // 隐藏元素
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+        // 第三步：恢复页面滚动功能（弹窗常把页面设为不可滚动）
			
 
				+          document.documentElement.style.overflow = 'auto';     // html标签恢复滚动
			
 
				+          document.body.style.overflow = 'auto';    // body标签恢复滚动
			
 
				+          document.body.classList.remove('el-popup-parent--hidden');  // 移除Element UI的滚动禁用类
			
 
				+        }
			
 
				+        """)
			
 
				+
			
 
				+        # logger.info("杀除弹窗成功")
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+SEARCH_INPUT_SELECTOR = "input[placeholder*='药品名称/厂家名称']"
			
 
				+
			
 
				+def pick_search_input(page):
			
 
				+    """优先选可见且可用的搜索输入框；第一个不行就尝试第二个"""
			
 
				+    inputs = page.locator(SEARCH_INPUT_SELECTOR)
			
 
				+    cnt = inputs.count()
			
 
				+
			
 
				+    # 优先检查前两个（你说只有两个）
			
 
				+    for i in range(min(cnt, 2)):
			
 
				+        candidate = inputs.nth(i)
			
 
				+        try:
			
 
				+            candidate.wait_for(state="visible", timeout=1500)  # 小超时快速试探
			
 
				+            if candidate.is_enabled():
			
 
				+                return candidate
			
 
				+        except PlaywrightTimeoutError:
			
 
				+            continue
			
 
				+
			
 
				+    # 兜底：直接找任意可见的（避免命中 hidden 模板）
			
 
				+    candidate = page.locator(f"{SEARCH_INPUT_SELECTOR}:visible").first
			
 
				+    candidate.wait_for(state="visible", timeout=5000)
			
 
				+    return candidate
			
 
				+
			
 
				+
			
 
				+def type_slow(locator, text: str, min_delay=0.06, max_delay=0.18):
			
 
				+    """逐字输入，模拟真人打字"""
			
 
				+    for ch in text:
			
 
				+        locator.type(ch, delay=int(random.uniform(min_delay, max_delay) * 1000))
			
 
				+
			
 
				+SEARCH_BTN_SELECTOR = 'div.home-search-container-search-head-btn[data-scmd="text-搜索"]'
			
 
				+
			
 
				+
			
 
				+
			
 
				+def force_close_popup(page):
			
 
				+    """关闭新手引导/遮罩（多步：下一步/完成/我知道了），并兜底移除遮罩层"""
			
 
				+    try:
			
 
				+        # 1) 尝试连续点“下一步/完成/我知道了/关闭”
			
 
				+        for _ in range(5):  # 最多点5次，足够覆盖多步引导
			
 
				+            btn = page.locator(
			
 
				+                "//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+
			
 
				+            # 有些引导是右上角 X（如果存在就点）
			
 
				+            close_icon = page.locator(
			
 
				+                "xpath=//*[contains(@class,'close') or contains(@class,'el-icon-close') or name()='svg' or name()='i'][1]"
			
 
				+            ).first
			
 
				+            if close_icon.count() > 0 and close_icon.is_visible():
			
 
				+                close_icon.click(timeout=1000)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+
			
 
				+            break
			
 
				+
			
 
				+        # 2) 兜底：移除常见遮罩层（element-ui / 通用 mask/overlay）
			
 
				+        page.evaluate("""
			
 
				+        const selectors = [
			
 
				+          '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+          '[class*="mask"]', '[class*="overlay"]', '[style*="z-index"]'
			
 
				+        ];
			
 
				+        for (const sel of selectors) {
			
 
				+          document.querySelectorAll(sel).forEach(el => {
			
 
				+            const s = window.getComputedStyle(el);
			
 
				+            // 只移除“覆盖层”倾向的元素：fixed/absolute 且 z-index 很高
			
 
				+            if ((s.position === 'fixed' || s.position === 'absolute') && parseInt(s.zIndex || '0', 10) >= 1000) {
			
 
				+              el.remove();
			
 
				+            }
			
 
				+          });
			
 
				+        }
			
 
				+        """)
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+def kill_masks(page):
			
 
				+    """
			
 
				+    强制清理残留遮罩层/覆盖层，并恢复 body 可滚动、可点击状态
			
 
				+    """
			
 
				+    page.evaluate(r"""
			
 
				+    () => {
			
 
				+      const removed = [];
			
 
				+      const hidden = [];
			
 
				+
			
 
				+      // 1) 先处理已知常见遮罩
			
 
				+      const knownSelectors = [
			
 
				+        '.v-modal',
			
 
				+        '.el-overlay',
			
 
				+        '.el-overlay-dialog',
			
 
				+        '.el-dialog__wrapper',
			
 
				+        '.el-message-box__wrapper',
			
 
				+        '.el-loading-mask',
			
 
				+        '.el-popup-parent--hidden'
			
 
				+      ];
			
 
				+
			
 
				+      for (const sel of knownSelectors) {
			
 
				+        document.querySelectorAll(sel).forEach(el => {
			
 
				+          // v-modal / overlay 直接 remove 最省事
			
 
				+          removed.push(sel);
			
 
				+          el.remove();
			
 
				+        });
			
 
				+      }
			
 
				+
			
 
				+      // 2) 再做一次“泛化兜底”：全屏 fixed/absolute + 高 z-index 的覆盖层
			
 
				+      //    注意：不要误删页面正常的固定导航，所以加上“近似全屏”的判断
			
 
				+      const all = Array.from(document.querySelectorAll('body *'));
			
 
				+      for (const el of all) {
			
 
				+        const s = window.getComputedStyle(el);
			
 
				+        if (!s) continue;
			
 
				+
			
 
				+        const z = parseInt(s.zIndex || '0', 10);
			
 
				+        const pos = s.position;
			
 
				+        const pe = s.pointerEvents;
			
 
				+
			
 
				+        if ((pos === 'fixed' || pos === 'absolute') && z >= 1000 && pe !== 'none') {
			
 
				+          const r = el.getBoundingClientRect();
			
 
				+          const nearFullScreen =
			
 
				+            r.width >= window.innerWidth * 0.8 &&
			
 
				+            r.height >= window.innerHeight * 0.8 &&
			
 
				+            r.left <= window.innerWidth * 0.1 &&
			
 
				+            r.top <= window.innerHeight * 0.1;
			
 
				+
			
 
				+          // 常见遮罩是半透明背景色，或者透明但拦截点击
			
 
				+          const bg = s.backgroundColor || '';
			
 
				+          const looksLikeMask =
			
 
				+            nearFullScreen && (bg.includes('rgba') || bg.includes('rgb') || s.opacity !== '1');
			
 
				+
			
 
				+          if (nearFullScreen) {
			
 
				+            // 不管透明不透明，只要近似全屏且高 z-index，就先让它不拦截点击
			
 
				+            el.style.pointerEvents = 'none';
			
 
				+            el.style.display = 'none';
			
 
				+            hidden.push(el.tagName + '.' + (el.className || ''));
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      // 3) 恢复 body / html 的滚动与交互（很多弹窗会锁滚动）
			
 
				+      document.documentElement.style.overflow = 'auto';
			
 
				+      document.body.style.overflow = 'auto';
			
 
				+      document.body.style.position = 'static';
			
 
				+      document.body.style.width = 'auto';
			
 
				+      document.body.style.paddingRight = '0px';
			
 
				+
			
 
				+      // 4) 去掉 Element-UI 常见的锁定 class
			
 
				+      document.body.classList.remove('el-popup-parent--hidden');
			
 
				+
			
 
				+      return { removed, hiddenCount: hidden.length, hidden };
			
 
				+    }
			
 
				+    """)
			
 
				+
			
 
				+
			
 
				+# ==================== 搜索操作函数 ====================
			
 
				+def search_operation(page, keyword, is_first_search: bool = True):
			
 
				+    """
			
 
				+    搜索框填充+提交搜索
			
 
				+    :param page: 页面对象
			
 
				+    :param keyword: 搜索关键词
			
 
				+    :param is_first_search: 是否是首次搜索（首次开新页面，后续原页面跳转）
			
 
				+    :return: (detail_page, 是否成功)
			
 
				+    """
			
 
				+    try:
			
 
				+        # 1) 找到“可用”的搜索框（第一个不行就用第二个）
			
 
				+        search_locator = page.locator(SEARCH_INPUT_SELECTOR)
			
 
				+
			
 
				+        # 清空并填充搜索框
			
 
				+        search_locator.wait_for(timeout=ELEMENT_TIMEOUT)
			
 
				+
			
 
				+        # 2. 清空搜索框（双重保障：先调用locator的clear，再手动全选删除）
			
 
				+        search_locator.click(force=True)  # 聚焦
			
 
				+        search_locator.fill("")
			
 
				+        page.keyboard.down("Control")  # 按住Control键
			
 
				+        page.keyboard.press("a")       # 按a键
			
 
				+        page.keyboard.up("Control")    # 松开Control键
			
 
				+
			
 
				+        page.keyboard.press("Backspace")  # 删除选中内容
			
 
				+
			
 
				+        # 3) 逐字输入
			
 
				+        type_slow(search_locator, keyword, min_delay=0.06, max_delay=0.18)
			
 
				+
			
 
				+        # 3. 输入搜索关键词
			
 
				+        # search_locator.fill(keyword)
			
 
				+        logger.info(f"📝 已输入搜索关键词：{keyword}")
			
 
				+
			
 
				+        # 3) 搜索按钮也建议点可见的那个
			
 
				+        btn = page.locator(f"{SEARCH_BTN_SELECTOR}")
			
 
				+        btn.wait_for(state="visible", timeout=SEARCH_BTN_TIMEOUT)
			
 
				+        # btn.click()
			
 
				+        page.wait_for_timeout(3000)
			
 
				+
			
 
				+        detail_page = page
			
 
				+        if is_first_search:
			
 
				+            #获取新页面对象
			
 
				+            try:
			
 
				+                # 先开始监听新页面事件（在点击前）
			
 
				+                with page.context.expect_page(timeout=60000) as new_page_info:
			
 
				+                    # 再执行点击操作
			
 
				+                    btn.click()
			
 
				+                # 点击后获取新页面
			
 
				+                detail_page = new_page_info.value
			
 
				+                detail_page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+
			
 
				+                # #点击出现的按钮
			
 
				+                # test_btn = detail_page.locator("div[data-v-c65c36bc].first-time-highlight-message-btn button")
			
 
				+                # btn_count = test_btn.count()
			
 
				+                # logger.info(f"✅ 匹配到的元素数量：{btn_count}")
			
 
				+                # test_btn.wait_for(state="attached", timeout=5000)
			
 
				+                # test_btn.click()
			
 
				+            except PlaywrightTimeoutError:
			
 
				+                logger.warning(f"   未检测到新标签页")
			
 
				+                return None, False
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                    logger.warning(f"   等待新标签页异常：{e}")
			
 
				+                    return None, False
			
 
				+        else:
			
 
				+            btn.click()
			
 
				+            # 等待原页面跳转并加载完成（替代新页面监听）
			
 
				+            page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+            # 详情页就是原页面，无需新建
			
 
				+            detail_page = page
			
 
				+            logger.info("✅ 后续搜索：已在原页面完成跳转加载")
			
 
				+
			
 
				+
			
 
				+        test_btn = detail_page.locator("div[data-v-c65c36bc].first-time-highlight-message-btn button")
			
 
				+        btn_count = test_btn.count()
			
 
				+        logger.info(f"✅ 匹配到的元素数量：{btn_count}")
			
 
				+
			
 
				+        if btn_count > 0:
			
 
				+            test_btn.wait_for(state="attached", timeout=5000)
			
 
				+            test_btn.click()
			
 
				+
			
 
				+        force_close_popup(detail_page)
			
 
				+        kill_masks(detail_page)
			
 
				+        logger.info("✅ 已触发搜索")
			
 
				+
			
 
				+        return detail_page, True
			
 
				+
			
 
				+
			
 
				+            # 搜索后等待结果加载
			
 
				+            # page.wait_for_timeout(COLLECT_DELAY)
			
 
				+            # return True
			
 
				+
			
 
				+    except PlaywrightTimeoutError as e:
			
 
				+        logger.error(f" 搜索失败：元素定位超时 - {str(e)}")
			
 
				+        return None, False  # 失败时返回 (None, False)
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 搜索异常：{str(e)}")
			
 
				+        return None, False  # 失败时返回 (None, False)
			
 
				+
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    with sync_playwright() as p:
			
 
				+        browser = p.chromium.launch(
			
 
				+            headless=False,  # 不要用无头模式（反爬：无头模式易被识别）
			
 
				+            channel="chrome",  # 使用真实Chrome内核
			
 
				+            slow_mo=random.randint(100, 300),  # 全局操作延迟（模拟真人慢速操作）
			
 
				+            args=[
			
 
				+                "--disable-blink-features=AutomationControlled",  # 禁用webdriver特征（核心！）
			
 
				+                "--enable-automation=false",  # 新增：禁用自动化标识
			
 
				+                "--disable-infobars",  # 新增：禁用信息栏
			
 
				+                "--remote-debugging-port=0",  # 新增：随机调试端口
			
 
				+                "--start-maximized",  # 最大化窗口（模拟真人使用）
			
 
				+                "--disable-extensions",  # 禁用扩展（避免特征）
			
 
				+                "--disable-plugins-discovery",  # 禁用插件发现
			
 
				+                "--no-sandbox",  # 避免沙箱模式特征
			
 
				+                "--disable-dev-shm-usage",  # 避免内存限制导致的异常
			
 
				+                f"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(110, 120)}.0.0.0 Safari/537.36"  # 随机Chrome版本的UA
			
 
				+            ]
			
 
				+        )
			
 
				+        # 创建页面时伪装指纹
			
 
				+        context = browser.new_context(
			
 
				+            locale="zh-CN",  # 中文环境
			
 
				+            timezone_id="Asia/Shanghai",  # 上海时区
			
 
				+            geolocation={"latitude": 31.230416, "longitude": 121.473701},  # 模拟上海地理位置（可选）
			
 
				+            permissions=["geolocation"],  # 授予定位权限（模拟真人）
			
 
				+            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
			
 
				+            no_viewport=True,
			
 
				+            # 关键：隐藏自动化特征
			
 
				+            java_script_enabled=True,
			
 
				+            bypass_csp=True,
			
 
				+            # user_data_dir="./temp_user_data"  # 模拟真实用户数据目录
			
 
				+        )
			
 
				+        input("...")
			
 
				+        page = context.new_page()
			
 
				+
			
 
				+
			
 
				+        # 关键：移除navigator.webdriver标识（反爬核心）
			
 
				+        page.add_init_script("""
			
 
				+            Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
			
 
				+            Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });  // 新增：模拟插件
			
 
				+            Object.defineProperty(navigator, 'mimeTypes', { get: () => [1, 2, 3] });  // 新增：模拟MIME类型
			
 
				+            window.chrome = { runtime: {}, loadTimes: () => ({}) };  // 增强Chrome模拟
			
 
				+            delete window.navigator.languages;
			
 
				+            window.navigator.languages = ['zh-CN', 'zh'];
			
 
				+            // 新增：模拟真实鼠标移动特征
			
 
				+            (() => {
			
 
				+                const originalAddEventListener = EventTarget.prototype.addEventListener;
			
 
				+                EventTarget.prototype.addEventListener = function(type, listener) {
			
 
				+                    if (type === 'mousemove') {
			
 
				+                        return originalAddEventListener.call(this, type, (e) => {
			
 
				+                            e._automation = undefined;
			
 
				+                            listener(e);
			
 
				+                        });
			
 
				+                    }
			
 
				+                    return originalAddEventListener.call(this, type, listener);
			
 
				+                };
			
 
				+            })();
			
 
				+""")
			
 
				+
			
 
				+
			
 
				+        try:
			
 
				+            # ========== 核心：Cookie复用逻辑 ==========
			
 
				+            # 1. 加载本地Cookie
			
 
				+            load_cookies(context)
			
 
				+
			
 
				+            # 2. 验证登录状态
			
 
				+            if not is_login(page):
			
 
				+                # 3. Cookie失效/不存在，执行登录
			
 
				+                page.goto(TARGET_LOGIN_URL)
			
 
				+                page.wait_for_load_state("networkidle")
			
 
				+                # logger.info("🔑 开始执行登录流程")
			
 
				+
			
 
				+                # 执行登录操作
			
 
				+                # login_success = login_operation(page, USERNAME, PASSWORD)
			
 
				+                # if not login_success:
			
 
				+                #     logger.error(" 登录失败，程序终止")
			
 
				+                #     return
			
 
				+
			
 
				+                # # 4. 登录成功后保存Cookie
			
 
				+                # save_cookies(context)
			
 
				+                # logger.info(" 登录并保存Cookie成功！")
			
 
				+
			
 
				+            KEYWORDS = get_search_keywords_from_db()
			
 
				+            # get_search_keywords_from_db()
			
 
				+            # 执行搜索
			
 
				+            total_num = 0
			
 
				+            # current_page = page
			
 
				+            detail_page = None
			
 
				+            nums = 0
			
 
				+            for kw in KEYWORDS:
			
 
				+                popup_guard(page, "before_search")
			
 
				+                if nums == 0:
			
 
				+                    popup_guard(detail_page if detail_page else page, "before_search")  # page是你的初始页面对象，需提前定义
			
 
				+                    detail_page, search_success = search_operation(page, kw, is_first_search=True)
			
 
				+                    nums += 1
			
 
				+                else:
			
 
				+                    if detail_page is None:
			
 
				+                        logger.error(f" ❌ 无可用的搜索页面，跳过「{kw}」")
			
 
				+                        continue
			
 
				+                    popup_guard(detail_page, "before_search")
			
 
				+                    detail_page, search_success = search_operation(detail_page, kw, is_first_search=False)
			
 
				+
			
 
				+                if not search_success:
			
 
				+                    print(f"❌ 搜索失败：{kw}")
			
 
				+                    continue
			
 
				+
			
 
				+                if detail_page is None:
			
 
				+                    break
			
 
				+
			
 
				+                popup_guard(detail_page, "after_search")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #找不到数据跳过判断和出现杂数据跳过
			
 
				+                not_found_keywords = detail_page.locator("div.filter-panel-container-empty-text")
			
 
				+                if not_found_keywords.count() > 0:
			
 
				+                    logger.warning(f"⚠️ 关键词「{kw}」无匹配商品，直接跳过整个关键词采集")
			
 
				+                    continue
			
 
				+
			
 
				+
			
 
				+
			
 
				+                TARGET_SELECTOR = detail_page.locator(
			
 
				+                    'span.el-pagination__total',  # 匹配class为el-pagination_total和is-first的span
			
 
				+                )
			
 
				+                total_count = 0  # ⚠️ 每一轮关键词都重置
			
 
				+                if TARGET_SELECTOR.count() > 0:
			
 
				+                    nums = TARGET_SELECTOR.inner_text(timeout=5000).strip()
			
 
				+                    print(nums)
			
 
				+                    match = re.search(r'\d+', nums)
			
 
				+                    if match:
			
 
				+                        total_count = int(match.group())
			
 
				+                        print(total_count)
			
 
				+                else:
			
 
				+                    itme_boxes = detail_page.locator("div.product-list-item")
			
 
				+                    total_count = itme_boxes.count()
			
 
				+
			
 
				+                    #
			
 
				+                    print(f"【{kw}】无分页，当前页盒子数：{total_count}")
			
 
				+
			
 
				+                total_num += total_count
			
 
				+                print(f"截止到这个{kw}关键词有{total_num}条数据")
			
 
				+                page.wait_for_timeout(10000)
			
 
				+            print(f"✅ 本次采集总数据量：{total_num}")
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f" 程序异常：{str(e)}")
			
 
				+        finally:
			
 
				+            browser.close()
			
 
				+            print(" 浏览器已关闭，程序结束")
			
 
				+
			
 
				+# ==================== 程序入口 ====================
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/get_url_test.py
+++ b/get_url_test.py
@@ -0,0 +1,621 @@
 
				+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
			
 
				+import os
			
 
				+import json
			
 
				+import random
			
 
				+from logger_config import logger
			
 
				+from config import *
			
 
				+import re
			
 
				+import pymysql
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+COOKIE_FILE_PATH = "ybm_cookies.json"  # Cookie保存路径
			
 
				+LOGIN_VALIDATE_URL = "https://www.ybm100.com/new/"
			
 
				+TARGET_LOGIN_URL = "https://www.ybm100.com/new/login"
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def load_cookies(context, cookie_path=COOKIE_FILE_PATH):
			
 
				+    """从本地JSON文件加载Cookie到浏览器上下文"""
			
 
				+    if not os.path.exists(cookie_path):
			
 
				+        # logger.warning(f" Cookie文件不存在：{cookie_path}")
			
 
				+        return False
			
 
				+    try:
			
 
				+        with open(cookie_path, "r", encoding="utf-8") as f:
			
 
				+            cookies = json.load(f)
			
 
				+        context.add_cookies(cookies)
			
 
				+        # logger.info(f"✅ 已从{cookie_path}加载Cookie")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        # logger.error(f" 加载Cookie失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def is_login(page):
			
 
				+    """验证是否已登录（核心：检测登录态）"""
			
 
				+    try:
			
 
				+        # 访问需要登录的页面
			
 
				+        page.goto(LOGIN_VALIDATE_URL, timeout=5000)
			
 
				+        page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+        # 检测是否跳转到登录页（URL包含login则未登录）
			
 
				+        if "login" in page.url.lower():
			
 
				+            # logger.warning(" Cookie失效，需要重新登录")
			
 
				+            return False
			
 
				+
			
 
				+        # 可选：检测登录后的专属元素（比如用户名、个人中心等）
			
 
				+        # if page.locator("用户中心选择器").count() > 0:
			
 
				+        #     return True
			
 
				+        # logger.info(" Cookie有效，已保持登录状态")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        # logger.error(f" 验证登录状态失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def popup_guard(page, tag=""):
			
 
				+    """
			
 
				+    全局弹窗/遮罩守卫：多步引导 + 关闭按钮 + 遮罩清理 + 恢复滚动
			
 
				+    tag 仅用于日志区分调用位置
			
 
				+    """
			
 
				+    try:
			
 
				+        # 给弹窗一点出现时间
			
 
				+        page.wait_for_timeout(300)
			
 
				+
			
 
				+        # 1) 连续点“下一步/完成/我知道了/关闭”
			
 
				+        for _ in range(6):
			
 
				+            btn = page.locator(
			
 
				+                "xpath=//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            # 2) 常见的 close icon
			
 
				+            close_btn = page.locator(
			
 
				+                "css=.el-dialog__headerbtn, .el-message-box__headerbtn, .close, .icon-close, .el-icon-close"
			
 
				+            ).first
			
 
				+            if close_btn.count() > 0 and close_btn.is_visible():
			
 
				+                close_btn.click(timeout=1200)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            break
			
 
				+
			
 
				+        # 3) 清遮罩 + 恢复滚动/交互
			
 
				+        page.evaluate(r"""
			
 
				+        () => {
			
 
				+          // 第一步：精准清理已知的遮罩/弹窗类名（Element UI框架常用）
			
 
				+          const selectors = [
			
 
				+            '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+            '.el-message-box__wrapper', '.el-loading-mask'
			
 
				+          ];
			
 
				+          selectors.forEach(sel => document.querySelectorAll(sel).forEach(e => e.remove()));
			
 
				+
			
 
				+          // 泛化兜底：近似全屏 + 高 z-index 的层直接屏蔽
			
 
				+          const all = Array.from(document.querySelectorAll('body *'));
			
 
				+          for (const el of all) {
			
 
				+            const s = getComputedStyle(el); // 获取元素的实际样式（含CSS生效的样式）
			
 
				+            const z = parseInt(s.zIndex || '0', 10);    // 取元素的层级（z-index），默认0
			
 
				+            // 条件1：元素是固定/绝对定位（弹窗/遮罩常见定位方式）+ 层级≥1000（高优先级遮挡）+ 能拦截鼠标事件
			
 
				+            if ((s.position === 'fixed' || s.position === 'absolute') && z >= 1000 && s.pointerEvents !== 'none') {
			
 
				+              const r = el.getBoundingClientRect();     // 获取元素的尺寸和位置
			
 
				+                // 条件2：元素宽度/高度≥屏幕80%（近似全屏遮罩）
			
 
				+              const nearFull = r.width >= innerWidth * 0.8 && r.height >= innerHeight * 0.8;
			
 
				+              if (nearFull) {
			
 
				+                el.style.pointerEvents = 'none';    // 让元素不拦截鼠标点击
			
 
				+                el.style.display = 'none';          // 隐藏元素
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+        // 第三步：恢复页面滚动功能（弹窗常把页面设为不可滚动）
			
 
				+          document.documentElement.style.overflow = 'auto';     // html标签恢复滚动
			
 
				+          document.body.style.overflow = 'auto';    // body标签恢复滚动
			
 
				+          document.body.classList.remove('el-popup-parent--hidden');  // 移除Element UI的滚动禁用类
			
 
				+        }
			
 
				+        """)
			
 
				+
			
 
				+        # logger.info("杀除弹窗成功")
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+SEARCH_INPUT_SELECTOR = "input[placeholder*='药品名称/厂家名称']"
			
 
				+
			
 
				+def pick_search_input(page):
			
 
				+    """优先选可见且可用的搜索输入框；第一个不行就尝试第二个"""
			
 
				+    inputs = page.locator(SEARCH_INPUT_SELECTOR)
			
 
				+    cnt = inputs.count()
			
 
				+
			
 
				+    # 优先检查前两个（你说只有两个）
			
 
				+    for i in range(min(cnt, 2)):
			
 
				+        candidate = inputs.nth(i)
			
 
				+        try:
			
 
				+            candidate.wait_for(state="visible", timeout=1500)  # 小超时快速试探
			
 
				+            if candidate.is_enabled():
			
 
				+                return candidate
			
 
				+        except PlaywrightTimeoutError:
			
 
				+            continue
			
 
				+
			
 
				+    # 兜底：直接找任意可见的（避免命中 hidden 模板）
			
 
				+    candidate = page.locator(f"{SEARCH_INPUT_SELECTOR}:visible").first
			
 
				+    candidate.wait_for(state="visible", timeout=5000)
			
 
				+    return candidate
			
 
				+
			
 
				+
			
 
				+def type_slow(locator, text: str, min_delay=0.06, max_delay=0.18):
			
 
				+    """逐字输入，模拟真人打字"""
			
 
				+    for ch in text:
			
 
				+        locator.type(ch, delay=int(random.uniform(min_delay, max_delay) * 1000))
			
 
				+
			
 
				+SEARCH_BTN_SELECTOR = 'div.home-search-container-search-head-btn[data-scmd="text-搜索"]'
			
 
				+
			
 
				+
			
 
				+
			
 
				+def force_close_popup(page):
			
 
				+    """关闭新手引导/遮罩（多步：下一步/完成/我知道了），并兜底移除遮罩层"""
			
 
				+    try:
			
 
				+        # 1) 尝试连续点“下一步/完成/我知道了/关闭”
			
 
				+        for _ in range(5):  # 最多点5次，足够覆盖多步引导
			
 
				+            btn = page.locator(
			
 
				+                "//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+
			
 
				+            # 有些引导是右上角 X（如果存在就点）
			
 
				+            close_icon = page.locator(
			
 
				+                "xpath=//*[contains(@class,'close') or contains(@class,'el-icon-close') or name()='svg' or name()='i'][1]"
			
 
				+            ).first
			
 
				+            if close_icon.count() > 0 and close_icon.is_visible():
			
 
				+                close_icon.click(timeout=1000)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+
			
 
				+            break
			
 
				+
			
 
				+        # 2) 兜底：移除常见遮罩层（element-ui / 通用 mask/overlay）
			
 
				+        page.evaluate("""
			
 
				+        const selectors = [
			
 
				+          '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+          '[class*="mask"]', '[class*="overlay"]', '[style*="z-index"]'
			
 
				+        ];
			
 
				+        for (const sel of selectors) {
			
 
				+          document.querySelectorAll(sel).forEach(el => {
			
 
				+            const s = window.getComputedStyle(el);
			
 
				+            // 只移除“覆盖层”倾向的元素：fixed/absolute 且 z-index 很高
			
 
				+            if ((s.position === 'fixed' || s.position === 'absolute') && parseInt(s.zIndex || '0', 10) >= 1000) {
			
 
				+              el.remove();
			
 
				+            }
			
 
				+          });
			
 
				+        }
			
 
				+        """)
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+def kill_masks(page):
			
 
				+    """
			
 
				+    强制清理残留遮罩层/覆盖层，并恢复 body 可滚动、可点击状态
			
 
				+    """
			
 
				+    page.evaluate(r"""
			
 
				+    () => {
			
 
				+      const removed = [];
			
 
				+      const hidden = [];
			
 
				+
			
 
				+      // 1) 先处理已知常见遮罩
			
 
				+      const knownSelectors = [
			
 
				+        '.v-modal',
			
 
				+        '.el-overlay',
			
 
				+        '.el-overlay-dialog',
			
 
				+        '.el-dialog__wrapper',
			
 
				+        '.el-message-box__wrapper',
			
 
				+        '.el-loading-mask',
			
 
				+        '.el-popup-parent--hidden'
			
 
				+      ];
			
 
				+
			
 
				+      for (const sel of knownSelectors) {
			
 
				+        document.querySelectorAll(sel).forEach(el => {
			
 
				+          // v-modal / overlay 直接 remove 最省事
			
 
				+          removed.push(sel);
			
 
				+          el.remove();
			
 
				+        });
			
 
				+      }
			
 
				+
			
 
				+      // 2) 再做一次“泛化兜底”：全屏 fixed/absolute + 高 z-index 的覆盖层
			
 
				+      //    注意：不要误删页面正常的固定导航，所以加上“近似全屏”的判断
			
 
				+      const all = Array.from(document.querySelectorAll('body *'));
			
 
				+      for (const el of all) {
			
 
				+        const s = window.getComputedStyle(el);
			
 
				+        if (!s) continue;
			
 
				+
			
 
				+        const z = parseInt(s.zIndex || '0', 10);
			
 
				+        const pos = s.position;
			
 
				+        const pe = s.pointerEvents;
			
 
				+
			
 
				+        if ((pos === 'fixed' || pos === 'absolute') && z >= 1000 && pe !== 'none') {
			
 
				+          const r = el.getBoundingClientRect();
			
 
				+          const nearFullScreen =
			
 
				+            r.width >= window.innerWidth * 0.8 &&
			
 
				+            r.height >= window.innerHeight * 0.8 &&
			
 
				+            r.left <= window.innerWidth * 0.1 &&
			
 
				+            r.top <= window.innerHeight * 0.1;
			
 
				+
			
 
				+          // 常见遮罩是半透明背景色，或者透明但拦截点击
			
 
				+          const bg = s.backgroundColor || '';
			
 
				+          const looksLikeMask =
			
 
				+            nearFullScreen && (bg.includes('rgba') || bg.includes('rgb') || s.opacity !== '1');
			
 
				+
			
 
				+          if (nearFullScreen) {
			
 
				+            // 不管透明不透明，只要近似全屏且高 z-index，就先让它不拦截点击
			
 
				+            el.style.pointerEvents = 'none';
			
 
				+            el.style.display = 'none';
			
 
				+            hidden.push(el.tagName + '.' + (el.className || ''));
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      // 3) 恢复 body / html 的滚动与交互（很多弹窗会锁滚动）
			
 
				+      document.documentElement.style.overflow = 'auto';
			
 
				+      document.body.style.overflow = 'auto';
			
 
				+      document.body.style.position = 'static';
			
 
				+      document.body.style.width = 'auto';
			
 
				+      document.body.style.paddingRight = '0px';
			
 
				+
			
 
				+      // 4) 去掉 Element-UI 常见的锁定 class
			
 
				+      document.body.classList.remove('el-popup-parent--hidden');
			
 
				+
			
 
				+      return { removed, hiddenCount: hidden.length, hidden };
			
 
				+    }
			
 
				+    """)
			
 
				+
			
 
				+
			
 
				+# ==================== 搜索操作函数 ====================
			
 
				+def search_operation(page, keyword):
			
 
				+    """搜索框填充+提交搜索"""
			
 
				+    try:
			
 
				+        # 1) 找到“可用”的搜索框（第一个不行就用第二个）
			
 
				+        search_locator = pick_search_input(page)
			
 
				+
			
 
				+        # 清空并填充搜索框
			
 
				+        search_locator.wait_for(timeout=5000)
			
 
				+
			
 
				+        # 2. 清空搜索框（双重保障：先调用locator的clear，再手动全选删除）
			
 
				+        search_locator.click()  # 聚焦
			
 
				+        search_locator.fill("")
			
 
				+        page.keyboard.down("Control")  # 按住Control键
			
 
				+        page.keyboard.press("a")       # 按a键
			
 
				+        page.keyboard.up("Control")    # 松开Control键
			
 
				+
			
 
				+        page.keyboard.press("Backspace")  # 删除选中内容
			
 
				+
			
 
				+        # 3) 逐字输入
			
 
				+        type_slow(search_locator, keyword, min_delay=0.25, max_delay=0.50)
			
 
				+
			
 
				+        # 3. 输入搜索关键词
			
 
				+        # search_locator.fill(keyword)
			
 
				+        logger.info(f"📝 已输入搜索关键词：{keyword}")
			
 
				+
			
 
				+        # 3) 搜索按钮也建议点可见的那个
			
 
				+        btn = page.locator(f"{SEARCH_BTN_SELECTOR}:visible").first
			
 
				+        btn.wait_for(state="visible", timeout=5000)
			
 
				+        # btn.click()
			
 
				+        page.wait_for_timeout(600)
			
 
				+        #获取新页面对象
			
 
				+        try:
			
 
				+            # 先开始监听新页面事件（在点击前）
			
 
				+            with page.context.expect_page(timeout=60000) as new_page_info:
			
 
				+                # 再执行点击操作
			
 
				+                btn.click()
			
 
				+                # 点击后获取新页面
			
 
				+                detail_page = new_page_info.value
			
 
				+                detail_page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+        except PlaywrightTimeoutError:
			
 
				+            logger.warning(f"未检测到新标签页")
			
 
				+            return False
			
 
				+        # force_close_popup(page)
			
 
				+        # kill_masks(page)
			
 
				+        logger.info("✅ 已触发搜索")
			
 
				+        detail_page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+
			
 
				+        test_btn = detail_page.locator("div[data-v-c65c36bc].first-time-highlight-message-btn button")
			
 
				+        btn_count = test_btn.count()
			
 
				+        logger.info(f"✅ 匹配到的元素数量：{btn_count}")
			
 
				+        test_btn.wait_for(state="attached", timeout=5000)
			
 
				+        test_btn.click()
			
 
				+        input("....")
			
 
				+
			
 
				+
			
 
				+        return detail_page, True
			
 
				+
			
 
				+
			
 
				+        # 搜索后等待结果加载
			
 
				+        # page.wait_for_timeout(COLLECT_DELAY)
			
 
				+        # return True
			
 
				+
			
 
				+    except PlaywrightTimeoutError as e:
			
 
				+        logger.error(f" 搜索失败：元素定位超时 - {str(e)}")
			
 
				+        return None, False  # 失败时返回 (None, False)
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 搜索异常：{str(e)}")
			
 
				+        return None, False  # 失败时返回 (None, False)
			
 
				+
			
 
				+
			
 
				+
			
 
				+def goto_next_page(page) -> bool:
			
 
				+    """
			
 
				+    尝试翻到下一页；成功返回True，没下一页/翻页失败返回False
			
 
				+    适配常见 ElementUI: .el-pagination .btn-next / .el-pagination__next
			
 
				+    """
			
 
				+    # 多写几个候选，哪个能用就用哪个
			
 
				+    candidates = [
			
 
				+        ".el-pagination button.btn-next:not(.is-disabled)",
			
 
				+        ".el-pagination__next:not(.is-disabled)",
			
 
				+        "button:has-text('下一页'):not([disabled])",
			
 
				+        "a:has-text('下一页')",
			
 
				+    ]
			
 
				+
			
 
				+    next_btn = None
			
 
				+    for sel in candidates:
			
 
				+        loc = page.locator(sel).first
			
 
				+        if loc.count() > 0:
			
 
				+            next_btn = loc
			
 
				+            break
			
 
				+
			
 
				+    if not next_btn:
			
 
				+        return False
			
 
				+
			
 
				+    # 用“当前页第一个商品标题”做翻页完成的判据（比只等networkidle更稳）
			
 
				+    first_title = page.locator(PRODUCT_TITLE_SELECTOR).first
			
 
				+    before = ""
			
 
				+    try:
			
 
				+        if first_title.count() > 0:
			
 
				+            before = first_title.inner_text(timeout=2000).strip()
			
 
				+    except:
			
 
				+        pass
			
 
				+
			
 
				+    try:
			
 
				+        page.evaluate("window.scrollTo(0, 0);")
			
 
				+        next_btn.click(timeout=5000)
			
 
				+        page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+        # 等列表发生变化（标题变了 / 或者至少第一个标题重新出现）
			
 
				+        if before:
			
 
				+            page.wait_for_function(
			
 
				+                """(sel, oldText) => {
			
 
				+                    const el = document.querySelector(sel);
			
 
				+                    return el && el.innerText && el.innerText.trim() !== oldText;
			
 
				+                }""",
			
 
				+                arg=(PRODUCT_TITLE_SELECTOR, before),
			
 
				+                timeout=5000
			
 
				+            )
			
 
				+        else:
			
 
				+            first_title.wait_for(timeout=1000)
			
 
				+
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f" 翻页失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+PRODUCT_ITEM_SELECTOR = "div.product-list-item"
			
 
				+def collect_data(page, keyword):
			
 
				+    collect_result = []
			
 
				+    collected_count = 0  # ✅ 初始化计数变量
			
 
				+    logger.info(f"📊 开始采集「{keyword}」的商品数据")
			
 
				+    page.wait_for_load_state("networkidle")
			
 
				+    page_no = 1
			
 
				+    while True:
			
 
				+        logger.info(f"\n📄 「{keyword}」开始采集第 {page_no} 页")
			
 
				+
			
 
				+
			
 
				+        # ✅ 先获取当前页商品个数
			
 
				+        page.wait_for_load_state("networkidle")
			
 
				+        total_limit = page.locator(PRODUCT_ITEM_SELECTOR).count()
			
 
				+        logger.info(f"📌 「{keyword}」第{page_no}页 初始商品个数（count）：{total_limit}")
			
 
				+
			
 
				+        for idx in range(total_limit):
			
 
				+            detail_page = None
			
 
				+            # total_limit += 1
			
 
				+            try:
			
 
				+                item = page.locator(PRODUCT_ITEM_SELECTOR).nth(idx)
			
 
				+
			
 
				+                collected_count += 1  # 实际采集计数（用于日志）
			
 
				+                # ========= 反爬随机延迟（保留你的原逻辑也行） =========
			
 
				+                page.wait_for_load_state("networkidle")
			
 
				+                # delay = random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                logger.info(f"📌 「{keyword}」第{page_no}页 第{collected_count}/{total_limit}个商品")
			
 
				+
			
 
				+
			
 
				+                #获取product_id
			
 
				+                product_id = None
			
 
				+
			
 
				+                #这里还得改
			
 
				+                child_item = item.locator("> [data-product-id]")
			
 
				+
			
 
				+                product_id = child_item.get_attribute("data-product-id")
			
 
				+                if product_id:
			
 
				+                    product_id = product_id.strip()
			
 
				+                    logger.info(f"✅ 「{keyword}」第{collected_count}个商品 - 提取到product_id：{product_id}")
			
 
				+                else:
			
 
				+                    logger.warning(f"没提取到{product_id}")
			
 
				+
			
 
				+                product_url = f"https://www.ybm100.com/new/base/skuDetail?id={product_id}"
			
 
				+                print(product_url)
			
 
				+                db_match_result = fuzzy_match_product_url_in_db_mysql(product_url)
			
 
				+
			
 
				+                if db_match_result:
			
 
				+                    logger.info(f"✅ 「{keyword}」第{collected_count}个商品 - MySQL 匹配到URL，直接返回结果：{db_match_result}")
			
 
				+                    print(db_match_result)
			
 
				+                else:
			
 
				+                    # 4. 匹配不存在：准备执行后续点击提取流程
			
 
				+                    logger.info(f"ℹ️ 「{keyword}」第{collected_count}个商品 - MySQL 未匹配到URL，执行点击提取")
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                logger.info("该商品链接没有在数据库，进行点击提取。")
			
 
				+                continue
			
 
				+                # ====== 当前页采集完毕，尝试翻页 ======
			
 
				+        delay = page.wait_for_timeout(5000)
			
 
				+        logger.info(f"⏳ 翻页前随机等待 {delay:.2f}s（反爬）")
			
 
				+
			
 
				+        if goto_next_page(page):
			
 
				+            page_no += 1
			
 
				+            continue
			
 
				+        else:
			
 
				+            logger.info(f" 「{keyword}」已无下一页，关键词采集结束")
			
 
				+            break
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    with sync_playwright() as p:
			
 
				+        browser = p.chromium.launch(
			
 
				+            headless=False,  # 不要用无头模式（反爬：无头模式易被识别）
			
 
				+            channel="chrome",  # 使用真实Chrome内核
			
 
				+            slow_mo=random.randint(100, 300),  # 全局操作延迟（模拟真人慢速操作）
			
 
				+            args=[
			
 
				+                "--disable-blink-features=AutomationControlled",  # 禁用webdriver特征（核心！）
			
 
				+                "--enable-automation=false",  # 新增：禁用自动化标识
			
 
				+                "--disable-infobars",  # 新增：禁用信息栏
			
 
				+                "--remote-debugging-port=0",  # 新增：随机调试端口
			
 
				+                "--start-maximized",  # 最大化窗口（模拟真人使用）
			
 
				+                "--disable-extensions",  # 禁用扩展（避免特征）
			
 
				+                "--disable-plugins-discovery",  # 禁用插件发现
			
 
				+                "--no-sandbox",  # 避免沙箱模式特征
			
 
				+                "--disable-dev-shm-usage",  # 避免内存限制导致的异常
			
 
				+                f"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(110, 120)}.0.0.0 Safari/537.36"  # 随机Chrome版本的UA
			
 
				+            ]
			
 
				+        )
			
 
				+        # 创建页面时伪装指纹
			
 
				+        context = browser.new_context(
			
 
				+            locale="zh-CN",  # 中文环境
			
 
				+            timezone_id="Asia/Shanghai",  # 上海时区
			
 
				+            geolocation={"latitude": 31.230416, "longitude": 121.473701},  # 模拟上海地理位置（可选）
			
 
				+            permissions=["geolocation"],  # 授予定位权限（模拟真人）
			
 
				+            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
			
 
				+            viewport={"width": 1600, "height": 1400},
			
 
				+            # 关键：隐藏自动化特征
			
 
				+            java_script_enabled=True,
			
 
				+            bypass_csp=True,
			
 
				+            # user_data_dir="./temp_user_data"  # 模拟真实用户数据目录
			
 
				+        )
			
 
				+
			
 
				+        page = context.new_page()
			
 
				+
			
 
				+
			
 
				+        # 关键：移除navigator.webdriver标识（反爬核心）
			
 
				+        page.add_init_script("""
			
 
				+            Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
			
 
				+            Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });  // 新增：模拟插件
			
 
				+            Object.defineProperty(navigator, 'mimeTypes', { get: () => [1, 2, 3] });  // 新增：模拟MIME类型
			
 
				+            window.chrome = { runtime: {}, loadTimes: () => ({}) };  // 增强Chrome模拟
			
 
				+            delete window.navigator.languages;
			
 
				+            window.navigator.languages = ['zh-CN', 'zh'];
			
 
				+            // 新增：模拟真实鼠标移动特征
			
 
				+            (() => {
			
 
				+                const originalAddEventListener = EventTarget.prototype.addEventListener;
			
 
				+                EventTarget.prototype.addEventListener = function(type, listener) {
			
 
				+                    if (type === 'mousemove') {
			
 
				+                        return originalAddEventListener.call(this, type, (e) => {
			
 
				+                            e._automation = undefined;
			
 
				+                            listener(e);
			
 
				+                        });
			
 
				+                    }
			
 
				+                    return originalAddEventListener.call(this, type, listener);
			
 
				+                };
			
 
				+            })();
			
 
				+""")
			
 
				+
			
 
				+
			
 
				+        try:
			
 
				+            # ========== 核心：Cookie复用逻辑 ==========
			
 
				+            # 1. 加载本地Cookie
			
 
				+            load_cookies(context)
			
 
				+
			
 
				+            # 2. 验证登录状态
			
 
				+            if not is_login(page):
			
 
				+                # 3. Cookie失效/不存在，执行登录
			
 
				+                page.goto(TARGET_LOGIN_URL)
			
 
				+                page.wait_for_load_state("networkidle")
			
 
				+                # logger.info("🔑 开始执行登录流程")
			
 
				+
			
 
				+                # 执行登录操作
			
 
				+                # login_success = login_operation(page, USERNAME, PASSWORD)
			
 
				+                # if not login_success:
			
 
				+                #     logger.error(" 登录失败，程序终止")
			
 
				+                #     return
			
 
				+
			
 
				+                # # 4. 登录成功后保存Cookie
			
 
				+                # save_cookies(context)
			
 
				+                # logger.info(" 登录并保存Cookie成功！")
			
 
				+
			
 
				+            KEYWORDS = get_search_keywords_from_db()
			
 
				+            # get_search_keywords_from_db()
			
 
				+            # 执行搜索
			
 
				+            total_num = 0
			
 
				+
			
 
				+            for kw in KEYWORDS:
			
 
				+                popup_guard(page, "before_search")
			
 
				+                detail_page, search_success = search_operation(page, kw)
			
 
				+                if not search_success:
			
 
				+                    print(f"❌ 搜索失败：{kw}")
			
 
				+                    continue
			
 
				+
			
 
				+                popup_guard(page, "after_search")
			
 
				+
			
 
				+
			
 
				+                detail_page.wait_for_load_state('networkidle')
			
 
				+
			
 
				+                data_list = collect_data(detail_page, kw)
			
 
				+
			
 
				+
			
 
				+                #找不到数据跳过判断和出现杂数据跳过
			
 
				+                # not_found_keywords = page.locator("span:has-text('新品登记')")
			
 
				+                # if not_found_keywords.count() > 0:
			
 
				+                #     logger.warning(f"⚠️ 关键词「{kw}」无匹配商品，直接跳过整个关键词采集")
			
 
				+                #     continue
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # TARGET_SELECTOR = page.locator('div[data-v-4c22c8c9].sr-page_turner-pagination-total')
			
 
				+                # total_count = 0  # ⚠️ 每一轮关键词都重置
			
 
				+                # if TARGET_SELECTOR.count() > 0:
			
 
				+                #     nums = TARGET_SELECTOR.inner_text(timeout=5000).strip()
			
 
				+                #     print(nums)
			
 
				+                #     match = re.search(r'\d+', nums)
			
 
				+                #     if match:
			
 
				+                #         total_count = int(match.group())
			
 
				+                #         print(total_count)
			
 
				+                # else:
			
 
				+                #     itme_boxes = page.locator("div[data-v-4c22c8c9].sr-list-item[data-item_loc]")
			
 
				+                #     total_count = itme_boxes.count()
			
 
				+                #     print(f"【{kw}】无分页，当前页盒子数：{total_count}")
			
 
				+
			
 
				+            #     total_num += total_count
			
 
				+            #     print(f"截止到这个{kw}关键词有{total_num}条数据")
			
 
				+            #     page.wait_for_timeout(10000)
			
 
				+            # print(f"✅ 本次采集总数据量：{total_num}")
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f" 程序异常：{str(e)}")
			
 
				+        finally:
			
 
				+            browser.close()
			
 
				+            print(" 浏览器已关闭，程序结束")
			
 
				+
			
 
				+# ==================== 程序入口 ====================
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/local_screenshots/20260202_100312_target_page.jpg
+++ b/local_screenshots/20260202_100312_target_page.jpg
--- a/logger_config.py
+++ b/logger_config.py
@@ -0,0 +1,99 @@
 
				+# logger_config.py
			
 
				+import logging
			
 
				+import os
			
 
				+from logging import handlers
			
 
				+from datetime import datetime
			
 
				+import sys
			
 
				+
			
 
				+
			
 
				+class ColorFormatter(logging.Formatter):
			
 
				+    """自定义彩色日志格式化器（仅控制台输出带颜色，文件日志无颜色）"""
			
 
				+    # 终端颜色编码
			
 
				+    COLORS = {
			
 
				+        'DEBUG': '\033[36m',    # 青色
			
 
				+        'INFO': '\033[32m',     # 绿色
			
 
				+        'WARNING': '\033[33m',  # 黄色
			
 
				+        'ERROR': '\033[31m',    # 红色
			
 
				+        'CRITICAL': '\033[41m', # 红底白字
			
 
				+        'RESET': '\033[0m'      # 重置颜色
			
 
				+    }
			
 
				+
			
 
				+    # 日志级别对应图标
			
 
				+    ICONS = {
			
 
				+        'DEBUG': '🐞',
			
 
				+        'INFO': '✅',
			
 
				+        'WARNING': '⚠️',
			
 
				+        'ERROR': '❌',
			
 
				+        'CRITICAL': '💥'
			
 
				+    }
			
 
				+
			
 
				+    def format(self, record):
			
 
				+        # 为控制台添加颜色和图标，文件日志保持纯文本
			
 
				+        if hasattr(sys.stderr, 'isatty') and sys.stderr.isatty():
			
 
				+            # 彩色格式：[时间] [图标 级别] [模块:行号] 信息
			
 
				+            color = self.COLORS.get(record.levelname, self.COLORS['RESET'])
			
 
				+            icon = self.ICONS.get(record.levelname, '')
			
 
				+            record.levelname = f"{color}{icon} {record.levelname}{self.COLORS['RESET']}"
			
 
				+            record.msg = f"{color}{record.msg}{self.COLORS['RESET']}"
			
 
				+
			
 
				+        # 调用父类格式化方法
			
 
				+        return super().format(record)
			
 
				+
			
 
				+def setup_logger(level=logging.DEBUG):
			
 
				+    """
			
 
				+    配置日志器：
			
 
				+    1. 自动创建logs文件夹
			
 
				+    2. 日志文件带时间戳，按大小分割（10MB/个，保留5个备份）
			
 
				+    3. 控制台输出带颜色/图标，文件日志纯文本（更规范）
			
 
				+    4. 支持自定义日志级别（默认DEBUG，可传INFO/WARNING等）
			
 
				+    5. 日志格式包含：时间、级别、进程ID、模块、行号、信息
			
 
				+    """
			
 
				+    # 1. 自动创建logs文件夹（不存在则创建）
			
 
				+    log_dir = "logs"
			
 
				+    os.makedirs(log_dir, exist_ok=True)  # exist_ok=True：文件夹已存在时不报错
			
 
				+
			
 
				+    # 2. 生成时间戳（格式：年月日_时分秒，如20260112_164530）
			
 
				+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")  # 生成时间戳
			
 
				+    # 3. 定义带时间戳的日志文件路径
			
 
				+    log_file = os.path.join(log_dir, f"yjj_crawl_log_{timestamp}.txt")
			
 
				+
			
 
				+    # 4. 创建日志器并设置级别
			
 
				+    logger = logging.getLogger("yjj_crawler")
			
 
				+    logger.setLevel(level)
			
 
				+    logger.propagate = False  # 避免重复输出
			
 
				+
			
 
				+    # 清空已有处理器（防止重复添加）
			
 
				+    if logger.handlers:
			
 
				+        logger.handlers.clear()
			
 
				+
			
 
				+    # 5. 配置文件处理器（美化格式，保存到logs文件夹）
			
 
				+    file_handler = handlers.RotatingFileHandler(
			
 
				+        log_file,
			
 
				+        encoding="utf-8",
			
 
				+        maxBytes=10*1024*1024,  # 单个日志文件最大10MB
			
 
				+        backupCount=5,          # 最多保留5个备份日志文件
			
 
				+        mode="a"                # 追加模式
			
 
				+    )
			
 
				+    # 文件日志美化格式：时间戳 | 级别 | 信息（带图标/模块标注）
			
 
				+    file_formatter = logging.Formatter(
			
 
				+        "[%(asctime)s] [%(levelname)-8s] [PID:%(process)d] [%(module)s:%(lineno)d] %(message)s",
			
 
				+        datefmt="%Y-%m-%d %H:%M:%S"  # 精确到微秒
			
 
				+    )
			
 
				+    file_handler.setFormatter(file_formatter)
			
 
				+    logger.addHandler(file_handler)
			
 
				+    # 5. 配置控制台处理器（简洁格式，仅输出信息）
			
 
				+    console_handler = logging.StreamHandler()
			
 
				+    console_formatter = ColorFormatter(
			
 
				+        "[%(asctime)s] %(levelname)-8s %(message)s",
			
 
				+        datefmt="%H:%M:%S"  # 控制台仅显示时分秒，更简洁
			
 
				+    )
			
 
				+    console_handler.setFormatter(console_formatter)
			
 
				+
			
 
				+    # 6. 添加处理器到日志器
			
 
				+    console_handler.setFormatter(console_formatter)
			
 
				+    logger.addHandler(console_handler)
			
 
				+
			
 
				+    return logger
			
 
				+
			
 
				+# 创建全局可调用的logger对象
			
 
				+logger = setup_logger(level=logging.INFO)
			
--- a/main.py
+++ b/main.py
@@ -0,0 +1,2396 @@
 
				+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
			
 
				+from logger_config import logger
			
 
				+from datetime import datetime
			
 
				+import random
			
 
				+import csv
			
 
				+import os
			
 
				+import time
			
 
				+import json
			
 
				+import pymysql
			
 
				+from pymysql.err import OperationalError, ProgrammingError, DataError
			
 
				+from config import *
			
 
				+import re
			
 
				+import uuid
			
 
				+import requests
			
 
				+import base64
			
 
				+from io import BytesIO
			
 
				+from PIL import Image
			
 
				+import traceback
			
 
				+
			
 
				+
			
 
				+# 加载城市JSON数据（全局只加载一次）
			
 
				+CITY_JSON_PATH = "city.json"
			
 
				+
			
 
				+# 全局映射字典
			
 
				+PROVINCE_ID_MAP = {}           # 省名 -> 省ID
			
 
				+CITY_ID_MAP = {}               # (省名, 市名) -> 市ID
			
 
				+CITY_TO_PROVINCES_MAP = {}     # 市名 -> 省名集合（用于城市反推省份）
			
 
				+DIRECT_MUNICIPALITIES = {"北京市", "上海市", "天津市", "重庆市"}
			
 
				+DIRECT_MUNICIPALITY_BASE_NAMES = {"北京", "上海", "天津", "重庆"}
			
 
				+DIRECT_MUNICIPALITY_ALIAS = {
			
 
				+    "北京": "北京市",
			
 
				+    "上海": "上海市",
			
 
				+    "天津": "天津市",
			
 
				+    "重庆": "重庆市",
			
 
				+}
			
 
				+
			
 
				+
			
 
				+def load_city_mapping():
			
 
				+    """加载 city.json 并构建快速查找字典"""
			
 
				+    global PROVINCE_ID_MAP, CITY_ID_MAP, CITY_TO_PROVINCES_MAP
			
 
				+    PROVINCE_ID_MAP.clear()
			
 
				+    CITY_ID_MAP.clear()
			
 
				+    CITY_TO_PROVINCES_MAP.clear()
			
 
				+
			
 
				+    if not os.path.exists(CITY_JSON_PATH):
			
 
				+        logger.error(f"❌ 城市JSON文件不存在：{CITY_JSON_PATH}")
			
 
				+        return
			
 
				+    try:
			
 
				+        with open(CITY_JSON_PATH, "r", encoding="utf-8") as f:
			
 
				+            data = json.load(f)
			
 
				+        for province_item in data:
			
 
				+            p_name = province_item['name']
			
 
				+            p_id = province_item['id']
			
 
				+            PROVINCE_ID_MAP[p_name] = p_id
			
 
				+
			
 
				+            for city_item in province_item.get('sons', []):
			
 
				+                c_name = city_item['name']
			
 
				+                c_id = city_item['id']
			
 
				+                CITY_ID_MAP[(p_name, c_name)] = c_id
			
 
				+                CITY_TO_PROVINCES_MAP.setdefault(c_name, set()).add(p_name)
			
 
				+        logger.info(f"✅ 城市映射加载完成，共 {len(PROVINCE_ID_MAP)} 个省份，{len(CITY_ID_MAP)} 个城市")
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"❌ 加载城市JSON失败：{str(e)}")
			
 
				+
			
 
				+
			
 
				+def _clean_province_name(name: str) -> str:
			
 
				+    return (name or "").replace("省", "").replace("市", "").replace("自治区", "").replace("特别行政区", "").strip()
			
 
				+
			
 
				+
			
 
				+def _clean_city_name(name: str) -> str:
			
 
				+    return (name or "").replace("市", "").replace("自治州", "").replace("地区", "").replace("盟", "").strip()
			
 
				+
			
 
				+
			
 
				+def normalize_province_city_names(province_name: str, city_name: str):
			
 
				+    """标准化省市名称，并在“省份缺失+城市唯一”时反推省份。"""
			
 
				+    province = (province_name or "").strip()
			
 
				+    city = (city_name or "").strip()
			
 
				+
			
 
				+    # 标准化省份名称（保留直辖市的“北京市/上海市”等完整写法）
			
 
				+    if province and province not in DIRECT_MUNICIPALITIES and province not in PROVINCE_ID_MAP:
			
 
				+        clean_p = _clean_province_name(province)
			
 
				+        for standard_name in PROVINCE_ID_MAP.keys():
			
 
				+            if clean_p and clean_p == _clean_province_name(standard_name):
			
 
				+                province = standard_name
			
 
				+                break
			
 
				+
			
 
				+    # 省份为空时，按“城市唯一”精确反推省份
			
 
				+    if not province and city:
			
 
				+        matched_provinces = CITY_TO_PROVINCES_MAP.get(city, set())
			
 
				+        if not matched_provinces:
			
 
				+            clean_c = _clean_city_name(city)
			
 
				+            if clean_c:
			
 
				+                matched_provinces = {
			
 
				+                    p_name
			
 
				+                    for (p_name, c_name) in CITY_ID_MAP.keys()
			
 
				+                    if _clean_city_name(c_name) == clean_c
			
 
				+                }
			
 
				+
			
 
				+        if len(matched_provinces) == 1:
			
 
				+            province = next(iter(matched_provinces))
			
 
				+        elif len(matched_provinces) > 1:
			
 
				+            logger.warning(
			
 
				+                f"⚠️ 城市名存在跨省重名，无法唯一反推省份: city={city}, candidates={sorted(matched_provinces)}"
			
 
				+            )
			
 
				+
			
 
				+    if province in DIRECT_MUNICIPALITY_BASE_NAMES:
			
 
				+        province = DIRECT_MUNICIPALITY_ALIAS[province]
			
 
				+
			
 
				+    # 标准化城市名称（仅在省份确定后做同省映射，避免跨省误匹配）
			
 
				+    if province and city and (province, city) not in CITY_ID_MAP:
			
 
				+        clean_c = _clean_city_name(city)
			
 
				+        for (p_name, c_name), _ in CITY_ID_MAP.items():
			
 
				+            if _clean_province_name(p_name) == _clean_province_name(province) and clean_c and _clean_city_name(c_name) == clean_c:
			
 
				+                city = c_name
			
 
				+                break
			
 
				+
			
 
				+    # 直辖市兜底
			
 
				+    if province in DIRECT_MUNICIPALITIES and not city:
			
 
				+        city = province
			
 
				+
			
 
				+    return province, city
			
 
				+
			
 
				+
			
 
				+def get_province_city_ids(province_name, city_name):
			
 
				+    """
			
 
				+    根据省份名称和城市名称返回对应的ID
			
 
				+    :return: (province_id, city_id) 若找不到返回 (0, 0)
			
 
				+    """
			
 
				+    province_name, city_name = normalize_province_city_names(province_name, city_name)
			
 
				+
			
 
				+    # ---- 查找省份ID ----
			
 
				+    province_id = PROVINCE_ID_MAP.get(province_name) if province_name else None
			
 
				+    if province_name and province_id is None:
			
 
				+        # 尝试去掉"省"、"自治区"、"市"后缀再匹配
			
 
				+        clean_p = _clean_province_name(province_name)
			
 
				+        for name, pid in PROVINCE_ID_MAP.items():
			
 
				+            if clean_p and clean_p == _clean_province_name(name):
			
 
				+                province_id = pid
			
 
				+                province_name = name   # 更新为标准名称，方便后续查城市
			
 
				+                break
			
 
				+        if province_id is None:
			
 
				+            logger.warning(f"⚠️ 未找到省份ID: {province_name}")
			
 
				+            province_id = 0
			
 
				+    elif province_id is None:
			
 
				+        province_id = 0
			
 
				+
			
 
				+    # 直辖市兜底：省份有值但城市为空时，城市按省份补齐
			
 
				+    if _clean_province_name(province_name) in DIRECT_MUNICIPALITY_BASE_NAMES and not city_name:
			
 
				+        city_name = f"{_clean_province_name(province_name)}市"
			
 
				+
			
 
				+    # ---- 查找城市ID ----
			
 
				+    city_id = CITY_ID_MAP.get((province_name, city_name)) if province_name and city_name else None
			
 
				+    if province_name and city_name and city_id is None:
			
 
				+        # 尝试去掉"市"、"自治州"等后缀
			
 
				+        clean_c = _clean_city_name(city_name)
			
 
				+        for (p_name, c_name), cid in CITY_ID_MAP.items():
			
 
				+            if p_name == province_name:
			
 
				+                if clean_c and clean_c == _clean_city_name(c_name):
			
 
				+                    city_id = cid
			
 
				+                    city_name = c_name
			
 
				+                    break
			
 
				+
			
 
				+        if city_id is None:
			
 
				+            # 直辖市特殊处理：城市ID与省份ID相同（或取第一个下属城市）
			
 
				+            if _clean_province_name(province_name) in DIRECT_MUNICIPALITY_BASE_NAMES and province_id:
			
 
				+                city_id = province_id
			
 
				+            else:
			
 
				+                logger.warning(f"⚠️ 未找到城市ID: {province_name} - {city_name}")
			
 
				+                city_id = 0
			
 
				+    elif city_id is None:
			
 
				+        city_id = 0
			
 
				+
			
 
				+    return province_id, city_id
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ===================== 工具函数：获取当前时间字符串 =====================
			
 
				+def get_current_time():
			
 
				+    """统一日志时间格式"""
			
 
				+    return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def report_start(task_id: int, keyword: str):
			
 
				+    """第一次上报：status=2 和 task_id"""
			
 
				+    payload = {
			
 
				+        "status": 2,
			
 
				+        "collect_task_allocate_id": task_id
			
 
				+    }
			
 
				+    print(f"🔔 [上报参数-开始] task_id={task_id} keyword={keyword} payload={payload}")
			
 
				+    _send_report(payload, f"开始上报（status=2）关键词：{keyword}")
			
 
				+
			
 
				+def report_end(task_id: int, keyword: str, company_id: int, success: bool,real_count: int, start_ts: int):
			
 
				+    """
			
 
				+    第二次上报：采集结束时调用，上传所有字段
			
 
				+    :param keyword: 关键词（用于日志）
			
 
				+    :param success: 采集是否成功
			
 
				+    :param company_id: 企业ID（从任务表获取）
			
 
				+    :param real_count: 实际采集到的商品数量
			
 
				+    :param start_ts: 开始采集时的Unix时间戳（秒）
			
 
				+    """
			
 
				+    end_ts = int(time.time())
			
 
				+    payload = {
			
 
				+        "collect_task_allocate_id": task_id,    #任务ID
			
 
				+        "status": 3 if success else 4,# 3=已完成，4=失败
			
 
				+        'finish_status': 1 if success else 0, # 1=是（采集最后的数据）0=否
			
 
				+        'real_count': real_count,
			
 
				+        "start_time": start_ts,
			
 
				+        "end_time": end_ts,
			
 
				+        # "enterprise_id": company_id
			
 
				+    }
			
 
				+    print(f"🔔 [上报参数-结束] task_id={task_id} keyword={keyword} payload={payload}")
			
 
				+    _send_report(payload, f"结束上报（全部字段）关键词：{keyword}")
			
 
				+
			
 
				+def _send_report(params: dict, log_msg: str):
			
 
				+    print("上传接口前的参数：", params)
			
 
				+    """通用上报请求，失败不中断主流程"""
			
 
				+    REPORT_URL = "https://scheduleapi.findit.ltd/api/collect_equipment_execute/result_report"
			
 
				+    try:
			
 
				+        resp = requests.get(REPORT_URL, params=params, timeout=10)
			
 
				+        if resp.status_code == 200:
			
 
				+            # 尝试解析 JSON
			
 
				+            try:
			
 
				+                data = resp.json()
			
 
				+                if data.get("code") == "success":
			
 
				+                    logger.info(f"✅ 上报成功：{log_msg}")
			
 
				+                else:
			
 
				+                    logger.warning(f"⚠️ 上报接口返回错误：code={data.get('code')}, msg={data.get('msg')}，参数：{params}")
			
 
				+            except ValueError:
			
 
				+                # 响应不是 JSON 格式
			
 
				+                logger.warning(f"⚠️ 上报响应非 JSON：{resp.text[:200]}，参数：{params}")
			
 
				+        else:
			
 
				+            logger.warning(f"⚠️ 上报 HTTP {resp.status_code}，参数：{params}")
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"❌ 上报失败：{log_msg}，错误：{str(e)}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 代理IP池
			
 
				+PROXY_POOL_URL =""
			
 
				+PROXY_VALIDATION_URL = ""  # 用于验证代理有效性的URL
			
 
				+PROXY_TIMEOUT = 10  # 代理验证超时时间(秒)
			
 
				+
			
 
				+def get_random_proxy():
			
 
				+    """从代理池获取随机代理IP"""
			
 
				+    try:
			
 
				+        response = requests.get(PROXY_POOL_URL, timeout=10)
			
 
				+        if response.status_code == 200:
			
 
				+            proxy = response.text.strip()
			
 
				+            if validate_proxy(proxy):
			
 
				+                logger.info(f"获取到有效代理: {proxy}")
			
 
				+                return proxy
			
 
				+            logger.warning(f"代理无效: {proxy}")
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"获取代理失败: {str(e)}")
			
 
				+    return None
			
 
				+
			
 
				+def validate_proxy(proxy):
			
 
				+    """验证代理IP有效性"""
			
 
				+    try:
			
 
				+        proxies = {
			
 
				+            "http": f"http://{proxy}",
			
 
				+            "https": f"https://{proxy}"
			
 
				+        }
			
 
				+        response = requests.get(
			
 
				+            PROXY_VALIDATION_URL,
			
 
				+            proxies=proxies,
			
 
				+            timeout=PROXY_TIMEOUT
			
 
				+        )
			
 
				+        return response.status_code == 200
			
 
				+    except:
			
 
				+        return False
			
 
				+
			
 
				+def init_browser_with_proxy(playwright):
			
 
				+    proxy = get_random_proxy()
			
 
				+    proxy_config = None
			
 
				+    if proxy:
			
 
				+        proxy_server, proxy_port = proxy.split(":")
			
 
				+        proxy_config = {
			
 
				+        "server": f"http://{proxy_server}:{proxy_port}",
			
 
				+        # "username": "your_proxy_username",
			
 
				+        # "password": "your_proxy_password"
			
 
				+        }
			
 
				+        logger.info(f"使用代理: {proxy_server}:{proxy_port}")
			
 
				+    else:
			
 
				+        logger.warning("未获取到有效代理，将使用本地IP")
			
 
				+
			
 
				+    # 启动浏览器（保留原有反爬配置）
			
 
				+    return playwright.chromium.launch(
			
 
				+        headless=False,  # 非无头模式
			
 
				+        channel="chrome",  # 使用Chrome内核
			
 
				+        slow_mo=random.randint(100, 300),  # 随机操作延迟
			
 
				+        proxy=proxy_config,  # 代理配置（None则不使用代理）
			
 
				+        args=[
			
 
				+            "--disable-blink-features=AutomationControlled",  # 核心反检测
			
 
				+            "--enable-automation=false",
			
 
				+            "--disable-infobars",
			
 
				+            "--remote-debugging-port=0",
			
 
				+            "--start-maximized",
			
 
				+            "--disable-extensions",
			
 
				+            "--disable-plugins-discovery",
			
 
				+            "--no-sandbox",
			
 
				+            "--disable-dev-shm-usage",
			
 
				+            # 随机Chrome版本UA
			
 
				+            f"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(110, 120)}.0.0.0 Safari/537.36"
			
 
				+        ]
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 2. 反爬工具函数 ====================
			
 
				+def random_delay(min_seconds, max_seconds):
			
 
				+    """生成随机延迟（核心反爬：避免固定间隔）"""
			
 
				+    delay = random.uniform(min_seconds, max_seconds)
			
 
				+    time.sleep(delay)
			
 
				+    return delay
			
 
				+
			
 
				+
			
 
				+
			
 
				+def simulate_human_typing(page, locator, text):
			
 
				+    """模拟真人打字（逐个字符输入，带随机间隔）"""
			
 
				+    try:
			
 
				+        locator.click()
			
 
				+        locator.clear()
			
 
				+        for char in text:
			
 
				+            locator.type(char, delay=random.uniform(MIN_INPUT_DELAY, MAX_INPUT_DELAY))
			
 
				+            random_delay(0.05, 0.1)  # 字符间额外小延迟
			
 
				+        logger.info(f" 模拟真人输入完成：{text}")
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"模拟打字失败：{e}")
			
 
				+        locator.fill(text)  # 兜底：直接填充
			
 
				+
			
 
				+
			
 
				+
			
 
				+def save_cookies(context, cookie_path=COOKIE_FILE_PATH):
			
 
				+    """保存Cookie到本地JSON文件"""
			
 
				+    try:
			
 
				+        cookies = context.cookies()
			
 
				+        with open(cookie_path, "w", encoding="utf-8") as f:
			
 
				+            json.dump(cookies, f, ensure_ascii=False, indent=2)
			
 
				+        logger.info(f"Cookie已保存到：{cookie_path}")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 保存Cookie失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def load_cookies(context, cookie_path=COOKIE_FILE_PATH):
			
 
				+    """从本地JSON文件加载Cookie到浏览器上下文"""
			
 
				+    if not os.path.exists(cookie_path):
			
 
				+        logger.warning(f" Cookie文件不存在：{cookie_path}")
			
 
				+        return False
			
 
				+    try:
			
 
				+        with open(cookie_path, "r", encoding="utf-8") as f:
			
 
				+            cookies = json.load(f)
			
 
				+        context.add_cookies(cookies)
			
 
				+        logger.info(f"✅ 已从{cookie_path}加载Cookie")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 加载Cookie失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def is_login(page):
			
 
				+    """验证是否已登录（核心：检测登录态）"""
			
 
				+    try:
			
 
				+        # 访问需要登录的页面
			
 
				+        page.goto(LOGIN_VALIDATE_URL, timeout=300000)
			
 
				+        page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+        # 检测是否跳转到登录页（URL包含login则未登录）
			
 
				+        if "login" in page.url.lower():
			
 
				+            logger.warning(" Cookie失效，需要重新登录")
			
 
				+            return False
			
 
				+
			
 
				+        # 可选：检测登录后的专属元素（比如用户名、个人中心等）
			
 
				+        # if page.locator("用户中心选择器").count() > 0:
			
 
				+        #     return True
			
 
				+        logger.info(" Cookie有效，已保持登录状态")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 验证登录状态失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 滚动函数重构（核心修改） ====================
			
 
				+def slow_scroll_400px(page,scroll_distance1=400):
			
 
				+    """
			
 
				+    慢速滚动400px±50px（模拟真人滑动）
			
 
				+    :param page: 页面对象
			
 
				+    :return: 滚动是否成功
			
 
				+    """
			
 
				+    try:
			
 
				+        # 生成400±50px的随机滚动距离
			
 
				+        scroll_distance = random.randint(
			
 
				+            scroll_distance1 - SCROLL_OFFSET_RANGE,
			
 
				+            scroll_distance1 + SCROLL_OFFSET_RANGE
			
 
				+        )
			
 
				+        remaining_distance = scroll_distance
			
 
				+        total_steps = int(scroll_distance / SCROLL_STEP)
			
 
				+
			
 
				+        logger.info(
			
 
				+            f"📜 开始慢速滚动（目标距离：{scroll_distance}px，总步数：{total_steps}，总时长约{total_steps*SCROLL_INTERVAL:.2f}秒）"
			
 
				+        )
			
 
				+
			
 
				+        # 渐进式滚动（每步50px，间隔0.05秒）
			
 
				+        for _ in range(total_steps):
			
 
				+            step = min(SCROLL_STEP, remaining_distance)
			
 
				+            page.evaluate(f"window.scrollBy(0, {step});")
			
 
				+            remaining_distance -= step
			
 
				+            time.sleep(SCROLL_INTERVAL)
			
 
				+
			
 
				+        # 处理剩余不足一步的距离
			
 
				+        if remaining_distance > 0:
			
 
				+            page.evaluate(f"window.scrollBy(0, {remaining_distance});")
			
 
				+            time.sleep(SCROLL_INTERVAL)
			
 
				+
			
 
				+        # 滚动后等待懒加载完成
			
 
				+        page.wait_for_load_state("networkidle", timeout=8000)
			
 
				+        random_delay(2.0, 3.0)  # 滚动后额外停顿，模拟真人
			
 
				+        logger.info(f" 慢速滚动完成，实际滚动距离：{scroll_distance - remaining_distance}px")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f" 慢速滚动失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# def check_anti_crawl(page):
			
 
				+#     """检测反爬弹窗/验证码（核心：提前识别反爬）"""
			
 
				+#     anti_crawl_selectors = [
			
 
				+#         "//div[contains(text(), '验证')]",
			
 
				+#         "//div[contains(text(), '人机验证')]",
			
 
				+#         "//div[contains(text(), '访问过于频繁')]",
			
 
				+#         "//button[contains(text(), '验证')]"
			
 
				+#     ]
			
 
				+#     for selector in anti_crawl_selectors:
			
 
				+#         if page.locator(selector).count() > 0:
			
 
				+#             logger.error("❌ 检测到反爬验证弹窗！请手动完成验证后按回车继续...")
			
 
				+#             input()  # 暂停等待手动验证
			
 
				+#             return True
			
 
				+#     return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# CSV配置
			
 
				+CSV_FILE_PATH = f"ybm_collect_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" # CSV保存路径
			
 
				+CSV_HEADERS = [
			
 
				+    "商品标题", "商品采购价格", "商品折扣价格", "规格", "盒数",
			
 
				+    "店铺名称", "公司名称",
			
 
				+    "有效日期", "生产日期", "批准文号", "采集时间"
			
 
				+]    #表头
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 登录函数 ====================
			
 
				+def login_operation(page, username, password):
			
 
				+    """登录操作函数"""
			
 
				+    try:
			
 
				+        # 输入手机号（直接用单个变量）
			
 
				+        page.wait_for_selector(USERNAME_SELECTOR, timeout=ELEMENT_TIMEOUT, state="visible")
			
 
				+        page.wait_for_timeout(timeout=3000)
			
 
				+        page.fill(USERNAME_SELECTOR, username)
			
 
				+        logger.info(" 已输入登录账号")
			
 
				+
			
 
				+        # 输入密码
			
 
				+        page.wait_for_selector(PASSWORD_SELECTOR, timeout=ELEMENT_TIMEOUT, state="visible")
			
 
				+        page.wait_for_timeout(timeout=3000)
			
 
				+        page.fill(PASSWORD_SELECTOR, password)
			
 
				+        logger.info(" 已输入登录密码")
			
 
				+
			
 
				+        random_delay(1, 2)
			
 
				+        agree_btn = page.locator('span.el-checkbox__inner')
			
 
				+        agree_btn.click()
			
 
				+
			
 
				+        # 点击登录按钮
			
 
				+        page.wait_for_selector(LOGIN_BTN_SELECTOR, timeout=ELEMENT_TIMEOUT)
			
 
				+        page.wait_for_timeout(timeout=3000)
			
 
				+        page.click(LOGIN_BTN_SELECTOR)
			
 
				+        logger.info(" 已点击登录按钮")
			
 
				+
			
 
				+        page.wait_for_timeout(LOGIN_AFTER_CLICK)
			
 
				+        return True
			
 
				+
			
 
				+    except PlaywrightTimeoutError as e:
			
 
				+        logger.error(f" 登录失败：元素定位超时 - {str(e)}")
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 登录异常：{str(e)}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def kill_masks(page):
			
 
				+    """
			
 
				+    强制清理残留遮罩层/覆盖层，并恢复 body 可滚动、可点击状态
			
 
				+    """
			
 
				+    page.evaluate(r"""
			
 
				+    () => {
			
 
				+      const removed = [];
			
 
				+      const hidden = [];
			
 
				+
			
 
				+      // 1) 先处理已知常见遮罩
			
 
				+      const knownSelectors = [
			
 
				+        '.v-modal',
			
 
				+        '.el-overlay',
			
 
				+        '.el-overlay-dialog',
			
 
				+        '.el-dialog__wrapper',
			
 
				+        '.el-message-box__wrapper',
			
 
				+        '.el-loading-mask',
			
 
				+        '.el-popup-parent--hidden'
			
 
				+      ];
			
 
				+
			
 
				+      for (const sel of knownSelectors) {
			
 
				+        document.querySelectorAll(sel).forEach(el => {
			
 
				+          // v-modal / overlay 直接 remove 最省事
			
 
				+          removed.push(sel);
			
 
				+          el.remove();
			
 
				+        });
			
 
				+      }
			
 
				+
			
 
				+      // 2) 再做一次“泛化兜底”：全屏 fixed/absolute + 高 z-index 的覆盖层
			
 
				+      //    注意：不要误删页面正常的固定导航，所以加上“近似全屏”的判断
			
 
				+      const all = Array.from(document.querySelectorAll('body *'));
			
 
				+      for (const el of all) {
			
 
				+        const s = window.getComputedStyle(el);
			
 
				+        if (!s) continue;
			
 
				+
			
 
				+        const z = parseInt(s.zIndex || '0', 10);
			
 
				+        const pos = s.position;
			
 
				+        const pe = s.pointerEvents;
			
 
				+
			
 
				+        if ((pos === 'fixed' || pos === 'absolute') && z >= 1000 && pe !== 'none') {
			
 
				+          const r = el.getBoundingClientRect();
			
 
				+          const nearFullScreen =
			
 
				+            r.width >= window.innerWidth * 0.8 &&
			
 
				+            r.height >= window.innerHeight * 0.8 &&
			
 
				+            r.left <= window.innerWidth * 0.1 &&
			
 
				+            r.top <= window.innerHeight * 0.1;
			
 
				+
			
 
				+          // 常见遮罩是半透明背景色，或者透明但拦截点击
			
 
				+          const bg = s.backgroundColor || '';
			
 
				+          const looksLikeMask =
			
 
				+            nearFullScreen && (bg.includes('rgba') || bg.includes('rgb') || s.opacity !== '1');
			
 
				+
			
 
				+          if (nearFullScreen) {
			
 
				+            // 不管透明不透明，只要近似全屏且高 z-index，就先让它不拦截点击
			
 
				+            el.style.pointerEvents = 'none';
			
 
				+            el.style.display = 'none';
			
 
				+            hidden.push(el.tagName + '.' + (el.className || ''));
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      // 3) 恢复 body / html 的滚动与交互（很多弹窗会锁滚动）
			
 
				+      document.documentElement.style.overflow = 'auto';
			
 
				+      document.body.style.overflow = 'auto';
			
 
				+      document.body.style.position = 'static';
			
 
				+      document.body.style.width = 'auto';
			
 
				+      document.body.style.paddingRight = '0px';
			
 
				+
			
 
				+      // 4) 去掉 Element-UI 常见的锁定 class
			
 
				+      document.body.classList.remove('el-popup-parent--hidden');
			
 
				+
			
 
				+      return { removed, hiddenCount: hidden.length, hidden };
			
 
				+    }
			
 
				+    """)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def force_close_popup(page):
			
 
				+    """关闭新手引导/遮罩（多步：下一步/完成/我知道了），并兜底移除遮罩层"""
			
 
				+    try:
			
 
				+        # 1) 尝试连续点“下一步/完成/我知道了/关闭”
			
 
				+        for _ in range(5):  # 最多点5次，足够覆盖多步引导
			
 
				+            btn = page.locator(
			
 
				+                "//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+
			
 
				+            # 有些引导是右上角 X（如果存在就点）
			
 
				+            close_icon = page.locator(
			
 
				+                "xpath=//*[contains(@class,'close') or contains(@class,'el-icon-close') or name()='svg' or name()='i'][1]"
			
 
				+            ).first
			
 
				+            if close_icon.count() > 0 and close_icon.is_visible():
			
 
				+                close_icon.click(timeout=1000)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+
			
 
				+            break
			
 
				+
			
 
				+        # 2) 兜底：移除常见遮罩层（element-ui / 通用 mask/overlay）
			
 
				+        page.evaluate("""
			
 
				+        const selectors = [
			
 
				+          '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+          '[class*="mask"]', '[class*="overlay"]', '[style*="z-index"]'
			
 
				+        ];
			
 
				+        for (const sel of selectors) {
			
 
				+          document.querySelectorAll(sel).forEach(el => {
			
 
				+            const s = window.getComputedStyle(el);
			
 
				+            // 只移除“覆盖层”倾向的元素：fixed/absolute 且 z-index 很高
			
 
				+            if ((s.position === 'fixed' || s.position === 'absolute') && parseInt(s.zIndex || '0', 10) >= 1000) {
			
 
				+              el.remove();
			
 
				+            }
			
 
				+          });
			
 
				+        }
			
 
				+        """)
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+# 调用方式和方案1一致：在搜索后、采集前执行
			
 
				+# force_close_popup(page)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def pick_search_input(page):
			
 
				+    """优先选可见且可用的搜索输入框；第一个不行就尝试第二个"""
			
 
				+    inputs = page.locator(SEARCH_INPUT_SELECTOR)
			
 
				+    cnt = inputs.count()
			
 
				+
			
 
				+    # 优先检查前两个（你说只有两个）
			
 
				+    for i in range(min(cnt, 2)):
			
 
				+        candidate = inputs.nth(i)
			
 
				+        try:
			
 
				+            candidate.wait_for(state="visible", timeout=1500)  # 小超时快速试探
			
 
				+            if candidate.is_enabled():
			
 
				+                return candidate
			
 
				+        except PlaywrightTimeoutError:
			
 
				+            continue
			
 
				+
			
 
				+    # 兜底：直接找任意可见的（避免命中 hidden 模板）
			
 
				+    candidate = page.locator(f"{SEARCH_INPUT_SELECTOR}:visible").first
			
 
				+    candidate.wait_for(state="visible", timeout=ELEMENT_TIMEOUT)
			
 
				+    return candidate
			
 
				+
			
 
				+
			
 
				+
			
 
				+def type_slow(locator, text: str, min_delay=0.06, max_delay=0.18):
			
 
				+    """逐字输入，模拟真人打字"""
			
 
				+    for ch in text:
			
 
				+        locator.type(ch, delay=int(random.uniform(min_delay, max_delay) * 1000))
			
 
				+
			
 
				+
			
 
				+# ==================== 搜索操作函数 ====================
			
 
				+def search_operation(page, keyword, is_first_search: bool = True):
			
 
				+    """
			
 
				+    搜索框填充+提交搜索
			
 
				+    :param page: 页面对象
			
 
				+    :param keyword: 搜索关键词
			
 
				+    :param is_first_search: 是否是首次搜索（首次开新页面，后续原页面跳转）
			
 
				+    :return: (detail_page, 是否成功)
			
 
				+    """
			
 
				+    try:
			
 
				+        # 1) 找到“可用”的搜索框（第一个不行就用第二个）
			
 
				+        search_locator = page.locator(SEARCH_INPUT_SELECTOR)
			
 
				+
			
 
				+        # 清空并填充搜索框
			
 
				+        search_locator.wait_for(timeout=ELEMENT_TIMEOUT)
			
 
				+
			
 
				+        # 2. 清空搜索框（双重保障：先调用locator的clear，再手动全选删除）
			
 
				+        search_locator.click(force=True)  # 聚焦
			
 
				+        search_locator.fill("")
			
 
				+        page.keyboard.down("Control")  # 按住Control键
			
 
				+        page.keyboard.press("a")       # 按a键
			
 
				+        page.keyboard.up("Control")    # 松开Control键
			
 
				+
			
 
				+        page.keyboard.press("Backspace")  # 删除选中内容
			
 
				+
			
 
				+        # 3) 逐字输入
			
 
				+        type_slow(search_locator, keyword, min_delay=0.06, max_delay=0.18)
			
 
				+
			
 
				+        # 3. 输入搜索关键词
			
 
				+        # search_locator.fill(keyword)
			
 
				+        logger.info(f"📝 已输入搜索关键词：{keyword}")
			
 
				+
			
 
				+        # 3) 搜索按钮也建议点可见的那个
			
 
				+        btn = page.locator(f"{SEARCH_BTN_SELECTOR}")
			
 
				+        btn.wait_for(state="visible", timeout=SEARCH_BTN_TIMEOUT)
			
 
				+        # btn.click()
			
 
				+        page.wait_for_timeout(3000)
			
 
				+
			
 
				+        detail_page = page
			
 
				+        if is_first_search:
			
 
				+            #获取新页面对象
			
 
				+            try:
			
 
				+                # 先开始监听新页面事件（在点击前）
			
 
				+                with page.context.expect_page(timeout=60000) as new_page_info:
			
 
				+                    # 再执行点击操作
			
 
				+                    btn.click()
			
 
				+                # 点击后获取新页面
			
 
				+                detail_page = new_page_info.value
			
 
				+                detail_page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+
			
 
				+                # #点击出现的按钮
			
 
				+                # test_btn = detail_page.locator("div[data-v-c65c36bc].first-time-highlight-message-btn button")
			
 
				+                # btn_count = test_btn.count()
			
 
				+                # logger.info(f"✅ 匹配到的元素数量：{btn_count}")
			
 
				+                # test_btn.wait_for(state="attached", timeout=5000)
			
 
				+                # test_btn.click()
			
 
				+            except PlaywrightTimeoutError:
			
 
				+                logger.warning(f"{get_current_time()}   未检测到新标签页")
			
 
				+                return None, False
			
 
				+
			
 
				+            except Exception as e:
			
 
				+                    logger.warning(f"{get_current_time()}   等待新标签页异常：{e}")
			
 
				+                    return None, False
			
 
				+        else:
			
 
				+            btn.click()
			
 
				+            # 等待原页面跳转并加载完成（替代新页面监听）
			
 
				+            page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+            # 详情页就是原页面，无需新建
			
 
				+            detail_page = page
			
 
				+            logger.info("✅ 后续搜索：已在原页面完成跳转加载")
			
 
				+
			
 
				+
			
 
				+        test_btn = detail_page.locator("div[data-v-c65c36bc].first-time-highlight-message-btn button")
			
 
				+        btn_count = test_btn.count()
			
 
				+        logger.info(f"✅ 匹配到的元素数量：{btn_count}")
			
 
				+
			
 
				+        if btn_count > 0:
			
 
				+            test_btn.wait_for(state="attached", timeout=5000)
			
 
				+            test_btn.click()
			
 
				+
			
 
				+        force_close_popup(detail_page)
			
 
				+        kill_masks(detail_page)
			
 
				+        logger.info("✅ 已触发搜索")
			
 
				+
			
 
				+        return detail_page, True
			
 
				+
			
 
				+
			
 
				+            # 搜索后等待结果加载
			
 
				+            # page.wait_for_timeout(COLLECT_DELAY)
			
 
				+            # return True
			
 
				+
			
 
				+    except PlaywrightTimeoutError as e:
			
 
				+        logger.error(f" 搜索失败：元素定位超时 - {str(e)}")
			
 
				+        return None, False  # 失败时返回 (None, False)
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 搜索异常：{str(e)}")
			
 
				+        return None, False  # 失败时返回 (None, False)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#翻下一页
			
 
				+def goto_next_page(page) -> bool:
			
 
				+    """
			
 
				+    基于 button.btn-next 的 aria-disabled 属性判断是否有下一页
			
 
				+    :param page: 搜索结果页面对象（detail_page）
			
 
				+    :return: True=翻页成功，False=无下一页/翻页失败
			
 
				+    """
			
 
				+    try:
			
 
				+        next_btn = page.locator("button.btn-next").first
			
 
				+
			
 
				+        # 2. 先等待按钮加载（确保元素存在）
			
 
				+        next_btn.wait_for(state="attached", timeout=3000)
			
 
				+
			
 
				+        # 3. 获取 aria-disabled 属性值（核心判断依据）
			
 
				+        aria_disabled = next_btn.get_attribute("aria-disabled")
			
 
				+        logger.info(f"下一页按钮 aria-disabled 属性值：{aria_disabled}")
			
 
				+
			
 
				+        # 4. 判断是否有下一页：aria-disabled="true" 表示无下一页
			
 
				+        if aria_disabled == "true":
			
 
				+            logger.warning("⚠️ 下一页按钮 aria-disabled=true，已无更多页面")
			
 
				+            return False
			
 
				+
			
 
				+        page.wait_for_timeout(500)
			
 
				+
			
 
				+         # 6. 确保按钮可见且可点击（强制点击兜底）
			
 
				+        if next_btn.is_visible() and next_btn.is_enabled():
			
 
				+            next_btn.click(timeout=5000)
			
 
				+        else:
			
 
				+            # 兜底：强制点击（避免元素不可见但实际可点击的情况）
			
 
				+            next_btn.click(force=True, timeout=5000)
			
 
				+
			
 
				+        logger.info("✅ 翻页成功，下一页按钮 aria-disabled=false")
			
 
				+        return True
			
 
				+
			
 
				+
			
 
				+    except PlaywrightTimeoutError:
			
 
				+        logger.warning("⚠️ 下一页按钮加载超时，判定无更多页面")
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f"⚠️ 翻页操作异常：{e}，判定无更多页面")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def popup_guard(page, tag=""):
			
 
				+    """
			
 
				+    全局弹窗/遮罩守卫：多步引导 + 关闭按钮 + 遮罩清理 + 恢复滚动
			
 
				+    tag 仅用于日志区分调用位置
			
 
				+    """
			
 
				+    try:
			
 
				+        # 给弹窗一点出现时间
			
 
				+        page.wait_for_timeout(300)
			
 
				+
			
 
				+        # 1) 连续点“下一步/完成/我知道了/关闭”
			
 
				+        for _ in range(6):
			
 
				+            btn = page.locator(
			
 
				+                "xpath=//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            # 2) 常见的 close icon
			
 
				+            close_btn = page.locator(
			
 
				+                "css=.el-dialog__headerbtn, .el-message-box__headerbtn, .close, .icon-close, .el-icon-close"
			
 
				+            ).first
			
 
				+            if close_btn.count() > 0 and close_btn.is_visible():
			
 
				+                close_btn.click(timeout=1200)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            break
			
 
				+
			
 
				+        # 3) 清遮罩 + 恢复滚动/交互
			
 
				+        page.evaluate(r"""
			
 
				+        () => {
			
 
				+          // 第一步：精准清理已知的遮罩/弹窗类名（Element UI框架常用）
			
 
				+          const selectors = [
			
 
				+            '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+            '.el-message-box__wrapper', '.el-loading-mask'
			
 
				+          ];
			
 
				+          selectors.forEach(sel => document.querySelectorAll(sel).forEach(e => e.remove()));
			
 
				+
			
 
				+          // 泛化兜底：近似全屏 + 高 z-index 的层直接屏蔽
			
 
				+          const all = Array.from(document.querySelectorAll('body *'));
			
 
				+          for (const el of all) {
			
 
				+            const s = getComputedStyle(el); // 获取元素的实际样式（含CSS生效的样式）
			
 
				+            const z = parseInt(s.zIndex || '0', 10);    // 取元素的层级（z-index），默认0
			
 
				+            // 条件1：元素是固定/绝对定位（弹窗/遮罩常见定位方式）+ 层级≥1000（高优先级遮挡）+ 能拦截鼠标事件
			
 
				+            if ((s.position === 'fixed' || s.position === 'absolute') && z >= 1000 && s.pointerEvents !== 'none') {
			
 
				+              const r = el.getBoundingClientRect();     // 获取元素的尺寸和位置
			
 
				+                // 条件2：元素宽度/高度≥屏幕80%（近似全屏遮罩）
			
 
				+              const nearFull = r.width >= innerWidth * 0.8 && r.height >= innerHeight * 0.8;
			
 
				+              if (nearFull) {
			
 
				+                el.style.pointerEvents = 'none';    // 让元素不拦截鼠标点击
			
 
				+                el.style.display = 'none';          // 隐藏元素
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+        // 第三步：恢复页面滚动功能（弹窗常把页面设为不可滚动）
			
 
				+          document.documentElement.style.overflow = 'auto';     // html标签恢复滚动
			
 
				+          document.body.style.overflow = 'auto';    // body标签恢复滚动
			
 
				+          document.body.classList.remove('el-popup-parent--hidden');  // 移除Element UI的滚动禁用类
			
 
				+        }
			
 
				+        """)
			
 
				+
			
 
				+        logger.info("杀除弹窗成功")
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def open_detail_page(list_page, item, keyword, idx, *, timeout=15000):
			
 
				+    """
			
 
				+    点击商品进入详情页，兼容：
			
 
				+    1) 新开 tab（返回 detail_page != list_page, opened_new_tab=True）
			
 
				+    2) 同 tab 跳转（detail_page == list_page, opened_new_tab=False）
			
 
				+    """
			
 
				+    ctx = list_page.context
			
 
				+    list_url = list_page.url
			
 
				+    detail_page = None
			
 
				+    opened_new_tab = False
			
 
				+
			
 
				+    try:
			
 
				+        # 期望新开 tab（很多站点会这样）
			
 
				+        with ctx.expect_page(timeout=timeout) as p:
			
 
				+            item.click(delay=random.uniform(0.1, 0.3))
			
 
				+        detail_page = p.value
			
 
				+        opened_new_tab = True
			
 
				+        logger.info(f" 「{keyword}」第{idx}个商品 - 新开标签页进入详情")
			
 
				+    except PlaywrightTimeoutError:
			
 
				+        # 兜底：没新开 tab，大概率是同页跳转/弹层
			
 
				+        detail_page = list_page
			
 
				+        opened_new_tab = False
			
 
				+        logger.info(f" 「{keyword}」第{idx}个商品 - 未新开标签页，按同页进入详情处理")
			
 
				+
			
 
				+    return detail_page, opened_new_tab, list_url
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def return_to_list(list_page, detail_page, opened_new_tab, list_url, keyword, idx):
			
 
				+    """
			
 
				+    从详情页返回列表页：
			
 
				+    - 新 tab：关闭 tab，然后 bring_to_front 切回
			
 
				+    - 同 tab：尽量 go_back 回到 list_url；如果没跳转而是弹层，尝试 ESC
			
 
				+    """
			
 
				+    # 如果浏览器/页面已经被关了，直接退出，避免二次异常
			
 
				+    if list_page is None or list_page.is_closed():
			
 
				+        logger.warning(f" 「{keyword}」第{idx}个商品 - 列表页已关闭，无法切回")
			
 
				+        return
			
 
				+
			
 
				+    if opened_new_tab:
			
 
				+        # 只关“新开的详情 tab”，绝不关 list_page
			
 
				+        try:
			
 
				+            if detail_page and (detail_page is not list_page) and (not detail_page.is_closed()):
			
 
				+                detail_page.close()
			
 
				+                logger.info(f"📌 「{keyword}」第{idx}个商品 - 已关闭详情页标签页")
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f" 「{keyword}」第{idx}个商品 - 关闭详情页失败：{e}")
			
 
				+
			
 
				+        # 切回列表页
			
 
				+        try:
			
 
				+            list_page.bring_to_front()
			
 
				+            list_page.mouse.move(random.randint(100, 300), random.randint(200, 400))
			
 
				+            random_delay(0.3, 0.8)
			
 
				+            list_page.wait_for_load_state("networkidle")
			
 
				+            logger.info(f" 「{keyword}」第{idx}个商品 - 已切回列表页（新tab模式）")
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f" 「{keyword}」第{idx}个商品 - 切回列表页失败：{e}")
			
 
				+        return
			
 
				+
			
 
				+    # 同 tab：detail_page == list_page
			
 
				+    try:
			
 
				+        # 1) 如果 URL 变了，说明确实跳转了 → go_back 回去
			
 
				+        if list_page.url != list_url:
			
 
				+            for _ in range(3):  # 最多退 3 次，防止死循环
			
 
				+                list_page.go_back(timeout=15000)
			
 
				+                list_page.wait_for_load_state("domcontentloaded", timeout=15000)
			
 
				+                random_delay(0.2, 0.5)
			
 
				+                if list_page.url == list_url:
			
 
				+                    break
			
 
				+            logger.info(f" 「{keyword}」第{idx}个商品 - 已返回列表页（同tab跳转模式）")
			
 
				+        else:
			
 
				+            # 2) URL 没变：可能是弹层详情 → 尝试 ESC 关闭弹层
			
 
				+            list_page.keyboard.press("Escape")
			
 
				+            random_delay(0.2, 0.5)
			
 
				+            logger.info(f" 「{keyword}」第{idx}个商品 - 已尝试关闭弹层并留在列表页（同tab弹层模式）")
			
 
				+
			
 
				+        list_page.bring_to_front()
			
 
				+        list_page.wait_for_load_state("networkidle")
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f" 「{keyword}」第{idx}个商品 - 同tab返回列表页失败：{e}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+#判断店名是否已经在数据库
			
 
				+def shop_is_exists_database(shop):
			
 
				+    try:
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor(pymysql.cursors.DictCursor)     # 改为字典游标
			
 
				+        query_sql = """
			
 
				+            SELECT province, city, business_license_company, qualification_number, business_license_address FROM retrieve_ybm_shop_info_middle
			
 
				+            WHERE shop = %s
			
 
				+"""
			
 
				+        cursor.execute(query_sql, (shop,))
			
 
				+        result = cursor.fetchone()
			
 
				+
			
 
				+        # 正确的调试方式（替代cursor._last_executed）
			
 
				+        print(f"【调试】传入的店铺名：{repr(shop)}")  # repr能显示空格/隐藏字符
			
 
				+        print(f"【调试】查询参数：{shop}")
			
 
				+        print(f"【调试】查询结果：{result} → 函数返回：{bool(result)}")
			
 
				+
			
 
				+        is_exists = bool(result)
			
 
				+        if is_exists:
			
 
				+            logger.info(f"【店铺存在校验】店铺已存在 | 店铺名：{repr(shop)} | 结果：存在（True）不要执行采集店铺")
			
 
				+        else:
			
 
				+            logger.info(f"【店铺存在校验】店铺不存在 | 店铺名：{repr(shop)} | 结果：不存在（False）")
			
 
				+
			
 
				+        return is_exists, result
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"查询店铺失败：{e}")
			
 
				+        return False, None  # 异常时明确返回False，避免返回None
			
 
				+    finally:
			
 
				+        # 修复：关闭游标和连接，避免泄露
			
 
				+        if cursor:
			
 
				+            cursor.close()
			
 
				+        if conn:
			
 
				+            conn.close()
			
 
				+
			
 
				+
			
 
				+def insert_shop_info_to_db(shop,contact_address, qualification_number, business_license_company, business_license_address, scrape_date, platform, province, city, create_time, update_time):
			
 
				+    """
			
 
				+    把字段插入到ybm_shop_info_middle表
			
 
				+    :param 各参数: 你要插入的字段值（空字符串也可）
			
 
				+    :return: bool - 插入成功返回True，失败返回False
			
 
				+    """
			
 
				+    # 1. 初始化数据库连接和游标
			
 
				+    conn = None
			
 
				+    cursor = None
			
 
				+    try:
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor()
			
 
				+        # 2. 构造INSERT SQL语句（参数化查询，防止SQL注入）
			
 
				+        # 注意：请确认ybm_shop_info_middle表的字段名和以下%s的顺序对应！
			
 
				+        # 若表字段名不同，修改INSERT后的字段列表（比如你的表字段是credit_code而非qualification_number，要对应改）
			
 
				+        sql = """
			
 
				+        INSERT INTO retrieve_ybm_shop_info_middle (
			
 
				+            shop,
			
 
				+            contact_address,
			
 
				+            qualification_number,
			
 
				+            business_license_company,
			
 
				+            business_license_address,
			
 
				+            scrape_date,
			
 
				+            platform,
			
 
				+            province,
			
 
				+            city,
			
 
				+            create_time,
			
 
				+            update_time
			
 
				+        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
			
 
				+        ON DUPLICATE KEY UPDATE
			
 
				+        contact_address = VALUES(contact_address),  # 重复时更新联系地址
			
 
				+        qualification_number = VALUES(qualification_number),  # 更新社会信用代码
			
 
				+        business_license_company = VALUES(business_license_company),  # 更新公司名
			
 
				+        business_license_address = VALUES(business_license_address),  # 更新地址
			
 
				+        scrape_date = VALUES(scrape_date),
			
 
				+        platform = VALUES(platform),
			
 
				+        province = VALUES(province),
			
 
				+        city = VALUES(city),
			
 
				+        update_time = VALUES(update_time)  # 重复时更新update_time
			
 
				+        """
			
 
				+
			
 
				+
			
 
				+
			
 
				+        # 3. 构造插入的参数（顺序必须和SQL中的%s一一对应）
			
 
				+        params = (
			
 
				+            shop,                          # 店铺名称
			
 
				+            contact_address,               # 联系地址
			
 
				+            qualification_number,          # 社会信用代码
			
 
				+            business_license_company,      # 营业执照公司名
			
 
				+            business_license_address,      # 营业执照地址
			
 
				+            scrape_date,                   # 爬取日期
			
 
				+            platform,                      # 平台名称（药九九）
			
 
				+            province,                      # 省份
			
 
				+            city,                          # 城市
			
 
				+            create_time,                  # create_time（当前时间）
			
 
				+            update_time
			
 
				+        )
			
 
				+
			
 
				+        # 4. 执行SQL并提交事务
			
 
				+        cursor.execute(sql, params)
			
 
				+        conn.commit()
			
 
				+        print(f"✅ 数据插入成功！店铺：{shop} | 公司：{business_license_company}")
			
 
				+        return True
			
 
				+
			
 
				+    except pymysql.MySQLError as e:
			
 
				+        # 数据库相关错误（连接失败、SQL语法错误、字段不匹配等）
			
 
				+        print(f"MySQL插入失败：{e}")
			
 
				+        print(f"详细异常信息：{traceback.format_exc()}")  # 打印详细堆栈，方便排查
			
 
				+        if conn:
			
 
				+            conn.rollback()  # 插入失败回滚事务
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        # 其他未知错误
			
 
				+        print(f"插入数据时发生未知错误：{e}")
			
 
				+        print(f"详细异常信息：{traceback.format_exc()}")
			
 
				+        if conn:
			
 
				+            conn.rollback()
			
 
				+        return False
			
 
				+    finally:
			
 
				+        # 5. 无论成功/失败，都关闭游标和连接（释放资源）
			
 
				+        if cursor:
			
 
				+            cursor.close()
			
 
				+        if conn:
			
 
				+            conn.close()
			
 
				+
			
 
				+def insert_single_to_mysql(single_data):
			
 
				+    """
			
 
				+    逐条插入单条数据到MySQL数据库
			
 
				+    :param single_data: 单条商品数据元组
			
 
				+    :return: 插入是否成功
			
 
				+    """
			
 
				+    conn = None
			
 
				+    cursor = None
			
 
				+    try:
			
 
				+
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor()
			
 
				+
			
 
				+        # 2. 确保表存在（兼容表未创建的情况）
			
 
				+        # cursor.execute(CREATE_TABLE_SQL)
			
 
				+
			
 
				+        """这里数据库得改"""
			
 
				+
			
 
				+        insert_sql = """
			
 
				+        INSERT INTO retrieve_scrape_data(
			
 
				+        enterprise_id,
			
 
				+        platform_id,
			
 
				+        platform_item_id,
			
 
				+        province_id,
			
 
				+        city_id,
			
 
				+        province_name,
			
 
				+        city_name,
			
 
				+        area_info,
			
 
				+        product_brand,
			
 
				+        product_name,
			
 
				+        product_specs,
			
 
				+        one_box_price,
			
 
				+        manufacture_date,
			
 
				+        expiry_date,
			
 
				+        manufacturer,
			
 
				+        approval_number,
			
 
				+        is_sold_out,
			
 
				+        online_posting_count,
			
 
				+        continuous_listing_count,
			
 
				+        link_url,
			
 
				+        store_name,
			
 
				+        store_url,
			
 
				+        shipment_province_id,
			
 
				+        shipment_province_name,
			
 
				+        shipment_city_id,
			
 
				+        shipment_city_name,
			
 
				+        company_name,
			
 
				+        qualification_number,
			
 
				+        scrape_date,
			
 
				+        min_price,
			
 
				+        number,
			
 
				+        sales,
			
 
				+        inventory,
			
 
				+        snapshot_url,
			
 
				+        insert_time,
			
 
				+        update_time
			
 
				+) VALUES (
			
 
				+    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
			
 
				+    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
			
 
				+    %s, %s, %s, %s, %s, %s, %s, %s, %s, %s,
			
 
				+    %s, %s, %s, %s, %s,%s
			
 
				+)
			
 
				+        """
			
 
				+
			
 
				+
			
 
				+        # 字段值（与SQL占位符顺序严格对应）
			
 
				+        values = (
			
 
				+            single_data.get("enterprise_id", 0),
			
 
				+            single_data.get("platform_id", 9),
			
 
				+            single_data.get("platform_item_id", ""),
			
 
				+            single_data.get("province_id", ""),
			
 
				+            single_data.get("city_id", ""),
			
 
				+            single_data.get("province_name", ""),
			
 
				+            single_data.get("city_name", ""),
			
 
				+            single_data.get("area_info", ""),
			
 
				+            single_data.get("product_brand", ""),
			
 
				+            single_data.get("product_name", ""),
			
 
				+            single_data.get("product_specs", ""),
			
 
				+            single_data.get("one_box_price", 0.0),
			
 
				+            single_data.get("manufacture_date", ""),
			
 
				+            single_data.get("expiry_date", ""),
			
 
				+            single_data.get("manufacturer", ""),
			
 
				+            single_data.get("approval_number", ""),
			
 
				+            single_data.get("is_sold_out", 0),
			
 
				+            single_data.get("online_posting_count", ""),
			
 
				+            single_data.get("continuous_listing_count", ""),
			
 
				+            single_data.get("link_url", ""),
			
 
				+            single_data.get("store_name", ""),
			
 
				+            single_data.get("store_url", ""),
			
 
				+            single_data.get("shipment_province_id", 0),
			
 
				+            single_data.get("shipment_province_name", ""),
			
 
				+            single_data.get("shipment_city_id", 0),
			
 
				+            single_data.get("shipment_city_name", ""),
			
 
				+            single_data.get("company_name", ""),
			
 
				+            single_data.get("qualification_number", ""),
			
 
				+            single_data.get("scrape_date", ""),
			
 
				+            single_data.get("min_price", 0.0),
			
 
				+            single_data.get("number", 1),
			
 
				+            single_data.get("sales", ""),
			
 
				+            single_data.get("inventory", ""),
			
 
				+            single_data.get("snapshot_url", ""),
			
 
				+            single_data.get("insert_time", datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
			
 
				+            single_data.get("update_time", datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+        cursor.execute(insert_sql, values)
			
 
				+        conn.commit()
			
 
				+        logger.info(f" 单条数据插入成功：...")  # 仅打印标题前20字
			
 
				+        return True
			
 
				+    except OperationalError as e:
			
 
				+        logger.error(f" MySQL连接失败：{str(e)}")
			
 
				+        if conn:
			
 
				+            conn.rollback()
			
 
				+        return False
			
 
				+    except ProgrammingError as e:
			
 
				+        logger.error(f" SQL语法错误：{str(e)}")
			
 
				+        if conn:
			
 
				+            conn.rollback()
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 单条数据插入失败：{str(e)}")
			
 
				+        if conn:
			
 
				+            conn.rollback()
			
 
				+        return False
			
 
				+    finally:
			
 
				+        # 关闭游标和连接
			
 
				+        if cursor:
			
 
				+            cursor.close()
			
 
				+        if conn:
			
 
				+            conn.close()
			
 
				+
			
 
				+
			
 
				+def clean_shop_name(raw_shop_name):
			
 
				+    """
			
 
				+    清洗店铺名称：移除无关前缀（如【xx截单】）、多余空格/特殊符号，提取核心店名
			
 
				+    :param raw_shop_name: 原始采集的店铺名称字符串
			
 
				+    :return: 清洗后的纯店铺名称
			
 
				+    """
			
 
				+    if not raw_shop_name:   #处理空值
			
 
				+        return ''
			
 
				+
			
 
				+    # 步骤1：移除【】/()/[]包裹的所有内容（如【2月13日11点截单】）
			
 
				+    # 正则解释：匹配【任意字符】、(任意字符)、[任意字符]，并替换为空
			
 
				+    pattern = r'【.*?】|\(.*?\)|\[.*?\]'
			
 
				+    cleaned = re.sub(pattern, '', raw_shop_name)
			
 
				+
			
 
				+    # 步骤2：移除首尾空格、换行符，替换中间多余空格为单个空格
			
 
				+    cleaned = cleaned.strip().replace('\n', '').replace('\r', '')
			
 
				+    cleaned = re.sub(r'\s+', ' ', cleaned)
			
 
				+
			
 
				+    # 步骤3：兜底处理（若清洗后为空，返回原始值避免空字符串）
			
 
				+    return cleaned if cleaned else raw_shop_name
			
 
				+
			
 
				+def check_dup_in_biz_db(product_link, discount_price_val, scrape_date):
			
 
				+    """直接查询业务表是否存在该商品链接+价格"""
			
 
				+    conn = None
			
 
				+    cursor = None
			
 
				+    log_context = (
			
 
				+        f"【去重校验】商品链接：{product_link.strip()} | 价格：{discount_price_val} "
			
 
				+        f"采集日期：{scrape_date.strip()}"
			
 
				+    )
			
 
				+    try:
			
 
				+
			
 
				+        """这里数据库得改"""
			
 
				+
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor()
			
 
				+        sql = """
			
 
				+            SELECT * FROM retrieve_scrape_data
			
 
				+            WHERE link_url = %s AND min_price = %s AND scrape_date=%s
			
 
				+            """
			
 
				+        # 先执行查询
			
 
				+        cursor.execute(sql, (product_link.strip(), discount_price_val, scrape_date.strip()))
			
 
				+        # 再判断是否有结果
			
 
				+        # 如果 fetchone() 返回元组（比如(1,)）→ (1,) is not None → 结果为 True；
			
 
				+        # 如果 fetchone() 返回 None → None is not None → 结果为 False。
			
 
				+        is_dup = cursor.fetchone() is not None
			
 
				+
			
 
				+        if is_dup:
			
 
				+            logger.warning(f"{log_context} - 表中已存在重复记录，跳过本次采集")
			
 
				+        else:
			
 
				+            logger.info(f"{log_context} - 表中无重复记录，正常采集")
			
 
				+
			
 
				+        return is_dup
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"查询业务表去重失败：{str(e)}")
			
 
				+        return False
			
 
				+    finally:
			
 
				+        if cursor:
			
 
				+            cursor.close()
			
 
				+        if conn:
			
 
				+            conn.close()
			
 
				+
			
 
				+
			
 
				+# 压缩图片函数
			
 
				+def compress_image(image_data, max_size=4*1024*1024):  # 4MB上限
			
 
				+    try:
			
 
				+        img = Image.open(BytesIO(image_data))
			
 
				+
			
 
				+        # 将RGBA模式转为RGB（兼容JPEG）
			
 
				+        if img.mode in ('RGBA', 'P'):  # P是PNG的调色板模式，也需转换
			
 
				+            # 新建白色背景的RGB图片，把透明图贴上去（避免透明区域变黑）
			
 
				+            bg_img = Image.new('RGB', img.size, (255, 255, 255))
			
 
				+            bg_img.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
			
 
				+            img = bg_img
			
 
				+
			
 
				+        # 缩小分辨率（按比例缩到宽≤1000px）
			
 
				+        if img.width > 1000:
			
 
				+            ratio = 1000 / img.width
			
 
				+            new_size = (int(img.width*ratio), int(img.height*ratio))
			
 
				+            img = img.resize(new_size, Image.Resampling.LANCZOS)
			
 
				+
			
 
				+        # 降低质量（JPG）/压缩（PNG）
			
 
				+        output = BytesIO()
			
 
				+        img.save(output, format='JPEG', quality=80)  # quality越小体积越小
			
 
				+        compressed_data = output.getvalue()
			
 
				+
			
 
				+        # 若仍超限，继续降质量
			
 
				+        if len(compressed_data) > max_size:
			
 
				+            img.save(output, format='JPEG', quality=60)
			
 
				+            compressed_data = output.getvalue()
			
 
				+        return compressed_data
			
 
				+    except Exception as e:
			
 
				+        logger.debug(f"图片压缩失败：{e}")
			
 
				+        return image_data  # 压缩失败返回原始数据
			
 
				+
			
 
				+
			
 
				+def download_image_to_base64(image_url, save_dir = "./download_images"):
			
 
				+    """下载网络图片，返回图片二进制数据（BytesIO）"""
			
 
				+    try:
			
 
				+        if not os.path.exists(save_dir):
			
 
				+            os.makedirs(save_dir)  # 创建多级目录（比如a/b/c）
			
 
				+            print(f"创建本地保存目录：{save_dir}")
			
 
				+    except Exception as e:
			
 
				+        print(f"创建保存目录失败：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+    try:
			
 
				+        # 模拟浏览器请求头，避免被服务器拦截
			
 
				+        headers = {
			
 
				+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
			
 
				+        }
			
 
				+        response = requests.get(image_url, headers=headers, timeout=15)
			
 
				+        response.raise_for_status()
			
 
				+        compressed_data = compress_image(response.content)
			
 
				+        image_base64 = base64.b64encode(compressed_data).decode("utf-8")
			
 
				+        image_data = compressed_data
			
 
				+
			
 
				+        # 步骤3：提取图片文件名（从URL中截取，避免重复）
			
 
				+        # 示例URL：https://xxx.com/123.jpg → 文件名：123.jpg
			
 
				+        file_name = image_url.split("/")[-1]
			
 
				+        # 处理特殊字符（避免文件名非法）
			
 
				+        file_name = file_name.replace("?", "").replace("&", "").replace("=", "")
			
 
				+        save_path = os.path.join(save_dir, file_name)  # 完整保存路径
			
 
				+
			
 
				+        # 步骤4：保存图片到本地
			
 
				+        with open(save_path, "wb") as f:
			
 
				+            f.write(image_data)
			
 
				+        print(f"图片已保存到本地：{save_path}")
			
 
				+
			
 
				+
			
 
				+        return image_base64
			
 
				+    except requests.exceptions.Timeout:
			
 
				+        print(f"下载图片超时：{image_url}")
			
 
				+        return None
			
 
				+    except requests.exceptions.HTTPError as e:
			
 
				+        print(f"图片URL无效（状态码：{response.status_code}）：{image_url}")
			
 
				+        return None
			
 
				+    except Exception as e:
			
 
				+        print(f"下载图片失败：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+
			
 
				+def get_ocr_res(img):
			
 
				+    try:
			
 
				+        #img地址
			
 
				+        print(f'开始识别图片:{img}')
			
 
				+        request_url = request_url_config
			
 
				+
			
 
				+        img_base64 = download_image_to_base64(img)
			
 
				+        if not img_base64:
			
 
				+            print("图片下载/转Base64失败，终止OCR识别")
			
 
				+            return None
			
 
				+
			
 
				+        # 获取access_token
			
 
				+        access_token = get_access_token()
			
 
				+        if not access_token:
			
 
				+            print("获取access_token失败，无法调用OCR接口")
			
 
				+            return None
			
 
				+        params = {"image": img_base64}
			
 
				+        request_url = request_url + "?access_token=" + access_token
			
 
				+        headers = {'content-type': 'application/x-www-form-urlencoded'}
			
 
				+        response = requests.post(request_url, data=params, headers=headers)
			
 
				+
			
 
				+        if response:
			
 
				+            res = response.json()
			
 
				+            # 检查OCR返回是否有错误
			
 
				+            if "error_code" in res:
			
 
				+                print(f"百度OCR接口错误：{res['error_msg']}（错误码：{res['error_code']}）")
			
 
				+                return None
			
 
				+            # 解析识别结果
			
 
				+            new_dic = dict()
			
 
				+            for ite in res['words_result'].keys():
			
 
				+                new_dic[ite] = res['words_result'][ite]['words']
			
 
				+            print('资质数据信息', new_dic)
			
 
				+            return new_dic
			
 
				+        else:
			
 
				+            print("OCR接口返回空响应")
			
 
				+            return None
			
 
				+    except requests.exceptions.RequestException as e:
			
 
				+        print(f"网络错误（图片下载/OCR请求失败）：{str(e)}")
			
 
				+        return None
			
 
				+    except KeyError as e:
			
 
				+        print(f"OCR响应格式异常，缺失字段：{str(e)}")
			
 
				+        return None
			
 
				+    except Exception as e:
			
 
				+        print(f"OCR识别未知错误：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+def get_access_token():
			
 
				+    AppKey = AppKey_config
			
 
				+    AppSrcret = AppSecret_config
			
 
				+    token_url =token_url_config
			
 
				+    url = f"{token_url}?grant_type=client_credentials&client_id={AppKey}&client_secret={AppSrcret}"
			
 
				+
			
 
				+    payload = ""
			
 
				+    headers = {
			
 
				+        'Content-Type': 'application/json',
			
 
				+        'Accept': 'application/json'
			
 
				+    }
			
 
				+    try:
			
 
				+        response = requests.request("POST", url, headers=headers, data=payload)
			
 
				+        response.raise_for_status()  # 触发HTTP错误
			
 
				+        return response.json()['access_token']
			
 
				+    except Exception as e:
			
 
				+        print(f"获取access_token失败：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def extract_province_city(address):
			
 
				+    """
			
 
				+    从地址中提取省份和城市
			
 
				+    :param address: 营业执照地址（如"福建省福州市马尾区"）
			
 
				+    :return: (province, city) - 提取到的省份/城市，提取失败返回空字符串
			
 
				+    """
			
 
				+    if not address:  # 地址为空，直接返回空
			
 
				+        return "", ""
			
 
				+
			
 
				+    # 正则1：匹配省份（兼容省/自治区/直辖市/特别行政区）
			
 
				+    province_pattern = re.compile(r'([^省]+省|.+自治区|北京市|上海市|天津市|重庆市|.+特别行政区)')
			
 
				+    province_match = province_pattern.search(address)
			
 
				+    province = province_match.group(1) if province_match else ""
			
 
				+
			
 
				+    # 正则2：匹配城市（兼容市/自治州/地区/盟，且排除省份已匹配的部分）
			
 
				+    # 先去掉已匹配的省份，再匹配城市
			
 
				+    address_remain = address.replace(province, "").strip() if province else address.strip()
			
 
				+    city_pattern = re.compile(r'([^市]+市|.+自治州|.+地区|.+盟|^[^\d区县镇]+)')
			
 
				+    city_match = city_pattern.search(address_remain)
			
 
				+    city = city_match.group(1).strip() if city_match else ""
			
 
				+
			
 
				+    # 兼容直辖市（如"北京市朝阳区"→city=北京市）
			
 
				+    if province in ["北京市", "上海市", "天津市", "重庆市"]:
			
 
				+        city = province
			
 
				+
			
 
				+    # 兼容地址不规范的情况（如"福建福州马尾区"，无"省"/"市"字）
			
 
				+    if not province and not city:
			
 
				+        # 匹配前两个地名（如"福建福州"→province=福建，city=福州）
			
 
				+        simple_pattern = re.compile(r'^([^\d区县镇]+)')
			
 
				+        simple_match = simple_pattern.search(address)
			
 
				+        if simple_match:
			
 
				+            city = simple_match.group(1).strip()  # 只有城市，省份留空
			
 
				+
			
 
				+    if city and province and city != province and province in city:
			
 
				+        city = city.replace(province, "").strip()
			
 
				+
			
 
				+    province, city = normalize_province_city_names(province, city)
			
 
				+    return province.strip(), city.strip()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#采集数据核心
			
 
				+def collect_data(store_page, brand, keyword, company_id):
			
 
				+    """
			
 
				+    1) 先获取当前页商品个数（count）
			
 
				+    2) 按循环次数采集；每循环15次滚动一次 slow_scroll_1200px
			
 
				+    3) 当前页循环完 -> goto_next_page；有下一页继续；无下一页结束该关键词
			
 
				+    """
			
 
				+    collect_result = []
			
 
				+    # seen = set()
			
 
				+
			
 
				+    logger.info(f"📊 开始采集「{keyword}」的商品数据")
			
 
				+    store_page.wait_for_load_state("networkidle")
			
 
				+    #没有找到商品就跳过这个商品
			
 
				+
			
 
				+
			
 
				+    page_no = 1
			
 
				+    while True:
			
 
				+
			
 
				+        logger.info(f"\n📄 「{keyword}」开始采集第 {page_no} 页")
			
 
				+
			
 
				+        #  记录列表页URL（可用于你后续兜底）
			
 
				+        list_page_url = store_page.url
			
 
				+        logger.info(f"📌 已记录商品列表页URL：{list_page_url}")
			
 
				+
			
 
				+
			
 
				+        # ✅ 先获取当前页商品个数
			
 
				+        store_page.wait_for_load_state("domcontentloaded")  # 先等DOM加载
			
 
				+        store_page.wait_for_load_state("networkidle")
			
 
				+        store_page.wait_for_timeout(500)                   # 额外等待渲染稳定
			
 
				+        total_limit = store_page.locator(PRODUCT_ITEM_SELECTOR).count()
			
 
				+        logger.info(f"📌 「{keyword}」第{page_no}页 初始商品个数（count）：{total_limit}")
			
 
				+
			
 
				+        # 重置当前页的采集计数
			
 
				+        collected_count = 0
			
 
				+
			
 
				+
			
 
				+        # ========= 初始化无匹配计数器（记录标题不包含核心关键词的次数） =========
			
 
				+        # no_match_count = 0  # 无匹配次数初始化为0
			
 
				+        # MAX_NO_MATCH = 10   # 最大无匹配次数阈值
			
 
				+
			
 
				+        #补充没找到关键词的兜底
			
 
				+        not_found_keywords = store_page.locator("div.filter-panel-container-empty-text")
			
 
				+        if not_found_keywords.count() > 0:
			
 
				+            logger.warning(f"⚠️ 关键词「{keyword}」无匹配商品，直接跳过整个关键词采集")
			
 
				+            return []
			
 
				+
			
 
				+
			
 
				+        # 获取当前页面
			
 
				+        # store_page = context.pages[0]  # 从上下文中获取当前页面
			
 
				+        # store_page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+
			
 
				+
			
 
				+        for idx in range(total_limit):
			
 
				+
			
 
				+            detail_page = None
			
 
				+            try:
			
 
				+                item = store_page.locator(PRODUCT_ITEM_SELECTOR).nth(idx)
			
 
				+                collected_count += 1  # 实际采集计数（用于日志）
			
 
				+                # ========= 反爬随机延迟（保留你的原逻辑也行） =========
			
 
				+                store_page.wait_for_load_state("networkidle")
			
 
				+                delay = random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                logger.info(f"📌 「{keyword}」第{page_no}页 第{collected_count}/{total_limit}个商品 - 等待{delay:.2f}秒后采集（反爬）")
			
 
				+
			
 
				+
			
 
				+                # 1. 初始化所有字段默认值
			
 
				+                title = ""
			
 
				+                price = "0.00"
			
 
				+                shop = ""
			
 
				+                expiry_date = "无有效期"
			
 
				+                manufacture_date = "无生产日期"
			
 
				+                approval_number = "无批准文号"
			
 
				+                manufacturer = "未知公司"
			
 
				+                # discount_price = "0.00"
			
 
				+                spec = "未知规格"
			
 
				+                num = 1  # ✅ 默认 1
			
 
				+                platform = '药帮忙'
			
 
				+                current_time = datetime.now().strftime("%Y-%m-%d")
			
 
				+                is_sold_out = 0
			
 
				+                business_license_address = '' #店铺地址为空
			
 
				+
			
 
				+                # =========1、 售罄不跳过 =========
			
 
				+                sold_locator = item.locator('div.product-status')
			
 
				+                if sold_locator.count() > 0:
			
 
				+                    is_sold_out = 1
			
 
				+                    logger.warning(f" 「{keyword}」第{page_no}页 第{collected_count}个商品已售罄")
			
 
				+                    # if collected_count % 5 == 0 and collected_count > 0:
			
 
				+                    #     logger.info("采满5个往下滑")
			
 
				+                    #     slow_scroll_400px(page)
			
 
				+                    #     page.wait_for_load_state("networkidle")
			
 
				+                    # continue
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #2、提取商品ID
			
 
				+                product_id = ''
			
 
				+                product_id_elem = item.locator('div.product-card[data-product-id]')
			
 
				+                if product_id_elem.count() > 0:
			
 
				+                    product_id = product_id_elem.get_attribute("data-product-id")
			
 
				+                    logger.info(f"✅ 提取到data-product-id：{product_id}")  # 输出：5678955
			
 
				+                else:
			
 
				+                    logger.warning("⚠️ 未找到商品ID，使用默认空字符串")
			
 
				+                #3、 提取商品标题（处理空值）
			
 
				+                product_locator = item.locator(PRODUCT_TITLE_SELECTOR)
			
 
				+                if product_locator.count() > 0:
			
 
				+                    title = product_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 列表页标题：{title}{'='*10}")
			
 
				+                else:
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品 - 列表页标题元素未找到，使用默认值：{title}")
			
 
				+
			
 
				+
			
 
				+                #关键词不在标题中，跳过当前商品
			
 
				+                # core_keyword = re.sub(r'^999[\s\(\)（）、·]*', '', keyword)
			
 
				+                # if core_keyword not in title:
			
 
				+                #     no_match_count += 1
			
 
				+                #     logger.warning(f" 「{keyword}」第{collected_count}个商品 - 标题「{title}」不包含核心关键词「{core_keyword}」（无匹配次数：{no_match_count}/{MAX_NO_MATCH}），跳过本次循环")
			
 
				+                #     continue
			
 
				+
			
 
				+                # if no_match_count >= MAX_NO_MATCH:
			
 
				+                #     logger.error(f"❌ 关键词「{keyword}」无匹配商品次数已达{MAX_NO_MATCH}次，直接终止当前关键词采集，进入下一个关键词")
			
 
				+                #     return []
			
 
				+
			
 
				+
			
 
				+                #4、 提取价格（带缺失日志）
			
 
				+                # price_locator = item.locator(PRODUCT_PRICE_SELECTOR)
			
 
				+                price_int = item.locator('//span[@class="price-int"]').text_content().strip()
			
 
				+                # 2. 提取小数部分（注意可能为空，比如价格是整数13）
			
 
				+                price_decimal_elem = item.locator('//span[@class="price-decimal"]')
			
 
				+                if price_decimal_elem.count() > 0:
			
 
				+                    price_decimal = price_decimal_elem.text_content().strip()
			
 
				+                else:
			
 
				+                    price_decimal = ''
			
 
				+                # 3. 拼接完整价格
			
 
				+                full_price = f"{price_int}{price_decimal}"
			
 
				+                # 转成浮点数（便于后续计算/入库）
			
 
				+                full_price_num = float(full_price)
			
 
				+                logger.info(f"✅ 提取到价格：{full_price_num}")
			
 
				+                if full_price_num is None:
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 列表页采购价格元素未找到，使用默认值：{price}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # if full_price_num > 0:
			
 
				+                #     price = price_locator.inner_text(timeout=3000).strip()
			
 
				+                #     logger.info(f"{'='*10}{keyword}」第{collected_count}个商品 - 列表页采购价格：{price}{'='*10}")
			
 
				+                # else:
			
 
				+                #     price = "0.00"  # 初始化默认值，避免后续报错
			
 
				+                #     logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 列表页采购价格元素未找到，使用默认值：{price}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # 5. 提取公司名称（带缺失日志）
			
 
				+                manufacturer_locator = item.locator(PRODUCT_COMPANY_SELECTOR)
			
 
				+                if manufacturer_locator.count() > 0:
			
 
				+                    manufacturer = manufacturer_locator.inner_text(timeout=3000).strip()
			
 
				+
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 列表页公司名：{manufacturer}{'='*10}")
			
 
				+                else:
			
 
				+
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 列表页公司名称元素未找到，使用默认值：{manufacturer}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #6、提取店铺名称
			
 
				+                shop_locator = item.locator(PRODUCT_STORE_SELECTOR)
			
 
				+                if shop_locator.count() > 0:
			
 
				+                    raw_shop = shop_locator.inner_text(timeout=3000).strip()
			
 
				+                    # 2. 清洗店名（核心新增步骤）
			
 
				+                    shop = clean_shop_name(raw_shop)
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 列表页店名：{shop}{'='*10}")
			
 
				+                    logger.info(f"原始店名：{raw_shop}")
			
 
				+                    logger.info(f"清洗后店名：{shop}{'='*10}")
			
 
				+                else:
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 列表页店铺名称元素未找到，使用默认值：{shop}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #7、 提取折扣价
			
 
				+                discount_price_val_origin = ""
			
 
				+                discount_price = ""
			
 
				+                discount_price_locator = item.locator('span[data-v-4cb6cc1f].discount-int').first
			
 
				+                if discount_price_locator.count() > 0:
			
 
				+                    discount_price = discount_price_locator.inner_text(timeout=3000).strip()
			
 
				+                    discount_price_val_origin = discount_price
			
 
				+                    match = re.search(r'\d+\.?\d*', str(discount_price_val_origin))
			
 
				+                    discount_price_val = float(match.group()) if match else 0.00
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页折扣价：{discount_price_val}{'='*10}")
			
 
				+                else:
			
 
				+                    #如果没有拿原价替换
			
 
				+                    # price = float(price.replace("￥", "").replace(",", "")) if price.replace("￥", "").replace(",", "").replace(".", "") else "0.00"
			
 
				+                    discount_price_val = full_price_num
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 折扣价元素未找到，使用采购价兜底：{discount_price_val}")
			
 
				+
			
 
				+                merged_price = f"{full_price_num}{discount_price_val_origin}" if discount_price_val_origin else full_price_num
			
 
				+
			
 
				+                #8、 提取有效期（处理空值）
			
 
				+                expiry_date_locator = item.locator(f"{PRODUCT_VALIDITY_SELECTOR}")
			
 
				+                if expiry_date_locator.count() > 0:
			
 
				+                    expiry_date = expiry_date_locator.inner_text(timeout=3000).strip().replace('-', '')    #.replace('近效期','')
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页有效期：{expiry_date}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 有效期元素未找到，使用默认值：{expiry_date}")
			
 
				+
			
 
				+                #获取product_id
			
 
				+                # product_id = None
			
 
				+                # try:
			
 
				+                #     product_id = item.get_attribute("data-product-id")
			
 
				+                #     if product_id:
			
 
				+                #         product_id = product_id.strip()
			
 
				+                #         logger.info(f"✅ 「{keyword}」第{collected_count}个商品 - 提取到product_id：{product_id}")
			
 
				+
			
 
				+                # ========= 模拟点击商品进入详情页 =========
			
 
				+                logger.info(
			
 
				+                    f"📌 「{keyword}」第{page_no}页 第{collected_count}个商品「{title}」- 模拟鼠标移动并点击"
			
 
				+                )
			
 
				+
			
 
				+                # 点击商品项容器，触发详情展示
			
 
				+                # ========== 点击商品跳详情页 ==========
			
 
				+                # 反爬：模拟真人鼠标移动到商品上再点击（不是直接点击）
			
 
				+                item.hover()  # 先悬停
			
 
				+                random_delay(0.2, 0.5)  # 悬停后延迟
			
 
				+                item.dispatch_event("mousedown")
			
 
				+                random_delay(0.05, 0.15)  # 鼠标按下后延迟
			
 
				+                item.dispatch_event("mouseup")
			
 
				+                random_delay(0.05, 0.1)   # 鼠标松开后延迟
			
 
				+
			
 
				+
			
 
				+
			
 
				+                try:
			
 
				+                    with store_page.context.expect_page(timeout=60000) as p:
			
 
				+                        item.click(delay=random.uniform(0.1, 0.3))
			
 
				+                    detail_page = p.value
			
 
				+                except PlaywrightTimeoutError:
			
 
				+                    logger.warning(
			
 
				+                        f" 「{keyword}」第{page_no}页 第{collected_count}个商品「{title}」- 未检测到新标签页，使用当前页采集详情"
			
 
				+                    )
			
 
				+                    detail_page = None  # 标记为无新标签页，避免关闭列表页
			
 
				+
			
 
				+
			
 
				+                # 等待详情加载（优先用新标签页，无则用列表页）
			
 
				+                target_page = detail_page if detail_page else store_page
			
 
				+                target_page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+                delay = random_delay(MIN_PAGE_DELAY, MAX_PAGE_DELAY)
			
 
				+                logger.info(
			
 
				+                    f"📌 「{keyword}」第{page_no}页 第{collected_count}个商品「{title}」- 详情页加载完成，等待{delay:.2f}秒（反爬）"
			
 
				+                )
			
 
				+                # 反爬：检测详情页反爬验证
			
 
				+                # check_anti_crawl(page)
			
 
				+                # ========== 采集详情页的专属信息（有效期/生产日期/批准文号） ==========
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #点击后：1、获取商品详情页链接
			
 
				+                product_link = target_page.url
			
 
				+                logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页链接：{product_link}{'='*10}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # ========= ✅ 去重逻辑，拿商品链接和折扣价和有效期和采集日期 =========
			
 
				+                if check_dup_in_biz_db(product_link, full_price_num, current_time):
			
 
				+                    logger.warning(f" 「{keyword}」第{page_no}页 第{collected_count}个商品（重复）：{title}，跳过")
			
 
				+                    # ========== 关闭新标签页，切回列表页 ==========
			
 
				+                    if detail_page and not detail_page.is_closed():
			
 
				+                        detail_page.close()  # 关闭详情页标签
			
 
				+                        logger.info(f"📌 「{keyword}」第{collected_count}个商品 - 已关闭详情页标签页")
			
 
				+                    # 切回原列表页（第一个标签页）
			
 
				+                    store_page.bring_to_front()  # 激活列表页
			
 
				+                    store_page.mouse.move(random.randint(100, 300), random.randint(200, 400))  # 随机移动鼠标
			
 
				+                    random_delay(0.5, 1.0)  # 增加切换后延迟
			
 
				+                    store_page.wait_for_load_state("networkidle")
			
 
				+                    random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                    logger.info(f" 「{keyword}」第{collected_count}个商品「{title}」- 已切回列表页")
			
 
				+                    if collected_count % 6 == 0 and collected_count > 0:
			
 
				+                            logger.info("采满6个往下滑")
			
 
				+                            slow_scroll_400px(store_page)
			
 
				+                            store_page.wait_for_load_state("networkidle")
			
 
				+                    continue
			
 
				+
			
 
				+
			
 
				+                # key = f"{product_link.strip()}|{discount_price_val}"
			
 
				+                # if key in seen:
			
 
				+                #     logger.warning(
			
 
				+                #         f" 「{keyword}」第{page_no}页 第{collected_count}个商品（重复）：{title}，跳过"
			
 
				+                #     )
			
 
				+                #     if collected_count % 5 == 0 and collected_count > 0:
			
 
				+                #         logger.info("采满15个往下滑")
			
 
				+                #         slow_scroll_400px(page)
			
 
				+                #         page.wait_for_load_state("networkidle")
			
 
				+                #     continue
			
 
				+                # seen.add(key)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #点击后：2、提取生产日期（修复完成）
			
 
				+                manufacture_date_locator = target_page.locator('//div[contains(@class, "spec-info-item") and .//div[contains(@class, "spec-info-item-label") and normalize-space(.)="生产日期"]]//div[contains(@class, "spec-info-item-value-text")]')
			
 
				+                if manufacture_date_locator.count() > 0:
			
 
				+                    manufacture_date = manufacture_date_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页生产日期：{manufacture_date}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 生产日期元素未找到，使用默认值：{manufacture_date}")
			
 
				+
			
 
				+
			
 
				+                #详情页： 3、提取批准文号
			
 
				+                approval_number_locator = target_page.locator('//div[contains(@class, "spec-info-item") and .//div[contains(@class, "spec-info-item-label") and normalize-space(.)="批准文号"]]//div[contains(@class, "spec-info-item-value-text")]')
			
 
				+                if approval_number_locator.count() > 0:
			
 
				+                    approval_number = approval_number_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页批准文号：{approval_number}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 批准文号元素未找到，使用默认值：{approval_number}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #详情页 4、提取规格
			
 
				+                spec_locator = target_page.locator('//div[contains(@class, "spec-info-item") and .//div[contains(@class, "spec-info-item-label") and normalize-space(.)="规格"]]//div[contains(@class, "spec-info-item-value-text")]')
			
 
				+                if spec_locator.count() > 0:
			
 
				+                    spec = spec_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页规格：{spec}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count，补充规格数量不足的提示
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 规格元素数量不足，使用默认值：{spec}")
			
 
				+
			
 
				+                # input("...")
			
 
				+
			
 
				+
			
 
				+                #详情页 5、提取库存
			
 
				+                storage = ''
			
 
				+                storage_locator = target_page.locator('[data-v-51f0e85d].detail-input-num-right-title')
			
 
				+                if storage_locator.count() > 0:
			
 
				+                    storage = storage_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页库存：{storage}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count，补充规格数量不足的提示
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 库存元素数量不足，使用默认值：{storage}")
			
 
				+
			
 
				+
			
 
				+                #详情页 6、提取销量
			
 
				+                sell = ''
			
 
				+                sell_locator = target_page.locator('div.detail-info-content-item-value-price-top-right div[data-v-95163d4a]',has_text='已售')
			
 
				+                if sell_locator.count() > 0:
			
 
				+                    sell = sell_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页销量：{sell}{'='*10}")
			
 
				+                else:
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 没有销量元素，使用默认值：{sell}")
			
 
				+
			
 
				+
			
 
				+                #详情页 7、保存快照url上传到oss
			
 
				+                try:
			
 
				+                    local_path, oss_url = screenshot_target_page_to_local_then_oss(
			
 
				+                        target_page=target_page,
			
 
				+                        full_page=True  # 截取全屏
			
 
				+                    )
			
 
				+                    print(f"最终结果：")
			
 
				+                    print(f"  本地文件路径：{local_path}")
			
 
				+                    logger.info(f"  OSS访问链接：{oss_url}")
			
 
				+                except Exception as e:
			
 
				+                    logger.warning(f"整体流程执行失败：{str(e)}")
			
 
				+                # input("...")
			
 
				+                province = ""
			
 
				+                city = ""
			
 
				+                business_license_company = ""
			
 
				+                qualification_number = ''
			
 
				+
			
 
				+                #如果店名为商品预约中心
			
 
				+                # if shop == '药店品种预约中心':
			
 
				+                #     #https://www.ybm100.com/new-front/product-info/detail?type__1241=222029ad07-tWcfAcrWtc_CSPpP_%2FtW_cfB_ETca0SugQSbgC7gAb5RAdZyTA5UdS%3DUAoogIsKBqyWgKP_tgAPItgePrBgRPrlgQP_ug0PTZgEPrugpPA5lq%3DSQPg%3Dgt2_xg%3D2FPgs0oBgYqwcg9%3DWPTuSgTHgtBsfgGEh%3D%2FXvko2R%3DGvhceloleBnCGBqcG%2F2V_uKVUBftg
			
 
				+                #     #获取pidhttps://www.ybm100.com/new-front/product-info/detail?type__1241=222029ad07-G%2FxP7PxPJgfPUgu%2FIbv7Wg6gpIgwJg5q4PfAg%2FTWZ_Q6gtHaHG%2FgWCPKsClvGsLPVsgQyuBlVVPTqgtvgQgWvG6gOPTkg5%2F_jgAvTog6vT4g5v_6gSU7vC9cggZgvPAtgZJBPgysGg_OuH%2Fg9ToPgjkBgO%2FgaCQggY7KNlo7itg%2FBGP2GrJpPV6%2FQ6f_u6qvMjPvQVIgPg
			
 
				+                #     url = 'https://www.ybm100.com/new-front/product-info/detail?type__1241=222029ad07-G%2FxP7PxPJgfPUgu%2FIbv7Wg6gpIgwJg5q4PfAg%2FTWZ_Q6gtHaHG%2FgWCPKsClvGsLPVsgQyuBlVVPTqgtvgQgWvG6gOPTkg5%2F_jgAvTog6vT4g5v_6gSU7vC9cggZgvPAtgZJBPgysGg_OuH%2Fg9ToPgjkBgO%2FgaCQggY7KNlo7itg%2FBGP2GrJpPV6%2FQ6f_u6qvMjPvQVIgPg'
			
 
				+                #     data = {
			
 
				+                #         'id': f'{product_id}',
			
 
				+                #         'isMainProductVirtualSupplier': 0
			
 
				+                #     }
			
 
				+                #     headers = {
			
 
				+                #         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0",
			
 
				+                #         'Cookie': '_abfpc=48083f46aa22e0eaefbace39874e38acc7c631ea_2.0; cna=2b5bf2a0d04d0ec45367fda825d4fa6b; xyy=MjM2JjE4MDA4NjUwMzAw; JSESSIONID=922A896126C5961D09622E042CAAA01D; xyy_token=eyJhbGciOiJIUzUxMiJ9.eyJhY2NvdW50X2lkIjoyMzYsImRldmljZV9pZCI6IiIsIm9zIjoiV2luZG93cyAxMCIsImxvZ2luX3RpbWUiOjE3NjkxNjAzNDQ5MDYsImJyb3dzZXIiOiJDaHJvbWUgMTQiLCJtZXJjaGFudF9pZCI6MjM2LCJpcF9hZGRyIjoiMTEzLjk4LjYyLjE2NiIsInZlcnNpb24iOiIiLCJsb2dpbl91c2VyX2tleSI6IjM3NzQ2ZjM5LTE3MjQtNDBjYi1hNTk4LWRlYTM5MTU2NjllNSJ9.IN8gFX6p4KuClT2KysZLNVuyQuszfdNW5gz7m_u4yq60zqbvSOg1yo0f7TuKcbZVvd-t5mVsb4hoNBRNV6nsYQ; xyy_principal=236&Y2MwY2FiZGYzZjU4NzUzNGE5OWRkZTIwYmRiMmQ4NTk2ZDg5N2QxOQ&236; xyy_last_login_time=1769160344906; acw_tc=1a0c650c17694095621061999e5d6b6730068c59854298f31bdd661882a009; qt_session=KsnsuMqE_1769409754197; ssxmod_itna=1-eq0xgDnDyAeYqDKi=G0KKG7DRDIEpDpxgGDBP01G7DuExjKidtDUDQulGmFgG4G=oG7iheet3RLKNDlpLeDZDGKQDqx0Eb0iiD4Ns3ImkiT53QQGvqUdaeOENowZaTRbY9oVG6MxfXy/UDgEeDU4GnD068CY6bDYYLDBYD74G_DDeDi2rD84D_DGpdMnudxi33nDeDzqr=xG3txYpdweDgADDB_RiDKkP=hDDlGA7YREbPAcTq6PmzxGU8lCGxUeDMFxGXmikYUQy6MK4rZCSfp1EYH1aDtqD9DgbDb42zvrTbp6ebF_mbS_83r1Ki=3iifhNQ2rt0iC0_Yiofx4lxxfxx3Be5WHiTHDDW=fd1xxq05p71UdznuzuAernD=xIxRtbj=/74anQqf5Dxx4hYb0DnOGK0D3j=bGrxnD4D; ssxmod_itna2=1-eq0xgDnDyAeYqDKi=G0KKG7DRDIEpDpxgGDBP01G7DuExjKidtDUDQulGmFgG4G=oG7iheet3RLFoDiaRAqzbCD7pxTs4GNeYfb78=o8pWc0HY8dN0vO6z5i69OeF5Dg34naHHkD98UZ3tVAb=9/L3BSLIczMds0bxfCAIfG0eY3oTQym5z/oAhmi4qDLetNaD',
			
 
				+                #         'Referer': f'https://www.ybm100.com/new/base/skuDetail?id={product_id}&combination=1&type=1',
			
 
				+                #         "Content-Type" : "application/json"
			
 
				+                #     }
			
 
				+
			
 
				+                #     response = requests.post(url, json=data, headers=headers)
			
 
				+                #     print(response.status_code)
			
 
				+                #     try:
			
 
				+                #         response_json = response.json()
			
 
				+                #         print("✅ 成功解析JSON响应")
			
 
				+
			
 
				+                #         if 'data' in response_json and 'detail' in response_json['data'] and 'pid' in response_json['data']['detail']:
			
 
				+                #             pid = response_json['data']['detail']['pid']
			
 
				+                #             print(f"✅ 提取到pid：{pid}")
			
 
				+                #         elif 'pid' in response_json:
			
 
				+                #             pid = response_json['pid']
			
 
				+                #             print(f"✅ 方式二提取到pid：{pid}")
			
 
				+                #         else:
			
 
				+                #             # 打印响应的前1000个字符，帮助你确认JSON结构
			
 
				+                #             print("⚠️ 未找到pid字段，响应数据预览：")
			
 
				+                #             print(json.dumps(response_json, ensure_ascii=False, indent=2)[:1000])
			
 
				+                #             pid = None
			
 
				+                #     except json.JSONDecodeError:
			
 
				+                #         # 响应不是JSON格式的情况
			
 
				+                #         print("❌ 响应不是JSON格式，无法解析")
			
 
				+                #         print("响应文本：", response.text[:1000])
			
 
				+                #         pid = None
			
 
				+                #     except Exception as e:
			
 
				+                #         # 其他异常
			
 
				+                #         print(f"❌ 提取pid时出错：{str(e)}")
			
 
				+                #         pid = None
			
 
				+                #     target_page.goto(f'https://www.ybm100.com/new/base/skuDetail?id={product_id}&combination=1&type=1')
			
 
				+                #     shop_name_elem = target_page.locator('span[data-v-5485589c]')
			
 
				+                #     shop_name = shop_name_elem.inner_text(timeout=3000).strip()
			
 
				+                #     shop_exists, shop_info = shop_is_exists_database(shop_name)
			
 
				+                #     if not shop_exists:
			
 
				+                #         if shop_info:
			
 
				+                #             province = shop_info['province']
			
 
				+                #             city = shop_info['city']
			
 
				+                #             business_license_company = shop_info['business_license_company']
			
 
				+                #             qualification_number = shop_info['qualification_number']
			
 
				+                # #去往药店品种预约中心后面的链接
			
 
				+                # target_page.goto(f"https://www.ybm100.com/new/base/skuDetail?id={pid}&combination=1&type=1")
			
 
				+                # if not shop_exists:
			
 
				+
			
 
				+                shop_exists, shop_info = shop_is_exists_database(shop)
			
 
				+                shop_page = None
			
 
				+                store_url = ''
			
 
				+
			
 
				+                #店铺名不是药品预约中心且店铺名不在数据库就要点击
			
 
				+                if shop != "药店品种预约中心" and not shop_exists:
			
 
				+                    logger.info("店铺名不是药店品种预约中心且数据库没有该公司的营业执照")
			
 
				+                    # 获取营业执照图片
			
 
				+                    # 进入店铺
			
 
				+                    random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                    entershop_btn = target_page.locator('div[data-v-5485589c].shop-info-container-left-info')
			
 
				+                    # 增强：先等待进入店铺按钮可见
			
 
				+                    entershop_btn.wait_for(state="visible", timeout=10000)
			
 
				+                    entershop_btn.scroll_into_view_if_needed()  # 确保按钮在视口内
			
 
				+                    entershop_btn.hover()  # 先悬停
			
 
				+                    random_delay(0.2, 0.5)  # 悬停后延迟
			
 
				+                    with target_page.expect_popup(timeout=15000) as pop:
			
 
				+                        entershop_btn.click()
			
 
				+                        random_delay(0.05, 0.15)  # 鼠标按下后延迟
			
 
				+                    shop_page = pop.value
			
 
				+                    shop_page.wait_for_load_state("domcontentloaded")  # 比 networkidle 更
			
 
				+
			
 
				+
			
 
				+                    #在这里获取店铺链接
			
 
				+                    store_url = shop_page.url   # 获取店铺链接
			
 
				+                    logger.info(f"📌 获取到店铺链接：{store_url}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                    #点击店铺资质
			
 
				+                    random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                    shop_license_page = shop_page.locator('//div[contains(@class, "shop-info-container-right-btns-item") and contains(span, "资质/售后")]')
			
 
				+                    shop_license_page.wait_for(state="attached", timeout=15000)  # 等待元素加载完成
			
 
				+                    shop_license_page.scroll_into_view_if_needed()  # 确保在视口内
			
 
				+                    shop_license_page.hover()   # 先悬停
			
 
				+                    random_delay(0.2, 0.5)  # 悬停后延迟
			
 
				+                    # shop_license_page.dispatch_event("mousedown")
			
 
				+                    shop_license_page.click()
			
 
				+                    random_delay(0.05, 0.15)  # 鼠标按下后延迟
			
 
				+                    # shop_license_page.dispatch_event("mouseup")
			
 
				+                    random_delay(0.05, 0.1)   # 鼠标松开后延迟
			
 
				+                    shop_page.wait_for_load_state("networkidle")
			
 
				+                    # slow_scroll_400px(shop_page, scroll_distance1=700)
			
 
				+
			
 
				+
			
 
				+                    #获取药品经营许可证图片
			
 
				+                    shop_page.wait_for_load_state("load")
			
 
				+                    ocr_res = None
			
 
				+                    # shop_license_div = target_page.locator('//span[contains(text(), "营业执照")]')
			
 
				+                    shop_license_img = shop_page.locator('//span[contains(text(), "企业营业执照") or contains(text(), "营业执照（正本）")]/ancestor::div[@class="shop-info-drawer-zz-tab1-list-item"]/img').first
			
 
				+                    shop_license_img.wait_for(state="visible", timeout=60000)
			
 
				+
			
 
				+                    try:
			
 
				+                        if shop_license_img.count() > 0:
			
 
				+                            shop_license_src = shop_license_img.get_attribute('src')
			
 
				+                            shop_license_src = shop_license_src.strip() if shop_license_src else None
			
 
				+                            ocr_res = get_ocr_res(shop_license_src)
			
 
				+                            # print(f'ocr_res:{ocr_res}')
			
 
				+                            # input(".....")
			
 
				+                        else:
			
 
				+                            shop_license_src = None
			
 
				+                    except Exception as e:
			
 
				+                            # 捕获定位/提取失败的异常，避免程序崩溃
			
 
				+                            logger.warning(f"提取营业执照图片src失败：{e}")
			
 
				+                            shop_license_src = None
			
 
				+                    print("营业执照图片链接：", shop_license_src)
			
 
				+                    # input("..")
			
 
				+
			
 
				+                    contact_address = ''
			
 
				+                    qualification_number = ocr_res.get('社会信用代码', '') if ocr_res else ''
			
 
				+                    business_license_company = ocr_res.get('单位名称', '') if ocr_res else ''
			
 
				+                    business_license_address = ocr_res.get('地址', '') if ocr_res else ''
			
 
				+                    # scrape_date = ''
			
 
				+
			
 
				+                    # 调用提取函数，获取省份和城市
			
 
				+                    province, city = extract_province_city(business_license_address)
			
 
				+                    logger.info(f"原始地址：{business_license_address}")
			
 
				+                    logger.info(f"提取的省份：{province} | 城市：{city}")
			
 
				+                    insert_result = insert_shop_info_to_db(
			
 
				+                        shop=shop,
			
 
				+                        contact_address=store_url,      #改为店铺链接，到时可以从数据库获取
			
 
				+                        qualification_number=qualification_number,
			
 
				+                        business_license_company=business_license_company,
			
 
				+                        business_license_address=business_license_address,
			
 
				+                        scrape_date=current_time,
			
 
				+                        platform=platform,
			
 
				+                        province=province,
			
 
				+                        city=city,
			
 
				+                        create_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S") ,
			
 
				+                        update_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+                                                            )
			
 
				+                else:
			
 
				+                    logger.info("数据库有该店名，在数据库拿取对应字段填充ybm_drug_middle表")
			
 
				+                    if shop_info:
			
 
				+                        province = shop_info['province']        #公司省份
			
 
				+                        city = shop_info['city']                #公司城市
			
 
				+                        business_license_company = shop_info['business_license_company']        #公司名称
			
 
				+                        qualification_number = shop_info['qualification_number']                #公司统一信用代码
			
 
				+                        business_license_address = shop_info['business_license_address']          #公司地址
			
 
				+
			
 
				+
			
 
				+
			
 
				+                try:
			
 
				+                    if shop_page and not shop_page.is_closed():
			
 
				+                        random_delay(4,8)
			
 
				+                        shop_page.close()
			
 
				+                        logger.info(f"📌 「{keyword}」第{collected_count}个商品 - 已关闭店铺页标签 shop_page")
			
 
				+
			
 
				+                except Exception as e:
			
 
				+                    logger.warning(f"⚠️ 关闭 shop_page 失败：{e}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # # purchase_price = float(price.replace("￥", "").replace(",", "")) if price.replace("￥", "").replace(",", "").replace(".", "").isdigit() else 0.00
			
 
				+
			
 
				+                random_delay(5,8)
			
 
				+
			
 
				+                # ========== 关闭新标签页，切回列表页 ==========
			
 
				+                if detail_page and not detail_page.is_closed():
			
 
				+                    detail_page.close()  # 关闭详情页标签
			
 
				+                    logger.info(f"📌 「{keyword}」第{collected_count}个商品 - 已关闭详情页标签页")
			
 
				+                # 切回原列表页（第一个标签页）
			
 
				+                store_page.bring_to_front()  # 激活列表页
			
 
				+                store_page.mouse.move(random.randint(100, 300), random.randint(200, 400))  # 随机移动鼠标
			
 
				+                random_delay(0.5, 1.0)  # 增加切换后延迟
			
 
				+                store_page.wait_for_load_state("networkidle")
			
 
				+                random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                logger.info(f" 「{keyword}」第{collected_count}个商品「{title}」- 已切回列表页")
			
 
				+                random_delay(2,4)
			
 
				+                # credit_code = ""
			
 
				+                availability = ""
			
 
				+                # input(".....")
			
 
				+
			
 
				+                # 调用函数获取省市ID（修复：传入province和city变量）
			
 
				+                province_id, city_id = get_province_city_ids(province, city)
			
 
				+
			
 
				+                # 组装单条数据（仅新增生产日期/批准文号字段，原有字段顺序/逻辑不变）
			
 
				+                # 构造单条数据元组（适配MySQL字段）
			
 
				+                single_data = {
			
 
				+                    # 核心商品信息
			
 
				+                        "enterprise_id": company_id,
			
 
				+                        'platform_id': 9,
			
 
				+                        'platform_item_id': product_id,
			
 
				+                        'province_id': province_id,
			
 
				+                        'city_id': city_id,
			
 
				+                        'province_name': province,
			
 
				+                        'city_name': city,
			
 
				+                        'area_info': business_license_address,
			
 
				+                        'product_brand': brand,  # 品牌
			
 
				+                        "product_name": keyword,  # 搜索商品名称
			
 
				+                        'product_specs': spec,  # 规格
			
 
				+                        # "my_good_price": merged_price,  # 自定义价格（可与min_price相同或单独提取）
			
 
				+                        "one_box_price": discount_price_val,  # 最低价格
			
 
				+                        "manufacture_date": manufacture_date,  # 生产日期
			
 
				+                        "expiry_date": expiry_date,  # 有效期
			
 
				+                        "manufacturer": manufacturer,  # 生产厂家
			
 
				+                        "approval_number": approval_number,  # 批准文号
			
 
				+                        "is_sold_out": is_sold_out,  # 售罄标记（0/1）
			
 
				+                        'online_posting_count': 1,
			
 
				+                        'continuous_listing_count': 1,
			
 
				+                        'link_url': product_link,
			
 
				+                        "store_name": shop,  # 店铺名称
			
 
				+                        'store_url': store_url,  # 店铺链接
			
 
				+                        'shipment_province_id': 0,
			
 
				+                        'shipment_province_name': '',
			
 
				+                        'shipment_city_id': 0,
			
 
				+                        'shipment_city_name': '',
			
 
				+                        "company_name": business_license_company,  # 营业执照主体（公司名称）
			
 
				+                        "qualification_number": qualification_number,  # 统一信用代码（如有可补充提取）
			
 
				+                        "scrape_date": current_time,  # 采集日期
			
 
				+                        "min_price": discount_price_val,  # 最低价格
			
 
				+                        "number": num,  # 数量（盒数）
			
 
				+                        "sales": sell,      #销量
			
 
				+                        "inventory": storage,       #库存
			
 
				+                        "snapshot_url": oss_url,         #快照链接
			
 
				+                        "insert_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),   # 创建时间
			
 
				+                        "update_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),  # 更新时间
			
 
				+
			
 
				+                        #可能会用到
			
 
				+                        # "province": province,  # 省份
			
 
				+                        # "city": city,  # 城市
			
 
				+                        # "scrape_province": "",  # 采集省份（可留空或根据IP获取）
			
 
				+                        # "availability": availability,  # 库存状态
			
 
				+
			
 
				+                        #暂时用不到
			
 
				+                        # "platform": platform,  # 平台名称（固定或动态获取）
			
 
				+                        # "search_key": keyword,  # 搜索关键词
			
 
				+
			
 
				+
			
 
				+            }
			
 
				+                # 调用逐条插入函数
			
 
				+                insert_single_to_mysql(single_data)
			
 
				+                collect_result.append(single_data)
			
 
				+                logger.info(f" 「{keyword}」第{collected_count}个商品「{title}」采集完成")
			
 
				+                # input("....")
			
 
				+            except Exception as e:
			
 
				+                # 异常处理：关闭详情页，强制切回列表页
			
 
				+                logger.exception(f" 「{keyword}」第{collected_count}个商品采集核心异常：{str(e)}")
			
 
				+                try:
			
 
				+                    if detail_page and not detail_page.is_closed():
			
 
				+                        detail_page.close()
			
 
				+                        logger.info(f"📌 「{keyword}」第{collected_count}个商品 - 异常时关闭详情页标签页")
			
 
				+                    if store_page and not store_page.is_closed():
			
 
				+                        store_page.bring_to_front()  # 切回列表页
			
 
				+                    store_page.wait_for_load_state("networkidle")
			
 
				+                    random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                except Exception as e2:
			
 
				+                    logger.error(f" 「{keyword}」第{collected_count}个商品详情采集异常（处理时）：{str(e2)}，原异常：{str(e)}")
			
 
				+                    continue
			
 
				+
			
 
				+
			
 
				+            # ✅ 每15次滚动一次（修复：用collected_count，且排除0的情况）
			
 
				+            if collected_count % 6 == 0 and collected_count > 0 and collected_count != total_limit:
			
 
				+                logger.info("采满5个往下滑")
			
 
				+                slow_scroll_400px(store_page,)
			
 
				+                store_page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+
			
 
				+
			
 
				+        # ====== 当前页采集完毕，尝试翻页 ======
			
 
				+        delay = random_delay(1.5, 3.0)
			
 
				+        logger.info(f"⏳ 翻页前随机等待 {delay:.2f}s（反爬）")
			
 
				+
			
 
				+        if goto_next_page(store_page):
			
 
				+            logger.info(f"「{keyword}」还有下一页")
			
 
				+            page_no += 1
			
 
				+            store_page.wait_for_load_state("networkidle")
			
 
				+            total_limit = store_page.locator(PRODUCT_ITEM_SELECTOR).count()
			
 
				+            logger.info(f"📌 「{keyword}」第{page_no}页 商品个数更新为：{total_limit}")
			
 
				+            continue
			
 
				+        else:
			
 
				+            logger.info(f" 「{keyword}」已无下一页，关键词采集结束")
			
 
				+            break
			
 
				+    # 关键词采集完成后长延迟
			
 
				+    long_delay = random_delay(MIN_KEYWORD_DELAY, MAX_KEYWORD_DELAY)
			
 
				+    logger.info(f" 「{keyword}」采集完成，共{len(collect_result)}条数据，等待{long_delay:.2f}秒后继续下一个关键词（反爬）")
			
 
				+    return collect_result
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 保存到CSV函数（适配新表头） ====================
			
 
				+# def save_to_csv(data_list):
			
 
				+#     """
			
 
				+#     保存数据到CSV（适配新表头）
			
 
				+#     :param data_list: list - 采集到的字典数据列表
			
 
				+#     :return: bool - 保存是否成功
			
 
				+#     """
			
 
				+#     if not data_list:
			
 
				+#         logger.warning(" 无数据可保存到CSV")
			
 
				+#         return False
			
 
				+
			
 
				+#     try:
			
 
				+#         # 判断文件是否存在，不存在则写入表头
			
 
				+#         file_exists = os.path.exists(CSV_FILE_PATH)
			
 
				+
			
 
				+#         # 打开CSV文件（追加模式，utf-8-sig避免Excel乱码）
			
 
				+#         with open(CSV_FILE_PATH, "a", newline="", encoding="utf-8-sig") as f:
			
 
				+#             # 用新表头作为字段名
			
 
				+#             writer = csv.DictWriter(f, fieldnames=CSV_HEADERS)
			
 
				+
			
 
				+#             # 首次写入表头
			
 
				+#             if not file_exists:
			
 
				+#                 writer.writeheader()
			
 
				+#                 logger.info(f" 已创建CSV文件并写入新表头：{CSV_FILE_PATH}")
			
 
				+
			
 
				+#             # 写入数据行
			
 
				+#             writer.writerows(data_list)
			
 
				+#             logger.info(f" 成功将 {len(data_list)} 条数据写入CSV")
			
 
				+#         return True
			
 
				+
			
 
				+#     except Exception as e:
			
 
				+#         logger.error(f" 保存CSV失败：{str(e)}")
			
 
				+#         return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 主函数（登录+批量搜索） ====================
			
 
				+def main():
			
 
				+    # 在程序启动时加载一次
			
 
				+    load_city_mapping()
			
 
				+
			
 
				+    logger.info("\n" + "="*50)
			
 
				+    logger.info("🚀 药帮忙采集程序启动")
			
 
				+    logger.info(f"⏰ 启动时间：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
			
 
				+    logger.info("="*50)
			
 
				+
			
 
				+    # 待搜索的关键词列表（直接写在这里，改起来更直观）
			
 
				+    # 存储所有关键词的采集数据
			
 
				+    # all_collect_data = []
			
 
				+
			
 
				+    with sync_playwright() as p:
			
 
				+        # browser = init_browser_with_proxy(p)
			
 
				+        # 启动浏览器（用单个配置变量）
			
 
				+        browser = p.chromium.launch(
			
 
				+            headless=False,  # 不要用无头模式（反爬：无头模式易被识别）
			
 
				+            channel="chrome",  # 使用真实Chrome内核
			
 
				+            slow_mo=random.randint(100, 300),  # 全局操作延迟（模拟真人慢速操作）
			
 
				+            args=[
			
 
				+                "--disable-blink-features=AutomationControlled",  # 禁用webdriver特征（核心！）
			
 
				+                "--enable-automation=false",  # 新增：禁用自动化标识
			
 
				+                "--disable-infobars",  # 新增：禁用信息栏
			
 
				+                "--remote-debugging-port=0",  # 新增：随机调试端口
			
 
				+                "--start-maximized",  # 最大化窗口（模拟真人使用）
			
 
				+                "--disable-extensions",  # 禁用扩展（避免特征）
			
 
				+                "--disable-plugins-discovery",  # 禁用插件发现
			
 
				+                "--no-sandbox",  # 避免沙箱模式特征
			
 
				+                "--disable-dev-shm-usage",  # 避免内存限制导致的异常
			
 
				+                f"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(110, 120)}.0.0.0 Safari/537.36"  # 随机Chrome版本的UA
			
 
				+            ]
			
 
				+        )
			
 
				+        # 创建页面时伪装指纹
			
 
				+        context = browser.new_context(
			
 
				+            locale="zh-CN",  # 中文环境
			
 
				+            timezone_id="Asia/Shanghai",  # 上海时区
			
 
				+            geolocation={"latitude": 31.230416, "longitude": 121.473701},  # 模拟上海地理位置（可选）
			
 
				+            permissions=["geolocation"],  # 授予定位权限（模拟真人）
			
 
				+            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
			
 
				+            viewport={"width": 1800, "height": 1000},
			
 
				+            # 关键：隐藏自动化特征
			
 
				+            java_script_enabled=True,
			
 
				+            bypass_csp=True,
			
 
				+            # user_data_dir="./temp_user_data"  # 模拟真实用户数据目录
			
 
				+        )
			
 
				+
			
 
				+        page = context.new_page()
			
 
				+
			
 
				+
			
 
				+        # 关键：移除navigator.webdriver标识（反爬核心）
			
 
				+        page.add_init_script("""
			
 
				+            Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
			
 
				+            Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });  // 新增：模拟插件
			
 
				+            Object.defineProperty(navigator, 'mimeTypes', { get: () => [1, 2, 3] });  // 新增：模拟MIME类型
			
 
				+            window.chrome = { runtime: {}, loadTimes: () => ({}) };  // 增强Chrome模拟
			
 
				+            delete window.navigator.languages;
			
 
				+            window.navigator.languages = ['zh-CN', 'zh'];
			
 
				+            // 新增：模拟真实鼠标移动特征
			
 
				+            (() => {
			
 
				+                const originalAddEventListener = EventTarget.prototype.addEventListener;
			
 
				+                EventTarget.prototype.addEventListener = function(type, listener) {
			
 
				+                    if (type === 'mousemove') {
			
 
				+                        return originalAddEventListener.call(this, type, (e) => {
			
 
				+                            e._automation = undefined;
			
 
				+                            listener(e);
			
 
				+                        });
			
 
				+                    }
			
 
				+                    return originalAddEventListener.call(this, type, listener);
			
 
				+                };
			
 
				+            })();
			
 
				+""")
			
 
				+
			
 
				+
			
 
				+        try:
			
 
				+            # ========== 核心：Cookie复用逻辑 ==========
			
 
				+            # 1. 加载本地Cookie
			
 
				+            load_cookies(context)
			
 
				+
			
 
				+            # 2. 验证登录状态
			
 
				+            if not is_login(page):
			
 
				+                # 3. Cookie失效/不存在，执行登录
			
 
				+                page.goto(TARGET_LOGIN_URL)
			
 
				+                page.wait_for_load_state("networkidle")
			
 
				+                logger.info("🔑 开始执行登录流程")
			
 
				+
			
 
				+                # 执行登录操作
			
 
				+                login_success = login_operation(page, USERNAME, PASSWORD)
			
 
				+                if not login_success:
			
 
				+                    logger.error(" 登录失败，程序终止")
			
 
				+                    return
			
 
				+
			
 
				+                # 4. 登录成功后保存Cookie
			
 
				+                save_cookies(context)
			
 
				+                logger.info(" 登录并保存Cookie成功！")
			
 
				+
			
 
				+            # 初始化变量：保存首次搜索的新页面对象
			
 
				+            store_page = None
			
 
				+            #统计搜索次数
			
 
				+            nums = 0
			
 
				+
			
 
				+
			
 
				+            # ==================== 1. 核心业务配置 ====================
			
 
				+            # tasks = [
			
 
				+            #     [1, '银力舒头孢丙烯分散片'],
			
 
				+            #     [2, '阿莫西林克拉维酸钾片'],   #可以
			
 
				+            #     # # [3, '阿奇霉素胶囊250mg*6s'],    #
			
 
				+            #     [4, '白云山盐酸达泊西汀'],
			
 
				+            #     [5, '白云山盐酸美金刚'],
			
 
				+            #     # [6, '世福素头孢克房胶囊'],
			
 
				+            #     # [7, '优克诺头抱克洛片'],
			
 
				+            #     [8, '抗之霸乙酰螺旋霉素片'],
			
 
				+            #     [9, '使力康阿奇霉素胶囊'],
			
 
				+            #     # [10, '金力舒莫西林克拉维酸钾片'],
			
 
				+            #     [11, '抗之霸阿莫西林胶囊'],
			
 
				+            #     # [12,'金力舒阿莫西林克拉维酸钾片']
			
 
				+            # ]
			
 
				+            tasks = get_search_keywords_from_db()
			
 
				+            # tasks = [1, '金活','金活依马打正红花油',3],
			
 
				+            if not tasks:
			
 
				+                logger.error("未获取到任何任务，程序退出")
			
 
				+                return
			
 
				+
			
 
				+            store_page =None
			
 
				+            nums = 0
			
 
				+
			
 
				+
			
 
				+            # 2. 批量搜索+采集+保存
			
 
				+            for task_id,brand, keyword, company_id in tasks:
			
 
				+                logger.info(f"\n=====================================")
			
 
				+                logger.info(f"开始处理任务 {task_id}，公司ID：{company_id}，关键词：{keyword}")
			
 
				+                logger.info(f"=====================================")
			
 
				+                # ---------- 第一次上报 ----------
			
 
				+                report_start(task_id, keyword)  # 上报开始任务（仅关键词）
			
 
				+                logger.info("上报状态")
			
 
				+                start_ts = int(time.time())   # 记录开始时间戳
			
 
				+
			
 
				+
			
 
				+                # 采集状态变量
			
 
				+                success = False
			
 
				+                real_count = 0
			
 
				+
			
 
				+                # 执行搜索
			
 
				+                popup_guard(page, "before_search")
			
 
				+                if nums == 0:
			
 
				+                    popup_guard(store_page if store_page else page, "before_search")  # page是你的初始页面对象，需提前定义
			
 
				+                    store_page, search_success = search_operation(page, keyword, is_first_search=True)
			
 
				+                    nums += 1
			
 
				+                else:
			
 
				+                    if store_page is None:
			
 
				+                        logger.error(f"{get_current_time()} ❌ 无可用的搜索页面，跳过「{keyword}」")
			
 
				+                        continue
			
 
				+                    popup_guard(store_page, "before_search")
			
 
				+                    store_page, search_success = search_operation(store_page, keyword, is_first_search=False)
			
 
				+
			
 
				+                # input("")
			
 
				+                popup_guard(store_page, "after_search")
			
 
				+
			
 
				+                # store_page = detail_page
			
 
				+
			
 
				+                if store_page is None:
			
 
				+                    break
			
 
				+
			
 
				+                if not search_success:
			
 
				+                    logger.warning(f" 「{keyword}」搜索失败，跳过采集")
			
 
				+                    continue
			
 
				+
			
 
				+
			
 
				+                # ✅ 再等页面稳定一下（networkidle 有时会等不到，建议加超时或换成 domcontentloaded）
			
 
				+                store_page.wait_for_load_state("domcontentloaded")
			
 
				+
			
 
				+
			
 
				+                store_page.wait_for_load_state('networkidle')
			
 
				+
			
 
				+                # 采集数据
			
 
				+                data_list = collect_data(store_page, brand, keyword, company_id)
			
 
				+                real_count = len(data_list)
			
 
				+                success = True
			
 
				+                logger.info(f"关键词「{keyword}」采集完成，共 {real_count} 条数据")
			
 
				+            # ---------- 第二次上报：结束任务（全部字段） ----------
			
 
				+            report_end(task_id, keyword, company_id,success=success, real_count=real_count, start_ts=start_ts)
			
 
				+
			
 
				+
			
 
				+                # # 保存到CSV
			
 
				+                # if data_list:
			
 
				+                #     save_to_csv(data_list)
			
 
				+                # else:
			
 
				+                #     logger.warning(f" 「{keyword}」无数据，跳过保存")
			
 
				+
			
 
				+            logger.info("\n🎉 所有关键词处理完成！CSV文件路径：" + os.path.abspath(CSV_FILE_PATH))
			
 
				+            # input("\n按回车关闭程序...")
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.error(f" 程序异常：{str(e)}")
			
 
				+        finally:
			
 
				+            browser.close()
			
 
				+            logger.info(" 浏览器已关闭，程序结束")
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 程序入口 ====================
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/main2.py
+++ b/main2.py
@@ -0,0 +1,1987 @@
 
				+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
			
 
				+from logger_config import logger
			
 
				+from datetime import datetime
			
 
				+import random
			
 
				+import csv
			
 
				+import os
			
 
				+import time
			
 
				+import json
			
 
				+import pymysql
			
 
				+from pymysql.err import OperationalError, ProgrammingError, DataError
			
 
				+from config import *
			
 
				+import re
			
 
				+import uuid
			
 
				+import requests
			
 
				+import base64
			
 
				+from io import BytesIO
			
 
				+from PIL import Image
			
 
				+import traceback
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ===================== 工具函数：获取当前时间字符串 =====================
			
 
				+def get_current_time():
			
 
				+    """统一日志时间格式"""
			
 
				+    return datetime.now().strftime('%Y-%m-%d %H:%M:%S')
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# 代理IP池
			
 
				+PROXY_POOL_URL =""
			
 
				+PROXY_VALIDATION_URL = ""  # 用于验证代理有效性的URL
			
 
				+PROXY_TIMEOUT = 10  # 代理验证超时时间(秒)
			
 
				+
			
 
				+def get_random_proxy():
			
 
				+    """从代理池获取随机代理IP"""
			
 
				+    try:
			
 
				+        response = requests.get(PROXY_POOL_URL, timeout=10)
			
 
				+        if response.status_code == 200:
			
 
				+            proxy = response.text.strip()
			
 
				+            if validate_proxy(proxy):
			
 
				+                logger.info(f"获取到有效代理: {proxy}")
			
 
				+                return proxy
			
 
				+            logger.warning(f"代理无效: {proxy}")
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"获取代理失败: {str(e)}")
			
 
				+    return None
			
 
				+
			
 
				+def validate_proxy(proxy):
			
 
				+    """验证代理IP有效性"""
			
 
				+    try:
			
 
				+        proxies = {
			
 
				+            "http": f"http://{proxy}",
			
 
				+            "https": f"https://{proxy}"
			
 
				+        }
			
 
				+        response = requests.get(
			
 
				+            PROXY_VALIDATION_URL,
			
 
				+            proxies=proxies,
			
 
				+            timeout=PROXY_TIMEOUT
			
 
				+        )
			
 
				+        return response.status_code == 200
			
 
				+    except:
			
 
				+        return False
			
 
				+
			
 
				+def init_browser_with_proxy(playwright):
			
 
				+    proxy = get_random_proxy()
			
 
				+    proxy_config = None
			
 
				+    if proxy:
			
 
				+        proxy_server, proxy_port = proxy.split(":")
			
 
				+        proxy_config = {
			
 
				+        "server": f"http://{proxy_server}:{proxy_port}",
			
 
				+        # "username": "your_proxy_username",
			
 
				+        # "password": "your_proxy_password"
			
 
				+        }
			
 
				+        logger.info(f"使用代理: {proxy_server}:{proxy_port}")
			
 
				+    else:
			
 
				+        logger.warning("未获取到有效代理，将使用本地IP")
			
 
				+
			
 
				+    # 启动浏览器（保留原有反爬配置）
			
 
				+    return playwright.chromium.launch(
			
 
				+        headless=False,  # 非无头模式
			
 
				+        channel="chrome",  # 使用Chrome内核
			
 
				+        slow_mo=random.randint(100, 300),  # 随机操作延迟
			
 
				+        proxy=proxy_config,  # 代理配置（None则不使用代理）
			
 
				+        args=[
			
 
				+            "--disable-blink-features=AutomationControlled",  # 核心反检测
			
 
				+            "--enable-automation=false",
			
 
				+            "--disable-infobars",
			
 
				+            "--remote-debugging-port=0",
			
 
				+            "--start-maximized",
			
 
				+            "--disable-extensions",
			
 
				+            "--disable-plugins-discovery",
			
 
				+            "--no-sandbox",
			
 
				+            "--disable-dev-shm-usage",
			
 
				+            # 随机Chrome版本UA
			
 
				+            f"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(110, 120)}.0.0.0 Safari/537.36"
			
 
				+        ]
			
 
				+    )
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 2. 反爬工具函数 ====================
			
 
				+def random_delay(min_seconds, max_seconds):
			
 
				+    """生成随机延迟（核心反爬：避免固定间隔）"""
			
 
				+    delay = random.uniform(min_seconds, max_seconds)
			
 
				+    time.sleep(delay)
			
 
				+    return delay
			
 
				+
			
 
				+
			
 
				+
			
 
				+def simulate_human_typing(page, locator, text):
			
 
				+    """模拟真人打字（逐个字符输入，带随机间隔）"""
			
 
				+    try:
			
 
				+        locator.click()
			
 
				+        locator.clear()
			
 
				+        for char in text:
			
 
				+            locator.type(char, delay=random.uniform(MIN_INPUT_DELAY, MAX_INPUT_DELAY))
			
 
				+            random_delay(0.05, 0.1)  # 字符间额外小延迟
			
 
				+        logger.info(f" 模拟真人输入完成：{text}")
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"模拟打字失败：{e}")
			
 
				+        locator.fill(text)  # 兜底：直接填充
			
 
				+
			
 
				+
			
 
				+
			
 
				+def save_cookies(context, cookie_path=COOKIE_FILE_PATH):
			
 
				+    """保存Cookie到本地JSON文件"""
			
 
				+    try:
			
 
				+        cookies = context.cookies()
			
 
				+        with open(cookie_path, "w", encoding="utf-8") as f:
			
 
				+            json.dump(cookies, f, ensure_ascii=False, indent=2)
			
 
				+        logger.info(f"Cookie已保存到：{cookie_path}")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 保存Cookie失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def load_cookies(context, cookie_path=COOKIE_FILE_PATH):
			
 
				+    """从本地JSON文件加载Cookie到浏览器上下文"""
			
 
				+    if not os.path.exists(cookie_path):
			
 
				+        logger.warning(f" Cookie文件不存在：{cookie_path}")
			
 
				+        return False
			
 
				+    try:
			
 
				+        with open(cookie_path, "r", encoding="utf-8") as f:
			
 
				+            cookies = json.load(f)
			
 
				+        context.add_cookies(cookies)
			
 
				+        logger.info(f"✅ 已从{cookie_path}加载Cookie")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 加载Cookie失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def is_login(page):
			
 
				+    """验证是否已登录（核心：检测登录态）"""
			
 
				+    try:
			
 
				+        # 访问需要登录的页面
			
 
				+        page.goto(LOGIN_VALIDATE_URL, timeout=300000)
			
 
				+        page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+        # 检测是否跳转到登录页（URL包含login则未登录）
			
 
				+        if "login" in page.url.lower():
			
 
				+            logger.warning(" Cookie失效，需要重新登录")
			
 
				+            return False
			
 
				+
			
 
				+        # 可选：检测登录后的专属元素（比如用户名、个人中心等）
			
 
				+        # if page.locator("用户中心选择器").count() > 0:
			
 
				+        #     return True
			
 
				+        logger.info(" Cookie有效，已保持登录状态")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 验证登录状态失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 滚动函数重构（核心修改） ====================
			
 
				+def slow_scroll_400px(page,scroll_distance1=400):
			
 
				+    """
			
 
				+    慢速滚动400px±50px（模拟真人滑动）
			
 
				+    :param page: 页面对象
			
 
				+    :return: 滚动是否成功
			
 
				+    """
			
 
				+    try:
			
 
				+        # 生成400±50px的随机滚动距离
			
 
				+        scroll_distance = random.randint(
			
 
				+            scroll_distance1 - SCROLL_OFFSET_RANGE,
			
 
				+            scroll_distance1 + SCROLL_OFFSET_RANGE
			
 
				+        )
			
 
				+        remaining_distance = scroll_distance
			
 
				+        total_steps = int(scroll_distance / SCROLL_STEP)
			
 
				+
			
 
				+        logger.info(
			
 
				+            f"📜 开始慢速滚动（目标距离：{scroll_distance}px，总步数：{total_steps}，总时长约{total_steps*SCROLL_INTERVAL:.2f}秒）"
			
 
				+        )
			
 
				+
			
 
				+        # 渐进式滚动（每步50px，间隔0.05秒）
			
 
				+        for _ in range(total_steps):
			
 
				+            step = min(SCROLL_STEP, remaining_distance)
			
 
				+            page.evaluate(f"window.scrollBy(0, {step});")
			
 
				+            remaining_distance -= step
			
 
				+            time.sleep(SCROLL_INTERVAL)
			
 
				+
			
 
				+        # 处理剩余不足一步的距离
			
 
				+        if remaining_distance > 0:
			
 
				+            page.evaluate(f"window.scrollBy(0, {remaining_distance});")
			
 
				+            time.sleep(SCROLL_INTERVAL)
			
 
				+
			
 
				+        # 滚动后等待懒加载完成
			
 
				+        page.wait_for_load_state("networkidle", timeout=8000)
			
 
				+        random_delay(2.0, 3.0)  # 滚动后额外停顿，模拟真人
			
 
				+        logger.info(f" 慢速滚动完成，实际滚动距离：{scroll_distance - remaining_distance}px")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f" 慢速滚动失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# def check_anti_crawl(page):
			
 
				+#     """检测反爬弹窗/验证码（核心：提前识别反爬）"""
			
 
				+#     anti_crawl_selectors = [
			
 
				+#         "//div[contains(text(), '验证')]",
			
 
				+#         "//div[contains(text(), '人机验证')]",
			
 
				+#         "//div[contains(text(), '访问过于频繁')]",
			
 
				+#         "//button[contains(text(), '验证')]"
			
 
				+#     ]
			
 
				+#     for selector in anti_crawl_selectors:
			
 
				+#         if page.locator(selector).count() > 0:
			
 
				+#             logger.error("❌ 检测到反爬验证弹窗！请手动完成验证后按回车继续...")
			
 
				+#             input()  # 暂停等待手动验证
			
 
				+#             return True
			
 
				+#     return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# CSV配置
			
 
				+CSV_FILE_PATH = f"ybm_collect_data_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv" # CSV保存路径
			
 
				+CSV_HEADERS = [
			
 
				+    "商品标题", "商品采购价格", "商品折扣价格", "规格", "盒数",
			
 
				+    "店铺名称", "公司名称",
			
 
				+    "有效日期", "生产日期", "批准文号", "采集时间"
			
 
				+]    #表头
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 登录函数 ====================
			
 
				+def login_operation(page, username, password):
			
 
				+    """登录操作函数"""
			
 
				+    try:
			
 
				+        # 输入手机号（直接用单个变量）
			
 
				+        page.wait_for_selector(USERNAME_SELECTOR, timeout=ELEMENT_TIMEOUT, state="visible")
			
 
				+        page.wait_for_timeout(timeout=3000)
			
 
				+        page.fill(USERNAME_SELECTOR, username)
			
 
				+        logger.info(" 已输入登录账号")
			
 
				+
			
 
				+        # 输入密码
			
 
				+        page.wait_for_selector(PASSWORD_SELECTOR, timeout=ELEMENT_TIMEOUT, state="visible")
			
 
				+        page.wait_for_timeout(timeout=3000)
			
 
				+        page.fill(PASSWORD_SELECTOR, password)
			
 
				+        logger.info(" 已输入登录密码")
			
 
				+
			
 
				+        random_delay(1, 2)
			
 
				+        agree_btn = page.locator('span.el-checkbox__inner')
			
 
				+        agree_btn.click()
			
 
				+
			
 
				+        # 点击登录按钮
			
 
				+        page.wait_for_selector(LOGIN_BTN_SELECTOR, timeout=ELEMENT_TIMEOUT)
			
 
				+        page.wait_for_timeout(timeout=3000)
			
 
				+        page.click(LOGIN_BTN_SELECTOR)
			
 
				+        logger.info(" 已点击登录按钮")
			
 
				+
			
 
				+        page.wait_for_timeout(LOGIN_AFTER_CLICK)
			
 
				+        return True
			
 
				+
			
 
				+    except PlaywrightTimeoutError as e:
			
 
				+        logger.error(f" 登录失败：元素定位超时 - {str(e)}")
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 登录异常：{str(e)}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def kill_masks(page):
			
 
				+    """
			
 
				+    强制清理残留遮罩层/覆盖层，并恢复 body 可滚动、可点击状态
			
 
				+    """
			
 
				+    page.evaluate(r"""
			
 
				+    () => {
			
 
				+      const removed = [];
			
 
				+      const hidden = [];
			
 
				+
			
 
				+      // 1) 先处理已知常见遮罩
			
 
				+      const knownSelectors = [
			
 
				+        '.v-modal',
			
 
				+        '.el-overlay',
			
 
				+        '.el-overlay-dialog',
			
 
				+        '.el-dialog__wrapper',
			
 
				+        '.el-message-box__wrapper',
			
 
				+        '.el-loading-mask',
			
 
				+        '.el-popup-parent--hidden'
			
 
				+      ];
			
 
				+
			
 
				+      for (const sel of knownSelectors) {
			
 
				+        document.querySelectorAll(sel).forEach(el => {
			
 
				+          // v-modal / overlay 直接 remove 最省事
			
 
				+          removed.push(sel);
			
 
				+          el.remove();
			
 
				+        });
			
 
				+      }
			
 
				+
			
 
				+      // 2) 再做一次“泛化兜底”：全屏 fixed/absolute + 高 z-index 的覆盖层
			
 
				+      //    注意：不要误删页面正常的固定导航，所以加上“近似全屏”的判断
			
 
				+      const all = Array.from(document.querySelectorAll('body *'));
			
 
				+      for (const el of all) {
			
 
				+        const s = window.getComputedStyle(el);
			
 
				+        if (!s) continue;
			
 
				+
			
 
				+        const z = parseInt(s.zIndex || '0', 10);
			
 
				+        const pos = s.position;
			
 
				+        const pe = s.pointerEvents;
			
 
				+
			
 
				+        if ((pos === 'fixed' || pos === 'absolute') && z >= 1000 && pe !== 'none') {
			
 
				+          const r = el.getBoundingClientRect();
			
 
				+          const nearFullScreen =
			
 
				+            r.width >= window.innerWidth * 0.8 &&
			
 
				+            r.height >= window.innerHeight * 0.8 &&
			
 
				+            r.left <= window.innerWidth * 0.1 &&
			
 
				+            r.top <= window.innerHeight * 0.1;
			
 
				+
			
 
				+          // 常见遮罩是半透明背景色，或者透明但拦截点击
			
 
				+          const bg = s.backgroundColor || '';
			
 
				+          const looksLikeMask =
			
 
				+            nearFullScreen && (bg.includes('rgba') || bg.includes('rgb') || s.opacity !== '1');
			
 
				+
			
 
				+          if (nearFullScreen) {
			
 
				+            // 不管透明不透明，只要近似全屏且高 z-index，就先让它不拦截点击
			
 
				+            el.style.pointerEvents = 'none';
			
 
				+            el.style.display = 'none';
			
 
				+            hidden.push(el.tagName + '.' + (el.className || ''));
			
 
				+          }
			
 
				+        }
			
 
				+      }
			
 
				+
			
 
				+      // 3) 恢复 body / html 的滚动与交互（很多弹窗会锁滚动）
			
 
				+      document.documentElement.style.overflow = 'auto';
			
 
				+      document.body.style.overflow = 'auto';
			
 
				+      document.body.style.position = 'static';
			
 
				+      document.body.style.width = 'auto';
			
 
				+      document.body.style.paddingRight = '0px';
			
 
				+
			
 
				+      // 4) 去掉 Element-UI 常见的锁定 class
			
 
				+      document.body.classList.remove('el-popup-parent--hidden');
			
 
				+
			
 
				+      return { removed, hiddenCount: hidden.length, hidden };
			
 
				+    }
			
 
				+    """)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def force_close_popup(page):
			
 
				+    """关闭新手引导/遮罩（多步：下一步/完成/我知道了），并兜底移除遮罩层"""
			
 
				+    try:
			
 
				+        # 1) 尝试连续点“下一步/完成/我知道了/关闭”
			
 
				+        for _ in range(5):  # 最多点5次，足够覆盖多步引导
			
 
				+            btn = page.locator(
			
 
				+                "//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+
			
 
				+            # 有些引导是右上角 X（如果存在就点）
			
 
				+            close_icon = page.locator(
			
 
				+                "xpath=//*[contains(@class,'close') or contains(@class,'el-icon-close') or name()='svg' or name()='i'][1]"
			
 
				+            ).first
			
 
				+            if close_icon.count() > 0 and close_icon.is_visible():
			
 
				+                close_icon.click(timeout=1000)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+
			
 
				+            break
			
 
				+
			
 
				+        # 2) 兜底：移除常见遮罩层（element-ui / 通用 mask/overlay）
			
 
				+        page.evaluate("""
			
 
				+        const selectors = [
			
 
				+          '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+          '[class*="mask"]', '[class*="overlay"]', '[style*="z-index"]'
			
 
				+        ];
			
 
				+        for (const sel of selectors) {
			
 
				+          document.querySelectorAll(sel).forEach(el => {
			
 
				+            const s = window.getComputedStyle(el);
			
 
				+            // 只移除“覆盖层”倾向的元素：fixed/absolute 且 z-index 很高
			
 
				+            if ((s.position === 'fixed' || s.position === 'absolute') && parseInt(s.zIndex || '0', 10) >= 1000) {
			
 
				+              el.remove();
			
 
				+            }
			
 
				+          });
			
 
				+        }
			
 
				+        """)
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+# 调用方式和方案1一致：在搜索后、采集前执行
			
 
				+# force_close_popup(page)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def pick_search_input(page):
			
 
				+    """优先选可见且可用的搜索输入框；第一个不行就尝试第二个"""
			
 
				+    inputs = page.locator(SEARCH_INPUT_SELECTOR)
			
 
				+    cnt = inputs.count()
			
 
				+
			
 
				+    # 优先检查前两个（你说只有两个）
			
 
				+    for i in range(min(cnt, 2)):
			
 
				+        candidate = inputs.nth(i)
			
 
				+        try:
			
 
				+            candidate.wait_for(state="visible", timeout=1500)  # 小超时快速试探
			
 
				+            if candidate.is_enabled():
			
 
				+                return candidate
			
 
				+        except PlaywrightTimeoutError:
			
 
				+            continue
			
 
				+
			
 
				+    # 兜底：直接找任意可见的（避免命中 hidden 模板）
			
 
				+    candidate = page.locator(f"{SEARCH_INPUT_SELECTOR}:visible").first
			
 
				+    candidate.wait_for(state="visible", timeout=ELEMENT_TIMEOUT)
			
 
				+    return candidate
			
 
				+
			
 
				+
			
 
				+
			
 
				+def type_slow(locator, text: str, min_delay=0.06, max_delay=0.18):
			
 
				+    """逐字输入，模拟真人打字"""
			
 
				+    for ch in text:
			
 
				+        locator.type(ch, delay=int(random.uniform(min_delay, max_delay) * 1000))
			
 
				+
			
 
				+
			
 
				+# ==================== 搜索操作函数 ====================
			
 
				+def search_operation(page, keyword):
			
 
				+    """搜索框填充+提交搜索"""
			
 
				+    try:
			
 
				+        # 1) 找到“可用”的搜索框（第一个不行就用第二个）
			
 
				+        search_locator = page.locator(SEARCH_INPUT_SELECTOR)
			
 
				+
			
 
				+        # 清空并填充搜索框
			
 
				+        search_locator.wait_for(timeout=ELEMENT_TIMEOUT)
			
 
				+
			
 
				+        # 2. 清空搜索框（双重保障：先调用locator的clear，再手动全选删除）
			
 
				+        search_locator.click()  # 聚焦
			
 
				+        search_locator.fill("")
			
 
				+        page.keyboard.down("Control")  # 按住Control键
			
 
				+        page.keyboard.press("a")       # 按a键
			
 
				+        page.keyboard.up("Control")    # 松开Control键
			
 
				+
			
 
				+        page.keyboard.press("Backspace")  # 删除选中内容
			
 
				+
			
 
				+        # 3) 逐字输入
			
 
				+        type_slow(search_locator, keyword, min_delay=0.06, max_delay=0.18)
			
 
				+
			
 
				+        # 3. 输入搜索关键词
			
 
				+        # search_locator.fill(keyword)
			
 
				+        logger.info(f"📝 已输入搜索关键词：{keyword}")
			
 
				+
			
 
				+        # 3) 搜索按钮也建议点可见的那个
			
 
				+        btn = page.locator(f"{SEARCH_BTN_SELECTOR}")
			
 
				+        btn.wait_for(state="visible", timeout=SEARCH_BTN_TIMEOUT)
			
 
				+        # btn.click()
			
 
				+        page.wait_for_timeout(3000)
			
 
				+        #获取新页面对象
			
 
				+        try:
			
 
				+            # 先开始监听新页面事件（在点击前）
			
 
				+            with page.context.expect_page(timeout=60000) as new_page_info:
			
 
				+                # 再执行点击操作
			
 
				+                btn.click()
			
 
				+            # 点击后获取新页面
			
 
				+            detail_page = new_page_info.value
			
 
				+            detail_page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+
			
 
				+            #点击出现的按钮
			
 
				+            test_btn = detail_page.locator("div[data-v-c65c36bc].first-time-highlight-message-btn button")
			
 
				+            btn_count = test_btn.count()
			
 
				+            logger.info(f"✅ 匹配到的元素数量：{btn_count}")
			
 
				+            test_btn.wait_for(state="attached", timeout=5000)
			
 
				+            test_btn.click()
			
 
				+            
			
 
				+
			
 
				+        except PlaywrightTimeoutError:
			
 
				+            logger.warning(f"{get_current_time()}   未检测到新标签页")
			
 
				+            return None, False
			
 
				+
			
 
				+        except Exception as e:
			
 
				+                logger.warning(f"{get_current_time()}   等待新标签页异常：{e}")
			
 
				+                return None, False
			
 
				+
			
 
				+        force_close_popup(detail_page)
			
 
				+        kill_masks(detail_page)
			
 
				+        logger.info("✅ 已触发搜索")
			
 
				+        return detail_page, True
			
 
				+
			
 
				+
			
 
				+        # 搜索后等待结果加载
			
 
				+        # page.wait_for_timeout(COLLECT_DELAY)
			
 
				+        # return True
			
 
				+
			
 
				+    except PlaywrightTimeoutError as e:
			
 
				+        logger.error(f" 搜索失败：元素定位超时 - {str(e)}")
			
 
				+        return None, False  # 失败时返回 (None, False)
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 搜索异常：{str(e)}")
			
 
				+        return None, False  # 失败时返回 (None, False)
			
 
				+
			
 
				+
			
 
				+
			
 
				+#翻下一页
			
 
				+def goto_next_page(page) -> bool:
			
 
				+    """
			
 
				+    尝试翻到下一页；成功返回True，没下一页/翻页失败返回False
			
 
				+    适配常见 ElementUI: .el-pagination .btn-next / .el-pagination__next
			
 
				+    """
			
 
				+    # 多写几个候选，哪个能用就用哪个
			
 
				+    candidates = [
			
 
				+        ".el-pagination button.btn-next:not(.is-disabled)",
			
 
				+        ".el-pagination__next:not(.is-disabled)",
			
 
				+        "button:has-text('下一页'):not([disabled])",
			
 
				+        "a:has-text('下一页')",
			
 
				+    ]
			
 
				+
			
 
				+    next_btn = None
			
 
				+    for sel in candidates:
			
 
				+        loc = page.locator(sel).first
			
 
				+        if loc.count() > 0:
			
 
				+            next_btn = loc
			
 
				+            break
			
 
				+
			
 
				+    if not next_btn:
			
 
				+        return False
			
 
				+
			
 
				+    # 用“当前页第一个商品标题”做翻页完成的判据（比只等networkidle更稳）
			
 
				+    first_title = page.locator(PRODUCT_TITLE_SELECTOR).first
			
 
				+    before = ""
			
 
				+    try:
			
 
				+        if first_title.count() > 0:
			
 
				+            before = first_title.inner_text(timeout=2000).strip()
			
 
				+    except:
			
 
				+        pass
			
 
				+
			
 
				+    try:
			
 
				+        page.evaluate("window.scrollTo(0, 0);")
			
 
				+        next_btn.click(timeout=5000)
			
 
				+        page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+        # 等列表发生变化（标题变了 / 或者至少第一个标题重新出现）
			
 
				+        if before:
			
 
				+            page.wait_for_function(
			
 
				+                """(sel, oldText) => {
			
 
				+                    const el = document.querySelector(sel);
			
 
				+                    return el && el.innerText && el.innerText.trim() !== oldText;
			
 
				+                }""",
			
 
				+                arg=(PRODUCT_TITLE_SELECTOR, before),
			
 
				+                timeout=5000
			
 
				+            )
			
 
				+        else:
			
 
				+            first_title.wait_for(timeout=1000)
			
 
				+
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f" 翻页失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def popup_guard(page, tag=""):
			
 
				+    """
			
 
				+    全局弹窗/遮罩守卫：多步引导 + 关闭按钮 + 遮罩清理 + 恢复滚动
			
 
				+    tag 仅用于日志区分调用位置
			
 
				+    """
			
 
				+    try:
			
 
				+        # 给弹窗一点出现时间
			
 
				+        page.wait_for_timeout(300)
			
 
				+
			
 
				+        # 1) 连续点“下一步/完成/我知道了/关闭”
			
 
				+        for _ in range(6):
			
 
				+            btn = page.locator(
			
 
				+                "xpath=//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            # 2) 常见的 close icon
			
 
				+            close_btn = page.locator(
			
 
				+                "css=.el-dialog__headerbtn, .el-message-box__headerbtn, .close, .icon-close, .el-icon-close"
			
 
				+            ).first
			
 
				+            if close_btn.count() > 0 and close_btn.is_visible():
			
 
				+                close_btn.click(timeout=1200)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            break
			
 
				+
			
 
				+        # 3) 清遮罩 + 恢复滚动/交互
			
 
				+        page.evaluate(r"""
			
 
				+        () => {
			
 
				+          // 第一步：精准清理已知的遮罩/弹窗类名（Element UI框架常用）
			
 
				+          const selectors = [
			
 
				+            '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+            '.el-message-box__wrapper', '.el-loading-mask'
			
 
				+          ];
			
 
				+          selectors.forEach(sel => document.querySelectorAll(sel).forEach(e => e.remove()));
			
 
				+
			
 
				+          // 泛化兜底：近似全屏 + 高 z-index 的层直接屏蔽
			
 
				+          const all = Array.from(document.querySelectorAll('body *'));
			
 
				+          for (const el of all) {
			
 
				+            const s = getComputedStyle(el); // 获取元素的实际样式（含CSS生效的样式）
			
 
				+            const z = parseInt(s.zIndex || '0', 10);    // 取元素的层级（z-index），默认0
			
 
				+            // 条件1：元素是固定/绝对定位（弹窗/遮罩常见定位方式）+ 层级≥1000（高优先级遮挡）+ 能拦截鼠标事件
			
 
				+            if ((s.position === 'fixed' || s.position === 'absolute') && z >= 1000 && s.pointerEvents !== 'none') {
			
 
				+              const r = el.getBoundingClientRect();     // 获取元素的尺寸和位置
			
 
				+                // 条件2：元素宽度/高度≥屏幕80%（近似全屏遮罩）
			
 
				+              const nearFull = r.width >= innerWidth * 0.8 && r.height >= innerHeight * 0.8;
			
 
				+              if (nearFull) {
			
 
				+                el.style.pointerEvents = 'none';    // 让元素不拦截鼠标点击
			
 
				+                el.style.display = 'none';          // 隐藏元素
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+        // 第三步：恢复页面滚动功能（弹窗常把页面设为不可滚动）
			
 
				+          document.documentElement.style.overflow = 'auto';     // html标签恢复滚动
			
 
				+          document.body.style.overflow = 'auto';    // body标签恢复滚动
			
 
				+          document.body.classList.remove('el-popup-parent--hidden');  // 移除Element UI的滚动禁用类
			
 
				+        }
			
 
				+        """)
			
 
				+
			
 
				+        logger.info("杀除弹窗成功")
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def open_detail_page(list_page, item, keyword, idx, *, timeout=15000):
			
 
				+    """
			
 
				+    点击商品进入详情页，兼容：
			
 
				+    1) 新开 tab（返回 detail_page != list_page, opened_new_tab=True）
			
 
				+    2) 同 tab 跳转（detail_page == list_page, opened_new_tab=False）
			
 
				+    """
			
 
				+    ctx = list_page.context
			
 
				+    list_url = list_page.url
			
 
				+    detail_page = None
			
 
				+    opened_new_tab = False
			
 
				+
			
 
				+    try:
			
 
				+        # 期望新开 tab（很多站点会这样）
			
 
				+        with ctx.expect_page(timeout=timeout) as p:
			
 
				+            item.click(delay=random.uniform(0.1, 0.3))
			
 
				+        detail_page = p.value
			
 
				+        opened_new_tab = True
			
 
				+        logger.info(f" 「{keyword}」第{idx}个商品 - 新开标签页进入详情")
			
 
				+    except PlaywrightTimeoutError:
			
 
				+        # 兜底：没新开 tab，大概率是同页跳转/弹层
			
 
				+        detail_page = list_page
			
 
				+        opened_new_tab = False
			
 
				+        logger.info(f" 「{keyword}」第{idx}个商品 - 未新开标签页，按同页进入详情处理")
			
 
				+
			
 
				+    return detail_page, opened_new_tab, list_url
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def return_to_list(list_page, detail_page, opened_new_tab, list_url, keyword, idx):
			
 
				+    """
			
 
				+    从详情页返回列表页：
			
 
				+    - 新 tab：关闭 tab，然后 bring_to_front 切回
			
 
				+    - 同 tab：尽量 go_back 回到 list_url；如果没跳转而是弹层，尝试 ESC
			
 
				+    """
			
 
				+    # 如果浏览器/页面已经被关了，直接退出，避免二次异常
			
 
				+    if list_page is None or list_page.is_closed():
			
 
				+        logger.warning(f" 「{keyword}」第{idx}个商品 - 列表页已关闭，无法切回")
			
 
				+        return
			
 
				+
			
 
				+    if opened_new_tab:
			
 
				+        # 只关“新开的详情 tab”，绝不关 list_page
			
 
				+        try:
			
 
				+            if detail_page and (detail_page is not list_page) and (not detail_page.is_closed()):
			
 
				+                detail_page.close()
			
 
				+                logger.info(f"📌 「{keyword}」第{idx}个商品 - 已关闭详情页标签页")
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f" 「{keyword}」第{idx}个商品 - 关闭详情页失败：{e}")
			
 
				+
			
 
				+        # 切回列表页
			
 
				+        try:
			
 
				+            list_page.bring_to_front()
			
 
				+            list_page.mouse.move(random.randint(100, 300), random.randint(200, 400))
			
 
				+            random_delay(0.3, 0.8)
			
 
				+            list_page.wait_for_load_state("networkidle")
			
 
				+            logger.info(f" 「{keyword}」第{idx}个商品 - 已切回列表页（新tab模式）")
			
 
				+        except Exception as e:
			
 
				+            logger.warning(f" 「{keyword}」第{idx}个商品 - 切回列表页失败：{e}")
			
 
				+        return
			
 
				+
			
 
				+    # 同 tab：detail_page == list_page
			
 
				+    try:
			
 
				+        # 1) 如果 URL 变了，说明确实跳转了 → go_back 回去
			
 
				+        if list_page.url != list_url:
			
 
				+            for _ in range(3):  # 最多退 3 次，防止死循环
			
 
				+                list_page.go_back(timeout=15000)
			
 
				+                list_page.wait_for_load_state("domcontentloaded", timeout=15000)
			
 
				+                random_delay(0.2, 0.5)
			
 
				+                if list_page.url == list_url:
			
 
				+                    break
			
 
				+            logger.info(f" 「{keyword}」第{idx}个商品 - 已返回列表页（同tab跳转模式）")
			
 
				+        else:
			
 
				+            # 2) URL 没变：可能是弹层详情 → 尝试 ESC 关闭弹层
			
 
				+            list_page.keyboard.press("Escape")
			
 
				+            random_delay(0.2, 0.5)
			
 
				+            logger.info(f" 「{keyword}」第{idx}个商品 - 已尝试关闭弹层并留在列表页（同tab弹层模式）")
			
 
				+
			
 
				+        list_page.bring_to_front()
			
 
				+        list_page.wait_for_load_state("networkidle")
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f" 「{keyword}」第{idx}个商品 - 同tab返回列表页失败：{e}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+#判断店名是否已经在数据库
			
 
				+def shop_is_exists_database(shop):
			
 
				+    try:
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor(pymysql.cursors.DictCursor)     # 改为字典游标
			
 
				+        query_sql = """
			
 
				+            SELECT province, city, business_license_company, qualification_number FROM ybm_shop_info_middle
			
 
				+            WHERE shop = %s
			
 
				+"""
			
 
				+        cursor.execute(query_sql, (shop,))
			
 
				+        result = cursor.fetchone()
			
 
				+
			
 
				+        # 正确的调试方式（替代cursor._last_executed）
			
 
				+        print(f"【调试】传入的店铺名：{repr(shop)}")  # repr能显示空格/隐藏字符
			
 
				+        print(f"【调试】查询参数：{shop}")
			
 
				+        print(f"【调试】查询结果：{result} → 函数返回：{bool(result)}")
			
 
				+
			
 
				+        is_exists = bool(result)
			
 
				+        if is_exists:
			
 
				+            logger.info(f"【店铺存在校验】店铺已存在 | 店铺名：{repr(shop)} | 结果：存在（True）不要执行采集店铺")
			
 
				+        else:
			
 
				+            logger.info(f"【店铺存在校验】店铺不存在 | 店铺名：{repr(shop)} | 结果：不存在（False）")
			
 
				+
			
 
				+        return is_exists, result
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"查询店铺失败：{e}")
			
 
				+        return False, None  # 异常时明确返回False，避免返回None
			
 
				+    finally:
			
 
				+        # 修复：关闭游标和连接，避免泄露
			
 
				+        if cursor:
			
 
				+            cursor.close()
			
 
				+        if conn:
			
 
				+            conn.close()
			
 
				+
			
 
				+
			
 
				+def insert_shop_info_to_db(shop,contact_address, qualification_number, business_license_company, business_license_address, scrape_date, platform, province, city, create_time, update_time):
			
 
				+    """
			
 
				+    把字段插入到ybm_shop_info_middle表
			
 
				+    :param 各参数: 你要插入的字段值（空字符串也可）
			
 
				+    :return: bool - 插入成功返回True，失败返回False
			
 
				+    """
			
 
				+    # 1. 初始化数据库连接和游标
			
 
				+    conn = None
			
 
				+    cursor = None
			
 
				+    try:
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor()
			
 
				+        # 2. 构造INSERT SQL语句（参数化查询，防止SQL注入）
			
 
				+        # 注意：请确认ybm_shop_info_middle表的字段名和以下%s的顺序对应！
			
 
				+        # 若表字段名不同，修改INSERT后的字段列表（比如你的表字段是credit_code而非qualification_number，要对应改）
			
 
				+        sql = """
			
 
				+        INSERT INTO ybm_shop_info_middle (
			
 
				+            shop,
			
 
				+            contact_address,
			
 
				+            qualification_number,
			
 
				+            business_license_company,
			
 
				+            business_license_address,
			
 
				+            scrape_date,
			
 
				+            platform,
			
 
				+            province,
			
 
				+            city,
			
 
				+            create_time,
			
 
				+            update_time
			
 
				+        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)
			
 
				+        ON DUPLICATE KEY UPDATE
			
 
				+        contact_address = VALUES(contact_address),  # 重复时更新联系地址
			
 
				+        qualification_number = VALUES(qualification_number),  # 更新社会信用代码
			
 
				+        business_license_company = VALUES(business_license_company),  # 更新公司名
			
 
				+        business_license_address = VALUES(business_license_address),  # 更新地址
			
 
				+        scrape_date = VALUES(scrape_date),
			
 
				+        platform = VALUES(platform),
			
 
				+        province = VALUES(province),
			
 
				+        city = VALUES(city),
			
 
				+        update_time = VALUES(update_time)  # 重复时更新update_time
			
 
				+        """
			
 
				+
			
 
				+
			
 
				+
			
 
				+        # 3. 构造插入的参数（顺序必须和SQL中的%s一一对应）
			
 
				+        params = (
			
 
				+            shop,                          # 店铺名称
			
 
				+            contact_address,               # 联系地址
			
 
				+            qualification_number,          # 社会信用代码
			
 
				+            business_license_company,      # 营业执照公司名
			
 
				+            business_license_address,      # 营业执照地址
			
 
				+            scrape_date,                   # 爬取日期
			
 
				+            platform,                      # 平台名称（药九九）
			
 
				+            province,                      # 省份
			
 
				+            city,                          # 城市
			
 
				+            create_time,                  # create_time（当前时间）
			
 
				+            update_time
			
 
				+        )
			
 
				+
			
 
				+        # 4. 执行SQL并提交事务
			
 
				+        cursor.execute(sql, params)
			
 
				+        conn.commit()
			
 
				+        print(f"✅ 数据插入成功！店铺：{shop} | 公司：{business_license_company}")
			
 
				+        return True
			
 
				+
			
 
				+    except pymysql.MySQLError as e:
			
 
				+        # 数据库相关错误（连接失败、SQL语法错误、字段不匹配等）
			
 
				+        print(f"MySQL插入失败：{e}")
			
 
				+        print(f"详细异常信息：{traceback.format_exc()}")  # 打印详细堆栈，方便排查
			
 
				+        if conn:
			
 
				+            conn.rollback()  # 插入失败回滚事务
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        # 其他未知错误
			
 
				+        print(f"插入数据时发生未知错误：{e}")
			
 
				+        print(f"详细异常信息：{traceback.format_exc()}")
			
 
				+        if conn:
			
 
				+            conn.rollback()
			
 
				+        return False
			
 
				+    finally:
			
 
				+        # 5. 无论成功/失败，都关闭游标和连接（释放资源）
			
 
				+        if cursor:
			
 
				+            cursor.close()
			
 
				+        if conn:
			
 
				+            conn.close()
			
 
				+
			
 
				+def insert_single_to_mysql(single_data):
			
 
				+    """
			
 
				+    逐条插入单条数据到MySQL数据库
			
 
				+    :param single_data: 单条商品数据元组
			
 
				+    :return: 插入是否成功
			
 
				+    """
			
 
				+    conn = None
			
 
				+    cursor = None
			
 
				+    try:
			
 
				+
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor()
			
 
				+
			
 
				+        # 2. 确保表存在（兼容表未创建的情况）
			
 
				+        # cursor.execute(CREATE_TABLE_SQL)
			
 
				+        insert_sql = """
			
 
				+        INSERT INTO ybm_drug_middle (
			
 
				+            product, my_good_price, min_price, manufacture_date, expiry_date,
			
 
				+            shop, business_license_company, province, city, manufacturer,
			
 
				+            specification, approval_number, product_link, scrape_date,
			
 
				+            scrape_province, availability, credit_code, platform, search_key,
			
 
				+            number, is_sold_out, sales, inventory, snapshot_url, update_time, create_time
			
 
				+        ) VALUES (%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s);
			
 
				+        """
			
 
				+
			
 
				+
			
 
				+        # 字段值（与SQL占位符顺序严格对应）
			
 
				+        values = (
			
 
				+            single_data["product"],
			
 
				+            single_data["my_good_price"],
			
 
				+            single_data["min_price"],
			
 
				+            single_data["manufacture_date"],
			
 
				+            single_data["expiry_date"],
			
 
				+            single_data["shop"],
			
 
				+            single_data["business_license_company"],
			
 
				+            single_data["province"],
			
 
				+            single_data["city"],
			
 
				+            single_data["manufacturer"],
			
 
				+            single_data["specification"],
			
 
				+            single_data["approval_number"],
			
 
				+            single_data["product_link"],
			
 
				+            single_data["scrape_date"],
			
 
				+            single_data["scrape_province"],
			
 
				+            single_data["availability"],
			
 
				+            single_data["credit_code"],
			
 
				+            single_data["platform"],
			
 
				+            single_data["search_key"],
			
 
				+            single_data["number"],
			
 
				+            single_data["is_sold_out"],
			
 
				+            single_data["sales"],
			
 
				+            single_data["inventory"],
			
 
				+            single_data["snapshot_url"],
			
 
				+            single_data["update_time"],
			
 
				+            single_data["create_time"]
			
 
				+        )
			
 
				+
			
 
				+
			
 
				+        cursor.execute(insert_sql, values)
			
 
				+        conn.commit()
			
 
				+        logger.info(f" 单条数据插入成功：...")  # 仅打印标题前20字
			
 
				+        return True
			
 
				+    except OperationalError as e:
			
 
				+        logger.error(f" MySQL连接失败：{str(e)}")
			
 
				+        if conn:
			
 
				+            conn.rollback()
			
 
				+        return False
			
 
				+    except ProgrammingError as e:
			
 
				+        logger.error(f" SQL语法错误：{str(e)}")
			
 
				+        if conn:
			
 
				+            conn.rollback()
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 单条数据插入失败：{str(e)}")
			
 
				+        if conn:
			
 
				+            conn.rollback()
			
 
				+        return False
			
 
				+    finally:
			
 
				+        # 关闭游标和连接
			
 
				+        if cursor:
			
 
				+            cursor.close()
			
 
				+        if conn:
			
 
				+            conn.close()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def check_dup_in_biz_db(product_link, discount_price_val, scrape_date):
			
 
				+    """直接查询业务表是否存在该商品链接+价格"""
			
 
				+    conn = None
			
 
				+    cursor = None
			
 
				+    log_context = (
			
 
				+        f"【去重校验】商品链接：{product_link.strip()} | 价格：{discount_price_val} "
			
 
				+        f"采集日期：{scrape_date.strip()}"
			
 
				+    )
			
 
				+    try:
			
 
				+        conn = pymysql.connect(**MYSQL_CONFIG)
			
 
				+        cursor = conn.cursor()
			
 
				+        sql = """
			
 
				+            SELECT * FROM ybm_drug_middle
			
 
				+            WHERE product_link = %s AND min_price = %s AND scrape_date=%s
			
 
				+            """
			
 
				+        # 先执行查询
			
 
				+        cursor.execute(sql, (product_link.strip(), discount_price_val, scrape_date.strip()))
			
 
				+        # 再判断是否有结果
			
 
				+        # 如果 fetchone() 返回元组（比如(1,)）→ (1,) is not None → 结果为 True；
			
 
				+        # 如果 fetchone() 返回 None → None is not None → 结果为 False。
			
 
				+        is_dup = cursor.fetchone() is not None
			
 
				+
			
 
				+        if is_dup:
			
 
				+            logger.warning(f"{log_context} - 表中已存在重复记录，跳过本次采集")
			
 
				+        else:
			
 
				+            logger.info(f"{log_context} - 表中无重复记录，正常采集")
			
 
				+
			
 
				+        return is_dup
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"查询业务表去重失败：{str(e)}")
			
 
				+        return False
			
 
				+    finally:
			
 
				+        if cursor:
			
 
				+            cursor.close()
			
 
				+        if conn:
			
 
				+            conn.close()
			
 
				+
			
 
				+
			
 
				+# 压缩图片函数
			
 
				+def compress_image(image_data, max_size=4*1024*1024):  # 4MB上限
			
 
				+    try:
			
 
				+        img = Image.open(BytesIO(image_data))
			
 
				+
			
 
				+        # 将RGBA模式转为RGB（兼容JPEG）
			
 
				+        if img.mode in ('RGBA', 'P'):  # P是PNG的调色板模式，也需转换
			
 
				+            # 新建白色背景的RGB图片，把透明图贴上去（避免透明区域变黑）
			
 
				+            bg_img = Image.new('RGB', img.size, (255, 255, 255))
			
 
				+            bg_img.paste(img, mask=img.split()[-1] if img.mode == 'RGBA' else None)
			
 
				+            img = bg_img
			
 
				+
			
 
				+        # 缩小分辨率（按比例缩到宽≤1000px）
			
 
				+        if img.width > 1000:
			
 
				+            ratio = 1000 / img.width
			
 
				+            new_size = (int(img.width*ratio), int(img.height*ratio))
			
 
				+            img = img.resize(new_size, Image.Resampling.LANCZOS)
			
 
				+
			
 
				+        # 降低质量（JPG）/压缩（PNG）
			
 
				+        output = BytesIO()
			
 
				+        img.save(output, format='JPEG', quality=80)  # quality越小体积越小
			
 
				+        compressed_data = output.getvalue()
			
 
				+
			
 
				+        # 若仍超限，继续降质量
			
 
				+        if len(compressed_data) > max_size:
			
 
				+            img.save(output, format='JPEG', quality=60)
			
 
				+            compressed_data = output.getvalue()
			
 
				+        return compressed_data
			
 
				+    except Exception as e:
			
 
				+        logger.debug(f"图片压缩失败：{e}")
			
 
				+        return image_data  # 压缩失败返回原始数据
			
 
				+
			
 
				+
			
 
				+def download_image_to_base64(image_url, save_dir = "./download_images"):
			
 
				+    """下载网络图片，返回图片二进制数据（BytesIO）"""
			
 
				+    try:
			
 
				+        if not os.path.exists(save_dir):
			
 
				+            os.makedirs(save_dir)  # 创建多级目录（比如a/b/c）
			
 
				+            print(f"创建本地保存目录：{save_dir}")
			
 
				+    except Exception as e:
			
 
				+        print(f"创建保存目录失败：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+    try:
			
 
				+        # 模拟浏览器请求头，避免被服务器拦截
			
 
				+        headers = {
			
 
				+            "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36"
			
 
				+        }
			
 
				+        response = requests.get(image_url, headers=headers, timeout=15)
			
 
				+        response.raise_for_status()
			
 
				+        compressed_data = compress_image(response.content)
			
 
				+        image_base64 = base64.b64encode(compressed_data).decode("utf-8")
			
 
				+        image_data = compressed_data
			
 
				+
			
 
				+        # 步骤3：提取图片文件名（从URL中截取，避免重复）
			
 
				+        # 示例URL：https://xxx.com/123.jpg → 文件名：123.jpg
			
 
				+        file_name = image_url.split("/")[-1]
			
 
				+        # 处理特殊字符（避免文件名非法）
			
 
				+        file_name = file_name.replace("?", "").replace("&", "").replace("=", "")
			
 
				+        save_path = os.path.join(save_dir, file_name)  # 完整保存路径
			
 
				+
			
 
				+        # 步骤4：保存图片到本地
			
 
				+        with open(save_path, "wb") as f:
			
 
				+            f.write(image_data)
			
 
				+        print(f"图片已保存到本地：{save_path}")
			
 
				+
			
 
				+
			
 
				+        return image_base64
			
 
				+    except requests.exceptions.Timeout:
			
 
				+        print(f"下载图片超时：{image_url}")
			
 
				+        return None
			
 
				+    except requests.exceptions.HTTPError as e:
			
 
				+        print(f"图片URL无效（状态码：{response.status_code}）：{image_url}")
			
 
				+        return None
			
 
				+    except Exception as e:
			
 
				+        print(f"下载图片失败：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+
			
 
				+def get_ocr_res(img):
			
 
				+    try:
			
 
				+        #img地址
			
 
				+        print(f'开始识别图片:{img}')
			
 
				+        request_url = request_url_config
			
 
				+
			
 
				+        img_base64 = download_image_to_base64(img)
			
 
				+        if not img_base64:
			
 
				+            print("图片下载/转Base64失败，终止OCR识别")
			
 
				+            return None
			
 
				+
			
 
				+        # 获取access_token
			
 
				+        access_token = get_access_token()
			
 
				+        if not access_token:
			
 
				+            print("获取access_token失败，无法调用OCR接口")
			
 
				+            return None
			
 
				+        params = {"image": img_base64}
			
 
				+        request_url = request_url + "?access_token=" + access_token
			
 
				+        headers = {'content-type': 'application/x-www-form-urlencoded'}
			
 
				+        response = requests.post(request_url, data=params, headers=headers)
			
 
				+
			
 
				+        if response:
			
 
				+            res = response.json()
			
 
				+            # 检查OCR返回是否有错误
			
 
				+            if "error_code" in res:
			
 
				+                print(f"百度OCR接口错误：{res['error_msg']}（错误码：{res['error_code']}）")
			
 
				+                return None
			
 
				+            # 解析识别结果
			
 
				+            new_dic = dict()
			
 
				+            for ite in res['words_result'].keys():
			
 
				+                new_dic[ite] = res['words_result'][ite]['words']
			
 
				+            print('资质数据信息', new_dic)
			
 
				+            return new_dic
			
 
				+        else:
			
 
				+            print("OCR接口返回空响应")
			
 
				+            return None
			
 
				+    except requests.exceptions.RequestException as e:
			
 
				+        print(f"网络错误（图片下载/OCR请求失败）：{str(e)}")
			
 
				+        return None
			
 
				+    except KeyError as e:
			
 
				+        print(f"OCR响应格式异常，缺失字段：{str(e)}")
			
 
				+        return None
			
 
				+    except Exception as e:
			
 
				+        print(f"OCR识别未知错误：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+def get_access_token():
			
 
				+    AppKey = AppKey_config
			
 
				+    AppSrcret = AppSecret_config
			
 
				+    token_url =token_url_config
			
 
				+    url = f"{token_url}?grant_type=client_credentials&client_id={AppKey}&client_secret={AppSrcret}"
			
 
				+
			
 
				+    payload = ""
			
 
				+    headers = {
			
 
				+        'Content-Type': 'application/json',
			
 
				+        'Accept': 'application/json'
			
 
				+    }
			
 
				+    try:
			
 
				+        response = requests.request("POST", url, headers=headers, data=payload)
			
 
				+        response.raise_for_status()  # 触发HTTP错误
			
 
				+        return response.json()['access_token']
			
 
				+    except Exception as e:
			
 
				+        print(f"获取access_token失败：{str(e)}")
			
 
				+        return None
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def extract_province_city(address):
			
 
				+    """
			
 
				+    从地址中提取省份和城市
			
 
				+    :param address: 营业执照地址（如"福建省福州市马尾区"）
			
 
				+    :return: (province, city) - 提取到的省份/城市，提取失败返回空字符串
			
 
				+    """
			
 
				+    if not address:  # 地址为空，直接返回空
			
 
				+        return "", ""
			
 
				+
			
 
				+    # 正则1：匹配省份（兼容省/自治区/直辖市/特别行政区）
			
 
				+    province_pattern = re.compile(r'([^省]+省|.+自治区|北京市|上海市|天津市|重庆市|.+特别行政区)')
			
 
				+    province_match = province_pattern.search(address)
			
 
				+    province = province_match.group(1) if province_match else ""
			
 
				+
			
 
				+    # 正则2：匹配城市（兼容市/自治州/地区/盟，且排除省份已匹配的部分）
			
 
				+    # 先去掉已匹配的省份，再匹配城市
			
 
				+    address_remain = address.replace(province, "").strip() if province else address.strip()
			
 
				+    city_pattern = re.compile(r'([^市]+市|.+自治州|.+地区|.+盟|^[^\d区县镇]+)')
			
 
				+    city_match = city_pattern.search(address_remain)
			
 
				+    city = city_match.group(1).strip() if city_match else ""
			
 
				+
			
 
				+    # 兼容直辖市（如"北京市朝阳区"→city=北京市）
			
 
				+    if province in ["北京市", "上海市", "天津市", "重庆市"]:
			
 
				+        city = province
			
 
				+
			
 
				+    # 兼容地址不规范的情况（如"福建福州马尾区"，无"省"/"市"字）
			
 
				+    if not province and not city:
			
 
				+        # 匹配前两个地名（如"福建福州"→province=福建，city=福州）
			
 
				+        simple_pattern = re.compile(r'^([^\d区县镇]+)')
			
 
				+        simple_match = simple_pattern.search(address)
			
 
				+        if simple_match:
			
 
				+            city = simple_match.group(1).strip()  # 只有城市，省份留空
			
 
				+
			
 
				+    if city and province in city:
			
 
				+        city = city.replace(province, "").strip()
			
 
				+
			
 
				+    return province.strip(), city.strip()
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+#采集数据核心
			
 
				+def collect_data(page, keyword):
			
 
				+    """
			
 
				+    1) 先获取当前页商品个数（count）
			
 
				+    2) 按循环次数采集；每循环15次滚动一次 slow_scroll_1200px
			
 
				+    3) 当前页循环完 -> goto_next_page；有下一页继续；无下一页结束该关键词
			
 
				+    """
			
 
				+    collect_result = []
			
 
				+    # seen = set()
			
 
				+
			
 
				+    logger.info(f"📊 开始采集「{keyword}」的商品数据")
			
 
				+    page.wait_for_load_state("networkidle")
			
 
				+    #没有找到商品就跳过这个商品
			
 
				+
			
 
				+
			
 
				+    page_no = 1
			
 
				+    while True:
			
 
				+        logger.info(f"\n📄 「{keyword}」开始采集第 {page_no} 页")
			
 
				+
			
 
				+        #  记录列表页URL（可用于你后续兜底）
			
 
				+        list_page_url = page.url
			
 
				+        logger.info(f"📌 已记录商品列表页URL：{list_page_url}")
			
 
				+
			
 
				+        # ✅ 先获取当前页商品个数
			
 
				+        page.wait_for_load_state("networkidle")
			
 
				+        total_limit = page.locator(PRODUCT_ITEM_SELECTOR).count()
			
 
				+        logger.info(f"📌 「{keyword}」第{page_no}页 初始商品个数（count）：{total_limit}")
			
 
				+
			
 
				+        # 重置当前页的采集计数
			
 
				+        collected_count = 0
			
 
				+
			
 
				+
			
 
				+        # ========= 初始化无匹配计数器（记录标题不包含核心关键词的次数） =========
			
 
				+        # no_match_count = 0  # 无匹配次数初始化为0
			
 
				+        # MAX_NO_MATCH = 10   # 最大无匹配次数阈值
			
 
				+
			
 
				+        #补充没找到关键词的兜底
			
 
				+        not_found_keywords = page.locator("span:has-text('新品登记')")
			
 
				+        if not_found_keywords.count() > 0:
			
 
				+            logger.warning(f"⚠️ 关键词「{keyword}」无匹配商品，直接跳过整个关键词采集")
			
 
				+            return []
			
 
				+
			
 
				+        for idx in range(total_limit):
			
 
				+            detail_page = None
			
 
				+            try:
			
 
				+                item = page.locator(PRODUCT_ITEM_SELECTOR).nth(idx)
			
 
				+                collected_count += 1  # 实际采集计数（用于日志）
			
 
				+                # ========= 反爬随机延迟（保留你的原逻辑也行） =========
			
 
				+                page.wait_for_load_state("networkidle")
			
 
				+                delay = random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                logger.info(f"📌 「{keyword}」第{page_no}页 第{collected_count}/{total_limit}个商品 - 等待{delay:.2f}秒后采集（反爬）")
			
 
				+
			
 
				+
			
 
				+                # 1. 初始化所有字段默认值
			
 
				+                title = "无标题"
			
 
				+                price = "0.00"
			
 
				+                shop = "无店名"
			
 
				+                expiry_date = "无有效期"
			
 
				+                manufacture_date = "无生产日期"
			
 
				+                approval_number = "无批准文号"
			
 
				+                manufacturer = "未知公司"
			
 
				+                # discount_price = "0.00"
			
 
				+                spec = "未知规格"
			
 
				+                num = 1  # ✅ 默认 1
			
 
				+                platform = '药帮忙'
			
 
				+                current_time = datetime.now().strftime("%Y-%m-%d")
			
 
				+                is_sold_out = 0
			
 
				+
			
 
				+
			
 
				+                # ========= 售罄不跳过 =========
			
 
				+                # sold_locator = item.locator('div[data-v-480da687].gc-l1-cirle_tip')
			
 
				+                # if sold_locator.count() > 0:
			
 
				+                #     is_sold_out = 1
			
 
				+                #     logger.warning(f" 「{keyword}」第{page_no}页 第{collected_count}个商品已售罄")
			
 
				+                    # if collected_count % 5 == 0 and collected_count > 0:
			
 
				+                    #     logger.info("采满5个往下滑")
			
 
				+                    #     slow_scroll_400px(page)
			
 
				+                    #     page.wait_for_load_state("networkidle")
			
 
				+                    # continue
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #提取商品ID
			
 
				+                # product_id_elem = item.locator('div.product-card[data-product-id]')
			
 
				+                # if product_id.count() > 0:
			
 
				+                #     product_id = product_id_elem.get_attribute("data-product-id")
			
 
				+                #     logger.info(f"✅ 提取到data-product-id：{product_id}")  # 输出：5678955
			
 
				+
			
 
				+                # 提取商品标题（处理空值）
			
 
				+                product_locator = item.locator(PRODUCT_TITLE_SELECTOR)
			
 
				+                if product_locator.count() > 0:
			
 
				+                    title = product_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 列表页标题：{title}{'='*10}")
			
 
				+                else:
			
 
				+
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品 - 列表页标题元素未找到，使用默认值：{title}")
			
 
				+
			
 
				+
			
 
				+                #关键词不在标题中，跳过当前商品
			
 
				+                # core_keyword = re.sub(r'^999[\s\(\)（）、·]*', '', keyword)
			
 
				+                # if core_keyword not in title:
			
 
				+                #     no_match_count += 1
			
 
				+                #     logger.warning(f" 「{keyword}」第{collected_count}个商品 - 标题「{title}」不包含核心关键词「{core_keyword}」（无匹配次数：{no_match_count}/{MAX_NO_MATCH}），跳过本次循环")
			
 
				+                #     continue
			
 
				+
			
 
				+                # if no_match_count >= MAX_NO_MATCH:
			
 
				+                #     logger.error(f"❌ 关键词「{keyword}」无匹配商品次数已达{MAX_NO_MATCH}次，直接终止当前关键词采集，进入下一个关键词")
			
 
				+                #     return []
			
 
				+
			
 
				+
			
 
				+                # 提取价格（带缺失日志）
			
 
				+                # price_locator = item.locator(PRODUCT_PRICE_SELECTOR)
			
 
				+                price_int = item.locator('//span[@class="price-int"]').text_content().strip()
			
 
				+                # 2. 提取小数部分（注意可能为空，比如价格是整数13）
			
 
				+                price_decimal_elem = item.locator('//span[@class="price-decimal"]')
			
 
				+                if price_decimal_elem.count() > 0:
			
 
				+                    price_decimal = price_decimal_elem.text_content().strip()
			
 
				+                else:
			
 
				+                    price_decimal = ''
			
 
				+                # 3. 拼接完整价格
			
 
				+                full_price = f"{price_int}{price_decimal}"
			
 
				+                # 转成浮点数（便于后续计算/入库）
			
 
				+                full_price_num = float(full_price)
			
 
				+                logger.info(f"✅ 提取到价格：{full_price_num}")
			
 
				+                if full_price_num is None:
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 列表页采购价格元素未找到，使用默认值：{price}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # if full_price_num > 0:
			
 
				+                #     price = price_locator.inner_text(timeout=3000).strip()
			
 
				+                #     logger.info(f"{'='*10}{keyword}」第{collected_count}个商品 - 列表页采购价格：{price}{'='*10}")
			
 
				+                # else:
			
 
				+                #     price = "0.00"  # 初始化默认值，避免后续报错
			
 
				+                #     logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 列表页采购价格元素未找到，使用默认值：{price}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # 5. 提取公司名称（带缺失日志）
			
 
				+                manufacturer_locator = item.locator(PRODUCT_COMPANY_SELECTOR)
			
 
				+                if manufacturer_locator.count() > 0:
			
 
				+                    manufacturer = manufacturer_locator.inner_text(timeout=3000).strip()
			
 
				+
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 列表页公司名：{manufacturer}{'='*10}")
			
 
				+                else:
			
 
				+
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 列表页公司名称元素未找到，使用默认值：{manufacturer}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #提取店铺名称
			
 
				+                shop_locator = item.locator(PRODUCT_STORE_SELECTOR)
			
 
				+                if shop_locator.count() > 0:
			
 
				+                    shop = shop_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 列表页店名：{shop}{'='*10}")
			
 
				+                else:
			
 
				+
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 列表页店铺名称元素未找到，使用默认值：{shop}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # 提取折扣价
			
 
				+                discount_price_val_origin = ""
			
 
				+                discount_price = ""
			
 
				+                discount_price_locator = item.locator('span[data-v-4cb6cc1f].discount-int').first
			
 
				+                if discount_price_locator.count() > 0:
			
 
				+                    discount_price = discount_price_locator.inner_text(timeout=3000).strip()
			
 
				+                    discount_price_val_origin = discount_price
			
 
				+                    match = re.search(r'\d+\.?\d*', str(discount_price_val_origin))
			
 
				+                    discount_price_val = float(match.group()) if match else 0.00
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页折扣价：{discount_price_val}{'='*10}")
			
 
				+                else:
			
 
				+                    #如果没有拿原价替换
			
 
				+                    # price = float(price.replace("￥", "").replace(",", "")) if price.replace("￥", "").replace(",", "").replace(".", "") else "0.00"
			
 
				+                    discount_price_val = full_price_num
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 折扣价元素未找到，使用采购价兜底：{discount_price_val}")
			
 
				+
			
 
				+                merged_price = f"{full_price_num}{discount_price_val_origin}" if discount_price_val_origin else full_price_num
			
 
				+
			
 
				+                # 提取有效期（处理空值）
			
 
				+                expiry_date_locator = item.locator(f"{PRODUCT_VALIDITY_SELECTOR}")
			
 
				+                if expiry_date_locator.count() > 0:
			
 
				+                    expiry_date = expiry_date_locator.inner_text(timeout=3000).strip().replace('-', '')    #.replace('近效期','')
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页有效期：{expiry_date}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 有效期元素未找到，使用默认值：{expiry_date}")
			
 
				+
			
 
				+                #获取product_id
			
 
				+                # product_id = None
			
 
				+                # try:
			
 
				+                #     product_id = item.get_attribute("data-product-id")
			
 
				+                #     if product_id:
			
 
				+                #         product_id = product_id.strip()
			
 
				+                #         logger.info(f"✅ 「{keyword}」第{collected_count}个商品 - 提取到product_id：{product_id}")
			
 
				+
			
 
				+                # ========= 模拟点击商品进入详情页 =========
			
 
				+                logger.info(
			
 
				+                    f"📌 「{keyword}」第{page_no}页 第{collected_count}个商品「{title}」- 模拟鼠标移动并点击"
			
 
				+                )
			
 
				+
			
 
				+                # 点击商品项容器，触发详情展示
			
 
				+                # ========== 点击商品跳详情页 ==========
			
 
				+                # 反爬：模拟真人鼠标移动到商品上再点击（不是直接点击）
			
 
				+                item.hover()  # 先悬停
			
 
				+                random_delay(0.2, 0.5)  # 悬停后延迟
			
 
				+                item.dispatch_event("mousedown")
			
 
				+                random_delay(0.05, 0.15)  # 鼠标按下后延迟
			
 
				+                item.dispatch_event("mouseup")
			
 
				+                random_delay(0.05, 0.1)   # 鼠标松开后延迟
			
 
				+
			
 
				+
			
 
				+
			
 
				+                try:
			
 
				+                    with page.context.expect_page(timeout=60000) as p:
			
 
				+                        item.click(delay=random.uniform(0.1, 0.3))
			
 
				+                    detail_page = p.value
			
 
				+                except PlaywrightTimeoutError:
			
 
				+                    logger.warning(
			
 
				+                        f" 「{keyword}」第{page_no}页 第{collected_count}个商品「{title}」- 未检测到新标签页，使用当前页采集详情"
			
 
				+                    )
			
 
				+                    detail_page = None  # 标记为无新标签页，避免关闭列表页
			
 
				+
			
 
				+
			
 
				+                # 等待详情加载（优先用新标签页，无则用列表页）
			
 
				+                target_page = detail_page if detail_page else page
			
 
				+                target_page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+                delay = random_delay(MIN_PAGE_DELAY, MAX_PAGE_DELAY)
			
 
				+                logger.info(
			
 
				+                    f"📌 「{keyword}」第{page_no}页 第{collected_count}个商品「{title}」- 详情页加载完成，等待{delay:.2f}秒（反爬）"
			
 
				+                )
			
 
				+                # 反爬：检测详情页反爬验证
			
 
				+                # check_anti_crawl(page)
			
 
				+                # ========== 采集详情页的专属信息（有效期/生产日期/批准文号） ==========
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #获取商品详情页链接
			
 
				+                product_link = target_page.url
			
 
				+                logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页链接：{product_link}{'='*10}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # ========= ✅ 去重逻辑，拿商品链接和折扣价和有效期和采集日期 =========
			
 
				+                if check_dup_in_biz_db(product_link, full_price_num, current_time):
			
 
				+                    logger.warning(f" 「{keyword}」第{page_no}页 第{collected_count}个商品（重复）：{title}，跳过")
			
 
				+                    # ========== 关闭新标签页，切回列表页 ==========
			
 
				+                    if detail_page and not detail_page.is_closed():
			
 
				+                        detail_page.close()  # 关闭详情页标签
			
 
				+                        logger.info(f"📌 「{keyword}」第{collected_count}个商品 - 已关闭详情页标签页")
			
 
				+                    # 切回原列表页（第一个标签页）
			
 
				+                    page.bring_to_front()  # 激活列表页
			
 
				+                    page.mouse.move(random.randint(100, 300), random.randint(200, 400))  # 随机移动鼠标
			
 
				+                    random_delay(0.5, 1.0)  # 增加切换后延迟
			
 
				+                    page.wait_for_load_state("networkidle")
			
 
				+                    random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                    logger.info(f" 「{keyword}」第{collected_count}个商品「{title}」- 已切回列表页")
			
 
				+                    if collected_count % 5 == 0 and collected_count > 0:
			
 
				+                            logger.info("采满5个往下滑")
			
 
				+                            slow_scroll_400px(page)
			
 
				+                            page.wait_for_load_state("networkidle")
			
 
				+                    continue
			
 
				+
			
 
				+
			
 
				+                # key = f"{product_link.strip()}|{discount_price_val}"
			
 
				+                # if key in seen:
			
 
				+                #     logger.warning(
			
 
				+                #         f" 「{keyword}」第{page_no}页 第{collected_count}个商品（重复）：{title}，跳过"
			
 
				+                #     )
			
 
				+                #     if collected_count % 5 == 0 and collected_count > 0:
			
 
				+                #         logger.info("采满15个往下滑")
			
 
				+                #         slow_scroll_400px(page)
			
 
				+                #         page.wait_for_load_state("networkidle")
			
 
				+                #     continue
			
 
				+                # seen.add(key)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # 提取生产日期（修复完成）
			
 
				+                manufacture_date_locator = target_page.locator('//div[contains(@class, "spec-info-item") and .//div[contains(@class, "spec-info-item-label") and normalize-space(.)="生产日期"]]//div[contains(@class, "spec-info-item-value-text")]')
			
 
				+                if manufacture_date_locator.count() > 0:
			
 
				+                    manufacture_date = manufacture_date_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页生产日期：{manufacture_date}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 生产日期元素未找到，使用默认值：{manufacture_date}")
			
 
				+
			
 
				+
			
 
				+                # 提取批准文号
			
 
				+                approval_number_locator = target_page.locator('//div[contains(@class, "spec-info-item") and .//div[contains(@class, "spec-info-item-label") and normalize-space(.)="批准文号"]]//div[contains(@class, "spec-info-item-value-text")]')
			
 
				+                if approval_number_locator.count() > 0:
			
 
				+                    approval_number = approval_number_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页批准文号：{approval_number}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 批准文号元素未找到，使用默认值：{approval_number}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #提取规格
			
 
				+                spec_locator = target_page.locator('//div[contains(@class, "spec-info-item") and .//div[contains(@class, "spec-info-item-label") and normalize-space(.)="规格"]]//div[contains(@class, "spec-info-item-value-text")]')
			
 
				+                if spec_locator.count() > 0:
			
 
				+                    spec = spec_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页规格：{spec}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count，补充规格数量不足的提示
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 规格元素数量不足，使用默认值：{spec}")
			
 
				+
			
 
				+                # input("...")
			
 
				+
			
 
				+
			
 
				+                #提取库存
			
 
				+                storage = ''
			
 
				+                storage_locator = target_page.locator('[data-v-51f0e85d].detail-input-num-right-title')
			
 
				+                if storage_locator.count() > 0:
			
 
				+                    storage = storage_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页库存：{storage}{'='*10}")
			
 
				+                else:
			
 
				+                    # 修复：替换未定义的i为collected_count，补充规格数量不足的提示
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 库存元素数量不足，使用默认值：{storage}")
			
 
				+
			
 
				+
			
 
				+                #提取销量
			
 
				+                sell = ''
			
 
				+                sell_locator = target_page.locator('div.detail-info-content-item-value-price-top-right div[data-v-95163d4a]')
			
 
				+                if sell_locator.count() > 0:
			
 
				+                    sell = sell_locator.inner_text(timeout=3000).strip()
			
 
				+                    logger.info(f"{'='*10}「{keyword}」第{collected_count}个商品 - 详情页销量：{sell}{'='*10}")
			
 
				+                else:
			
 
				+                    logger.warning(f" 「{keyword}」第{collected_count}个商品「{title}」- 没有销量元素，使用默认值：{sell}")
			
 
				+
			
 
				+
			
 
				+                #保存快照url上传到oss
			
 
				+                try:
			
 
				+                    local_path, oss_url = screenshot_target_page_to_local_then_oss(
			
 
				+                        target_page=target_page,
			
 
				+                        full_page=True  # 截取全屏
			
 
				+                    )
			
 
				+                    print(f"最终结果：")
			
 
				+                    print(f"  本地文件路径：{local_path}")
			
 
				+                    logger.info(f"  OSS访问链接：{oss_url}")
			
 
				+                except Exception as e:
			
 
				+                    logger.warning(f"整体流程执行失败：{str(e)}")
			
 
				+                # input("...")
			
 
				+                province = ""
			
 
				+                city = ""
			
 
				+                business_license_company = ""
			
 
				+                qualification_number = ''
			
 
				+
			
 
				+                #如果店名为商品预约中心
			
 
				+                # if shop == '药店品种预约中心':
			
 
				+                #     #https://www.ybm100.com/new-front/product-info/detail?type__1241=222029ad07-tWcfAcrWtc_CSPpP_%2FtW_cfB_ETca0SugQSbgC7gAb5RAdZyTA5UdS%3DUAoogIsKBqyWgKP_tgAPItgePrBgRPrlgQP_ug0PTZgEPrugpPA5lq%3DSQPg%3Dgt2_xg%3D2FPgs0oBgYqwcg9%3DWPTuSgTHgtBsfgGEh%3D%2FXvko2R%3DGvhceloleBnCGBqcG%2F2V_uKVUBftg
			
 
				+                #     #获取pidhttps://www.ybm100.com/new-front/product-info/detail?type__1241=222029ad07-G%2FxP7PxPJgfPUgu%2FIbv7Wg6gpIgwJg5q4PfAg%2FTWZ_Q6gtHaHG%2FgWCPKsClvGsLPVsgQyuBlVVPTqgtvgQgWvG6gOPTkg5%2F_jgAvTog6vT4g5v_6gSU7vC9cggZgvPAtgZJBPgysGg_OuH%2Fg9ToPgjkBgO%2FgaCQggY7KNlo7itg%2FBGP2GrJpPV6%2FQ6f_u6qvMjPvQVIgPg
			
 
				+                #     url = 'https://www.ybm100.com/new-front/product-info/detail?type__1241=222029ad07-G%2FxP7PxPJgfPUgu%2FIbv7Wg6gpIgwJg5q4PfAg%2FTWZ_Q6gtHaHG%2FgWCPKsClvGsLPVsgQyuBlVVPTqgtvgQgWvG6gOPTkg5%2F_jgAvTog6vT4g5v_6gSU7vC9cggZgvPAtgZJBPgysGg_OuH%2Fg9ToPgjkBgO%2FgaCQggY7KNlo7itg%2FBGP2GrJpPV6%2FQ6f_u6qvMjPvQVIgPg'
			
 
				+                #     data = {
			
 
				+                #         'id': f'{product_id}',
			
 
				+                #         'isMainProductVirtualSupplier': 0
			
 
				+                #     }
			
 
				+                #     headers = {
			
 
				+                #         "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/144.0.0.0 Safari/537.36 Edg/144.0.0.0",
			
 
				+                #         'Cookie': '_abfpc=48083f46aa22e0eaefbace39874e38acc7c631ea_2.0; cna=2b5bf2a0d04d0ec45367fda825d4fa6b; xyy=MjM2JjE4MDA4NjUwMzAw; JSESSIONID=922A896126C5961D09622E042CAAA01D; xyy_token=eyJhbGciOiJIUzUxMiJ9.eyJhY2NvdW50X2lkIjoyMzYsImRldmljZV9pZCI6IiIsIm9zIjoiV2luZG93cyAxMCIsImxvZ2luX3RpbWUiOjE3NjkxNjAzNDQ5MDYsImJyb3dzZXIiOiJDaHJvbWUgMTQiLCJtZXJjaGFudF9pZCI6MjM2LCJpcF9hZGRyIjoiMTEzLjk4LjYyLjE2NiIsInZlcnNpb24iOiIiLCJsb2dpbl91c2VyX2tleSI6IjM3NzQ2ZjM5LTE3MjQtNDBjYi1hNTk4LWRlYTM5MTU2NjllNSJ9.IN8gFX6p4KuClT2KysZLNVuyQuszfdNW5gz7m_u4yq60zqbvSOg1yo0f7TuKcbZVvd-t5mVsb4hoNBRNV6nsYQ; xyy_principal=236&Y2MwY2FiZGYzZjU4NzUzNGE5OWRkZTIwYmRiMmQ4NTk2ZDg5N2QxOQ&236; xyy_last_login_time=1769160344906; acw_tc=1a0c650c17694095621061999e5d6b6730068c59854298f31bdd661882a009; qt_session=KsnsuMqE_1769409754197; ssxmod_itna=1-eq0xgDnDyAeYqDKi=G0KKG7DRDIEpDpxgGDBP01G7DuExjKidtDUDQulGmFgG4G=oG7iheet3RLKNDlpLeDZDGKQDqx0Eb0iiD4Ns3ImkiT53QQGvqUdaeOENowZaTRbY9oVG6MxfXy/UDgEeDU4GnD068CY6bDYYLDBYD74G_DDeDi2rD84D_DGpdMnudxi33nDeDzqr=xG3txYpdweDgADDB_RiDKkP=hDDlGA7YREbPAcTq6PmzxGU8lCGxUeDMFxGXmikYUQy6MK4rZCSfp1EYH1aDtqD9DgbDb42zvrTbp6ebF_mbS_83r1Ki=3iifhNQ2rt0iC0_Yiofx4lxxfxx3Be5WHiTHDDW=fd1xxq05p71UdznuzuAernD=xIxRtbj=/74anQqf5Dxx4hYb0DnOGK0D3j=bGrxnD4D; ssxmod_itna2=1-eq0xgDnDyAeYqDKi=G0KKG7DRDIEpDpxgGDBP01G7DuExjKidtDUDQulGmFgG4G=oG7iheet3RLFoDiaRAqzbCD7pxTs4GNeYfb78=o8pWc0HY8dN0vO6z5i69OeF5Dg34naHHkD98UZ3tVAb=9/L3BSLIczMds0bxfCAIfG0eY3oTQym5z/oAhmi4qDLetNaD',
			
 
				+                #         'Referer': f'https://www.ybm100.com/new/base/skuDetail?id={product_id}&combination=1&type=1',
			
 
				+                #         "Content-Type" : "application/json"
			
 
				+                #     }
			
 
				+
			
 
				+                #     response = requests.post(url, json=data, headers=headers)
			
 
				+                #     print(response.status_code)
			
 
				+                #     try:
			
 
				+                #         response_json = response.json()
			
 
				+                #         print("✅ 成功解析JSON响应")
			
 
				+
			
 
				+                #         if 'data' in response_json and 'detail' in response_json['data'] and 'pid' in response_json['data']['detail']:
			
 
				+                #             pid = response_json['data']['detail']['pid']
			
 
				+                #             print(f"✅ 提取到pid：{pid}")
			
 
				+                #         elif 'pid' in response_json:
			
 
				+                #             pid = response_json['pid']
			
 
				+                #             print(f"✅ 方式二提取到pid：{pid}")
			
 
				+                #         else:
			
 
				+                #             # 打印响应的前1000个字符，帮助你确认JSON结构
			
 
				+                #             print("⚠️ 未找到pid字段，响应数据预览：")
			
 
				+                #             print(json.dumps(response_json, ensure_ascii=False, indent=2)[:1000])
			
 
				+                #             pid = None
			
 
				+                #     except json.JSONDecodeError:
			
 
				+                #         # 响应不是JSON格式的情况
			
 
				+                #         print("❌ 响应不是JSON格式，无法解析")
			
 
				+                #         print("响应文本：", response.text[:1000])
			
 
				+                #         pid = None
			
 
				+                #     except Exception as e:
			
 
				+                #         # 其他异常
			
 
				+                #         print(f"❌ 提取pid时出错：{str(e)}")
			
 
				+                #         pid = None
			
 
				+                #     target_page.goto(f'https://www.ybm100.com/new/base/skuDetail?id={product_id}&combination=1&type=1')
			
 
				+                #     shop_name_elem = target_page.locator('span[data-v-5485589c]')
			
 
				+                #     shop_name = shop_name_elem.inner_text(timeout=3000).strip()
			
 
				+                #     shop_exists, shop_info = shop_is_exists_database(shop_name)
			
 
				+                #     if not shop_exists:
			
 
				+                #         if shop_info:
			
 
				+                #             province = shop_info['province']
			
 
				+                #             city = shop_info['city']
			
 
				+                #             business_license_company = shop_info['business_license_company']
			
 
				+                #             qualification_number = shop_info['qualification_number']
			
 
				+                # #去往药店品种预约中心后面的链接
			
 
				+                # target_page.goto(f"https://www.ybm100.com/new/base/skuDetail?id={pid}&combination=1&type=1")
			
 
				+                # if not shop_exists:
			
 
				+
			
 
				+                shop_exists, shop_info = shop_is_exists_database(shop)
			
 
				+                shop_page = None
			
 
				+
			
 
				+
			
 
				+                #店铺名不是药品预约中心且店铺名不在数据库就要点击
			
 
				+                if shop != "药店品种预约中心" and not shop_exists:
			
 
				+                    logger.info("店铺名不是药店品种预约中心且数据库没有该公司的营业执照")
			
 
				+                    # 获取营业执照图片
			
 
				+                    # 进入店铺
			
 
				+                    random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                    entershop_btn = target_page.locator('div[data-v-5485589c].shop-info-container-left-info')
			
 
				+                    # 增强：先等待进入店铺按钮可见
			
 
				+                    entershop_btn.wait_for(state="visible", timeout=10000)
			
 
				+                    entershop_btn.scroll_into_view_if_needed()  # 确保按钮在视口内
			
 
				+                    entershop_btn.hover()  # 先悬停
			
 
				+                    random_delay(0.2, 0.5)  # 悬停后延迟
			
 
				+                    with target_page.expect_popup(timeout=15000) as pop:
			
 
				+                        entershop_btn.click()
			
 
				+                        random_delay(0.05, 0.15)  # 鼠标按下后延迟
			
 
				+                    shop_page = pop.value
			
 
				+                    shop_page.wait_for_load_state("domcontentloaded")  # 比 networkidle 更
			
 
				+
			
 
				+
			
 
				+                    #点击店铺资质
			
 
				+                    random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                    shop_license_page = shop_page.locator('//div[contains(@class, "shop-info-container-right-btns-item") and contains(span, "资质/售后")]')
			
 
				+                    shop_license_page.wait_for(state="attached", timeout=15000)  # 等待元素加载完成
			
 
				+                    shop_license_page.scroll_into_view_if_needed()  # 确保在视口内
			
 
				+                    shop_license_page.hover()   # 先悬停
			
 
				+                    random_delay(0.2, 0.5)  # 悬停后延迟
			
 
				+                    # shop_license_page.dispatch_event("mousedown")
			
 
				+                    shop_license_page.click()
			
 
				+                    random_delay(0.05, 0.15)  # 鼠标按下后延迟
			
 
				+                    # shop_license_page.dispatch_event("mouseup")
			
 
				+                    random_delay(0.05, 0.1)   # 鼠标松开后延迟
			
 
				+                    shop_page.wait_for_load_state("networkidle")
			
 
				+                    # slow_scroll_400px(shop_page, scroll_distance1=700)
			
 
				+
			
 
				+
			
 
				+                    #获取药品经营许可证图片
			
 
				+                    shop_page.wait_for_load_state("load")
			
 
				+                    ocr_res = None
			
 
				+                    # shop_license_div = target_page.locator('//span[contains(text(), "营业执照")]')
			
 
				+                    shop_license_img = shop_page.locator('//span[contains(text(), "企业营业执照")]/ancestor::div[@class="shop-info-drawer-zz-tab1-list-item"]/img').first
			
 
				+                    shop_license_img.wait_for(state="visible", timeout=60000)
			
 
				+
			
 
				+                    try:
			
 
				+                        if shop_license_img.count() > 0:
			
 
				+                            shop_license_src = shop_license_img.get_attribute('src')
			
 
				+                            shop_license_src = shop_license_src.strip() if shop_license_src else None
			
 
				+                            ocr_res = get_ocr_res(shop_license_src)
			
 
				+                            # print(f'ocr_res:{ocr_res}')
			
 
				+                            # input(".....")
			
 
				+                        else:
			
 
				+                            shop_license_src = None
			
 
				+                    except Exception as e:
			
 
				+                            # 捕获定位/提取失败的异常，避免程序崩溃
			
 
				+                            logger.warning(f"提取营业执照图片src失败：{e}")
			
 
				+                            shop_license_src = None
			
 
				+                    print("营业执照图片链接：", shop_license_src)
			
 
				+                    # input("..")
			
 
				+
			
 
				+                    contact_address = ''
			
 
				+                    qualification_number = ocr_res.get('社会信用代码', '') if ocr_res else ''
			
 
				+                    business_license_company = ocr_res.get('单位名称', '') if ocr_res else ''
			
 
				+                    business_license_address = ocr_res.get('地址', '') if ocr_res else ''
			
 
				+                    # scrape_date = ''
			
 
				+
			
 
				+                    # 调用提取函数，获取省份和城市
			
 
				+                    province, city = extract_province_city(business_license_address)
			
 
				+                    logger.info(f"原始地址：{business_license_address}")
			
 
				+                    logger.info(f"提取的省份：{province} | 城市：{city}")
			
 
				+                    insert_result = insert_shop_info_to_db(
			
 
				+                        shop=shop,
			
 
				+                        contact_address=contact_address,
			
 
				+                        qualification_number=qualification_number,
			
 
				+                        business_license_company=business_license_company,
			
 
				+                        business_license_address=business_license_address,
			
 
				+                        scrape_date=current_time,
			
 
				+                        platform=platform,
			
 
				+                        province=province,
			
 
				+                        city=city,
			
 
				+                        create_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S") ,
			
 
				+                        update_time=datetime.now().strftime("%Y-%m-%d %H:%M:%S")
			
 
				+                                                            )
			
 
				+                else:
			
 
				+                    logger.info("数据库有该店名，在数据库拿取对应字段填充ybm_drug_middle表")
			
 
				+                    if shop_info:
			
 
				+                        province = shop_info['province']
			
 
				+                        city = shop_info['city']
			
 
				+                        business_license_company = shop_info['business_license_company']
			
 
				+                        qualification_number = shop_info['qualification_number']
			
 
				+
			
 
				+
			
 
				+                try:
			
 
				+                    if shop_page and not shop_page.is_closed():
			
 
				+                        random_delay(4,8)
			
 
				+                        shop_page.close()
			
 
				+                        logger.info(f"📌 「{keyword}」第{collected_count}个商品 - 已关闭店铺页标签 shop_page")
			
 
				+
			
 
				+                except Exception as e:
			
 
				+                    logger.warning(f"⚠️ 关闭 shop_page 失败：{e}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # # purchase_price = float(price.replace("￥", "").replace(",", "")) if price.replace("￥", "").replace(",", "").replace(".", "").isdigit() else 0.00
			
 
				+
			
 
				+                random_delay(5,8)
			
 
				+
			
 
				+                # ========== 关闭新标签页，切回列表页 ==========
			
 
				+                if detail_page and not detail_page.is_closed():
			
 
				+                    detail_page.close()  # 关闭详情页标签
			
 
				+                    logger.info(f"📌 「{keyword}」第{collected_count}个商品 - 已关闭详情页标签页")
			
 
				+                # 切回原列表页（第一个标签页）
			
 
				+                page.bring_to_front()  # 激活列表页
			
 
				+                page.mouse.move(random.randint(100, 300), random.randint(200, 400))  # 随机移动鼠标
			
 
				+                random_delay(0.5, 1.0)  # 增加切换后延迟
			
 
				+                page.wait_for_load_state("networkidle")
			
 
				+                random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                logger.info(f" 「{keyword}」第{collected_count}个商品「{title}」- 已切回列表页")
			
 
				+                random_delay(2,4)
			
 
				+                # credit_code = ""
			
 
				+                availability = ""
			
 
				+                # input(".....")
			
 
				+
			
 
				+                # 组装单条数据（仅新增生产日期/批准文号字段，原有字段顺序/逻辑不变）
			
 
				+                # 构造单条数据元组（适配MySQL字段）
			
 
				+                single_data = {
			
 
				+                    # 核心商品信息
			
 
				+                        "product": title,  # 商品名称
			
 
				+                        "my_good_price": merged_price,  # 自定义价格（可与min_price相同或单独提取）
			
 
				+                        "min_price": discount_price_val,  # 最低价格
			
 
				+                        "manufacture_date": manufacture_date,  # 生产日期
			
 
				+                        "expiry_date": expiry_date,  # 有效期
			
 
				+                        "shop": shop,  # 店铺名
			
 
				+                        "business_license_company": business_license_company,  # 营业执照主体（公司名称）
			
 
				+                        "province": province,  # 省份
			
 
				+                        "city": city,  # 城市
			
 
				+                        "manufacturer": manufacturer,  # 生产厂家
			
 
				+                        "specification": spec,  # 规格
			
 
				+                        "approval_number": approval_number,  # 批准文号
			
 
				+                        "product_link": product_link,  # 商品链接
			
 
				+                        "scrape_date": current_time,  # 采集日期
			
 
				+                        "scrape_province": "",  # 采集省份（可留空或根据IP获取）
			
 
				+                        "availability": availability,  # 库存状态
			
 
				+                        "credit_code": qualification_number,  # 统一信用代码（如有可补充提取）
			
 
				+                        "platform": platform,  # 平台名称（固定或动态获取）
			
 
				+                        "search_key": keyword,  # 搜索关键词
			
 
				+                        "number": num,  # 数量（盒数）
			
 
				+                        "is_sold_out": is_sold_out,  # 售罄标记（0/1）
			
 
				+                        "sales": sell,      #销量
			
 
				+                        "inventory": storage,       #库存
			
 
				+                        "snapshot_url": oss_url,         #快照链接
			
 
				+                        "update_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S"),  # 更新时间
			
 
				+                        "create_time": datetime.now().strftime("%Y-%m-%d %H:%M:%S")   # 创建时间
			
 
				+            }
			
 
				+                # 调用逐条插入函数
			
 
				+                insert_single_to_mysql(single_data)
			
 
				+                collect_result.append(single_data)
			
 
				+                logger.info(f" 「{keyword}」第{collected_count}个商品「{title}」采集完成")
			
 
				+                # input("....")
			
 
				+            except Exception as e:
			
 
				+                # 异常处理：关闭详情页，强制切回列表页
			
 
				+                logger.exception(f" 「{keyword}」第{collected_count}个商品采集核心异常：{str(e)}")
			
 
				+                try:
			
 
				+                    if detail_page and not detail_page.is_closed():
			
 
				+                        detail_page.close()
			
 
				+                        logger.info(f"📌 「{keyword}」第{collected_count}个商品 - 异常时关闭详情页标签页")
			
 
				+                    if page and not page.is_closed():
			
 
				+                        page.bring_to_front()  # 切回列表页
			
 
				+                    page.wait_for_load_state("networkidle")
			
 
				+                    random_delay(MIN_CLICK_DELAY, MAX_CLICK_DELAY)
			
 
				+                except Exception as e2:
			
 
				+                    logger.error(f" 「{keyword}」第{collected_count}个商品详情采集异常（处理时）：{str(e2)}，原异常：{str(e)}")
			
 
				+                    continue
			
 
				+
			
 
				+
			
 
				+            # ✅ 每15次滚动一次（修复：用collected_count，且排除0的情况）
			
 
				+            if collected_count % 5 == 0 and collected_count > 0 and collected_count != total_limit:
			
 
				+                logger.info("采满5个往下滑")
			
 
				+                slow_scroll_400px(page,)
			
 
				+                page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+
			
 
				+
			
 
				+        # ====== 当前页采集完毕，尝试翻页 ======
			
 
				+        delay = random_delay(1.5, 3.0)
			
 
				+        logger.info(f"⏳ 翻页前随机等待 {delay:.2f}s（反爬）")
			
 
				+
			
 
				+        if goto_next_page(page):
			
 
				+            page_no += 1
			
 
				+            continue
			
 
				+        else:
			
 
				+            logger.info(f" 「{keyword}」已无下一页，关键词采集结束")
			
 
				+            break
			
 
				+    # 关键词采集完成后长延迟
			
 
				+    long_delay = random_delay(MIN_KEYWORD_DELAY, MAX_KEYWORD_DELAY)
			
 
				+    logger.info(f" 「{keyword}」采集完成，共{len(collect_result)}条数据，等待{long_delay:.2f}秒后继续下一个关键词（反爬）")
			
 
				+    return collect_result
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 保存到CSV函数（适配新表头） ====================
			
 
				+# def save_to_csv(data_list):
			
 
				+#     """
			
 
				+#     保存数据到CSV（适配新表头）
			
 
				+#     :param data_list: list - 采集到的字典数据列表
			
 
				+#     :return: bool - 保存是否成功
			
 
				+#     """
			
 
				+#     if not data_list:
			
 
				+#         logger.warning(" 无数据可保存到CSV")
			
 
				+#         return False
			
 
				+
			
 
				+#     try:
			
 
				+#         # 判断文件是否存在，不存在则写入表头
			
 
				+#         file_exists = os.path.exists(CSV_FILE_PATH)
			
 
				+
			
 
				+#         # 打开CSV文件（追加模式，utf-8-sig避免Excel乱码）
			
 
				+#         with open(CSV_FILE_PATH, "a", newline="", encoding="utf-8-sig") as f:
			
 
				+#             # 用新表头作为字段名
			
 
				+#             writer = csv.DictWriter(f, fieldnames=CSV_HEADERS)
			
 
				+
			
 
				+#             # 首次写入表头
			
 
				+#             if not file_exists:
			
 
				+#                 writer.writeheader()
			
 
				+#                 logger.info(f" 已创建CSV文件并写入新表头：{CSV_FILE_PATH}")
			
 
				+
			
 
				+#             # 写入数据行
			
 
				+#             writer.writerows(data_list)
			
 
				+#             logger.info(f" 成功将 {len(data_list)} 条数据写入CSV")
			
 
				+#         return True
			
 
				+
			
 
				+#     except Exception as e:
			
 
				+#         logger.error(f" 保存CSV失败：{str(e)}")
			
 
				+#         return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+# ==================== 主函数（登录+批量搜索） ====================
			
 
				+def main():
			
 
				+    logger.info("\n" + "="*50)
			
 
				+    logger.info("🚀 药帮忙采集程序启动")
			
 
				+    logger.info(f"⏰ 启动时间：{datetime.now().strftime('%Y-%m-%d %H:%M:%S')}")
			
 
				+    logger.info("="*50)
			
 
				+
			
 
				+    # 待搜索的关键词列表（直接写在这里，改起来更直观）
			
 
				+    # 存储所有关键词的采集数据
			
 
				+    # all_collect_data = []
			
 
				+
			
 
				+    with sync_playwright() as p:
			
 
				+        # browser = init_browser_with_proxy(p)
			
 
				+        # 启动浏览器（用单个配置变量）
			
 
				+        browser = p.chromium.launch(
			
 
				+            headless=False,  # 不要用无头模式（反爬：无头模式易被识别）
			
 
				+            channel="chrome",  # 使用真实Chrome内核
			
 
				+            slow_mo=random.randint(100, 300),  # 全局操作延迟（模拟真人慢速操作）
			
 
				+            args=[
			
 
				+                "--disable-blink-features=AutomationControlled",  # 禁用webdriver特征（核心！）
			
 
				+                "--enable-automation=false",  # 新增：禁用自动化标识
			
 
				+                "--disable-infobars",  # 新增：禁用信息栏
			
 
				+                "--remote-debugging-port=0",  # 新增：随机调试端口
			
 
				+                "--start-maximized",  # 最大化窗口（模拟真人使用）
			
 
				+                "--disable-extensions",  # 禁用扩展（避免特征）
			
 
				+                "--disable-plugins-discovery",  # 禁用插件发现
			
 
				+                "--no-sandbox",  # 避免沙箱模式特征
			
 
				+                "--disable-dev-shm-usage",  # 避免内存限制导致的异常
			
 
				+                f"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(110, 120)}.0.0.0 Safari/537.36"  # 随机Chrome版本的UA
			
 
				+            ]
			
 
				+        )
			
 
				+        # 创建页面时伪装指纹
			
 
				+        context = browser.new_context(
			
 
				+            locale="zh-CN",  # 中文环境
			
 
				+            timezone_id="Asia/Shanghai",  # 上海时区
			
 
				+            geolocation={"latitude": 31.230416, "longitude": 121.473701},  # 模拟上海地理位置（可选）
			
 
				+            permissions=["geolocation"],  # 授予定位权限（模拟真人）
			
 
				+            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
			
 
				+            viewport={"width": 1800, "height": 1000},
			
 
				+            # 关键：隐藏自动化特征
			
 
				+            java_script_enabled=True,
			
 
				+            bypass_csp=True,
			
 
				+            # user_data_dir="./temp_user_data"  # 模拟真实用户数据目录
			
 
				+        )
			
 
				+
			
 
				+        page = context.new_page()
			
 
				+
			
 
				+
			
 
				+        # 关键：移除navigator.webdriver标识（反爬核心）
			
 
				+        page.add_init_script("""
			
 
				+            Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
			
 
				+            Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });  // 新增：模拟插件
			
 
				+            Object.defineProperty(navigator, 'mimeTypes', { get: () => [1, 2, 3] });  // 新增：模拟MIME类型
			
 
				+            window.chrome = { runtime: {}, loadTimes: () => ({}) };  // 增强Chrome模拟
			
 
				+            delete window.navigator.languages;
			
 
				+            window.navigator.languages = ['zh-CN', 'zh'];
			
 
				+            // 新增：模拟真实鼠标移动特征
			
 
				+            (() => {
			
 
				+                const originalAddEventListener = EventTarget.prototype.addEventListener;
			
 
				+                EventTarget.prototype.addEventListener = function(type, listener) {
			
 
				+                    if (type === 'mousemove') {
			
 
				+                        return originalAddEventListener.call(this, type, (e) => {
			
 
				+                            e._automation = undefined;
			
 
				+                            listener(e);
			
 
				+                        });
			
 
				+                    }
			
 
				+                    return originalAddEventListener.call(this, type, listener);
			
 
				+                };
			
 
				+            })();
			
 
				+""")
			
 
				+
			
 
				+
			
 
				+        try:
			
 
				+            # ========== 核心：Cookie复用逻辑 ==========
			
 
				+            # 1. 加载本地Cookie
			
 
				+            load_cookies(context)
			
 
				+
			
 
				+            # 2. 验证登录状态
			
 
				+            if not is_login(page):
			
 
				+                # 3. Cookie失效/不存在，执行登录
			
 
				+                page.goto(TARGET_LOGIN_URL)
			
 
				+                page.wait_for_load_state("networkidle")
			
 
				+                logger.info("🔑 开始执行登录流程")
			
 
				+
			
 
				+                # 执行登录操作
			
 
				+                login_success = login_operation(page, USERNAME, PASSWORD)
			
 
				+                if not login_success:
			
 
				+                    logger.error(" 登录失败，程序终止")
			
 
				+                    return
			
 
				+
			
 
				+                # 4. 登录成功后保存Cookie
			
 
				+                save_cookies(context)
			
 
				+                logger.info(" 登录并保存Cookie成功！")
			
 
				+
			
 
				+
			
 
				+
			
 
				+            # 2. 批量搜索+采集+保存
			
 
				+            for keyword_idx, keyword in enumerate(SEARCH_KEYWORDS, 1):
			
 
				+                logger.info(f"\n=====================================")
			
 
				+                logger.info(f"🔍 开始处理第{keyword_idx}/{len(SEARCH_KEYWORDS)}个关键词：{keyword}")
			
 
				+                logger.info(f"=====================================")
			
 
				+
			
 
				+                # 执行搜索
			
 
				+                popup_guard(page, "before_search")
			
 
				+                detail_page,search_success = search_operation(page, keyword)
			
 
				+                # input("")
			
 
				+                popup_guard(detail_page, "after_search")
			
 
				+
			
 
				+                if detail_page is None:
			
 
				+                    break
			
 
				+
			
 
				+                if not search_success:
			
 
				+                    logger.warning(f" 「{keyword}」搜索失败，跳过采集")
			
 
				+                    continue
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # ✅ 再等页面稳定一下（networkidle 有时会等不到，建议加超时或换成 domcontentloaded）
			
 
				+                detail_page.wait_for_load_state("domcontentloaded")
			
 
				+
			
 
				+
			
 
				+                detail_page.wait_for_load_state('networkidle')
			
 
				+
			
 
				+                # 采集数据
			
 
				+                data_list = collect_data(detail_page, keyword)
			
 
				+
			
 
				+                # # 保存到CSV
			
 
				+                # if data_list:
			
 
				+                #     save_to_csv(data_list)
			
 
				+                # else:
			
 
				+                #     logger.warning(f" 「{keyword}」无数据，跳过保存")
			
 
				+
			
 
				+            logger.info("\n🎉 所有关键词处理完成！CSV文件路径：" + os.path.abspath(CSV_FILE_PATH))
			
 
				+            # input("\n按回车关闭程序...")
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            logger.error(f" 程序异常：{str(e)}")
			
 
				+        finally:
			
 
				+            browser.close()
			
 
				+            logger.info(" 浏览器已关闭，程序结束")
			
 
				+
			
 
				+# ==================== 程序入口 ====================
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/nextpage.py
+++ b/nextpage.py
@@ -0,0 +1,766 @@
 
				+from playwright.sync_api import sync_playwright, TimeoutError as PlaywrightTimeoutError
			
 
				+import os
			
 
				+import json
			
 
				+import random
			
 
				+from logger_config import logger
			
 
				+from config import *
			
 
				+import re
			
 
				+COOKIE_FILE_PATH = "ybm_cookies.json"  # Cookie保存路径
			
 
				+LOGIN_VALIDATE_URL = "https://www.ybm100.com/new/"
			
 
				+TARGET_LOGIN_URL = "https://www.ybm100.com/new/login"
			
 
				+
			
 
				+
			
 
				+# ========== 新增：滚动相关配置（可根据需要调整） ==========
			
 
				+SCROLL_STEP = 200  # 每次滚动的步长（像素），越小越慢越逼真
			
 
				+SCROLL_DELAY = 0.15  # 每步滚动的延迟（秒），越大越慢
			
 
				+MAX_SCROLL_ATTEMPTS = 50  # 最大滚动次数（防止无限循环）
			
 
				+
			
 
				+
			
 
				+
			
 
				+def load_cookies(context, cookie_path=COOKIE_FILE_PATH):
			
 
				+    """从本地JSON文件加载Cookie到浏览器上下文"""
			
 
				+    if not os.path.exists(cookie_path):
			
 
				+        # logger.warning(f" Cookie文件不存在：{cookie_path}")
			
 
				+        return False
			
 
				+    try:
			
 
				+        with open(cookie_path, "r", encoding="utf-8") as f:
			
 
				+            cookies = json.load(f)
			
 
				+        context.add_cookies(cookies)
			
 
				+        # logger.info(f"✅ 已从{cookie_path}加载Cookie")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        # logger.error(f" 加载Cookie失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def is_login(page):
			
 
				+    """验证是否已登录（核心：检测登录态）"""
			
 
				+    try:
			
 
				+        # 访问需要登录的页面
			
 
				+        page.goto(LOGIN_VALIDATE_URL, timeout=5000)
			
 
				+        page.wait_for_load_state("networkidle")
			
 
				+
			
 
				+        # 检测是否跳转到登录页（URL包含login则未登录）
			
 
				+        if "login" in page.url.lower():
			
 
				+            # logger.warning(" Cookie失效，需要重新登录")
			
 
				+            return False
			
 
				+
			
 
				+        # 可选：检测登录后的专属元素（比如用户名、个人中心等）
			
 
				+        # if page.locator("用户中心选择器").count() > 0:
			
 
				+        #     return True
			
 
				+        # logger.info(" Cookie有效，已保持登录状态")
			
 
				+        return True
			
 
				+    except Exception as e:
			
 
				+        # logger.error(f" 验证登录状态失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+def popup_guard(page, tag=""):
			
 
				+    """
			
 
				+    全局弹窗/遮罩守卫：多步引导 + 关闭按钮 + 遮罩清理 + 恢复滚动
			
 
				+    tag 仅用于日志区分调用位置
			
 
				+    """
			
 
				+    try:
			
 
				+        # 给弹窗一点出现时间
			
 
				+        page.wait_for_timeout(300)
			
 
				+
			
 
				+        # 1) 连续点“下一步/完成/我知道了/关闭”
			
 
				+        for _ in range(6):
			
 
				+            btn = page.locator(
			
 
				+                "xpath=//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            # 2) 常见的 close icon
			
 
				+            close_btn = page.locator(
			
 
				+                "css=.el-dialog__headerbtn, .el-message-box__headerbtn, .close, .icon-close, .el-icon-close"
			
 
				+            ).first
			
 
				+            if close_btn.count() > 0 and close_btn.is_visible():
			
 
				+                close_btn.click(timeout=1200)
			
 
				+                page.wait_for_timeout(250)
			
 
				+                continue
			
 
				+
			
 
				+            break
			
 
				+
			
 
				+        # 3) 清遮罩 + 恢复滚动/交互
			
 
				+        page.evaluate(r"""
			
 
				+        () => {
			
 
				+          // 第一步：精准清理已知的遮罩/弹窗类名（Element UI框架常用）
			
 
				+          const selectors = [
			
 
				+            '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+            '.el-message-box__wrapper', '.el-loading-mask'
			
 
				+          ];
			
 
				+          selectors.forEach(sel => document.querySelectorAll(sel).forEach(e => e.remove()));
			
 
				+
			
 
				+          // 泛化兜底：近似全屏 + 高 z-index 的层直接屏蔽
			
 
				+          const all = Array.from(document.querySelectorAll('body *'));
			
 
				+          for (const el of all) {
			
 
				+            const s = getComputedStyle(el); // 获取元素的实际样式（含CSS生效的样式）
			
 
				+            const z = parseInt(s.zIndex || '0', 10);    // 取元素的层级（z-index），默认0
			
 
				+            // 条件1：元素是固定/绝对定位（弹窗/遮罩常见定位方式）+ 层级≥1000（高优先级遮挡）+ 能拦截鼠标事件
			
 
				+            if ((s.position === 'fixed' || s.position === 'absolute') && z >= 1000 && s.pointerEvents !== 'none') {
			
 
				+              const r = el.getBoundingClientRect();     // 获取元素的尺寸和位置
			
 
				+                // 条件2：元素宽度/高度≥屏幕80%（近似全屏遮罩）
			
 
				+              const nearFull = r.width >= innerWidth * 0.8 && r.height >= innerHeight * 0.8;
			
 
				+              if (nearFull) {
			
 
				+                el.style.pointerEvents = 'none';    // 让元素不拦截鼠标点击
			
 
				+                el.style.display = 'none';          // 隐藏元素
			
 
				+              }
			
 
				+            }
			
 
				+          }
			
 
				+        // 第三步：恢复页面滚动功能（弹窗常把页面设为不可滚动）
			
 
				+          document.documentElement.style.overflow = 'auto';     // html标签恢复滚动
			
 
				+          document.body.style.overflow = 'auto';    // body标签恢复滚动
			
 
				+          document.body.classList.remove('el-popup-parent--hidden');  // 移除Element UI的滚动禁用类
			
 
				+        }
			
 
				+        """)
			
 
				+
			
 
				+        # logger.info("杀除弹窗成功")
			
 
				+    except Exception:
			
 
				+        pass
			
 
				+
			
 
				+SEARCH_INPUT_SELECTOR = "input[placeholder*='药品名称/厂家名称']"
			
 
				+
			
 
				+def pick_search_input(page):
			
 
				+    """优先选可见且可用的搜索输入框；第一个不行就尝试第二个"""
			
 
				+    inputs = page.locator(SEARCH_INPUT_SELECTOR)
			
 
				+    cnt = inputs.count()
			
 
				+
			
 
				+    # 优先检查前两个（你说只有两个）
			
 
				+    for i in range(min(cnt, 2)):
			
 
				+        candidate = inputs.nth(i)
			
 
				+        try:
			
 
				+            candidate.wait_for(state="visible", timeout=1500)  # 小超时快速试探
			
 
				+            if candidate.is_enabled():
			
 
				+                return candidate
			
 
				+        except PlaywrightTimeoutError:
			
 
				+            continue
			
 
				+
			
 
				+    # 兜底：直接找任意可见的（避免命中 hidden 模板）
			
 
				+    candidate = page.locator(f"{SEARCH_INPUT_SELECTOR}:visible").first
			
 
				+    candidate.wait_for(state="visible", timeout=5000)
			
 
				+    return candidate
			
 
				+
			
 
				+
			
 
				+def type_slow(locator, text: str, min_delay=0.06, max_delay=0.18):
			
 
				+    """逐字输入，模拟真人打字"""
			
 
				+    for ch in text:
			
 
				+        locator.type(ch, delay=int(random.uniform(min_delay, max_delay) * 1000))
			
 
				+
			
 
				+SEARCH_BTN_SELECTOR = 'div.home-search-container-search-head-btn[data-scmd="text-搜索"]'
			
 
				+
			
 
				+
			
 
				+
			
 
				+def force_close_popup(page):
			
 
				+    """增强版：优先清理遮罩，再处理引导按钮"""
			
 
				+    try:
			
 
				+        # 第一步：先强制移除所有遮罩层（前置操作，关键！）
			
 
				+        page.evaluate("""
			
 
				+        () => {
			
 
				+            // 1. 移除所有高优先级遮罩
			
 
				+            const maskSelectors = [
			
 
				+                '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+                '.el-message-box__wrapper', '.el-loading-mask', '[class*="mask"]', '[class*="overlay"]',
			
 
				+                '[style*="position: fixed"][style*="z-index: 9999"]', '[style*="position: absolute"][style*="z-index: 9999"]'
			
 
				+            ];
			
 
				+            maskSelectors.forEach(sel => {
			
 
				+                document.querySelectorAll(sel).forEach(el => {
			
 
				+                    el.remove(); // 直接删除遮罩元素
			
 
				+                });
			
 
				+            });
			
 
				+            // 2. 恢复body和列表容器的交互
			
 
				+            document.body.style.overflow = 'auto';
			
 
				+            document.body.style.pointerEvents = 'auto';
			
 
				+            // 3. 强制解除列表容器的样式限制
			
 
				+            document.querySelectorAll('.product-list-container, .list-container, .el-table__body-wrapper').forEach(el => {
			
 
				+                el.style.overflow = 'auto !important';
			
 
				+                el.style.height = 'auto !important';
			
 
				+                el.style.maxHeight = 'calc(100vh - 200px) !important';
			
 
				+                el.style.pointerEvents = 'auto !important';
			
 
				+            });
			
 
				+        }
			
 
				+        """)
			
 
				+        page.wait_for_timeout(500)
			
 
				+
			
 
				+        # 第二步：处理引导按钮（下一步/完成/关闭）
			
 
				+        for _ in range(5):
			
 
				+            btn = page.locator(
			
 
				+                "//button[normalize-space()='下一步' or normalize-space()='完成' or normalize-space()='我知道了' or normalize-space()='关闭']"
			
 
				+            ).first
			
 
				+            if btn.count() > 0 and btn.is_visible():
			
 
				+                btn.click(timeout=1500)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+
			
 
				+            close_icon = page.locator(
			
 
				+                "xpath=//*[contains(@class,'close') or contains(@class,'el-icon-close') or name()='svg' or name()='i'][1]"
			
 
				+            ).first
			
 
				+            if close_icon.count() > 0 and close_icon.is_visible():
			
 
				+                close_icon.click(timeout=1000)
			
 
				+                page.wait_for_timeout(300)
			
 
				+                continue
			
 
				+            break
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f"⚠️ 强制清理弹窗时异常：{e}")
			
 
				+
			
 
				+def kill_masks(page):
			
 
				+    """增强版：强制解除所有样式限制"""
			
 
				+    page.evaluate(r"""
			
 
				+    () => {
			
 
				+        // 1. 移除所有已知遮罩类
			
 
				+        const knownSelectors = [
			
 
				+            '.v-modal', '.el-overlay', '.el-overlay-dialog', '.el-dialog__wrapper',
			
 
				+            '.el-message-box__wrapper', '.el-loading-mask', '.el-popup-parent--hidden'
			
 
				+        ];
			
 
				+        knownSelectors.forEach(sel => {
			
 
				+            document.querySelectorAll(sel).forEach(el => el.remove());
			
 
				+        });
			
 
				+
			
 
				+        // 2. 泛化清理所有高z-index遮挡层
			
 
				+        const all = Array.from(document.querySelectorAll('body *'));
			
 
				+        for (const el of all) {
			
 
				+            const s = getComputedStyle(el);
			
 
				+            if (!s) continue;
			
 
				+
			
 
				+            const z = parseInt(s.zIndex || '0', 10);
			
 
				+            const pos = s.position;
			
 
				+            const pe = s.pointerEvents;
			
 
				+
			
 
				+            if ((pos === 'fixed' || pos === 'absolute') && z >= 1000) {
			
 
				+                el.remove(); // 直接删除高优先级遮挡元素
			
 
				+            }
			
 
				+        }
			
 
				+
			
 
				+        // 3. 强制恢复页面所有元素的交互和滚动
			
 
				+        document.documentElement.style.overflow = 'auto !important';
			
 
				+        document.body.style.overflow = 'auto !important';
			
 
				+        document.body.style.position = 'static !important';
			
 
				+        document.body.style.width = 'auto !important';
			
 
				+        document.body.style.paddingRight = '0px !important';
			
 
				+        document.body.style.pointerEvents = 'auto !important';
			
 
				+        document.body.classList.remove('el-popup-parent--hidden');
			
 
				+
			
 
				+        // 4. 强制恢复商品列表容器的样式（关键！）
			
 
				+        document.querySelectorAll('.product-list-container').forEach(el => {
			
 
				+            el.style.overflow = 'auto !important';
			
 
				+            el.style.height = 'auto !important';
			
 
				+            el.style.maxHeight = 'calc(100vh - 200px) !important';
			
 
				+            el.style.display = 'block !important';
			
 
				+            el.style.visibility = 'visible !important';
			
 
				+            el.style.pointerEvents = 'auto !important';
			
 
				+        });
			
 
				+
			
 
				+        return { success: true };
			
 
				+    }
			
 
				+    """)
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def slow_scroll_to_bottom(page):
			
 
				+    """
			
 
				+    模拟真人慢速滚动到页面最底部（适配全局/局部滚动容器）
			
 
				+    :param page: 页面对象
			
 
				+    :return: None
			
 
				+    """
			
 
				+    try:
			
 
				+        logger.info("📜 开始慢速滚动到页面底部...")
			
 
				+        # ========== 前置：强制刷新容器样式 ==========
			
 
				+        page.evaluate("""
			
 
				+        () => {
			
 
				+            // 1. 强制重绘商品列表容器
			
 
				+            const container = document.querySelector('.product-list-container');
			
 
				+            if (container) {
			
 
				+                container.style.overflow = 'auto !important';
			
 
				+                container.offsetHeight; // 触发重绘，更新高度
			
 
				+            }
			
 
				+            // 2. 强制刷新页面滚动高度
			
 
				+            document.documentElement.offsetHeight;
			
 
				+        }
			
 
				+        """)
			
 
				+        page.wait_for_timeout(500)
			
 
				+
			
 
				+        # 调试日志：打印滚动容器信息（保留）
			
 
				+        container_debug = page.evaluate("""
			
 
				+        () => {
			
 
				+            const container = document.querySelector('.product-list-container') || document.documentElement;
			
 
				+            return {
			
 
				+                tag: container.tagName,
			
 
				+                className: container.className,
			
 
				+                scrollHeight: container.scrollHeight,
			
 
				+                clientHeight: container.clientHeight,
			
 
				+                scrollTop: container.scrollTop
			
 
				+            };
			
 
				+        }
			
 
				+        """)
			
 
				+        logger.info(f"滚动容器信息：{container_debug}")
			
 
				+
			
 
				+        scroll_attempts = 0
			
 
				+        last_scroll_height = -1  # 初始值改为-1，避免首次误判
			
 
				+
			
 
				+        while scroll_attempts < MAX_SCROLL_ATTEMPTS:
			
 
				+            # ========== 关键：每次循环都重新获取容器信息 ==========
			
 
				+            scroll_container_info = page.evaluate("""
			
 
				+            () => {
			
 
				+                // 优先找商品列表容器
			
 
				+                const container = document.querySelector('.product-list-container') || document.documentElement;
			
 
				+                return {
			
 
				+                    isGlobal: container === document.documentElement,
			
 
				+                    scrollHeight: container.scrollHeight,
			
 
				+                    scrollTop: container.scrollTop,
			
 
				+                    clientHeight: container.clientHeight
			
 
				+                };
			
 
				+            }
			
 
				+            """)
			
 
				+
			
 
				+            current_scroll_height = scroll_container_info["scrollHeight"]
			
 
				+            current_scroll_top = scroll_container_info["scrollTop"]
			
 
				+            client_height = scroll_container_info["clientHeight"]
			
 
				+            is_global = scroll_container_info["isGlobal"]
			
 
				+
			
 
				+            # ========== 优化到底判断逻辑 ==========
			
 
				+            # 条件1：滚动高度无变化（连续2次相同）；条件2：已滚到底（留50px余量）
			
 
				+            is_height_same = current_scroll_height == last_scroll_height
			
 
				+            is_scroll_bottom = (current_scroll_top + client_height) >= (current_scroll_height - 50)
			
 
				+            if is_height_same and scroll_attempts > 2:  # 至少滚动2次再判断高度无变化
			
 
				+                logger.info(f"✅ 滚动高度无变化，判定已到底部")
			
 
				+                break
			
 
				+            if is_scroll_bottom:
			
 
				+                logger.info(f"✅ 已滚动到容器底部")
			
 
				+                break
			
 
				+
			
 
				+            # ========== 执行滚动 ==========
			
 
				+            random_delay = random.uniform(SCROLL_DELAY - 0.05, SCROLL_DELAY + 0.05)
			
 
				+            if is_global:
			
 
				+                # 全局滚动
			
 
				+                page.evaluate(f"window.scrollBy(0, {SCROLL_STEP})")
			
 
				+            else:
			
 
				+                # 局部容器滚动（核心！）
			
 
				+                page.evaluate(f"""
			
 
				+                () => {{
			
 
				+                    const container = document.querySelector('.product-list-container');
			
 
				+                    if (container) {{
			
 
				+                        container.scrollTop += {SCROLL_STEP};
			
 
				+                        // 滚动后触发重绘
			
 
				+                        container.offsetHeight;
			
 
				+                    }}
			
 
				+                }}
			
 
				+                """)
			
 
				+
			
 
				+            page.wait_for_timeout(int(random_delay * 1000))
			
 
				+
			
 
				+            # 更新状态
			
 
				+            last_scroll_height = current_scroll_height
			
 
				+            scroll_attempts += 1
			
 
				+
			
 
				+        # 最后强制滚到底
			
 
				+        page.evaluate("""
			
 
				+        () => {
			
 
				+            const container = document.querySelector('.product-list-container') || document.documentElement;
			
 
				+            container.scrollTop = container.scrollHeight;
			
 
				+        }
			
 
				+        """)
			
 
				+        page.wait_for_timeout(500)
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f"⚠️ 慢速滚动到底部时出现异常：{e}")
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+
			
 
				+def search_operation(page, keyword, is_first_search: bool = True):
			
 
				+    """搜索框填充+提交搜索（遮罩前置清理）"""
			
 
				+    try:
			
 
				+        # ========== 前置清理：先清遮罩，再操作搜索框 ==========
			
 
				+        force_close_popup(page)
			
 
				+        kill_masks(page)
			
 
				+
			
 
				+        search_locator = page.locator(SEARCH_INPUT_SELECTOR)
			
 
				+        search_locator.wait_for(timeout=ELEMENT_TIMEOUT)
			
 
				+
			
 
				+        # 清空搜索框
			
 
				+        search_locator.click(force=True)
			
 
				+        search_locator.fill("")
			
 
				+        page.keyboard.down("Control")
			
 
				+        page.keyboard.press("a")
			
 
				+        page.keyboard.up("Control")
			
 
				+        page.keyboard.press("Backspace")
			
 
				+
			
 
				+        # 逐字输入
			
 
				+        type_slow(search_locator, keyword, min_delay=0.06, max_delay=0.18)
			
 
				+        logger.info(f"📝 已输入搜索关键词：{keyword}")
			
 
				+
			
 
				+        # ========== 再次清理：点击搜索前再清一次 ==========
			
 
				+        force_close_popup(page)
			
 
				+
			
 
				+        # 点击搜索按钮
			
 
				+        btn = page.locator(f"{SEARCH_BTN_SELECTOR}")
			
 
				+        btn.wait_for(state="visible", timeout=SEARCH_BTN_TIMEOUT)
			
 
				+        page.wait_for_timeout(3000)
			
 
				+
			
 
				+        detail_page = page
			
 
				+        if is_first_search:
			
 
				+            try:
			
 
				+                with page.context.expect_page(timeout=60000) as new_page_info:
			
 
				+                    btn.click()
			
 
				+                detail_page = new_page_info.value
			
 
				+                # ========== 新页面立即清遮罩 ==========
			
 
				+                detail_page.wait_for_load_state("domcontentloaded", timeout=20000)
			
 
				+                force_close_popup(detail_page)
			
 
				+                kill_masks(detail_page)
			
 
				+                detail_page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+            except PlaywrightTimeoutError:
			
 
				+                logger.warning(f"   未检测到新标签页")
			
 
				+                return None, False
			
 
				+            except Exception as e:
			
 
				+                logger.warning(f"   等待新标签页异常：{e}")
			
 
				+                return None, False
			
 
				+        else:
			
 
				+            btn.click()
			
 
				+            # ========== 原页面跳转后立即清遮罩 ==========
			
 
				+            page.wait_for_load_state("domcontentloaded", timeout=20000)
			
 
				+            force_close_popup(page)
			
 
				+            kill_masks(page)
			
 
				+            page.wait_for_load_state("networkidle", timeout=20000)
			
 
				+            detail_page = page
			
 
				+            logger.info("✅ 后续搜索：已在原页面完成跳转加载")
			
 
				+
			
 
				+        # 处理引导按钮
			
 
				+        test_btn = detail_page.locator("div[data-v-c65c36bc].first-time-highlight-message-btn button")
			
 
				+        btn_count = test_btn.count()
			
 
				+        logger.info(f"✅ 匹配到的引导按钮数量：{btn_count}")
			
 
				+        if btn_count > 0:
			
 
				+            test_btn.wait_for(state="attached", timeout=5000)
			
 
				+            test_btn.click()
			
 
				+
			
 
				+        # 最终清理：确保无残留遮罩
			
 
				+        force_close_popup(detail_page)
			
 
				+        kill_masks(detail_page)
			
 
				+        logger.info("✅ 已触发搜索并清理弹窗")
			
 
				+
			
 
				+        # ========== 关键：等待列表渲染 + 强制刷新容器高度 ==========
			
 
				+        # 等待商品列表容器加载
			
 
				+        detail_page.wait_for_selector(".product-list-container", timeout=5000)
			
 
				+        # 强制刷新容器样式（解决高度计算错误）
			
 
				+        detail_page.evaluate("""
			
 
				+        () => {
			
 
				+            const container = document.querySelector('.product-list-container');
			
 
				+            if (container) {
			
 
				+                container.style.overflow = 'auto !important';
			
 
				+                // 强制重绘容器
			
 
				+                container.offsetHeight; // 触发重绘
			
 
				+            }
			
 
				+        }
			
 
				+        """)
			
 
				+        detail_page.wait_for_timeout(1000)
			
 
				+
			
 
				+        return detail_page, True
			
 
				+
			
 
				+    except PlaywrightTimeoutError as e:
			
 
				+        logger.error(f" 搜索失败：元素定位超时 - {str(e)}")
			
 
				+        return None, False
			
 
				+    except Exception as e:
			
 
				+        logger.error(f" 搜索异常：{str(e)}")
			
 
				+        return None, False
			
 
				+
			
 
				+
			
 
				+#翻下一页
			
 
				+def goto_next_page(page) -> bool:
			
 
				+    """
			
 
				+    核心修改：基于 button.btn-next 的 aria-disabled 属性判断是否有下一页
			
 
				+    :param page: 搜索结果页面对象（detail_page）
			
 
				+    :return: True=翻页成功，False=无下一页/翻页失败
			
 
				+    """
			
 
				+    try:
			
 
				+        # 1. 定位下一页按钮（精准匹配你指定的元素）
			
 
				+        next_btn = page.locator("button.btn-next").first
			
 
				+
			
 
				+        # 2. 先等待按钮加载（确保元素存在）
			
 
				+        next_btn.wait_for(state="attached", timeout=3000)
			
 
				+
			
 
				+        # 3. 获取 aria-disabled 属性值（核心判断依据）
			
 
				+        aria_disabled = next_btn.get_attribute("aria-disabled")
			
 
				+        logger.info(f"下一页按钮 aria-disabled 属性值：{aria_disabled}")
			
 
				+
			
 
				+        # 4. 判断是否有下一页：aria-disabled="true" 表示无下一页
			
 
				+        if aria_disabled == "true":
			
 
				+            logger.warning("⚠️ 下一页按钮 aria-disabled=true，已无更多页面")
			
 
				+            return False
			
 
				+
			
 
				+        # 5. 按钮可用（aria-disabled="false"），先滚动到顶部（避免按钮被遮挡）
			
 
				+        page.evaluate("window.scrollTo(0, 0);")
			
 
				+        page.wait_for_timeout(500)
			
 
				+
			
 
				+        # 6. 确保按钮可见且可点击（强制点击兜底）
			
 
				+        if next_btn.is_visible() and next_btn.is_enabled():
			
 
				+            next_btn.click(timeout=5000)
			
 
				+        else:
			
 
				+            # 兜底：强制点击（避免元素不可见但实际可点击的情况）
			
 
				+            next_btn.click(force=True, timeout=5000)
			
 
				+
			
 
				+        # 7. 等待页面加载完成（确保翻页后内容刷新）
			
 
				+        page.wait_for_load_state("networkidle", timeout=15000)
			
 
				+
			
 
				+        # 8. 翻页后清理遮罩（避免新页面遮罩影响）
			
 
				+        force_close_popup(page)
			
 
				+        kill_masks(page)
			
 
				+
			
 
				+        logger.info("✅ 翻页成功，下一页按钮 aria-disabled=false")
			
 
				+        return True
			
 
				+
			
 
				+    except PlaywrightTimeoutError:
			
 
				+        logger.warning("⚠️ 下一页按钮加载超时，判定无更多页面")
			
 
				+        return False
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f"⚠️ 翻页操作异常：{e}，判定无更多页面")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+import time
			
 
				+import random
			
 
				+# from playwright.sync_api import MouseWheelDirection
			
 
				+
			
 
				+# ========== 滚动配置（保持你的原有配置） ==========
			
 
				+SCROLL_STEP = 50  # 每步滚动50px
			
 
				+SCROLL_INTERVAL = 0.05  # 每步间隔0.05秒
			
 
				+SCROLL_OFFSET_RANGE = 50  # 滚动距离随机偏移±50px
			
 
				+MIN_CLICK_DELAY = 0.5
			
 
				+MAX_CLICK_DELAY = 1.0
			
 
				+
			
 
				+def random_delay(min_delay, max_delay):
			
 
				+    """随机延迟（模拟真人操作）"""
			
 
				+    time.sleep(random.uniform(min_delay, max_delay))
			
 
				+
			
 
				+def slow_scroll_400px(page, scroll_distance1=400):
			
 
				+    """
			
 
				+    慢速滚动400px±50px（模拟真人鼠标滚轮+强制解除滚动限制）
			
 
				+    适配：全局滚动条动，但JS scrollBy无效的场景
			
 
				+    :param page: 页面对象
			
 
				+    :return: 滚动是否成功
			
 
				+    """
			
 
				+    try:
			
 
				+        # 1. 前置：强制解除页面所有滚动限制（核心！）
			
 
				+        page.evaluate("""
			
 
				+            () => {
			
 
				+                // 强制恢复html/body的滚动能力
			
 
				+                document.documentElement.style.overflow = 'auto !important';
			
 
				+                document.body.style.overflow = 'auto !important';
			
 
				+                document.documentElement.style.pointerEvents = 'auto !important';
			
 
				+                document.body.style.pointerEvents = 'auto !important';
			
 
				+                document.documentElement.style.position = 'static !important';
			
 
				+                document.body.style.position = 'static !important';
			
 
				+
			
 
				+                // 移除所有可能禁用滚动的类/属性
			
 
				+                document.body.classList.remove('el-popup-parent--hidden', 'no-scroll');
			
 
				+                document.documentElement.classList.remove('el-popup-parent--hidden', 'no-scroll');
			
 
				+
			
 
				+                // 强制刷新滚动高度（避免计算错误）
			
 
				+                document.documentElement.offsetHeight;
			
 
				+            }
			
 
				+        """)
			
 
				+        page.wait_for_timeout(300)
			
 
				+
			
 
				+        # 2. 生成随机滚动距离（400±50px）
			
 
				+        scroll_distance = random.randint(
			
 
				+            scroll_distance1 - SCROLL_OFFSET_RANGE,
			
 
				+            scroll_distance1 + SCROLL_OFFSET_RANGE
			
 
				+        )
			
 
				+        # 转换为鼠标滚轮的“步长”（1个滚轮步长≈100px，需适配）
			
 
				+        wheel_steps = int(scroll_distance / 100)
			
 
				+        remaining_pixels = scroll_distance % 100
			
 
				+
			
 
				+        logger.info(
			
 
				+            f"📜 开始模拟鼠标滚轮滚动（目标距离：{scroll_distance}px，滚轮步数：{wheel_steps}步 + {remaining_pixels}px）"
			
 
				+        )
			
 
				+
			
 
				+        # 3. 第一步：用鼠标滚轮模拟真人滚动（反爬友好）
			
 
				+        # 先把鼠标移到页面中间（商品列表区域），避免滚动空白处
			
 
				+        page.mouse.move(random.randint(300, 800), random.randint(400, 600))
			
 
				+        for _ in range(wheel_steps):
			
 
				+            # 模拟鼠标滚轮向下滚动（1步≈100px）
			
 
				+            page.mouse.wheel(delta_x=0, delta_y=100)
			
 
				+            time.sleep(random.uniform(SCROLL_INTERVAL*2, SCROLL_INTERVAL*5))  # 随机间隔，更像真人
			
 
				+
			
 
				+        # 4. 第二步：处理剩余不足1步的像素（用scrollTo兜底）
			
 
				+        if remaining_pixels > 0:
			
 
				+            current_scroll_top = page.evaluate("window.scrollY || document.documentElement.scrollTop")
			
 
				+            target_scroll_top = current_scroll_top + remaining_pixels
			
 
				+            # 用scrollTo强制滚动（比scrollBy更稳定）
			
 
				+            page.evaluate(f"window.scrollTo(0, {target_scroll_top});")
			
 
				+            time.sleep(SCROLL_INTERVAL)
			
 
				+
			
 
				+        # 5. 验证滚动是否生效
			
 
				+        final_scroll_top = page.evaluate("window.scrollY || document.documentElement.scrollTop")
			
 
				+        logger.info(f" 滚动完成，当前全局滚动位置：{final_scroll_top}px")
			
 
				+
			
 
				+        # 6. 滚动后等待懒加载+模拟真人停顿
			
 
				+        page.wait_for_load_state("networkidle", timeout=8000)
			
 
				+        random_delay(2.0, 3.0)
			
 
				+        return True
			
 
				+
			
 
				+    except Exception as e:
			
 
				+        logger.warning(f" 慢速滚动失败：{e}")
			
 
				+        return False
			
 
				+
			
 
				+
			
 
				+
			
 
				+def main():
			
 
				+    with sync_playwright() as p:
			
 
				+        browser = p.chromium.launch(
			
 
				+            headless=False,  # 不要用无头模式（反爬：无头模式易被识别）
			
 
				+            channel="chrome",  # 使用真实Chrome内核
			
 
				+            slow_mo=random.randint(100, 300),  # 全局操作延迟（模拟真人慢速操作）
			
 
				+            args=[
			
 
				+                "--disable-blink-features=AutomationControlled",  # 禁用webdriver特征（核心！）
			
 
				+                "--enable-automation=false",  # 新增：禁用自动化标识
			
 
				+                "--disable-infobars",  # 新增：禁用信息栏
			
 
				+                "--remote-debugging-port=0",  # 新增：随机调试端口
			
 
				+                "--start-maximized",  # 最大化窗口（模拟真人使用）
			
 
				+                "--disable-extensions",  # 禁用扩展（避免特征）
			
 
				+                "--disable-plugins-discovery",  # 禁用插件发现
			
 
				+                "--no-sandbox",  # 避免沙箱模式特征
			
 
				+                "--disable-dev-shm-usage",  # 避免内存限制导致的异常
			
 
				+                f"--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/{random.randint(110, 120)}.0.0.0 Safari/537.36"  # 随机Chrome版本的UA
			
 
				+            ]
			
 
				+        )
			
 
				+        # 创建页面时伪装指纹
			
 
				+        context = browser.new_context(
			
 
				+            locale="zh-CN",  # 中文环境
			
 
				+            timezone_id="Asia/Shanghai",  # 上海时区
			
 
				+            geolocation={"latitude": 31.230416, "longitude": 121.473701},  # 模拟上海地理位置（可选）
			
 
				+            permissions=["geolocation"],  # 授予定位权限（模拟真人）
			
 
				+            user_agent="Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36",
			
 
				+            no_viewport=True,
			
 
				+            # 关键：隐藏自动化特征
			
 
				+            java_script_enabled=True,
			
 
				+            bypass_csp=True,
			
 
				+            # user_data_dir="./temp_user_data"  # 模拟真实用户数据目录
			
 
				+        )
			
 
				+        # input("...")
			
 
				+        page = context.new_page()
			
 
				+
			
 
				+
			
 
				+        # 关键：移除navigator.webdriver标识（反爬核心）
			
 
				+        page.add_init_script("""
			
 
				+            Object.defineProperty(navigator, 'webdriver', { get: () => undefined });
			
 
				+            Object.defineProperty(navigator, 'plugins', { get: () => [1, 2, 3] });  // 新增：模拟插件
			
 
				+            Object.defineProperty(navigator, 'mimeTypes', { get: () => [1, 2, 3] });  // 新增：模拟MIME类型
			
 
				+            window.chrome = { runtime: {}, loadTimes: () => ({}) };  // 增强Chrome模拟
			
 
				+            delete window.navigator.languages;
			
 
				+            window.navigator.languages = ['zh-CN', 'zh'];
			
 
				+            // 新增：模拟真实鼠标移动特征
			
 
				+            (() => {
			
 
				+                const originalAddEventListener = EventTarget.prototype.addEventListener;
			
 
				+                EventTarget.prototype.addEventListener = function(type, listener) {
			
 
				+                    if (type === 'mousemove') {
			
 
				+                        return originalAddEventListener.call(this, type, (e) => {
			
 
				+                            e._automation = undefined;
			
 
				+                            listener(e);
			
 
				+                        });
			
 
				+                    }
			
 
				+                    return originalAddEventListener.call(this, type, listener);
			
 
				+                };
			
 
				+            })();
			
 
				+""")
			
 
				+
			
 
				+
			
 
				+        try:
			
 
				+            # ========== 核心：Cookie复用逻辑 ==========
			
 
				+            # 1. 加载本地Cookie
			
 
				+            load_cookies(context)
			
 
				+
			
 
				+            # 2. 验证登录状态
			
 
				+            if not is_login(page):
			
 
				+                # 3. Cookie失效/不存在，执行登录
			
 
				+                page.goto(TARGET_LOGIN_URL)
			
 
				+                page.wait_for_load_state("networkidle")
			
 
				+                # logger.info("🔑 开始执行登录流程")
			
 
				+
			
 
				+                # 执行登录操作
			
 
				+                # login_success = login_operation(page, USERNAME, PASSWORD)
			
 
				+                # if not login_success:
			
 
				+                #     logger.error(" 登录失败，程序终止")
			
 
				+                #     return
			
 
				+
			
 
				+                # # 4. 登录成功后保存Cookie
			
 
				+                # save_cookies(context)
			
 
				+                # logger.info(" 登录并保存Cookie成功！")
			
 
				+
			
 
				+            KEYWORDS = ['999皮炎平 糠酸莫米松凝胶']
			
 
				+            # get_search_keywords_from_db()
			
 
				+            # 执行搜索
			
 
				+            total_num = 0
			
 
				+            # current_page = page
			
 
				+            detail_page = None
			
 
				+            nums = 0
			
 
				+            page_no = 1
			
 
				+            for kw in KEYWORDS:
			
 
				+                popup_guard(page, "before_search")
			
 
				+                if nums == 0:
			
 
				+                    popup_guard(detail_page if detail_page else page, "before_search")  # page是你的初始页面对象，需提前定义
			
 
				+                    detail_page, search_success = search_operation(page, kw, is_first_search=True)
			
 
				+                    nums += 1
			
 
				+                else:
			
 
				+                    if detail_page is None:
			
 
				+                        logger.error(f" ❌ 无可用的搜索页面，跳过「{kw}」")
			
 
				+                        continue
			
 
				+                    popup_guard(detail_page, "before_search")
			
 
				+                    detail_page, search_success = search_operation(detail_page, kw, is_first_search=False)
			
 
				+
			
 
				+                if not search_success:
			
 
				+                    print(f"❌ 搜索失败：{kw}")
			
 
				+                    continue
			
 
				+
			
 
				+                if detail_page is None:
			
 
				+                    break
			
 
				+
			
 
				+                popup_guard(detail_page, "after_search")
			
 
				+
			
 
				+
			
 
				+
			
 
				+                #找不到数据跳过判断和出现杂数据跳过
			
 
				+                not_found_keywords = detail_page.locator("div.filter-panel-container-empty-text")
			
 
				+                if not_found_keywords.count() > 0:
			
 
				+                    logger.warning(f"⚠️ 关键词「{kw}」无匹配商品，直接跳过整个关键词采集")
			
 
				+                    continue
			
 
				+
			
 
				+
			
 
				+
			
 
				+                # detail_page.wait_for_selector("div[class*='product-list'], .el-table", timeout=5000)
			
 
				+
			
 
				+                # slow_scroll_to_bottom(detail_page)
			
 
				+
			
 
				+                while True:
			
 
				+                    # ✅ 先获取当前页商品个数
			
 
				+                    detail_page.wait_for_load_state("domcontentloaded")  # 先等DOM加载
			
 
				+                    detail_page.wait_for_load_state("networkidle")
			
 
				+                    detail_page.wait_for_timeout(500)                   # 额外等待渲染稳定
			
 
				+                    goods_item = detail_page.locator("div.product-list-item").count()
			
 
				+                    print(f"这页商品有{goods_item}个")
			
 
				+
			
 
				+
			
 
				+                    slow_scroll_400px(detail_page)
			
 
				+                    if goto_next_page(detail_page):
			
 
				+                        logger.info(f"「{kw}」还有下一页")
			
 
				+                        page_no += 1
			
 
				+                        continue
			
 
				+                    else:
			
 
				+                        logger.info(f" 「{kw}」已无下一页，关键词采集结束")
			
 
				+                        break
			
 
				+
			
 
				+        except Exception as e:
			
 
				+            print(f" 程序异常：{str(e)}")
			
 
				+        finally:
			
 
				+            browser.close()
			
 
				+            print(" 浏览器已关闭，程序结束")
			
 
				+
			
 
				+# ==================== 程序入口 ====================
			
 
				+if __name__ == '__main__':
			
 
				+    main()
			
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -0,0 +1,7 @@
 
				+[project]
			
 
				+name = "ybm-project"
			
 
				+version = "0.1.0"
			
 
				+description = "Add your description here"
			
 
				+readme = "README.md"
			
 
				+requires-python = ">=3.14"
			
 
				+dependencies = []
			
--- a/scheduler.py
+++ b/scheduler.py
@@ -0,0 +1,60 @@
 
				+import time
			
 
				+
			
 
				+from logger_config import logger
			
 
				+from config import get_search_keywords_from_db, has_running_task
			
 
				+import main as main_module
			
 
				+
			
 
				+
			
 
				+# 调度间隔：每 300 秒（5 分钟）检查一次
			
 
				+POLL_SECONDS = 1800
			
 
				+# 平台筛选：只处理 platform=9 的任务
			
 
				+PLATFORM = 9
			
 
				+
			
 
				+
			
 
				+def tick(platform: int = PLATFORM) -> None:
			
 
				+    """执行一轮调度检查。"""
			
 
				+    try:
			
 
				+        # 1) 先看是否已有执行中任务（status=2），有则跳过，避免打断
			
 
				+        if has_running_task(platform=platform):
			
 
				+            logger.info(f"[调度器] 检测到执行中任务(status=2, platform={platform})，本轮跳过。")
			
 
				+            return
			
 
				+
			
 
				+        # 2) 查询待执行任务（status=1）
			
 
				+        tasks = get_search_keywords_from_db(platform=platform)
			
 
				+        logger.info(f"[调度器] 当前待执行任务数: {len(tasks)}")
			
 
				+        if not tasks:
			
 
				+            logger.info(f"[调度器] 未发现待执行任务(status=1, platform={platform})。")
			
 
				+            return
			
 
				+
			
 
				+        # 3) 有任务则触发 main.py 单次执行流程
			
 
				+        logger.info(
			
 
				+            f"[调度器] 发现 {len(tasks)} 个待执行任务(platform={platform})，开始执行 main.main()。"
			
 
				+        )
			
 
				+        main_module.main()
			
 
				+    except Exception as e:
			
 
				+        logger.error(f"[调度器] 本轮调度执行失败: {str(e)}")
			
 
				+
			
 
				+
			
 
				+def run_scheduler(interval_seconds: int = POLL_SECONDS, platform: int = PLATFORM) -> None:
			
 
				+    """持续运行调度器：启动后立即执行一次，随后按固定间隔轮询。"""
			
 
				+    logger.info(
			
 
				+        f"[调度器] 已启动，轮询间隔={interval_seconds}秒，platform={platform}。"
			
 
				+        f"启动后立即执行首轮检查。"
			
 
				+    )
			
 
				+    while True:
			
 
				+        # 记录本轮开始时间，用于计算剩余休眠时长
			
 
				+        started = time.time()
			
 
				+        try:
			
 
				+            tick(platform=platform)
			
 
				+        except Exception as e:
			
 
				+            logger.error(f"[调度器] 调度循环出现未预期异常: {str(e)}")
			
 
				+
			
 
				+        # 休眠“间隔 - 本轮耗时”，保证轮询节奏稳定
			
 
				+        elapsed = time.time() - started
			
 
				+        sleep_seconds = max(0, interval_seconds - elapsed)
			
 
				+        logger.info(f"[调度器] 下一轮将在 {sleep_seconds:.1f} 秒后执行。")
			
 
				+        time.sleep(sleep_seconds)
			
 
				+
			
 
				+
			
 
				+if __name__ == "__main__":
			
 
				+    run_scheduler()
			
--- a/ybm_cookies.json
+++ b/ybm_cookies.json
@@ -0,0 +1,132 @@
 
				+[
			
 
				+  {
			
 
				+    "name": "_abfpc",
			
 
				+    "value": "3e5802e74d6b1f7671f6bed1de99e61b79f83598_2.0",
			
 
				+    "domain": ".ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1803720191.559186,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": true,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "cna",
			
 
				+    "value": "e3360d62ac68ee48485b5f7516e60876",
			
 
				+    "domain": "qt.ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1807157215.041262,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": true,
			
 
				+    "sameSite": "None"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "cna",
			
 
				+    "value": "e3360d62ac68ee48485b5f7516e60876",
			
 
				+    "domain": ".ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1810181200.955104,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": true,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "acw_tc",
			
 
				+    "value": "276077e617756211968315303e2112335bbd43395844dce3e30acc0e6d7dd2",
			
 
				+    "domain": "www.ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1775622998.166995,
			
 
				+    "httpOnly": true,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "xyy",
			
 
				+    "value": "MjM2JjE4MDA4NjUwMzAw",
			
 
				+    "domain": "www.ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1778213212.789501,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "JSESSIONID",
			
 
				+    "value": "CEFE61BFC25EC7F0438E02D9333F2B8B",
			
 
				+    "domain": "www.ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": -1,
			
 
				+    "httpOnly": true,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "xyy_token",
			
 
				+    "value": "eyJhbGciOiJIUzUxMiJ9.eyJhY2NvdW50X2lkIjoyMzYsImRldmljZV9pZCI6IiIsIm9zIjoiV2luZG93cyAxMCIsImxvZ2luX3RpbWUiOjE3NzU2MjEyMTEzNzEsImJyb3dzZXIiOiJDaHJvbWUgMTIiLCJtZXJjaGFudF9pZCI6MjM2LCJpcF9hZGRyIjoiMTE2LjMwLjIyOS4xMDMiLCJ2ZXJzaW9uIjoiIiwibG9naW5fdXNlcl9rZXkiOiI2NThmNzI5NS02OWMwLTRmYzAtOTdlNS1iMDU1OTcwN2JiMmUifQ.owO3fCXjp4YxOE-RRRQiiX1-4W1ujjjmer9snqzkeMSQmKpozZAHeLztAAtVIyTnXpYzmTlvNfOx_YRLJHQg4A",
			
 
				+    "domain": "www.ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1778213212.789323,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "xyy_principal",
			
 
				+    "value": "236&ODRkNTZhNDU3MDY2NWI3MGU3ZTc4ODk1NmY2YmMxZWU4Y2YyMThjOQ&236",
			
 
				+    "domain": "www.ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1778213212.789396,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "xyy_last_login_time",
			
 
				+    "value": "1775621211371",
			
 
				+    "domain": "www.ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1778213212.78945,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "jg_login",
			
 
				+    "value": "1",
			
 
				+    "domain": "www.ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1775707612.789604,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "qt_session",
			
 
				+    "value": "S7872LTW_1775621212097",
			
 
				+    "domain": "www.ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1778213213.709563,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "ssxmod_itna",
			
 
				+    "value": "1-Qq_x9DgDRmD=DtG0YYDOKG7=i=Mx_D0dGMD3qq7U3GcD8rx0px74Gkf46LyQWYWDDq08qQe37tDlr7TYDSxD6FDK4GTh8xx7tqcDUhohp3fx3yqhdP4Co89eFvn0B=6TqV/H2zOyV7YDU4GnD06xWY5exYAfDBYD74G_DDeDirrD84D_DGpouWuoxifYfAeDzwidxG3QxYpokeDg_DDBGbiDKqPdx4DlFqYI_jK7cWiOdoHDAkD=oGGox0tWDBL=fKAoLc8jxRT=1K8HTf2TNPGuDG6bIeGmYQH7jeWHqg0mtQm6WBhMTSxqAK37qqExcBYkBxYDxqli/kDzDxN0DPxo_7dOQeDDfwY_KjXinDPDbupt9ZUBxwi4vpYK2QGmjPxRGX7G5WKrIw8jwzlqIWwKDR=0iqCi4_AO34xD",
			
 
				+    "domain": ".ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1775623015,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  },
			
 
				+  {
			
 
				+    "name": "ssxmod_itna2",
			
 
				+    "value": "1-Qq_x9DgDRmD=DtG0YYDOKG7=i=Mx_D0dGMD3qq7U3GcD8rx0px74Gkf46LyQWYWDDq08qQe37YDipwBwikzDe033m8k0qDlrBxcU=DIar=E=YN7i9Nb=QiSG=OxdPSkP_VbB18PYzst6xfbxyiTQylkGK9ff4TMDf9ddq_W26I83q5TfQHCqZ8EdqkAxE8or/YLP4RjI6CLP35YQakQ6aEG3GfRDGziLqS8wKgbhizAxPuO3iI4_fi9b4nLgRzA34UnfheF2yB3MEyUIUdamiD41ckGE1Sg549AKbaN1w=KYxD",
			
 
				+    "domain": ".ybm100.com",
			
 
				+    "path": "/",
			
 
				+    "expires": 1775623015,
			
 
				+    "httpOnly": false,
			
 
				+    "secure": false,
			
 
				+    "sameSite": "Lax"
			
 
				+  }
			
 
				+]