Parcourir la source

从店铺表获取已有的店铺数据回填省市

feelsocode il y a 1 semaine
Parent
commit
7009a02ee6

+ 113 - 0
.idea/workspace.xml

@@ -0,0 +1,113 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<project version="4">
+  <component name="AutoImportSettings">
+    <option name="autoReloadType" value="SELECTIVE" />
+  </component>
+  <component name="ChangeListManager">
+    <list default="true" id="ff35c8dd-dae0-4980-b102-1ac3ac1f38ed" name="更改" comment="" />
+    <option name="SHOW_DIALOG" value="false" />
+    <option name="HIGHLIGHT_CONFLICTS" value="true" />
+    <option name="HIGHLIGHT_NON_ACTIVE_CHANGELIST" value="false" />
+    <option name="LAST_RESOLUTION" value="IGNORE" />
+  </component>
+  <component name="FileTemplateManagerImpl">
+    <option name="RECENT_TEMPLATES">
+      <list>
+        <option value="Python Script" />
+      </list>
+    </option>
+  </component>
+  <component name="Git.Settings">
+    <option name="RECENT_GIT_ROOT_PATH" value="$PROJECT_DIR$" />
+  </component>
+  <component name="ProjectColorInfo"><![CDATA[{
+  "associatedIndex": 2
+}]]></component>
+  <component name="ProjectId" id="3CR12zNmCDVR6Fs7SPNdePavjb6" />
+  <component name="ProjectViewState">
+    <option name="hideEmptyMiddlePackages" value="true" />
+    <option name="showLibraryContents" value="true" />
+  </component>
+  <component name="PropertiesComponent"><![CDATA[{
+  "keyToString": {
+    "ASKED_SHARE_PROJECT_CONFIGURATION_FILES": "true",
+    "ModuleVcsDetector.initialDetectionPerformed": "true",
+    "Python.main.executor": "Run",
+    "RunOnceActivity.ShowReadmeOnStart": "true",
+    "RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager.252": "true",
+    "SHARE_PROJECT_CONFIGURATION_FILES": "true",
+    "git-widget-placeholder": "main",
+    "last_opened_file_path": "C:/Users/datai/Desktop/PDD/pdd1",
+    "settings.editor.selected.configurable": "com.jetbrains.python.configuration.PyActiveSdkModuleConfigurable"
+  }
+}]]></component>
+  <component name="RecentsManager">
+    <key name="CopyFile.RECENT_KEYS">
+      <recent name="C:\Users\datai\Desktop\PDD\pdd1" />
+      <recent name="C:\Users\datai\Desktop\PDD\pdd1\省市回填脚本" />
+    </key>
+    <key name="MoveFile.RECENT_KEYS">
+      <recent name="C:\Users\datai\Desktop\PDD\pdd1\process_shop" />
+    </key>
+  </component>
+  <component name="RunManager">
+    <configuration name="main" type="PythonConfigurationType" factoryName="Python" nameIsGenerated="true">
+      <module name="PDD" />
+      <option name="ENV_FILES" value="" />
+      <option name="INTERPRETER_OPTIONS" value="" />
+      <option name="PARENT_ENVS" value="true" />
+      <envs>
+        <env name="PYTHONUNBUFFERED" value="1" />
+      </envs>
+      <option name="SDK_HOME" value="" />
+      <option name="WORKING_DIRECTORY" value="$PROJECT_DIR$" />
+      <option name="IS_MODULE_SDK" value="true" />
+      <option name="ADD_CONTENT_ROOTS" value="true" />
+      <option name="ADD_SOURCE_ROOTS" value="true" />
+      <option name="SCRIPT_NAME" value="$PROJECT_DIR$/main.py" />
+      <option name="PARAMETERS" value="" />
+      <option name="SHOW_COMMAND_LINE" value="false" />
+      <option name="EMULATE_TERMINAL" value="false" />
+      <option name="MODULE_MODE" value="false" />
+      <option name="REDIRECT_INPUT" value="false" />
+      <option name="INPUT_FILE" value="" />
+      <method v="2" />
+    </configuration>
+  </component>
+  <component name="SharedIndexes">
+    <attachedChunks>
+      <set>
+        <option value="bundled-python-sdk-4e2b1448bda8-9a97661f3031-com.jetbrains.pycharm.pro.sharedIndexes.bundled-PY-252.27397.106" />
+      </set>
+    </attachedChunks>
+  </component>
+  <component name="TaskManager">
+    <task active="true" id="Default" summary="默认任务">
+      <changelist id="ff35c8dd-dae0-4980-b102-1ac3ac1f38ed" name="更改" comment="" />
+      <created>1776333366641</created>
+      <option name="number" value="Default" />
+      <option name="presentableId" value="Default" />
+      <updated>1776333366641</updated>
+    </task>
+    <servers />
+  </component>
+  <component name="XDebuggerManager">
+    <breakpoint-manager>
+      <breakpoints>
+        <line-breakpoint enabled="true" suspend="THREAD" type="python-line">
+          <url>file://$PROJECT_DIR$/main.py</url>
+          <line>8</line>
+          <option name="timeStamp" value="1" />
+        </line-breakpoint>
+      </breakpoints>
+    </breakpoint-manager>
+  </component>
+  <component name="github-copilot-workspace">
+    <instructionFileLocations>
+      <option value=".github/instructions" />
+    </instructionFileLocations>
+    <promptFileLocations>
+      <option value=".github/prompts" />
+    </promptFileLocations>
+  </component>
+</project>

+ 68 - 0
logs/pdd_spider.log

@@ -0,0 +1,68 @@
+2026-04-21 00:09:53,422 - INFO - 当前没有待执行任务
+2026-04-21 00:19:53,518 - INFO - 当前没有待执行任务
+2026-04-21 00:29:53,587 - INFO - 当前没有待执行任务
+2026-04-21 00:39:53,668 - INFO - 当前没有待执行任务
+2026-04-21 00:49:53,736 - INFO - 当前没有待执行任务
+2026-04-21 00:59:53,813 - INFO - 当前没有待执行任务
+2026-04-21 01:09:53,966 - INFO - 当前没有待执行任务
+2026-04-21 01:19:54,033 - INFO - 当前没有待执行任务
+2026-04-21 01:29:54,122 - INFO - 当前没有待执行任务
+2026-04-21 01:39:54,228 - INFO - 当前没有待执行任务
+2026-04-21 01:49:54,307 - INFO - 当前没有待执行任务
+2026-04-21 01:59:54,389 - INFO - 当前没有待执行任务
+2026-04-21 02:09:54,455 - INFO - 当前没有待执行任务
+2026-04-21 02:19:54,511 - INFO - 当前没有待执行任务
+2026-04-21 02:29:54,604 - INFO - 当前没有待执行任务
+2026-04-21 02:39:54,676 - INFO - 当前没有待执行任务
+2026-04-21 02:49:54,765 - INFO - 当前没有待执行任务
+2026-04-21 02:59:54,861 - INFO - 当前没有待执行任务
+2026-04-21 03:09:54,943 - INFO - 当前没有待执行任务
+2026-04-21 03:19:55,044 - INFO - 当前没有待执行任务
+2026-04-21 03:29:55,229 - INFO - 当前没有待执行任务
+2026-04-21 03:39:55,333 - INFO - 当前没有待执行任务
+2026-04-21 03:49:55,413 - INFO - 当前没有待执行任务
+2026-04-21 03:59:55,516 - INFO - 当前没有待执行任务
+2026-04-21 04:09:55,599 - INFO - 当前没有待执行任务
+2026-04-21 04:19:55,674 - INFO - 当前没有待执行任务
+2026-04-21 04:29:55,755 - INFO - 当前没有待执行任务
+2026-04-21 04:39:55,853 - INFO - 当前没有待执行任务
+2026-04-21 04:49:55,954 - INFO - 当前没有待执行任务
+2026-04-21 04:59:56,116 - INFO - 当前没有待执行任务
+2026-04-21 05:09:56,195 - INFO - 当前没有待执行任务
+2026-04-21 05:19:56,279 - INFO - 当前没有待执行任务
+2026-04-21 05:29:56,572 - INFO - 当前没有待执行任务
+2026-04-21 05:39:56,674 - INFO - 当前没有待执行任务
+2026-04-21 05:49:56,755 - INFO - 当前没有待执行任务
+2026-04-21 05:59:56,959 - INFO - 当前没有待执行任务
+2026-04-21 06:09:57,140 - INFO - 当前没有待执行任务
+2026-04-21 06:19:57,225 - INFO - 当前没有待执行任务
+2026-04-21 06:29:57,322 - INFO - 当前没有待执行任务
+2026-04-21 06:39:57,409 - INFO - 当前没有待执行任务
+2026-04-21 06:49:57,499 - INFO - 当前没有待执行任务
+2026-04-21 06:59:57,583 - INFO - 当前没有待执行任务
+2026-04-21 07:09:57,741 - INFO - 当前没有待执行任务
+2026-04-21 07:19:57,829 - INFO - 当前没有待执行任务
+2026-04-21 07:29:57,918 - INFO - 当前没有待执行任务
+2026-04-21 07:39:58,010 - INFO - 当前没有待执行任务
+2026-04-21 07:49:58,085 - INFO - 当前没有待执行任务
+2026-04-21 07:59:58,171 - INFO - 当前没有待执行任务
+2026-04-21 08:09:58,288 - INFO - 当前没有待执行任务
+2026-04-21 08:19:58,377 - INFO - 当前没有待执行任务
+2026-04-21 08:29:58,462 - INFO - 当前没有待执行任务
+2026-04-21 08:39:58,544 - INFO - 当前没有待执行任务
+2026-04-21 08:49:58,637 - INFO - 当前没有待执行任务
+2026-04-21 08:59:58,733 - INFO - 当前没有待执行任务
+2026-04-21 09:09:58,841 - INFO - 当前没有待执行任务
+2026-04-21 09:19:58,918 - INFO - 当前没有待执行任务
+2026-04-21 09:29:59,047 - INFO - 当前没有待执行任务
+2026-04-21 09:39:59,203 - INFO - 当前没有待执行任务
+2026-04-21 09:42:54,438 - INFO - PDD 调度器启动,轮询间隔 600 秒
+2026-04-21 09:42:54,566 - INFO - 当前没有待执行任务
+2026-04-21 09:52:54,678 - INFO - 当前没有待执行任务
+2026-04-21 10:02:54,777 - INFO - 当前没有待执行任务
+2026-04-21 10:12:54,873 - INFO - 当前没有待执行任务
+2026-04-21 10:22:55,049 - INFO - 当前没有待执行任务
+2026-04-21 10:32:55,387 - INFO - 当前没有待执行任务
+2026-04-21 10:42:55,494 - INFO - 当前没有待执行任务
+2026-04-21 10:52:55,609 - INFO - 当前没有待执行任务
+2026-04-21 11:02:55,705 - INFO - 当前没有待执行任务

+ 58 - 0
logs/pdd_spider.log.2026-04-20

@@ -0,0 +1,58 @@
+2026-04-20 14:39:50,045 - INFO - PDD 调度器启动,轮询间隔 600 秒
+2026-04-20 14:39:50,123 - INFO - 当前没有待执行任务
+2026-04-20 14:49:50,285 - INFO - 当前没有待执行任务
+2026-04-20 14:59:50,357 - INFO - 当前没有待执行任务
+2026-04-20 15:09:50,449 - INFO - 当前没有待执行任务
+2026-04-20 15:19:50,688 - INFO - 当前没有待执行任务
+2026-04-20 15:29:50,883 - INFO - 当前没有待执行任务
+2026-04-20 15:39:50,955 - INFO - 当前没有待执行任务
+2026-04-20 15:49:51,055 - INFO - 当前没有待执行任务
+2026-04-20 15:59:51,146 - INFO - 当前没有待执行任务
+2026-04-20 16:09:51,271 - INFO - 当前没有待执行任务
+2026-04-20 16:19:51,472 - INFO - 当前没有待执行任务
+2026-04-20 16:29:51,553 - INFO - 当前没有待执行任务
+2026-04-20 16:39:51,639 - INFO - 当前没有待执行任务
+2026-04-20 16:49:51,707 - INFO - 当前没有待执行任务
+2026-04-20 16:59:51,780 - INFO - 当前没有待执行任务
+2026-04-20 17:09:51,903 - INFO - 当前没有待执行任务
+2026-04-20 17:19:52,014 - INFO - 当前没有待执行任务
+2026-04-20 17:29:52,099 - INFO - 当前没有待执行任务
+2026-04-20 17:39:52,184 - INFO - 当前没有待执行任务
+2026-04-20 17:49:52,271 - INFO - 当前没有待执行任务
+2026-04-20 17:59:52,367 - INFO - 当前没有待执行任务
+2026-04-20 18:09:52,494 - INFO - 当前没有待执行任务
+2026-04-20 18:19:52,556 - INFO - 当前没有待执行任务
+2026-04-20 18:29:52,914 - INFO - 当前没有待执行任务
+2026-04-20 18:39:53,024 - INFO - 当前没有待执行任务
+2026-04-20 18:49:53,117 - INFO - 当前没有待执行任务
+2026-04-20 18:59:53,204 - INFO - 当前没有待执行任务
+2026-04-20 19:09:53,292 - INFO - 当前没有待执行任务
+2026-04-20 19:19:53,373 - INFO - 当前没有待执行任务
+2026-04-20 19:29:53,522 - INFO - 当前没有待执行任务
+2026-04-20 19:39:53,594 - INFO - 当前没有待执行任务
+2026-04-20 19:49:53,703 - INFO - 当前没有待执行任务
+2026-04-20 19:59:53,788 - INFO - 当前没有待执行任务
+2026-04-20 20:09:53,882 - INFO - 当前没有待执行任务
+2026-04-20 20:19:53,963 - INFO - 当前没有待执行任务
+2026-04-20 20:29:54,044 - INFO - 当前没有待执行任务
+2026-04-20 20:39:54,142 - INFO - 当前没有待执行任务
+2026-04-20 20:49:54,222 - INFO - 当前没有待执行任务
+2026-04-20 20:59:54,306 - INFO - 当前没有待执行任务
+2026-04-20 21:09:54,396 - INFO - 当前没有待执行任务
+2026-04-20 21:19:54,484 - INFO - 当前没有待执行任务
+2026-04-20 21:29:54,557 - INFO - 当前没有待执行任务
+2026-04-20 21:39:54,638 - INFO - 当前没有待执行任务
+2026-04-20 21:49:54,710 - INFO - 当前没有待执行任务
+2026-04-20 21:59:54,798 - INFO - 当前没有待执行任务
+2026-04-20 22:09:54,887 - INFO - 当前没有待执行任务
+2026-04-20 22:19:54,972 - INFO - 当前没有待执行任务
+2026-04-20 22:29:55,021 - INFO - 当前没有待执行任务
+2026-04-20 22:39:55,108 - INFO - 当前没有待执行任务
+2026-04-20 22:49:55,172 - INFO - 当前没有待执行任务
+2026-04-20 22:59:55,221 - INFO - 当前没有待执行任务
+2026-04-20 23:09:55,322 - INFO - 当前没有待执行任务
+2026-04-20 23:19:53,012 - INFO - 当前没有待执行任务
+2026-04-20 23:29:53,133 - INFO - 当前没有待执行任务
+2026-04-20 23:39:53,196 - INFO - 当前没有待执行任务
+2026-04-20 23:49:53,259 - INFO - 当前没有待执行任务
+2026-04-20 23:59:53,346 - INFO - 当前没有待执行任务

+ 16 - 0
main.py

@@ -0,0 +1,16 @@
+# 这是一个示例 Python 脚本。
+
+# 按 Shift+F10 执行或将其替换为您的代码。
+# 按 双击 Shift 在所有地方搜索类、文件、工具窗口、操作和设置。
+
+
+def print_hi(name):
+    # 在下面的代码行中使用断点来调试脚本。
+    print(f'Hi, {name}')  # 按 Ctrl+F8 切换断点。
+
+
+# 按装订区域中的绿色按钮以运行脚本。
+if __name__ == '__main__':
+    print_hi('PyCharm')
+
+# 访问 https://www.jetbrains.com/help/pycharm/ 获取 PyCharm 帮助

BIN
pdd1/__pycache__/logger.cpython-314.pyc


BIN
pdd1/__pycache__/pdd_config.cpython-314.pyc


BIN
pdd1/__pycache__/pdd_new.cpython-314.pyc


+ 1929 - 0
pdd1/city.json

@@ -0,0 +1,1929 @@
+[
+    {
+        "id": 1,
+        "name": "北京",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 2,
+                "name": "北京市",
+                "pid": 1
+            }
+        ]
+    },
+    {
+        "id": 368,
+        "name": "天津",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 369,
+                "name": "天津市",
+                "pid": 368
+            }
+        ]
+    },
+    {
+        "id": 685,
+        "name": "河北省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 686,
+                "name": "石家庄市",
+                "pid": 685
+            },
+            {
+                "id": 991,
+                "name": "唐山市",
+                "pid": 685
+            },
+            {
+                "id": 1261,
+                "name": "秦皇岛市",
+                "pid": 685
+            },
+            {
+                "id": 1373,
+                "name": "邯郸市",
+                "pid": 685
+            },
+            {
+                "id": 1640,
+                "name": "邢台市",
+                "pid": 685
+            },
+            {
+                "id": 1861,
+                "name": "保定市",
+                "pid": 685
+            },
+            {
+                "id": 2247,
+                "name": "张家口市",
+                "pid": 685
+            },
+            {
+                "id": 2513,
+                "name": "承德市",
+                "pid": 685
+            },
+            {
+                "id": 2745,
+                "name": "沧州市",
+                "pid": 685
+            },
+            {
+                "id": 2974,
+                "name": "廊坊市",
+                "pid": 685
+            },
+            {
+                "id": 3109,
+                "name": "衡水市",
+                "pid": 685
+            }
+        ]
+    },
+    {
+        "id": 3252,
+        "name": "山西省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 3253,
+                "name": "太原市",
+                "pid": 3252
+            },
+            {
+                "id": 3373,
+                "name": "大同市",
+                "pid": 3252
+            },
+            {
+                "id": 3512,
+                "name": "阳泉市",
+                "pid": 3252
+            },
+            {
+                "id": 3561,
+                "name": "长治市",
+                "pid": 3252
+            },
+            {
+                "id": 3723,
+                "name": "晋城市",
+                "pid": 3252
+            },
+            {
+                "id": 3807,
+                "name": "朔州市",
+                "pid": 3252
+            },
+            {
+                "id": 3887,
+                "name": "晋中市",
+                "pid": 3252
+            },
+            {
+                "id": 4029,
+                "name": "运城市",
+                "pid": 3252
+            },
+            {
+                "id": 4204,
+                "name": "忻州市",
+                "pid": 3252
+            },
+            {
+                "id": 4393,
+                "name": "临汾市",
+                "pid": 3252
+            },
+            {
+                "id": 4569,
+                "name": "吕梁市",
+                "pid": 3252
+            }
+        ]
+    },
+    {
+        "id": 4737,
+        "name": "内蒙古自治区",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 4738,
+                "name": "呼和浩特市",
+                "pid": 4737
+            },
+            {
+                "id": 4848,
+                "name": "包头市",
+                "pid": 4737
+            },
+            {
+                "id": 4950,
+                "name": "乌海市",
+                "pid": 4737
+            },
+            {
+                "id": 4976,
+                "name": "赤峰市",
+                "pid": 4737
+            },
+            {
+                "id": 5186,
+                "name": "通辽市",
+                "pid": 4737
+            },
+            {
+                "id": 5339,
+                "name": "鄂尔多斯市",
+                "pid": 4737
+            },
+            {
+                "id": 5437,
+                "name": "呼伦贝尔市",
+                "pid": 4737
+            },
+            {
+                "id": 5610,
+                "name": "巴彦淖尔市",
+                "pid": 4737
+            },
+            {
+                "id": 5707,
+                "name": "乌兰察布市",
+                "pid": 4737
+            },
+            {
+                "id": 5829,
+                "name": "兴安盟",
+                "pid": 4737
+            },
+            {
+                "id": 5926,
+                "name": "锡林郭勒盟",
+                "pid": 4737
+            },
+            {
+                "id": 6039,
+                "name": "阿拉善盟",
+                "pid": 4737
+            }
+        ]
+    },
+    {
+        "id": 6083,
+        "name": "辽宁省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 6084,
+                "name": "沈阳市",
+                "pid": 6083
+            },
+            {
+                "id": 6288,
+                "name": "大连市",
+                "pid": 6083
+            },
+            {
+                "id": 6451,
+                "name": "鞍山市",
+                "pid": 6083
+            },
+            {
+                "id": 6555,
+                "name": "抚顺市",
+                "pid": 6083
+            },
+            {
+                "id": 6635,
+                "name": "本溪市",
+                "pid": 6083
+            },
+            {
+                "id": 6691,
+                "name": "丹东市",
+                "pid": 6083
+            },
+            {
+                "id": 6787,
+                "name": "锦州市",
+                "pid": 6083
+            },
+            {
+                "id": 6899,
+                "name": "营口市",
+                "pid": 6083
+            },
+            {
+                "id": 6976,
+                "name": "阜新市",
+                "pid": 6083
+            },
+            {
+                "id": 7067,
+                "name": "辽阳市",
+                "pid": 6083
+            },
+            {
+                "id": 7125,
+                "name": "盘锦市",
+                "pid": 6083
+            },
+            {
+                "id": 7180,
+                "name": "铁岭市",
+                "pid": 6083
+            },
+            {
+                "id": 7294,
+                "name": "朝阳市",
+                "pid": 6083
+            },
+            {
+                "id": 7465,
+                "name": "葫芦岛市",
+                "pid": 6083
+            }
+        ]
+    },
+    {
+        "id": 7604,
+        "name": "吉林省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 7605,
+                "name": "长春市",
+                "pid": 7604
+            },
+            {
+                "id": 7852,
+                "name": "吉林市",
+                "pid": 7604
+            },
+            {
+                "id": 8015,
+                "name": "四平市",
+                "pid": 7604
+            },
+            {
+                "id": 8100,
+                "name": "辽源市",
+                "pid": 7604
+            },
+            {
+                "id": 8153,
+                "name": "通化市",
+                "pid": 7604
+            },
+            {
+                "id": 8270,
+                "name": "白山市",
+                "pid": 7604
+            },
+            {
+                "id": 8344,
+                "name": "松原市",
+                "pid": 7604
+            },
+            {
+                "id": 8498,
+                "name": "白城市",
+                "pid": 7604
+            },
+            {
+                "id": 8631,
+                "name": "延边朝鲜族自治州",
+                "pid": 7604
+            }
+        ]
+    },
+    {
+        "id": 8758,
+        "name": "黑龙江省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 8759,
+                "name": "哈尔滨市",
+                "pid": 8758
+            },
+            {
+                "id": 9106,
+                "name": "齐齐哈尔市",
+                "pid": 8758
+            },
+            {
+                "id": 9332,
+                "name": "鸡西市",
+                "pid": 8758
+            },
+            {
+                "id": 9437,
+                "name": "鹤岗市",
+                "pid": 8758
+            },
+            {
+                "id": 9515,
+                "name": "双鸭山市",
+                "pid": 8758
+            },
+            {
+                "id": 9634,
+                "name": "大庆市",
+                "pid": 8758
+            },
+            {
+                "id": 9763,
+                "name": "伊春市",
+                "pid": 8758
+            },
+            {
+                "id": 9824,
+                "name": "佳木斯市",
+                "pid": 8758
+            },
+            {
+                "id": 9973,
+                "name": "七台河市",
+                "pid": 8758
+            },
+            {
+                "id": 10022,
+                "name": "牡丹江市",
+                "pid": 8758
+            },
+            {
+                "id": 10125,
+                "name": "黑河市",
+                "pid": 8758
+            },
+            {
+                "id": 10292,
+                "name": "绥化市",
+                "pid": 8758
+            },
+            {
+                "id": 10550,
+                "name": "大兴安岭地区",
+                "pid": 8758
+            }
+        ]
+    },
+    {
+        "id": 10601,
+        "name": "上海",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 10602,
+                "name": "上海市",
+                "pid": 10601
+            }
+        ]
+    },
+    {
+        "id": 10853,
+        "name": "江苏省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 10854,
+                "name": "南京市",
+                "pid": 10853
+            },
+            {
+                "id": 11006,
+                "name": "无锡市",
+                "pid": 10853
+            },
+            {
+                "id": 11102,
+                "name": "徐州市",
+                "pid": 10853
+            },
+            {
+                "id": 11300,
+                "name": "常州市",
+                "pid": 10853
+            },
+            {
+                "id": 11374,
+                "name": "苏州市",
+                "pid": 10853
+            },
+            {
+                "id": 11503,
+                "name": "南通市",
+                "pid": 10853
+            },
+            {
+                "id": 11648,
+                "name": "连云港市",
+                "pid": 10853
+            },
+            {
+                "id": 11765,
+                "name": "淮安市",
+                "pid": 10853
+            },
+            {
+                "id": 11887,
+                "name": "盐城市",
+                "pid": 10853
+            },
+            {
+                "id": 12052,
+                "name": "扬州市",
+                "pid": 10853
+            },
+            {
+                "id": 12158,
+                "name": "镇江市",
+                "pid": 10853
+            },
+            {
+                "id": 12226,
+                "name": "泰州市",
+                "pid": 10853
+            },
+            {
+                "id": 12342,
+                "name": "宿迁市",
+                "pid": 10853
+            }
+        ]
+    },
+    {
+        "id": 12458,
+        "name": "浙江省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 12459,
+                "name": "杭州市",
+                "pid": 12458
+            },
+            {
+                "id": 12669,
+                "name": "宁波市",
+                "pid": 12458
+            },
+            {
+                "id": 12842,
+                "name": "温州市",
+                "pid": 12458
+            },
+            {
+                "id": 13046,
+                "name": "嘉兴市",
+                "pid": 12458
+            },
+            {
+                "id": 13126,
+                "name": "湖州市",
+                "pid": 12458
+            },
+            {
+                "id": 13204,
+                "name": "绍兴市",
+                "pid": 12458
+            },
+            {
+                "id": 13314,
+                "name": "金华市",
+                "pid": 12458
+            },
+            {
+                "id": 13470,
+                "name": "衢州市",
+                "pid": 12458
+            },
+            {
+                "id": 13577,
+                "name": "舟山市",
+                "pid": 12458
+            },
+            {
+                "id": 13618,
+                "name": "台州市",
+                "pid": 12458
+            },
+            {
+                "id": 13764,
+                "name": "丽水市",
+                "pid": 12458
+            }
+        ]
+    },
+    {
+        "id": 13947,
+        "name": "安徽省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 13948,
+                "name": "合肥市",
+                "pid": 13947
+            },
+            {
+                "id": 14121,
+                "name": "芜湖市",
+                "pid": 13947
+            },
+            {
+                "id": 14210,
+                "name": "蚌埠市",
+                "pid": 13947
+            },
+            {
+                "id": 14307,
+                "name": "淮南市",
+                "pid": 13947
+            },
+            {
+                "id": 14410,
+                "name": "马鞍山市",
+                "pid": 13947
+            },
+            {
+                "id": 14476,
+                "name": "淮北市",
+                "pid": 13947
+            },
+            {
+                "id": 14518,
+                "name": "铜陵市",
+                "pid": 13947
+            },
+            {
+                "id": 14583,
+                "name": "安庆市",
+                "pid": 13947
+            },
+            {
+                "id": 14763,
+                "name": "黄山市",
+                "pid": 13947
+            },
+            {
+                "id": 14880,
+                "name": "滁州市",
+                "pid": 13947
+            },
+            {
+                "id": 15015,
+                "name": "阜阳市",
+                "pid": 13947
+            },
+            {
+                "id": 15196,
+                "name": "宿州市",
+                "pid": 13947
+            },
+            {
+                "id": 15326,
+                "name": "六安市",
+                "pid": 13947
+            },
+            {
+                "id": 15480,
+                "name": "亳州市",
+                "pid": 13947
+            },
+            {
+                "id": 15581,
+                "name": "池州市",
+                "pid": 13947
+            },
+            {
+                "id": 15652,
+                "name": "宣城市",
+                "pid": 13947
+            }
+        ]
+    },
+    {
+        "id": 15768,
+        "name": "福建省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 15769,
+                "name": "福州市",
+                "pid": 15768
+            },
+            {
+                "id": 15969,
+                "name": "厦门市",
+                "pid": 15768
+            },
+            {
+                "id": 16030,
+                "name": "莆田市",
+                "pid": 15768
+            },
+            {
+                "id": 16093,
+                "name": "三明市",
+                "pid": 15768
+            },
+            {
+                "id": 16252,
+                "name": "泉州市",
+                "pid": 15768
+            },
+            {
+                "id": 16440,
+                "name": "漳州市",
+                "pid": 15768
+            },
+            {
+                "id": 16598,
+                "name": "南平市",
+                "pid": 15768
+            },
+            {
+                "id": 16751,
+                "name": "龙岩市",
+                "pid": 15768
+            },
+            {
+                "id": 16892,
+                "name": "宁德市",
+                "pid": 15768
+            }
+        ]
+    },
+    {
+        "id": 17032,
+        "name": "江西省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 17033,
+                "name": "南昌市",
+                "pid": 17032
+            },
+            {
+                "id": 17182,
+                "name": "景德镇市",
+                "pid": 17032
+            },
+            {
+                "id": 17246,
+                "name": "萍乡市",
+                "pid": 17032
+            },
+            {
+                "id": 17310,
+                "name": "九江市",
+                "pid": 17032
+            },
+            {
+                "id": 17551,
+                "name": "新余市",
+                "pid": 17032
+            },
+            {
+                "id": 17588,
+                "name": "鹰潭市",
+                "pid": 17032
+            },
+            {
+                "id": 17651,
+                "name": "赣州市",
+                "pid": 17032
+            },
+            {
+                "id": 17980,
+                "name": "吉安市",
+                "pid": 17032
+            },
+            {
+                "id": 18246,
+                "name": "宜春市",
+                "pid": 17032
+            },
+            {
+                "id": 18476,
+                "name": "抚州市",
+                "pid": 17032
+            },
+            {
+                "id": 18669,
+                "name": "上饶市",
+                "pid": 17032
+            }
+        ]
+    },
+    {
+        "id": 18911,
+        "name": "山东省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 18912,
+                "name": "济南市",
+                "pid": 18911
+            },
+            {
+                "id": 19090,
+                "name": "青岛市",
+                "pid": 18911
+            },
+            {
+                "id": 19242,
+                "name": "淄博市",
+                "pid": 18911
+            },
+            {
+                "id": 19339,
+                "name": "枣庄市",
+                "pid": 18911
+            },
+            {
+                "id": 19412,
+                "name": "东营市",
+                "pid": 18911
+            },
+            {
+                "id": 19472,
+                "name": "烟台市",
+                "pid": 18911
+            },
+            {
+                "id": 19645,
+                "name": "潍坊市",
+                "pid": 18911
+            },
+            {
+                "id": 19781,
+                "name": "济宁市",
+                "pid": 18911
+            },
+            {
+                "id": 19952,
+                "name": "泰安市",
+                "pid": 18911
+            },
+            {
+                "id": 20047,
+                "name": "威海市",
+                "pid": 18911
+            },
+            {
+                "id": 20128,
+                "name": "日照市",
+                "pid": 18911
+            },
+            {
+                "id": 20190,
+                "name": "临沂市",
+                "pid": 18911
+            },
+            {
+                "id": 20360,
+                "name": "德州市",
+                "pid": 18911
+            },
+            {
+                "id": 20508,
+                "name": "聊城市",
+                "pid": 18911
+            },
+            {
+                "id": 20653,
+                "name": "滨州市",
+                "pid": 18911
+            },
+            {
+                "id": 20753,
+                "name": "菏泽市",
+                "pid": 18911
+            }
+        ]
+    },
+    {
+        "id": 20932,
+        "name": "河南省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 20933,
+                "name": "郑州市",
+                "pid": 20932
+            },
+            {
+                "id": 21157,
+                "name": "开封市",
+                "pid": 20932
+            },
+            {
+                "id": 21291,
+                "name": "洛阳市",
+                "pid": 20932
+            },
+            {
+                "id": 21501,
+                "name": "平顶山市",
+                "pid": 20932
+            },
+            {
+                "id": 21662,
+                "name": "安阳市",
+                "pid": 20932
+            },
+            {
+                "id": 21810,
+                "name": "鹤壁市",
+                "pid": 20932
+            },
+            {
+                "id": 21865,
+                "name": "新乡市",
+                "pid": 20932
+            },
+            {
+                "id": 22051,
+                "name": "焦作市",
+                "pid": 20932
+            },
+            {
+                "id": 22175,
+                "name": "濮阳市",
+                "pid": 20932
+            },
+            {
+                "id": 22273,
+                "name": "许昌市",
+                "pid": 20932
+            },
+            {
+                "id": 22386,
+                "name": "漯河市",
+                "pid": 20932
+            },
+            {
+                "id": 22451,
+                "name": "三门峡市",
+                "pid": 20932
+            },
+            {
+                "id": 22540,
+                "name": "南阳市",
+                "pid": 20932
+            },
+            {
+                "id": 22812,
+                "name": "商丘市",
+                "pid": 20932
+            },
+            {
+                "id": 23023,
+                "name": "信阳市",
+                "pid": 20932
+            },
+            {
+                "id": 23259,
+                "name": "周口市",
+                "pid": 20932
+            },
+            {
+                "id": 23480,
+                "name": "驻马店市",
+                "pid": 20932
+            },
+            {
+                "id": 23700,
+                "name": "省直辖县级行政区划",
+                "pid": 20932
+            }
+        ]
+    },
+    {
+        "id": 23718,
+        "name": "湖北省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 23719,
+                "name": "武汉市",
+                "pid": 23718
+            },
+            {
+                "id": 23926,
+                "name": "黄石市",
+                "pid": 23718
+            },
+            {
+                "id": 23990,
+                "name": "十堰市",
+                "pid": 23718
+            },
+            {
+                "id": 24128,
+                "name": "宜昌市",
+                "pid": 23718
+            },
+            {
+                "id": 24255,
+                "name": "襄阳市",
+                "pid": 23718
+            },
+            {
+                "id": 24395,
+                "name": "鄂州市",
+                "pid": 23718
+            },
+            {
+                "id": 24427,
+                "name": "荆门市",
+                "pid": 23718
+            },
+            {
+                "id": 24517,
+                "name": "孝感市",
+                "pid": 23718
+            },
+            {
+                "id": 24653,
+                "name": "荆州市",
+                "pid": 23718
+            },
+            {
+                "id": 24792,
+                "name": "黄冈市",
+                "pid": 23718
+            },
+            {
+                "id": 24969,
+                "name": "咸宁市",
+                "pid": 23718
+            },
+            {
+                "id": 25065,
+                "name": "随州市",
+                "pid": 23718
+            },
+            {
+                "id": 25120,
+                "name": "恩施土家族苗族自治州",
+                "pid": 23718
+            },
+            {
+                "id": 25223,
+                "name": "省直辖县级行政区划",
+                "pid": 23718
+            }
+        ]
+    },
+    {
+        "id": 25313,
+        "name": "湖南省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 25314,
+                "name": "长沙市",
+                "pid": 25313
+            },
+            {
+                "id": 25496,
+                "name": "株洲市",
+                "pid": 25313
+            },
+            {
+                "id": 25618,
+                "name": "湘潭市",
+                "pid": 25313
+            },
+            {
+                "id": 25697,
+                "name": "衡阳市",
+                "pid": 25313
+            },
+            {
+                "id": 25903,
+                "name": "邵阳市",
+                "pid": 25313
+            },
+            {
+                "id": 26122,
+                "name": "岳阳市",
+                "pid": 25313
+            },
+            {
+                "id": 26271,
+                "name": "常德市",
+                "pid": 25313
+            },
+            {
+                "id": 26460,
+                "name": "张家界市",
+                "pid": 25313
+            },
+            {
+                "id": 26539,
+                "name": "益阳市",
+                "pid": 25313
+            },
+            {
+                "id": 26646,
+                "name": "郴州市",
+                "pid": 25313
+            },
+            {
+                "id": 26820,
+                "name": "永州市",
+                "pid": 25313
+            },
+            {
+                "id": 27029,
+                "name": "怀化市",
+                "pid": 25313
+            },
+            {
+                "id": 27250,
+                "name": "娄底市",
+                "pid": 25313
+            },
+            {
+                "id": 27345,
+                "name": "湘西土家族苗族自治州",
+                "pid": 25313
+            }
+        ]
+    },
+    {
+        "id": 27470,
+        "name": "广东省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 27471,
+                "name": "广州市",
+                "pid": 27470
+            },
+            {
+                "id": 27661,
+                "name": "韶关市",
+                "pid": 27470
+            },
+            {
+                "id": 27784,
+                "name": "深圳市",
+                "pid": 27470
+            },
+            {
+                "id": 27873,
+                "name": "珠海市",
+                "pid": 27470
+            },
+            {
+                "id": 27908,
+                "name": "汕头市",
+                "pid": 27470
+            },
+            {
+                "id": 27983,
+                "name": "佛山市",
+                "pid": 27470
+            },
+            {
+                "id": 28030,
+                "name": "江门市",
+                "pid": 27470
+            },
+            {
+                "id": 28120,
+                "name": "湛江市",
+                "pid": 27470
+            },
+            {
+                "id": 28255,
+                "name": "茂名市",
+                "pid": 27470
+            },
+            {
+                "id": 28388,
+                "name": "肇庆市",
+                "pid": 27470
+            },
+            {
+                "id": 28503,
+                "name": "惠州市",
+                "pid": 27470
+            },
+            {
+                "id": 28593,
+                "name": "梅州市",
+                "pid": 27470
+            },
+            {
+                "id": 28719,
+                "name": "汕尾市",
+                "pid": 27470
+            },
+            {
+                "id": 28787,
+                "name": "河源市",
+                "pid": 27470
+            },
+            {
+                "id": 28895,
+                "name": "阳江市",
+                "pid": 27470
+            },
+            {
+                "id": 28963,
+                "name": "清远市",
+                "pid": 27470
+            },
+            {
+                "id": 29064,
+                "name": "东莞市",
+                "pid": 27470
+            },
+            {
+                "id": 29102,
+                "name": "中山市",
+                "pid": 27470
+            },
+            {
+                "id": 29127,
+                "name": "潮州市",
+                "pid": 27470
+            },
+            {
+                "id": 29182,
+                "name": "揭阳市",
+                "pid": 27470
+            },
+            {
+                "id": 29296,
+                "name": "云浮市",
+                "pid": 27470
+            }
+        ]
+    },
+    {
+        "id": 29373,
+        "name": "广西壮族自治区",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 29374,
+                "name": "南宁市",
+                "pid": 29373
+            },
+            {
+                "id": 29525,
+                "name": "柳州市",
+                "pid": 29373
+            },
+            {
+                "id": 29654,
+                "name": "桂林市",
+                "pid": 29373
+            },
+            {
+                "id": 29820,
+                "name": "梧州市",
+                "pid": 29373
+            },
+            {
+                "id": 29894,
+                "name": "北海市",
+                "pid": 29373
+            },
+            {
+                "id": 29929,
+                "name": "防城港市",
+                "pid": 29373
+            },
+            {
+                "id": 29965,
+                "name": "钦州市",
+                "pid": 29373
+            },
+            {
+                "id": 30041,
+                "name": "贵港市",
+                "pid": 29373
+            },
+            {
+                "id": 30121,
+                "name": "玉林市",
+                "pid": 29373
+            },
+            {
+                "id": 30239,
+                "name": "百色市",
+                "pid": 29373
+            },
+            {
+                "id": 30389,
+                "name": "贺州市",
+                "pid": 29373
+            },
+            {
+                "id": 30456,
+                "name": "河池市",
+                "pid": 29373
+            },
+            {
+                "id": 30609,
+                "name": "来宾市",
+                "pid": 29373
+            },
+            {
+                "id": 30687,
+                "name": "崇左市",
+                "pid": 29373
+            }
+        ]
+    },
+    {
+        "id": 30783,
+        "name": "海南省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 30784,
+                "name": "海口市",
+                "pid": 30783
+            },
+            {
+                "id": 30835,
+                "name": "三亚市",
+                "pid": 30783
+            },
+            {
+                "id": 30844,
+                "name": "三沙市",
+                "pid": 30783
+            },
+            {
+                "id": 30853,
+                "name": "儋州市",
+                "pid": 30783
+            },
+            {
+                "id": 30873,
+                "name": "省直辖县级行政区划",
+                "pid": 30783
+            }
+        ]
+    },
+    {
+        "id": 31059,
+        "name": "重庆",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 31060,
+                "name": "重庆市",
+                "pid": 31059
+            },
+            {
+                "id": 31737,
+                "name": "自治县",
+                "pid": 31059
+            }
+        ]
+    },
+    {
+        "id": 32131,
+        "name": "四川省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 32132,
+                "name": "成都市",
+                "pid": 32131
+            },
+            {
+                "id": 32414,
+                "name": "自贡市",
+                "pid": 32131
+            },
+            {
+                "id": 32511,
+                "name": "攀枝花市",
+                "pid": 32131
+            },
+            {
+                "id": 32566,
+                "name": "泸州市",
+                "pid": 32131
+            },
+            {
+                "id": 32700,
+                "name": "德阳市",
+                "pid": 32131
+            },
+            {
+                "id": 32791,
+                "name": "绵阳市",
+                "pid": 32131
+            },
+            {
+                "id": 32968,
+                "name": "广元市",
+                "pid": 32131
+            },
+            {
+                "id": 33122,
+                "name": "遂宁市",
+                "pid": 32131
+            },
+            {
+                "id": 33223,
+                "name": "内江市",
+                "pid": 32131
+            },
+            {
+                "id": 33312,
+                "name": "乐山市",
+                "pid": 32131
+            },
+            {
+                "id": 33456,
+                "name": "南充市",
+                "pid": 32131
+            },
+            {
+                "id": 33708,
+                "name": "眉山市",
+                "pid": 32131
+            },
+            {
+                "id": 33795,
+                "name": "宜宾市",
+                "pid": 32131
+            },
+            {
+                "id": 33942,
+                "name": "广安市",
+                "pid": 32131
+            },
+            {
+                "id": 34073,
+                "name": "达州市",
+                "pid": 32131
+            },
+            {
+                "id": 34281,
+                "name": "雅安市",
+                "pid": 32131
+            },
+            {
+                "id": 34386,
+                "name": "巴中市",
+                "pid": 32131
+            },
+            {
+                "id": 34531,
+                "name": "资阳市",
+                "pid": 32131
+            },
+            {
+                "id": 34625,
+                "name": "阿坝藏族羌族自治州",
+                "pid": 32131
+            },
+            {
+                "id": 34817,
+                "name": "甘孜藏族自治州",
+                "pid": 32131
+            },
+            {
+                "id": 35125,
+                "name": "凉山彝族自治州",
+                "pid": 32131
+            }
+        ]
+    },
+    {
+        "id": 35447,
+        "name": "贵州省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 35448,
+                "name": "贵阳市",
+                "pid": 35447
+            },
+            {
+                "id": 35605,
+                "name": "六盘水市",
+                "pid": 35447
+            },
+            {
+                "id": 35702,
+                "name": "遵义市",
+                "pid": 35447
+            },
+            {
+                "id": 35971,
+                "name": "安顺市",
+                "pid": 35447
+            },
+            {
+                "id": 36070,
+                "name": "毕节市",
+                "pid": 35447
+            },
+            {
+                "id": 36358,
+                "name": "铜仁市",
+                "pid": 35447
+            },
+            {
+                "id": 36550,
+                "name": "黔西南布依族苗族自治州",
+                "pid": 35447
+            },
+            {
+                "id": 36699,
+                "name": "黔东南苗族侗族自治州",
+                "pid": 35447
+            },
+            {
+                "id": 36933,
+                "name": "黔南布依族苗族自治州",
+                "pid": 35447
+            }
+        ]
+    },
+    {
+        "id": 37055,
+        "name": "云南省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 37056,
+                "name": "昆明市",
+                "pid": 37055
+            },
+            {
+                "id": 37221,
+                "name": "曲靖市",
+                "pid": 37055
+            },
+            {
+                "id": 37368,
+                "name": "玉溪市",
+                "pid": 37055
+            },
+            {
+                "id": 37454,
+                "name": "保山市",
+                "pid": 37055
+            },
+            {
+                "id": 37536,
+                "name": "昭通市",
+                "pid": 37055
+            },
+            {
+                "id": 37698,
+                "name": "丽江市",
+                "pid": 37055
+            },
+            {
+                "id": 37770,
+                "name": "普洱市",
+                "pid": 37055
+            },
+            {
+                "id": 37884,
+                "name": "临沧市",
+                "pid": 37055
+            },
+            {
+                "id": 37977,
+                "name": "楚雄彝族自治州",
+                "pid": 37055
+            },
+            {
+                "id": 38091,
+                "name": "红河哈尼族彝族自治州",
+                "pid": 37055
+            },
+            {
+                "id": 38244,
+                "name": "文山壮族苗族自治州",
+                "pid": 37055
+            },
+            {
+                "id": 38359,
+                "name": "西双版纳傣族自治州",
+                "pid": 37055
+            },
+            {
+                "id": 38405,
+                "name": "大理白族自治州",
+                "pid": 37055
+            },
+            {
+                "id": 38530,
+                "name": "德宏傣族景颇族自治州",
+                "pid": 37055
+            },
+            {
+                "id": 38592,
+                "name": "怒江傈僳族自治州",
+                "pid": 37055
+            },
+            {
+                "id": 38628,
+                "name": "迪庆藏族自治州",
+                "pid": 37055
+            }
+        ]
+    },
+    {
+        "id": 38662,
+        "name": "西藏自治区",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 38663,
+                "name": "拉萨市",
+                "pid": 38662
+            },
+            {
+                "id": 38745,
+                "name": "日喀则市",
+                "pid": 38662
+            },
+            {
+                "id": 38970,
+                "name": "昌都市",
+                "pid": 38662
+            },
+            {
+                "id": 39120,
+                "name": "林芝市",
+                "pid": 38662
+            },
+            {
+                "id": 39184,
+                "name": "山南市",
+                "pid": 38662
+            },
+            {
+                "id": 39282,
+                "name": "那曲市",
+                "pid": 38662
+            },
+            {
+                "id": 39408,
+                "name": "阿里地区",
+                "pid": 38662
+            }
+        ]
+    },
+    {
+        "id": 39453,
+        "name": "陕西省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 39454,
+                "name": "西安市",
+                "pid": 39453
+            },
+            {
+                "id": 39640,
+                "name": "铜川市",
+                "pid": 39453
+            },
+            {
+                "id": 39683,
+                "name": "宝鸡市",
+                "pid": 39453
+            },
+            {
+                "id": 39816,
+                "name": "咸阳市",
+                "pid": 39453
+            },
+            {
+                "id": 39973,
+                "name": "渭南市",
+                "pid": 39453
+            },
+            {
+                "id": 40121,
+                "name": "延安市",
+                "pid": 39453
+            },
+            {
+                "id": 40252,
+                "name": "汉中市",
+                "pid": 39453
+            },
+            {
+                "id": 40441,
+                "name": "榆林市",
+                "pid": 39453
+            },
+            {
+                "id": 40638,
+                "name": "安康市",
+                "pid": 39453
+            },
+            {
+                "id": 40788,
+                "name": "商洛市",
+                "pid": 39453
+            }
+        ]
+    },
+    {
+        "id": 40906,
+        "name": "甘肃省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 40907,
+                "name": "兰州市",
+                "pid": 40906
+            },
+            {
+                "id": 41033,
+                "name": "嘉峪关市",
+                "pid": 40906
+            },
+            {
+                "id": 41040,
+                "name": "金昌市",
+                "pid": 40906
+            },
+            {
+                "id": 41061,
+                "name": "白银市",
+                "pid": 40906
+            },
+            {
+                "id": 41145,
+                "name": "天水市",
+                "pid": 40906
+            },
+            {
+                "id": 41283,
+                "name": "武威市",
+                "pid": 40906
+            },
+            {
+                "id": 41396,
+                "name": "张掖市",
+                "pid": 40906
+            },
+            {
+                "id": 41481,
+                "name": "平凉市",
+                "pid": 40906
+            },
+            {
+                "id": 41602,
+                "name": "酒泉市",
+                "pid": 40906
+            },
+            {
+                "id": 41694,
+                "name": "庆阳市",
+                "pid": 40906
+            },
+            {
+                "id": 41822,
+                "name": "定西市",
+                "pid": 40906
+            },
+            {
+                "id": 41952,
+                "name": "陇南市",
+                "pid": 40906
+            },
+            {
+                "id": 42161,
+                "name": "临夏回族自治州",
+                "pid": 40906
+            },
+            {
+                "id": 42301,
+                "name": "甘南藏族自治州",
+                "pid": 40906
+            }
+        ]
+    },
+    {
+        "id": 42412,
+        "name": "青海省",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 42413,
+                "name": "西宁市",
+                "pid": 42412
+            },
+            {
+                "id": 42501,
+                "name": "海东市",
+                "pid": 42412
+            },
+            {
+                "id": 42606,
+                "name": "海北藏族自治州",
+                "pid": 42412
+            },
+            {
+                "id": 42643,
+                "name": "黄南藏族自治州",
+                "pid": 42412
+            },
+            {
+                "id": 42681,
+                "name": "海南藏族自治州",
+                "pid": 42412
+            },
+            {
+                "id": 42727,
+                "name": "果洛藏族自治州",
+                "pid": 42412
+            },
+            {
+                "id": 42778,
+                "name": "玉树藏族自治州",
+                "pid": 42412
+            },
+            {
+                "id": 42834,
+                "name": "海西蒙古族藏族自治州",
+                "pid": 42412
+            }
+        ]
+    },
+    {
+        "id": 42888,
+        "name": "宁夏回族自治区",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 42889,
+                "name": "银川市",
+                "pid": 42888
+            },
+            {
+                "id": 42958,
+                "name": "石嘴山市",
+                "pid": 42888
+            },
+            {
+                "id": 42998,
+                "name": "吴忠市",
+                "pid": 42888
+            },
+            {
+                "id": 43054,
+                "name": "固原市",
+                "pid": 42888
+            },
+            {
+                "id": 43125,
+                "name": "中卫市",
+                "pid": 42888
+            }
+        ]
+    },
+    {
+        "id": 43175,
+        "name": "新疆维吾尔自治区",
+        "pid": 0,
+        "sons": [
+            {
+                "id": 43176,
+                "name": "乌鲁木齐市",
+                "pid": 43175
+            },
+            {
+                "id": 43315,
+                "name": "克拉玛依市",
+                "pid": 43175
+            },
+            {
+                "id": 43339,
+                "name": "吐鲁番市",
+                "pid": 43175
+            },
+            {
+                "id": 43378,
+                "name": "哈密市",
+                "pid": 43175
+            },
+            {
+                "id": 43440,
+                "name": "昌吉回族自治州",
+                "pid": 43175
+            },
+            {
+                "id": 43554,
+                "name": "博尔塔拉蒙古自治州",
+                "pid": 43175
+            },
+            {
+                "id": 43593,
+                "name": "巴音郭楞蒙古自治州",
+                "pid": 43175
+            },
+            {
+                "id": 43712,
+                "name": "阿克苏地区",
+                "pid": 43175
+            },
+            {
+                "id": 43836,
+                "name": "克孜勒苏柯尔克孜自治州",
+                "pid": 43175
+            },
+            {
+                "id": 43887,
+                "name": "喀什地区",
+                "pid": 43175
+            },
+            {
+                "id": 44109,
+                "name": "和田地区",
+                "pid": 43175
+            },
+            {
+                "id": 44224,
+                "name": "伊犁哈萨克自治州",
+                "pid": 43175
+            },
+            {
+                "id": 44386,
+                "name": "塔城地区",
+                "pid": 43175
+            },
+            {
+                "id": 44514,
+                "name": "阿勒泰地区",
+                "pid": 43175
+            },
+            {
+                "id": 44592,
+                "name": "自治区直辖县级行政区划",
+                "pid": 43175
+            }
+        ]
+    }
+]

+ 0 - 0
pdd1/logs/yjj_crawl_log_20260420_143950.txt


+ 0 - 0
pdd1/logs/yjj_crawl_log_20260421_094254.txt


+ 1954 - 0
pdd1/pdd_new2.py

@@ -0,0 +1,1954 @@
+import requests
+import base64
+import uiautomator2 as u2
+import time
+import sys
+import subprocess
+import re
+import random
+import json
+from aip import AipOcr
+import numpy as np
+import cv2
+import os
+from pdd_config import Config
+import logging
+from logger import setup_logger
+import pymysql
+from 拼多多盒数处理脚本.main import extract_box_number
+import datetime
+import threading
+setup_logger("pdd_spider")  # 初始化日志
+
+# 功能:这个模块负责从数据库拉取拼多多待执行任务,把任务分发到空闲设备,
+# 然后在单设备线程中驱动 App 完成搜索、采集、校验、去重和落库。
+# 边界:这里主要做调度和采集流程编排,不负责数据库表结构定义,也不负责
+# OCR 服务、滑块识别服务或盒数提取逻辑本身的实现,它们都通过外部依赖完成。
+
+# 功能:创建新的 MySQL 连接,供调度查询和采集落库逻辑复用。
+# 返回:返回可直接用于 cursor/commit/rollback 的连接对象。
+# 副作用/失败:连接关闭由调用方负责;如果连接失败会由上层调用点捕获异常。
+def get_mysql():
+    return pymysql.connect(
+        host='120.24.49.2',  # 修改后的主机
+        port=3306,  # 添加端口号
+        user='drug_retrieve',  # 修改后的用户名
+        password='ksCt3xm6chzdkafj',  # 修改后的密码
+        db='drug_retrieve',  # 修改后的数据库名
+        charset='utf8mb4'
+    )
+
+def get_shop_mysql():
+    return pymysql.connect(
+        host='120.24.49.2',  # 修改后的主机
+        port=3306,  # 添加端口号
+        user='drug_retrieve',  # 修改后的用户名
+        password='ksCt3xm6chzdkafj',  # 修改后的密码
+        db='drug_retrieve',  # 修改后的数据库名
+        charset='utf8mb4'
+    )
+
+
+SCHEDULER_INTERVAL_SECONDS = 600
+PLATFORM_PDD = 3
+TASK_STATUS_PENDING = 1
+DEVICE_STATUS_IDLE = 0
+DEFAULT_MAX_COUNTS_LIMIT = 300
+
+# 这些集合只表示“当前进程里的占用状态”。
+# 数据库里设备仍可能显示空闲,因此调度前后都要结合这几份内存状态做去重。
+dispatch_lock = threading.Lock()
+running_task_ids = set()
+running_device_ids = set()
+worker_threads = {}
+scheduler_stop_event = threading.Event()
+scheduler_timer = None
+
+# 功能:把外部输入安全转成整数;当值为空或格式不合法时回退到默认值。
+# 输入约束:允许传入 None、空字符串、数字字符串或整数。
+# 返回:成功时返回 int,失败时返回 default。
+def parse_optional_int(value, default=None):
+    if value in (None, ""):
+        return default
+    try:
+        return int(value)
+    except (TypeError, ValueError):
+        return default
+
+def fetch_pending_tasks():
+    # 功能:查询数据库里当前仍处于待执行状态的拼多多任务。
+    # 返回:按任务 id 升序排列的任务列表,便于旧任务优先执行。
+    # 调用 get_mysql() 的目的是先建立查询连接,后续 cursor.execute() 依赖这个连接对象可用。
+    conn = None
+    try:
+        conn = get_mysql()
+        with conn.cursor() as cursor:
+            sql = """
+                SELECT *
+                FROM retrieve_collect_task_allocate
+                WHERE platform = %s AND status = %s
+                ORDER BY id ASC
+            """
+            cursor.execute(sql, (PLATFORM_PDD, TASK_STATUS_PENDING))
+            return cursor.fetchall()
+    except Exception as e:
+        logging.exception(f"读取待执行任务失败: {e}")
+        return []
+    finally:
+        if conn:
+            conn.close()
+
+def fetch_idle_device_by_equipment_id(equipment_id):
+    # 功能:按设备 id 查询指定终端是否空闲,避免任务和设备错配。
+    # 调用 get_mysql() 的目的是查询指定设备是否空闲,避免把任务错误派发到其他终端。
+    conn = None
+    try:
+        conn = get_mysql()
+        with conn.cursor() as cursor:
+            sql = """
+                SELECT *
+                FROM retrieve_collect_equipment
+                WHERE name LIKE %s AND id = %s AND status = %s
+                LIMIT 1
+            """
+            cursor.execute(sql, ('%pdd%', equipment_id, DEVICE_STATUS_IDLE))
+            return cursor.fetchone()
+    except Exception as e:
+        logging.exception(f"读取空闲设备失败 equipment_id={equipment_id}: {e}")
+        return None
+    finally:
+        if conn:
+            conn.close()
+
+def build_task_payload(task_row, device_row):
+    # 功能:把数据库原始任务行和设备行整理成线程入口可直接消费的任务上下文。
+    # 返回:统一字段名的字典,避免后续线程逻辑继续依赖固定列下标。
+    start_page = parse_optional_int(task_row[9] if len(task_row) > 9 else None, 0)
+    end_page = parse_optional_int(task_row[10] if len(task_row) > 10 else None, None)
+    max_counts_limit = parse_optional_int(
+        task_row[11] if len(task_row) > 11 else None,
+        DEFAULT_MAX_COUNTS_LIMIT
+    )
+
+    return {
+        "task_id": task_row[0],
+        "equipment_id": task_row[2],
+        "enterprise_id": task_row[3],
+        "platform": task_row[4],
+        "title_key": task_row[5],
+        "spec_list": task_row[6],
+        "brand": task_row[7],
+        "search_key": f"{task_row[7]}{task_row[5]}",
+        "save_search_key": f"{task_row[7]}{task_row[5]}",
+        "start_page": start_page,
+        "end_page": end_page,
+        "max_counts_limit": max_counts_limit,
+        "sort": "升序",
+        "device_id": device_row[2],
+        "task_row": task_row,
+    }
+
+
+def fetch_runnable_task_payloads():
+    # 功能:从待执行任务中筛出“数据库层面空闲 + 当前进程未占用”的任务集合。
+    # 返回:可以直接交给 worker 线程执行的 payload 列表。
+    tasks = fetch_pending_tasks()
+    if not tasks:
+        logging.info("当前没有待执行任务")
+        return []
+
+    payloads = []
+    # 单次轮询内也要避免一个设备被多个任务重复命中。
+    reserved_equipment_ids = set()
+
+    for task_row in tasks:
+        task_id = task_row[0]
+        equipment_id = task_row[2]
+
+        with dispatch_lock:
+            if task_id in running_task_ids:
+                continue
+            if equipment_id in reserved_equipment_ids:
+                continue
+
+        device_row = fetch_idle_device_by_equipment_id(equipment_id)
+        if not device_row:
+            logging.info(f"任务 {task_id} 对应设备 {equipment_id} 当前不空闲,跳过本轮")
+            continue
+
+        device_id = device_row[2]
+        with dispatch_lock:
+            if device_id in running_device_ids:
+                logging.info(f"设备 {device_id} 已在本进程执行任务,跳过任务 {task_id}")
+                continue
+            # 线程真正启动前先登记占用,缩小轮询竞争窗口。
+            running_task_ids.add(task_id)
+            running_device_ids.add(device_id)
+            reserved_equipment_ids.add(equipment_id)
+
+        payloads.append(build_task_payload(task_row, device_row))
+
+    return payloads
+
+def cleanup_finished_workers():
+    # 功能:同步 worker_threads 字典,只保留仍然存活的线程引用,避免失效线程长期占位。
+    dead_threads = []
+    with dispatch_lock:
+        for device_id, thread in worker_threads.items():
+            if not thread.is_alive():
+                dead_threads.append(device_id)
+        for device_id in dead_threads:
+            worker_threads.pop(device_id, None)
+
+
+
+def run_task_worker(task_payload):
+    # 功能:单个任务线程的主入口,负责构建 PDD 实例、执行采集并在异常时兜底收尾。
+    task_id = task_payload["task_id"]
+    device_id = task_payload["device_id"]
+    pdd = None
+    try:
+        logging.info(f"[任务 {task_id}] 开始执行,设备: {device_id}")
+        print(task_payload)
+        # 调用 PDD(...) 的目的是把本次任务涉及的筛选条件、页码边界和设备信息固化到实例状态里,
+        # 后续 main()、详情页解析和落库逻辑都会依赖这些实例属性。
+        pdd = PDD(
+            task_payload["search_key"],
+            device_id,
+            title_key=task_payload.get("title_key"),
+            spec_list=task_payload.get("spec_list"),
+            brand=task_payload.get("brand", ""),
+            save_search_key=task_payload.get("save_search_key"),
+            start_page=task_payload.get("start_page"),
+            end_page=task_payload.get("end_page"),
+            max_counts_limit=task_payload.get("max_counts_limit"),
+            direct_shop_lookup=task_payload.get("direct_shop_lookup", False),
+            sort=task_payload.get("sort"),
+            platform=task_payload.get("platform"),
+            task_id=task_payload.get("task_id"),
+            enterprise_id=task_payload.get("enterprise_id"),
+        )
+        # 这里调用 pdd.main(),是为了把调度层切换到具体采集流程;
+        # 调用结果会决定当前任务是按“正常完成”还是“已结束/异常”记录日志。
+        completed_normally = pdd.main(device_id, 1, 0)
+        if completed_normally:
+            logging.info(f"[任务 {task_id}] 执行完成,设备: {device_id}")
+        else:
+            logging.info(f"[任务 {task_id}] 已结束,设备: {device_id}")
+    except Exception as e:
+        end_page = task_payload.get("start_page")
+        if pdd is not None:
+            end_page = getattr(pdd, "page", end_page)
+            pdd.finish_task_abnormally(end_page, f"任务执行异常: {e}")
+        else:
+            report_api(task_id, end_page=end_page, start=4, end_time=int(time.time()),finish_status=0)
+        logging.exception(f"[任务 {task_id}] 执行异常,设备: {device_id},错误: {e}")
+    finally:
+        # 无论任务正常还是异常结束,都必须释放进程内占用标记。
+        with dispatch_lock:
+            running_task_ids.discard(task_id)
+            running_device_ids.discard(device_id)
+            worker_threads.pop(device_id, None)
+
+
+def dispatch_pending_tasks():
+    # 功能:执行一轮派单,把每个可运行任务绑定到对应设备线程。
+    # 阶段 1:先清理已经结束的线程引用,避免占用状态过期。
+    cleanup_finished_workers()
+    # 阶段 2:重新计算本轮真正可运行的任务集合。
+    task_payloads = fetch_runnable_task_payloads()
+    if not task_payloads:
+        return
+
+    for task_payload in task_payloads:
+        device_id = task_payload["device_id"]
+        try:
+            # 阶段 3:为每个任务创建独立线程,让不同设备可以并发执行采集。
+            thread = threading.Thread(
+                target=run_task_worker,
+                args=(task_payload,),
+                daemon=True,
+                name=f"pdd-{device_id}",
+            )
+            with dispatch_lock:
+                worker_threads[device_id] = thread
+            thread.start()
+            logging.info(f"[任务 {task_payload['task_id']}] 已分发到设备 {device_id}")
+        except Exception:
+            with dispatch_lock:
+                # 线程创建失败时同步回滚占用状态,避免任务被“卡住”。
+                running_task_ids.discard(task_payload["task_id"])
+                running_device_ids.discard(device_id)
+                worker_threads.pop(device_id, None)
+            raise
+
+def schedule_dispatch(delay_seconds=SCHEDULER_INTERVAL_SECONDS):
+    # 功能:安排下一次轮询触发时间。
+    # 功能:注册下一轮调度定时器,让调度器按固定间隔持续轮询。
+    global scheduler_timer
+    if scheduler_stop_event.is_set():
+        return
+    # 采用递归定时器而不是死循环,便于后续统一停机。
+    scheduler_timer = threading.Timer(delay_seconds, scheduled_dispatch_job)
+    scheduler_timer.daemon = False
+    scheduler_timer.name = "pdd-scheduler"
+    scheduler_timer.start()
+
+def scheduled_dispatch_job():
+    # 功能:执行一次定时派单,并保证下一轮轮询仍会继续注册。
+    # 功能:包装一次定时调度执行,确保本轮出错也不会中断后续轮询。
+    try:
+        dispatch_pending_tasks()
+    except Exception as e:
+        logging.exception(f"PDD 定时调度异常: {e}")
+    finally:
+        schedule_dispatch(SCHEDULER_INTERVAL_SECONDS)
+
+def report_api(task_id,page=None,start=None,end_page=None,end_time=None,finish_status=None):
+    # 功能:向外部调度中心上报任务页码、状态和结束信息。
+    # 外部接口只负责收状态;具体状态值由调用方按当前阶段传入。
+    # 调用这个接口的目的是把当前任务状态同步给外部调度系统,
+    # 这样任务中心才能感知“开始执行 / 正常结束 / 异常结束”等阶段变化。
+    params = {
+        "collect_task_allocate_id": task_id,
+        "statr_page":page if page is not None else '',
+        "end_page": end_page if end_page is not None else '',
+        "status": start,
+        "finish_status": finish_status if finish_status is not None else 0,
+        "start_time": int(time.time()),
+        "end_time": end_time if end_time is not None else '',
+    }
+    print(params)
+    url = "http://schedule.dfwy.tech/api/collect_equipment_execute/result_report"
+    res = requests.get(url, params=params, timeout=20)
+    print(res.text)
+
+# 获取滑块验证中滑块需要移动的距离
+def slide_verify(img_path):
+    # 功能:把本地截图交给第三方打码服务,换回滑块缺口位移。
+    # 返回:识别成功时返回滑块位移结果,失败时返回服务端给出的空结果。
+    with open(img_path, 'rb') as f:
+        b = base64.b64encode(f.read()).decode()  ## 图片二进制流base64字符串
+    url = "http://api.jfbym.com/api/YmServer/customApi"
+    data = {
+        ## 关于参数,一般来说有3个;不同类型id可能有不同的参数个数和参数名,找客服获取
+        "token": "1nDVocTE2mJ0yLEYb2sZJ5uUY2VIEoGTkIpW44X7Kgk",
+        "type": "22222",
+        "image": b,
+    }
+    _headers = {
+        "Content-Type": "application/json"
+    }
+    # 识别结果来自第三方服务,当前逻辑只读取它约定的 msg/data 字段。
+    response = requests.request("POST", url, headers=_headers, json=data).json()
+    print(response)
+
+    if response.get("msg") == "识别成功":
+        # 获取 data 中的 data 字段
+        result = response.get("data", {}).get("data")
+        if result:
+            print(result)  # 输出结果
+        else:
+            print("无法获取数据")
+    else:
+        print("识别未成功")
+
+    return result
+
+class PDD:
+    # 功能:这个类负责维护单次拼多多采集任务的运行状态,并封装设备连接、
+    # 页面导航、数据提取、说明书解析、去重校验和写库等操作。
+    # 边界:这个类负责“如何跑完整个采集流程”,但不负责外部调度器的轮询策略,
+    # 也不负责 OCR/盒数提取等外部依赖的底层实现。
+    def __init__(
+            self,
+            search_key,
+            device_id,
+            title_key=None,
+            spec_list=None,
+            brand="",
+            save_search_key=None,
+            start_page=0,
+            end_page=None,
+            max_counts_limit=None,
+            direct_shop_lookup=False,
+            sort=None,
+            platform = None,
+            task_id = None,
+            enterprise_id=None,
+    ):
+        # 阶段 1:初始化与 App、OCR、日志、OSS 相关的基础依赖。
+        self.package_name = 'com.xunmeng.pinduoduo'
+        self.APP_ID = '116857964'
+        self.API_KEY = '1gAzACJOAr7BeILKqkqPOETh'
+        self.SECRET_KEY = 'ZNArANb9GwJYgLKg4EfYhukKBfPdl1n3'
+        self.client = AipOcr(self.APP_ID, self.API_KEY, self.SECRET_KEY)
+
+        self.table_name = "retrieve_scrape_data" # "pdd_drug"
+        self.shop_table_name = "pdd_shop_info_middle"  # "pdd_shop_info"
+
+        self.loggerPdd = logging.getLogger()
+
+        self.clipboard = ""  # 初始化剪切板的内容为空
+
+        # 阶段 2:固化本次任务的筛选条件、页码边界和搜索参数。
+        self.enterprise_id = enterprise_id
+        self.task_id = task_id
+        self.platform = platform
+        self.sort = sort
+        self.sort_key = 0
+        self.search_key = search_key  # 参苓健脾胃颗粒 香砂平胃颗粒 舒肝颗粒 清肺化痰丸
+        # title_key 支持把“搜索词”和“标题过滤词”拆开;未单独传入时回退到搜索词。
+        self.title_key = title_key if title_key is not None else search_key
+        # 规格统一整理成列表,后续匹配逻辑就不用再分辨单值、列表和空值三种输入。
+        self.spec_list = self._normalize_rule_list(spec_list)
+        self.brand = brand
+        self.save_search_key = save_search_key or search_key
+        # 起止页在入口阶段先做边界修正,主循环只消费规范化后的值。
+        self.start_page = max(parse_optional_int(start_page, 0), 0)
+        self.end_page = parse_optional_int(end_page, None)
+        self.max_counts_limit = max_counts_limit
+        self.direct_shop_lookup = direct_shop_lookup
+        self.unrelated_data = 0  # 无关数据数量
+        self.device_id = device_id
+        self.page = self.start_page
+        if self.end_page is not None and self.end_page < self.start_page:
+            self.end_page = self.start_page
+
+        # 阶段 3:初始化运行时统计状态,这些状态会在主循环中持续更新。
+        # 统计售罄数量
+        self.sold_out_counts = 0
+
+        # 程序启动时间
+        self.program_start_time = self.app_start_time()
+
+        # 统计商品数量
+        # 最大量数据阈值
+        self.max_counts = 0
+
+        # 统计点击商品的次数
+        self.click_counts = 0
+
+        # 商品在列表的位置
+        self.search_key_loc = 0
+        # finish_reported 用来保证无论走哪个退出分支,只向调度系统上报一次结束状态。
+        self.finish_reported = False
+
+        # oss配置
+        self.oss_config = {
+            "access_key_id": Config.access_key_id,
+            "access_key_secret": Config.access_key_secret,
+            "endpoint": Config.endpoint,  # 例: oss-cn-beijing.aliyuncs.com
+            "bucket_name": Config.bucket_name,
+            "oss_prefix": Config.oss_prefix  # OSS中存放截图的前缀(虚拟文件夹)
+        }
+
+    # 异常处理
+    def wr_re(self, mod, device_id, sort=None, page=None):
+        # 功能:读写或删除本地进度文件,给断点续跑保留入口。
+        file_path = f'./ycwj/{device_id}_{self.title_key}.txt'
+        if mod == "写":
+            try:
+                data = {
+                    "page": page if page else "",
+                    "sort": sort if sort else "",
+                }
+                os.makedirs(os.path.dirname(file_path), exist_ok=True)
+                with open(file_path, 'w', encoding='utf-8') as f:
+                    json.dump(data, f, ensure_ascii=False, indent=2)
+                print(f"进度保存成功:{sort},{page}页")
+            except Exception as e:
+                print("保存进度失败")
+        elif mod == "读":
+            try:
+                if not os.path.exists(file_path):
+                    return None
+                with open(file_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    print(self.sort)
+                    if self.sort and self.sort_key == 0:
+                        self.li_or_lo(self.sort)
+                    if data['page'] != '':
+                        progress_page = int(data['page'])
+                        self.page = max(progress_page, self.start_page)
+                        self.scroll_to_target_page(self.page)
+                    else:
+                        return None
+                return data
+            except Exception as e:
+                print(f"读取进度失败", e)
+                return None
+        elif mod == "删":
+            try:
+                if os.path.exists(file_path):
+                    os.remove(file_path)
+                    print(f"进度文件已删除:{file_path}")
+            except Exception as e:
+                print(f"删除进度文件失败:{e}")
+        return None
+
+    def clear_progress_file(self):
+        # 功能:统一封装“清理断点进度文件”动作;当前保留空实现以兼容旧调用点。
+        # self.wr_re("删", self.device_id, self.sort)
+        pass
+
+    def is_max_count_reached(self):
+        # 功能:判断当前采集数量是否达到任务设定上限。
+        return bool(self.max_counts_limit and self.max_counts >= self.max_counts_limit)
+
+    def scroll_to_target_page(self, target_page):
+        # 功能:按目标页数执行固定次数滑动,用于恢复上次采集的大致位置。
+        target_page = int(target_page or 0)
+        if target_page <= 0:
+            return
+        # 这里按“页数约等于滑动次数”的经验规则恢复列表位置,宁可保守,不做复杂校准。
+        for _ in range(target_page):
+            end_y = 300
+            self.d.swipe(200, 1400, 200, end_y, 0.4)
+            time.sleep(self.get_sleep_time())
+
+    def finish_task_normally(self, end_page, reason):
+        # 功能:以“正常完成”状态结束任务并保证只上报一次。
+        # 多个退出分支都会走到这里,因此先判断是否已经上报过结束状态。
+        if not self.finish_reported:
+            report_api(self.task_id, end_page=end_page, start=3, end_time=int(time.time()),finish_status=1)
+            self.finish_reported = True
+        print(reason)
+        return True
+
+    def finish_task_abnormally(self, end_page, reason, finish_status=0):
+        # 功能:以“异常结束”状态结束任务并保证只上报一次。
+        # 异常结束与正常结束共享同一幂等保护,避免重复通知外部调度系统。
+        if not self.finish_reported:
+            report_api(
+                self.task_id,
+                end_page=end_page,
+                start=4,
+                end_time=int(time.time()),
+                finish_status=finish_status
+            )
+            self.finish_reported = True
+        print(reason)
+        return False
+
+    def finish_task_with_max_count(self, end_page):
+        # 功能:达到采集上限时复用正常结束逻辑,只替换结束原因。
+        return self.finish_task_normally(
+            end_page,
+            f"达到最大采集数量 {self.max_counts_limit},当前已采集 {self.max_counts} 条,停止任务"
+        )
+
+    # 排序
+    def li_or_lo(self, key):
+        # 功能:进入搜索结果页后切换价格排序方式。
+
+        if key == "升序":
+            self.sort_key += 1
+            self.d.xpath('//*[@text="价格"]').click()
+            n = self.d.xpath('//*[@text="总价低到高"]')
+            if n.exists:
+                n.click()
+            time.sleep(self.get_sleep_time())
+
+        if key == "降序":
+            self.sort_key += 1
+            self.d.xpath('//*[@text="价格"]').click()
+            n = self.d.xpath('//*[@text="单粒价格低到高"]')
+            if n:
+                n.click()
+            else:
+                self.d.xpath('//*[@text="价格"]').click()
+
+    # 返回列表页
+    def back_to_list_page(self):
+        # 功能:通过多次尝试返回键,尽量把页面恢复到商品列表页。
+        for i in range(10):
+            if self.distinct_target():
+                return True
+            print(f'第{i}次尝试退回到列表页')
+            self.swipe_back(1)
+            time.sleep(1)
+        print('页面出错,没有退回到列表页')
+        return False
+
+    def get_drug_lis(self, idx):
+        # 功能:根据当前页布局拿到可点击的商品卡片列表。
+        # 这里区分 idx==0 和后续页面,是因为首屏与翻页后的 RecyclerView 层级不完全一致。
+        if idx == 0:
+            drug_lis = self.d.xpath(
+                '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[2]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout').all()
+        else:
+            for i in range(1, 6):
+                drug_lis = self.d.xpath(
+                    f'/hierarchy/android.widget.FrameLayout[{i}]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.support.v7.widget.RecyclerView[1]/android.widget.FrameLayout').all()
+                if drug_lis:
+                    break
+        return drug_lis
+
+    # 代码运行那时候的时间
+    def app_current_time(self):
+        # 功能:返回当前时刻的格式化时间字符串,主要用于日志打印。
+        return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+
+    def slide_link(self):
+        # 功能:在分享面板中横向滑动,把“复制链接”附近的目标入口滑到可见区域。
+        value_tag = None
+        if self.d.xpath('//*[@text="微信"]').exists:
+            value_tag = self.d.xpath('//*[@text="微信"]').info['bounds']
+            self.d.swipe(400, value_tag['top'], 100, value_tag['top'], 0.3)
+            return
+        if self.d.xpath('//*[@text="朋友圈"]').exists:
+            value_tag = self.d.xpath('//*[@text="朋友圈"]').info['bounds']
+            self.d.swipe(400, value_tag['top'], 100, value_tag['top'], 0.3)
+            return
+        if self.d.xpath('//*[@text="QQ好友"]').exists:
+            value_tag = self.d.xpath('//*[@text="QQ好友"]').info['bounds']
+            self.d.swipe(400, value_tag['top'], 100, value_tag['top'], 0.3)
+            return
+
+    def app_start_time(self):
+        """
+        获取app启动时间
+        :return:
+        """
+        return datetime.datetime.now().strftime('%Y-%m-%d %H:%M:%S')
+
+    def stop_app(self):
+        # 功能:停止拼多多 App,并等待设备状态稳定下来。
+        self.d.app_stop(self.package_name)
+        time.sleep(5)
+
+    def start_app(self):
+        # 功能:启动拼多多 App,并预留加载时间。
+        self.d.app_start(self.package_name)
+        time.sleep(5)
+
+    def restart_app(self):
+        """
+        重启app
+        :return:
+        """
+        self.stop_app()
+        # 这里先调用 stop_app(),是为了清掉上一次运行残留的页面状态;
+        # 后续 start_app() 依赖应用已经被完全拉起,搜索流程才能从稳定起点开始。
+        self.start_app()
+
+    @staticmethod
+    def get_sleep_time():
+        # 功能:生成短随机等待时间,减少固定节奏操作带来的页面未加载或风控风险。
+        return random.randint(1, 2)
+        # return random.randint(5, 8)
+
+    @staticmethod
+    def get_current_date():
+        # 功能:返回当前采集日期,作为去重和落库字段使用。
+        return datetime.datetime.now().strftime('%Y/%m/%d')
+
+    @staticmethod
+    def _normalize_rule_list(value):
+        # 功能:把单值或集合统一归一成非空字符串列表,供过滤逻辑直接复用。
+        if value is None:
+            return []
+        if isinstance(value, (list, tuple, set)):
+            raw_values = value
+        else:
+            raw_values = [value]
+        result = []
+        for item in raw_values:
+            item_str = str(item).strip()
+            if item_str:
+                result.append(item_str)
+        return result
+
+    @staticmethod
+    def _normalize_match_text(value):
+        # 功能:把待匹配文本做去空白和小写归一,减少页面文案格式差异带来的误判。
+        return re.sub(r'\s+', '', str(value or '')).lower()
+
+    def _match_any_keyword(self, text, keywords):
+        # 功能:判断目标文本是否命中任一过滤词;过滤词为空时直接放行。
+        keyword_list = self._normalize_rule_list(keywords)
+        if not keyword_list:
+            # 没有配置过滤词时默认放行,让调用方只在需要时开启细筛。
+            return True
+        normalized_text = self._normalize_match_text(text)
+        return any(self._normalize_match_text(keyword) in normalized_text for keyword in keyword_list)
+
+    def is_link_spec_useful(self, product_title, specifications=''):
+        # 功能:判断标题或说明书规格里是否包含目标品规。
+        if not self.spec_list:
+            return True
+
+        title_text = self._normalize_match_text(product_title)
+        spec_text = self._normalize_match_text(specifications)
+
+        for spec in self.spec_list:
+            normalized_spec = self._normalize_match_text(spec)
+            if normalized_spec in title_text or normalized_spec in spec_text:
+                return True
+        return False
+
+    def is_link_useful(self, product_title, specifications=''):
+        # 功能:统一做标题、品牌、规格三层过滤,尽量在早期就排除无关商品。
+        if not self._match_any_keyword(product_title, self.title_key):
+            print(f"当前商品名称:{product_title} 不包含{self.title_key}关键字")
+            return False
+        if not self._match_any_keyword(product_title, self.brand):
+            print(f"当前商品名称:{product_title} 不包含{self.brand}品牌")
+            return False
+        if not self.is_link_spec_useful(product_title, specifications):
+            print(f"当前商品名称:{product_title} 不包含{self.spec_list}品规")
+            return False
+        return True
+
+    def remove_watermark(self, img_path):
+        # 功能:弱化截图中的水印或遮罩,提升后续 OCR 识别成功率。
+        """
+        图片去水印(将水印部分变成白色背景)并将数据转化为二进制数据
+        :param img_path: 图片路径
+        :return: 二进制图片数据
+        """
+        img = cv2.imdecode(np.fromfile(img_path, dtype=np.uint8), -1)
+        endswith = os.path.splitext(img_path)[1]
+        new = np.clip(1.4057577998008846 * img - 38.33089999653017, 0, 255).astype(np.uint8)
+        _, img_binary = cv2.imencode(endswith, new)
+        return img_binary
+
+    def get_shop_name(self):
+        # 功能:优先从当前详情页直接提取店铺名,失败时再进入店铺页兜底提取。
+        """
+        获取店铺名
+        :return:
+        """
+        try:
+            xpath = '//*[@text="进店"]/preceding-sibling::android.view.ViewGroup/android.widget.LinearLayout/android.widget.TextView'
+
+            # 优先从当前详情页直接读取店铺名,成本最低。
+            if self.d.xpath(xpath).exists:
+                shop_name = self.d.xpath(xpath).text
+                self.loggerPdd.info(f'1-获取到店铺名:{shop_name}')
+            else:
+                # 进入店铺新页面
+                # 当前页取不到时,再进入店铺页做兜底提取。
+                shop_btn_xpath = '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]'
+                if self.d.xpath(shop_btn_xpath).exists:
+                    self.d.xpath(shop_btn_xpath).click()
+
+                    time.sleep(1)
+                    # self.d.xpath('//*[@text="店铺"]').click()
+
+                    xpath_shop_name = '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.view.ViewGroup[1]/android.widget.LinearLayout[1]/android.widget.RelativeLayout[1]/android.widget.LinearLayout[1]/android.support.v7.widget.RecyclerView[1]/android.widget.RelativeLayout[1]/android.view.ViewGroup[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.widget.TextView[1]'
+                    if self.d.xpath(xpath_shop_name).exists:
+                        shop_name = self.d.xpath(xpath_shop_name).text
+                        self.loggerPdd.info(f'2-获取到店铺名:{shop_name}')
+                    else:
+                        shop_name = ''
+                        self.loggerPdd.info(f'3-获取到店铺名:{shop_name}')
+
+                    self.swipe_back(1)  #
+                else:
+                    shop_name = ''
+                    self.loggerPdd.info('4-因为shop_btn_xpath不存在,获取到店铺名为空')
+                    # time.sleep(10000)
+            return shop_name
+        except Exception as e:
+            print(f'获取店铺名出错:{e}')
+            self.loggerPdd.error(f'获取店铺名出错:{e}')
+            return None
+
+    def save_to_shop_database(self, data):
+        # 功能:把当前商品采集结果落库;只有 commit 成功后才计入采集数量。
+        print(f'保存店铺数据到店铺数据库:{data}')
+        max_retries = 5
+        # 数据库偶发抖动时允许短重试,但只有 commit 成功后才算真正采集到一条数据。
+        for attempt in range(max_retries):
+            conn = None
+            try:
+                conn = get_mysql()
+                with conn.cursor() as cur:
+                    add_sql = """
+                                INSERT INTO pdd_shop_info_middle (
+                                    shop, contact_address, qualification_number, business_license_company,
+                                    business_license_address, store_url, scrape_date, platform,
+                                    province,city, create_time, update_time  
+                                ) VALUES (
+                                    %s, %s, %s, %s, %s,
+                                    %s, %s, %s, %s, %s,
+                                    %s, %s
+                                )
+                            """
+                    cur.execute(add_sql, (
+                        data['shop'],
+                        None,
+                        None,
+                        None,
+                        None,
+                        data['store_url'],
+                        data['scrape_date'],
+                        data['platform'],
+                        None,
+                        None,
+                        data['create_time'],
+                        data['update_time'],
+                    ))
+                conn.commit()
+                self.max_counts += 1
+                print(f"存入数据库成功")
+                return True
+            except Exception as e:
+                print(f'保存数据库异常 (尝试 {attempt + 1}/{max_retries}): {e}')
+                if conn:
+                    conn.rollback()
+                    conn.close()
+                if attempt == max_retries - 1:
+                    print("达到最大重试次数,保存失败")
+                    return False
+                time.sleep(2)
+
+
+    def save_to_database(self, data):
+        # 功能:把当前商品采集结果落库;只有 commit 成功后才计入采集数量。
+        print(f'保存数据到数据库:{data}')
+        max_retries = 5
+        # 数据库偶发抖动时允许短重试,但只有 commit 成功后才算真正采集到一条数据。
+        for attempt in range(max_retries):
+            conn = None
+            try:
+                conn = get_mysql()
+                with conn.cursor() as cur:
+                    add_sql = """
+                        INSERT INTO retrieve_scrape_data (
+                            enterprise_id, platform_id, platform_item_id, province_id, city_id,
+                            province_name, city_name, area_info, product_name, product_specs,
+                            one_box_price, manufacture_date, expiry_date, manufacturer, approval_number,
+                            is_sold_out, online_posting_count, continuous_listing_count, link_url,
+                            store_name, store_url, shipment_province_id, shipment_province_name,
+                            shipment_city_id, shipment_city_name, company_name, qualification_number,
+                            scrape_date, min_price, number, sales, inventory, snapshot_url
+                        ) VALUES (
+                            %s, %s, %s, %s, %s,
+                            %s, %s, %s, %s, %s,
+                            %s, %s, %s, %s, %s,
+                            %s, %s, %s, %s,
+                            %s, %s, %s, %s,
+                            %s, %s, %s, %s,
+                            %s, %s, %s, %s, %s, %s
+                        )
+                    """
+                    cur.execute(add_sql, (
+                        data['enterprise_id'],
+                        data['platform_id'],
+                        data['platform_item_id'],
+                        data['province_id'],
+                        data['city_id'],
+                        data['province_name'],
+                        data['city_name'],
+                        data['area_info'],
+                        data['product_name'],
+                        data['product_specs'],
+                        data['one_box_price'],
+                        data['manufacture_date'],
+                        data['expiry_date'],
+                        data['manufacturer'],
+                        data['approval_number'],
+                        data['is_sold_out'],
+                        data['online_posting_count'],
+                        data['continuous_listing_count'],
+                        data['link_url'],
+                        data['store_name'],
+                        data['store_url'],
+                        data['shipment_province_id'],
+                        data['shipment_province_name'],
+                        data['shipment_city_id'],
+                        data['shipment_city_name'],
+                        data['company_name'],
+                        data['qualification_number'],
+                        data['scrape_date'],
+                        data['min_price'],
+                        data['number'],
+                        data['sales'],
+                        data['inventory'],
+                        data['snapshot_url'],
+                    ))
+                conn.commit()
+                self.max_counts += 1
+                print(f"存入数据库成功,当前已采集 {self.max_counts} 条")
+                return True
+            except Exception as e:
+                print(f'保存数据库异常 (尝试 {attempt + 1}/{max_retries}): {e}')
+                if conn:
+                    conn.rollback()
+                    conn.close()
+                if attempt == max_retries - 1:
+                    print("达到最大重试次数,保存失败")
+                    return False
+                time.sleep(2)
+
+    def click_target_product_by_search_key(self, fuzzy_match=False, timeout=10):
+        # 功能:在列表页重新定位当前搜索词对应的商品,常用于异常恢复后的重新对焦。
+        """
+        动态匹配self.search_key对应的商品并点击
+        :param fuzzy_match: 是否模糊匹配(应对商品名带额外后缀/前缀的情况) 不模糊匹配
+        :param timeout: 等待元素出现的超时时间(秒)
+        :return: 点击是否成功(bool)
+        """
+        try:
+            # 1. 定义定位条件(动态使用self.search_key)
+            # 异常恢复后需要重新找到“当前任务真正想点的那一个商品”,
+            # 这里支持精确和模糊两种定位策略。
+            if fuzzy_match:
+                # 模糊匹配:包含search_key即可(推荐,适配搜索结果商品名略有差异)
+                locator = self.d(textContains=self.search_key)
+                print(f"🔍 模糊匹配商品:包含「{self.search_key}」的元素")
+            else:
+                # 精确匹配:商品名与search_key完全一致
+                locator = self.d(text=self.search_key)
+                print(f"🔍 精确匹配商品:「{self.search_key}」")
+
+            # 2. 等待元素出现(核心:避免元素未加载就点击)
+            if locator.wait(timeout=timeout):
+                print(f"✅ 找到匹配的商品,准备点击")
+                # 执行点击(优先点击可点击的元素)
+                locator.click()
+                print(f"✅ 成功点击「{self.search_key}」对应的商品")
+                # 点击后等待页面加载
+                time.sleep(self.get_sleep_time())
+                return True
+            else:
+
+                print(f"❌ 滑动后仍未找到「{self.search_key}」对应的商品")
+                return False
+        except Exception as e:
+            print(f"❌ 点击「{self.search_key}」对应商品时异常:{e}")
+            return False
+
+    def swipe_down(self):
+        # 功能:执行带随机性的向下滑动,兼顾页面恢复、回找搜索框和设备适配。
+        """
+        下滑(模拟真人操作,抗风控+设备适配+容错)
+        核心:起点在屏幕上方,终点在屏幕下方(和上滑相反)
+        :return: None
+        """
+        try:
+            # 1. 获取屏幕尺寸(兼容不同设备,给默认值避免获取失败)
+            screen_width = self.d.info.get('displayWidth', 1080)  # 默认1080px宽度
+            screen_height = self.d.info.get('displayHeight', 2400)  # 默认2400px高度
+
+            # 2. 随机滑动时长(0.1~0.3秒,避免固定值被风控,且不设0秒)
+            duration_rate = random.uniform(0.1, 0.3)
+
+            # 3. 计算滑动坐标(用屏幕比例,适配所有设备)
+            start_x = screen_width // 2  # 水平居中(和上滑一致,符合真人操作习惯)
+            start_y = screen_height * 0.2  # 起点:屏幕20%高度(上方偏下)
+            end_y = screen_height * 0.8  # 终点:屏幕80%高度(下方偏上)
+            # 强制确保起点y < 终点y(必为向下滑,避免逻辑错误)
+            start_y, end_y = min(start_y, end_y - 10), max(end_y, start_y + 10)
+
+            # 4. 核心向下滑动操作
+            self.d.swipe(start_x, start_y, start_x, end_y, duration=duration_rate)
+            # 滑动后全局等待(确保页面加载,避免元素定位失败)
+            time.sleep(self.get_sleep_time())
+
+
+        except Exception as e:
+            # 异常捕获:避免设备断开/滑动失败导致程序崩溃
+            print(f"向下滑动失败:{e}")
+            # 兜底方案:用固定坐标重试(适配主流1080x2400设备)
+            self.d.swipe(540, 480, 540, 1920, duration=0.2)
+            time.sleep(self.get_sleep_time())
+
+    def swipe_up(self):
+        # 功能:执行向上滑动,用于翻页或继续向下浏览详情。
+        """
+        上滑
+        :return:
+        """
+        screen_width = self.d.info['displayWidth']
+        screen_height = self.d.info['displayHeight']
+        duration_rate = random.uniform(0, 0.3)
+        self.d.swipe(screen_width // 2, screen_height - 100, screen_width // 2, 100, duration=duration_rate)
+        no = random.uniform(0, 1)
+        if no > 0.85:
+            # 有的时候卡着 再稍微往上滑一点点
+            self.d.swipe_ext("up", 0.1)
+            time.sleep(self.get_sleep_time())
+
+    def swipe_back(self, no):
+        # 功能:按指定次数执行返回,但只有当前不在列表页时才真正后退。
+        """
+        返回
+        :param no: 回退次数
+        :return:
+        """
+        if not self.distinct_target():
+            for idx in range(no):
+                self.d.press('back')
+                time.sleep(self.get_sleep_time())
+
+    def drug_price(self):
+        # 功能:直接从详情页读取价格,作为规格弹窗取价失败时的兜底方案。
+        """
+        获取药品价格
+        :return:
+        """
+        try:
+            xpath = '//*[@text="¥"]/following-sibling::android.widget.TextView[1]'
+            price_str = self.d.xpath(xpath).text
+            price = float(re.search(r'[\d\.]+', price_str).group())
+            print(f'获取到价格:{price}')
+            return float(price)
+        except Exception as e:
+            print(f'提取价格出错-->{e}')
+            return None
+
+    def drug_price_ex(self):
+        # 功能:优先从规格选择弹窗里同时提取价格和“已选规格”文本。
+
+        price_str = ''  # 价格初始化
+        ext = ''  # 初始化已选择的信息
+        price = ''
+
+        # 阶段 1:先尝试打开规格/品规弹窗,因为后续价格和规格文本都依赖这个弹窗内容。
+        # 这是点击进入品规的按钮
+        button_xpath_1 = '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[2]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.view.ViewGroup[last()]'
+
+        button_xpath_2 = '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[2]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[1]/android.view.ViewGroup[last()]'
+
+        # 调试
+        # test_button = self.d.xpath(button_xpath_1).exists
+        # print(test_button)
+
+        # test_button_2 = self.d.xpath(button_xpath_2).exists
+        # print(test_button_2)
+        # time.sleep(1000)
+        # if self.d.xpath('//*[@text="发起拼单"]').exists:
+        #     self.d.xpath('//*[@text="发起拼单"]').click()
+        # elif self.d.xpath('//*[@text="去复诊开药"]').exists:
+        #     self.d.xpath('//*[@text="去复诊开药"]').click()
+
+        if self.d.xpath(button_xpath_1).exists:
+            self.d.xpath(button_xpath_1).click()
+        elif self.d.xpath(button_xpath_2).exists:
+            self.d.xpath(button_xpath_2).click()
+        else:
+            print("button1 and button_2 all not exist")
+            return price, ext
+
+        # 阶段 2:根据不同弹窗布局选择对应的 XPath 解析策略。
+        select_xpath_1 = '//*[@resource-id="android:id/content"]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.TextView[last()]'
+        select_xpath_2 = '//*[@resource-id="android:id/content"]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]/android.widget.TextView[last()]'
+        select_xpath_3 = '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.view.ViewGroup[2]/android.widget.LinearLayout[1]/android.view.ViewGroup[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.view.ViewGroup[1]/android.widget.TextView[last()]'
+        select_xpath_3_2 = '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.view.ViewGroup[2]/android.widget.LinearLayout[1]/android.view.ViewGroup[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.view.ViewGroup[1]/android.widget.TextView[last()-1]'
+
+        price_xpath_1 = '//*[@resource-id="android:id/content"]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.TextView[1]'
+        price_xpath_2 = '//*[@resource-id="android:id/content"]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]/android.widget.TextView[1]'
+        price_xpath_3 = '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.view.ViewGroup[2]/android.widget.LinearLayout[1]/android.view.ViewGroup[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.view.ViewGroup[1]//android.widget.TextView[1]'
+
+        if self.d.xpath(select_xpath_1).exists:
+            text1 = self.d.xpath(select_xpath_1).text
+
+            print(f"select_xpath_1--text1={text1}")
+
+            # 这里先判断是否已经有默认规格,是为了减少额外点击;
+            # 如果已经存在“已选”文本,后续可以直接读取价格和规格。
+            if '已选' in text1:
+                if self.d.xpath(price_xpath_1).exists:
+                    price_str = self.d.xpath(price_xpath_1).text
+                    print(f"select_xpath_1--price_str-1={price_str}")
+                else:
+                    print("select_xpath_1--price_xpath_1-1 not exist")
+
+                ext = text1
+            elif '请选择' in text1:
+                # 调用 click() 的目的是补齐一次规格选择动作,
+                # 调用后价格文本和“已选规格”文案才会稳定刷新出来。
+                # 需要再下面点击选择
+                scroll_xpath_1 = '//*[@resource-id="android:id/content"]//android.widget.ScrollView[1]/android.widget.LinearLayout[1]/android.support.v7.widget.RecyclerView[1]/android.widget.LinearLayout[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[last()]'
+                scroll_xpath_2 = ''
+                if self.d.xpath(scroll_xpath_1).exists:
+                    self.d.xpath(scroll_xpath_1).click()
+                    time.sleep(2)  # 延时2秒钟,选择了之后价格会刷新
+                    if self.d.xpath(select_xpath_1).exists:
+                        text2 = self.d.xpath(select_xpath_1).text
+                        if '已选' in text2:
+                            print(f"select_xpath_1--已选择2:text2={text2}")
+                            if self.d.xpath(price_xpath_1).exists:
+                                price_str = self.d.xpath(price_xpath_1).text
+                                print(f"select_xpath_1--price_str-2={price_str}")
+                            else:
+                                print("select_xpath_1--price_xpath_1-2 not exist")
+
+                            ext = text2
+                else:
+                    print("select_xpath_1--scroll_xpath_1 not exist")
+        elif self.d.xpath(select_xpath_2).exists:
+            text1 = self.d.xpath(select_xpath_2).text
+            print(f"xpath2--text1={text1}")
+            if '已选' in text1:
+                ext = text1
+                if self.d.xpath(price_xpath_2).exists:
+                    price_str = self.d.xpath(price_xpath_2).text
+                    print(f"select_xpath_2--price_str-2={price_str}")
+                else:
+                    print("select_xpath_2--price_xpath_2-1 not exist")
+            elif '请选择' in text1:
+                # 当前布局下如果不先选择一个规格,后续既拿不到准确价格,也无法计算盒数。
+                print('come in here')
+                # 需要再下面点击选择
+                scroll_xpath_1 = '//*[@resource-id="android:id/content"]//android.widget.ScrollView[1]/android.widget.LinearLayout[1]/android.support.v7.widget.RecyclerView[1]/android.widget.LinearLayout[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[1]'
+
+                if self.d.xpath(scroll_xpath_1).exists:
+                    print("scroll_xpath_1 exists")
+                    self.d.xpath(scroll_xpath_1).click()
+                    time.sleep(2)  # 延时2秒钟,选择了之后价格可能会刷新
+                    if self.d.xpath(select_xpath_2).exists:
+                        text2 = self.d.xpath(select_xpath_2).text
+                        if '已选' in text2:
+                            ext = text2
+                            print(f"select_xpath_2--已选择2:text2={text2}")
+                            if self.d.xpath(price_xpath_2).exists:
+                                price_str = self.d.xpath(price_xpath_2).text
+                                print(f"select_xpath_2--price_str-2={price_str}")
+                            else:
+                                print("select_xpath_2--price_xpath_2-2 not exist")
+                else:
+                    print("scroll_xpath_1 not exists")
+            else:
+                print("not exist 请选择 or 已选")
+        elif self.d.xpath(select_xpath_3).exists:
+            text1 = self.d.xpath(select_xpath_3).text
+            print(f"xpath3--text1-1={text1}")
+
+            if ('请选择' not in text1) and ('已选' not in text1):
+                text1 = self.d.xpath(select_xpath_3_2).text
+                print(f"xpath3--text1-2={text1}")
+
+            if '已选' in text1:
+                ext = text1
+                if self.d.xpath(price_xpath_3).exists:
+                    price_str = self.d.xpath(price_xpath_3).text
+                    print(f"select_xpath_3--price_str-3-3-1={price_str}")
+                else:
+                    print("select_xpath_3--price_xpath_3-3-1 not exist")
+            elif '请选择' in text1:
+                # 这一支兼容另一类规格弹窗结构,核心目标仍然是先拿到“已选”文本。
+                print('come in here')
+                # 需要再下面点击选择
+                scroll_xpath_1 = '//*[@resource-id="android:id/content"]//android.widget.ScrollView[1]/android.widget.LinearLayout[1]/android.support.v7.widget.RecyclerView[1]/android.widget.LinearLayout[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[1]'
+                recycler_view_xpath = '//*[@resource-id="android:id/content"]//android.support.v7.widget.RecyclerView[1]/android.widget.LinearLayout[1]/android.widget.LinearLayout[last()]/android.view.ViewGroup[1]/android.view.ViewGroup[1]'
+
+                if self.d.xpath(scroll_xpath_1).exists:
+                    print("scroll_xpath_1 exists")
+                    self.d.xpath(scroll_xpath_1).click()
+                    time.sleep(2)  # 延时2秒钟,选择了之后价格可能会刷新
+                    if self.d.xpath(select_xpath_3).exists:
+                        text2 = self.d.xpath(select_xpath_3).text
+                        if '已选' in text2:
+                            ext = text2
+                            print(f"select_xpath_3--已选择2:text2={text2}")
+                            if self.d.xpath(price_xpath_3).exists:
+                                price_str = self.d.xpath(price_xpath_3).text
+                                print(f"select_xpath_3--price_str-3-2={price_str}")
+                            else:
+                                print("select_xpath_3--price_xpath_3-3-2 not exist")
+                elif self.d.xpath(recycler_view_xpath).exists:
+                    self.d.xpath(recycler_view_xpath).click()
+                    time.sleep(2)  # 延时2秒钟,选择了之后价格可能会刷新
+                    if self.d.xpath(select_xpath_3).exists:
+                        text2 = self.d.xpath(select_xpath_3).text
+                        if '已选' in text2:
+                            ext = text2
+                            print(f"select_xpath_3--已选择2:text2={text2}")
+                            if self.d.xpath(price_xpath_3).exists:
+                                price_str = self.d.xpath(price_xpath_3).text
+                                print(f"select_xpath_3--price_str-3-3={price_str}")
+                            else:
+                                print("select_xpath_3--price_xpath_3-3-3 not exist")
+
+                else:
+                    print("scroll_xpath_1 not exists")
+            else:
+                print(f"xpath3--text1-不包含请选择和已选择")
+
+        else:
+            print("select_xpath_1 and select_xpath_2 and select_xpath_3 all not exist")
+
+        # 阶段 3:从界面文案中抽取纯价格值,供后续去重和单盒价格计算。
+        if price_str:
+            # price = float(re.search('[\d\.]+', price_str).group())
+            match = re.search(r'¥([\d\.]+)', price_str)
+            if match:
+                price = float(match.group(1))
+            else:
+                price = ''
+
+                # price = float(re.search(r'¥([\d\.]+)', price_str).group(1))
+            print(f'获取到价格:{price}')
+
+        print(f"ext={ext}")
+
+        # 调用 swipe_back() 的目的是把页面从规格弹窗恢复回商品详情页,
+        # 后续提取店铺名、链接和说明书都依赖当前仍停留在详情页。
+        self.swipe_back(1)  #
+
+        return price, ext
+
+    def restart_uiautomator_services(self, device_id):
+        # 功能:重启设备上的 atx-agent/uiautomator 服务,恢复自动化控制能力。
+        """
+        重启atx的uiautomator 服务
+        :param device_id:
+        :return:
+        """
+        stop_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d --stop'
+        start_uiautomator_services = f'adb -s {device_id} shell /data/local/tmp/atx-agent server -d'
+
+        subprocess.run(stop_uiautomator_services, capture_output=True, text=True, shell=True)
+        time.sleep(self.get_sleep_time())
+        subprocess.run(start_uiautomator_services, capture_output=True, text=True, shell=True)
+        time.sleep(self.get_sleep_time())
+
+    def connect_devices(self, device_id):
+        # 功能:建立 USB 设备连接,并把自动化服务重置到可用状态。
+        """
+        连接设备
+        :return:
+        """
+        try:
+            self.d = u2.connect_usb(device_id)
+            # 设置隐形等待时间
+            # self.d.implicitly_wait(5)
+            # 连上设备后主动重启 atx-agent,减少长时间运行后的控件失效问题。
+            self.restart_uiautomator_services(device_id)
+            print(f'[{self.program_start_time}]连接到设备:{device_id}')
+        except Exception as e:
+            print(f'{device_id} 连接错误: {e}')
+            raise Exception(e)
+
+    def get_ocr_res(self, img):
+        # 功能:对截图做去水印后调用百度 OCR,返回识别出的文字结果列表。
+        try:
+            image = self.remove_watermark(img)
+            res_image = self.client.basicGeneral(image)
+            data = res_image.get('words_result', '')
+            print(f'百度api返回结果:{data}')
+
+            return data
+        except:
+            return None
+
+    def get_title(self):
+        # 功能:从商品详情页提取当前标题,作为第一层匹配和落库名称来源。
+        try:
+            print('开始提取标题')
+            time.sleep(self.get_sleep_time())
+            title_xpath = '//*[@resource-id="com.xunmeng.pinduoduo:id/tv_title"]'
+            if self.d.xpath(title_xpath).exists:
+                title = self.d.xpath(title_xpath).info['contentDescription'].strip()
+            else:
+                return None
+            # title = self.d.xpath('//*[@resource-id="com.xunmeng.pinduoduo:id/tv_title"]').info['contentDescription'].strip()
+            print(f'提取到标题:{title}')
+            return title
+        except Exception as e:
+            print(f'获取标题出错:{e}')
+            return None
+        # 从里面匹配出药品名和规格
+        # drugs_name
+        # specifications
+        # match = re.search(r'([^\d]+)([\d\D]+)', title)
+
+        # match = re.search(r'(\[[^\]]+\])(.+?)(\d+.*)', title)
+        # if match:
+        #     drugs_name = match.group(1).strip() + match.group(2).strip()
+        #     specifications = match.group(3).strip()
+        #     print("药品名:", drugs_name)
+        #     print("规格:", specifications)
+        #     print('完整药名:', drugs_name + specifications)
+        #     return drugs_name, specifications
+        # else:
+        #     print("没有匹配到预期格式")
+
+    def enter_shop(self):
+        # 功能:进入店铺页,供后续读取店铺或资质信息时使用。
+        """
+        进店,方便提取资质环境
+        :return:
+        """
+        # self.d.xpath('//*[@text="进店"]').click()
+        self.d.xpath('//*[@text="店铺"]').click()
+        time.sleep(self.get_sleep_time())
+
+    #店铺去重
+    def shop_is_exists(self, data):
+        # 功能:按店铺去重校验,避免同类数据重复入库。
+        # 1. 验证必要字段
+        # 先校验去重所需字段是否齐全,避免把不完整的数据带到 SQL 条件里。
+        required_keys = ['shop']
+        if not all(key in data for key in required_keys):
+            missing = [key for key in required_keys if key not in data]
+
+            print(f"缺少必要字段: {', '.join(missing)}")
+            return None
+
+        shop_value = data.get('shop')
+        if not shop_value or not str(shop_value).strip():
+            print("shop 字段为空,无法执行去重查询")
+            return False
+
+        conn = None
+        try:
+            conn = get_mysql()
+            with conn.cursor() as cur:
+                query_sql = """
+                    SELECT * FROM {} 
+                    WHERE shop = %s 
+                    LIMIT 1
+                """.format(self.shop_table_name)
+
+                cur.execute(query_sql, (
+                    data['shop']
+                ))
+
+                result = cur.fetchone()
+
+            return bool(result)  # 如果存在返回True,否则False
+        except Exception as e:
+            print(f"MySQL 错误: {str(e)}")
+        finally:
+            if conn:
+                conn.close()
+
+    def get_province_city(self,data):
+        """
+        从 pdd_shop_info_middle 表中查询已存在的 province 和 city,
+        并赋值给 data['province_name'] 和 data['city_name']
+        """
+        print("获取店铺营业公司对应的省份和城市")
+        shop_name = data.get('shop')
+        if not shop_name:
+            print("shop 字段为空,无法执行查询")
+            return
+        conn = None
+        try:
+            conn = get_mysql()
+            with conn.cursor() as cur:
+                # 查询 shop_info_middle 表,获取 province 和 city
+                sql = "SELECT province, city FROM pdd_shop_info_middle WHERE shop = %s LIMIT 1"
+                cur.execute(sql, (shop_name,))
+                result = cur.fetchone()
+
+                if result:
+                    province, city = result
+                    data['province_name'] = province if province else ''
+                    data['city_name'] = city if city else ''
+                    print(f"店铺 {shop_name} 对应的省份和城市为: {province}, {city}")
+                else:
+                    print(f"未在 shop_info_middle 表中找到店铺:{shop_name}")
+                    # 可根据业务需求设置默认值或保持原样
+                    data['province_name'] = ''
+                    data['city_name'] = ''
+        except Exception as e:
+            print(f"查询省市信息失败: {str(e)}")
+            # 异常时也可设置默认空值,避免后续代码因缺少键而报错
+            data['province_name'] = ''
+            data['city_name'] = ''
+        finally:
+            if conn:
+                conn.close()
+
+
+
+
+    def data_is_exists(self, data):
+        # 功能:按价格、店铺、日期、平台做去重校验,避免同类数据重复入库。
+        # 1. 验证必要字段
+        # 先校验去重所需字段是否齐全,避免把不完整的数据带到 SQL 条件里。
+        required_keys = ['min_price', 'shop', 'scrape_date', 'platform']
+        if not all(key in data for key in required_keys):
+            missing = [key for key in required_keys if key not in data]
+
+            print(f"缺少必要字段: {', '.join(missing)}")
+            return None
+
+        conn = None
+        try:
+            conn = get_mysql()
+            with conn.cursor() as cur:
+                query_sql = """
+                    SELECT * FROM {} 
+                    WHERE min_price = %s 
+                    AND store_name = %s 
+                    AND scrape_date = %s 
+                    AND platform_id = %s
+                """.format(self.table_name)
+
+                cur.execute(query_sql, (
+                    data['min_price'],
+                    data['shop'],
+                    data['scrape_date'],
+                    data['platform']
+                ))
+
+                result = cur.fetchone()
+
+            return bool(result)  # 如果存在返回True,否则False
+        except Exception as e:
+            print(f"MySQL 错误: {str(e)}")
+        finally:
+            if conn:
+                conn.close()
+
+    def get_instructions_data(self):
+        # 功能:在详情页中提取说明书/商品参数区域的关键字段,整理成统一字典。
+        """
+        确定有详情页之后之后,提取所有的详情页数据
+        :return:
+        """
+        # 先把页面滚到说明书/参数区域附近,再开始解析键值对。
+        for i in range(8):
+
+            if self.d.xpath('//*[@text="品牌"]').exists or self.d.xpath('//*[@text="药品通用名"]').exists:
+                self.d.swipe_ext("up", scale=0.1)
+                print('开始采集详情数据')
+                break
+            self.d.swipe_ext("up", scale=0.5)
+            time.sleep(self.get_sleep_time())
+
+        # 阶段 2:进入“查看全部”区域,把折叠的参数信息完整展开。
+        # 点击查看全部
+        if self.d.xpath('//*[@text="品牌"]').exists:
+            self.d.xpath('//*[@text="品牌"]').click()
+        else:
+            self.d.xpath('//*[@text="药品通用名"]').click()
+        time.sleep(self.get_sleep_time())
+        attr = dict()
+        # 阶段 3:批量解析键值对文本,构造说明书字段字典。
+        # # 获取详情页信息
+        xpath = '//*[starts-with(@text,"商品参数")]/parent::*/parent::*/following-sibling::*/*/*/android.view.ViewGroup//android.widget.TextView'
+        ddd = self.d.xpath(xpath).all()
+        for i in range(0, len(ddd), 2):
+            group = ddd[i:i + 2]
+            attr[group[0].text] = group[1].text
+        # 截图获取未获取到的数据
+        # if not all(i in ['有效期', '生产企业', '批准文号', '药品规格', '产品规格'] for i in attr.keys()):
+        if not all(i in ['有效期', '生产企业', '批准文号', '药品规格'] for i in attr.keys()):
+            # 首轮解析拿不到关键字段时再补一次较短滑动,兼容参数区未完整展示的情况。
+            self.d.swipe_ext("up", 0.4)
+            time.sleep(self.get_sleep_time())
+            xpath = '//*[starts-with(@text,"商品参数")]/parent::*/parent::*/following-sibling::*/*/*/android.view.ViewGroup//android.widget.TextView'
+            ddd = self.d.xpath(xpath).all()
+            for i in range(0, len(ddd), 2):
+                group = ddd[i:i + 2]
+                attr[group[0].text] = group[1].text
+        print(f'当前说明书规格参数:{attr}')
+        res_data = {
+            # "有效期": attr.get('有效期',''),
+            # "生产单位": attr['生产企业'],
+            # "批准文号": attr['批准文号'],
+            # "产品规格": attr.get('药品规格') if attr.get('药品规格', '') else attr.get('药品规格')
+            "有效期": attr.get('有效期', ''),
+            "生产单位": attr.get('生产企业', ''),
+            "批准文号": attr.get('批准文号', ''),
+            "产品规格": attr.get('药品规格', '')
+        }
+        print(f'当前规格参数字典数据:{res_data}')
+        return res_data
+
+    def has_instructions(self):
+        # 功能:判断当前详情页能否找到说明书/商品详情区域。
+        """
+        是否有详情页
+        :return:如果有详情页返回True,否则返回False
+        """
+        # 没有说明书的无法采集具体数据
+        max_attempts = 12  # 最大尝试次数
+        attempt = 0  # 当前尝试次数
+
+        while attempt < max_attempts:
+            time.sleep(0.5)
+            xpath = '//*[@text="商品详情"]'
+            is_has_instructions = self.d.xpath(xpath).exists
+            if is_has_instructions:
+                return True  # 如果找到“商品详情”,则返回True
+            self.d.swipe_ext("up", 0.3)
+            attempt += 1
+
+        return False  # 如果尝试次数达到最大次数,则返回False
+
+    def distinct_target(self):
+        # 功能:判断当前页面是否已经回到商品列表页。
+        # 这里同时检查多个锚点,是为了兼容拼多多不同活动页和不同 UI 版本。
+        result = False
+
+        is_position = self.d.xpath('//*[@content-desc="拍照搜索"]').exists
+        is_position2 = self.d.xpath('//*[@text="年货节大促"]').exists
+        is_position3 = self.d.xpath('//*[@text="筛选"]').exists
+        is_position4 = self.d.xpath('//*[@text="回头客常拼"]').exists
+
+        list_page_xpath = '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[2]/android.view.ViewGroup[1]/android.widget.LinearLayout[1]//android.support.v7.widget.RecyclerView[1]'
+
+        is_position_new = self.d.xpath(list_page_xpath).exists
+        print(f'is_position_new={is_position_new}')
+
+        if is_position or is_position2 or is_position3 or is_position4 or is_position_new:
+            result = True
+
+        return result
+
+    def enter_target_page(self):
+        # 功能:进入搜索页、输入关键字并恢复排序/页位,为主循环建立起始页面。
+        # 阶段 1:进入搜索框并提交当前任务的搜索词。
+        self.d.xpath(
+            '//*[@resource-id="android:id/content"]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]/android.widget.FrameLayout[1]/android.widget.LinearLayout[1]').click()
+        time.sleep(self.get_sleep_time())
+        self.d(className='android.widget.EditText').click()
+        time.sleep(self.get_sleep_time())
+        self.d.send_keys(self.search_key, clear=True)
+        time.sleep(self.get_sleep_time())
+        self.d.xpath('//*[@text="搜索"]').click()
+        time.sleep(self.get_sleep_time())
+        # 阶段 2:如果任务要求排序,则在首次进入结果页后先切到目标排序方式。
+        # 排序只在进入列表后的第一次执行,避免恢复进度时重复切换排序方向。
+        if self.sort and self.sort_key == 0:
+            self.li_or_lo(self.sort)
+        # progress = self.wr_re("读", self.device_id)
+        progress = None
+        # 阶段 3:如有历史页码,则把列表大致恢复到目标位置。
+        # 进度恢复逻辑目前停用,但保留按页滑动的入口,便于后续重新启用断点续跑。
+        if not progress and self.page > 0:
+            self.scroll_to_target_page(self.page)
+
+    def get_clipboard(self):
+        # 功能:读取设备剪贴板内容,并去掉空值和首尾空白。
+        self.loggerPdd.info(f"Clipboard content:{self.d.clipboard}")  # 打印调试信息
+        clipboard_content = self.d.clipboard
+        if clipboard_content is None:
+            return ''
+        return clipboard_content.strip()
+
+    def get_product_link(self):
+        # 功能:通过商品详情页的分享入口复制商品链接。
+        product_link = ''
+        print('开始获取商品链接')
+
+        content_frame = self.d.xpath('//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]').exists
+        print(content_frame)
+
+        relative_layout = self.d.xpath(
+            '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]').exists
+        print(relative_layout)
+
+        relative_layout2 = self.d.xpath(
+            '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]').exists
+        print(relative_layout2)
+
+        Frame_Layout = self.d.xpath(
+            '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[2]').exists
+        print(Frame_Layout)
+        ImageView = self.d.xpath(
+            '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[2]/android.view.View[1]').exists
+        print(ImageView)
+
+        ImageView2 = self.d.xpath(
+            '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[3]/android.view.View[1]').exists
+        print(ImageView2)
+        # 多种可能的“分享”按钮
+        # 分享入口在不同商品页布局里位置不稳定,因此保留多套候选 XPath。
+        dots_xpaths = [
+            # '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[2]/android.view.View[1]',
+            '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[1]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[last()]/android.view.View[1]',
+            # '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.RelativeLayout[1]/android.widget.FrameLayout[2]/android.view.View[1]',
+            # '//*[@resource-id="android:id/content"]/android.widget.FrameLayout[1]/android.widget.RelativeLayout[2]/android.widget.RelativeLayout[2]/android.widget.FrameLayout[3]/android.widget.ImageView[1]',
+        ]
+
+        # 阶段 1:遍历候选分享入口,找到当前布局下可点击的“更多/分享”按钮。
+        max_retry = 5  # 最多尝试次数
+        # 分享面板偶尔会因为动画或按钮未露出而失败,因此允许多次重试。
+        for idx in range(1, max_retry + 1):
+            if product_link:  # 已经拿到则退出
+                break
+
+            for xp in dots_xpaths:
+                if self.d.xpath(xp).exists:
+                    # print(f'{idx}-进入分享点点点')
+                    self.loggerPdd.info(f'{idx}-进入分享点点点')
+
+                    # 调用 click() 的目的是打开分享面板;
+                    # 后续 slide_link() 和“复制链接”点击都依赖分享面板已经展开。
+                    self.d.xpath(xp).click()
+                    time.sleep(1)
+                    self.loggerPdd.info('开始滑动')
+                    # 这里先调用 slide_link(),是为了把“复制链接”按钮滑到当前可见区域。
+                    self.slide_link()
+                    time.sleep(0.2)
+                    # 调用 click_exists() 的目的是直接触发系统复制动作,
+                    # 调用后 get_clipboard() 才有机会读到最新商品链接。
+                    self.d.xpath('//*[@text="复制链接"]').click_exists()
+                    time.sleep(1)
+                    product_link = self.get_clipboard()
+                    time.sleep(0.5)
+
+                    self.loggerPdd.info(f'{idx}-商品链接:{product_link}')
+                    break  # 找到并执行后跳出内层循环
+
+        if not product_link and idx < max_retry:
+            time.sleep(0.5)  # 最后一次不需要再等待
+
+        # time.sleep(100000)
+
+        return product_link
+
+    def integrate_data_v2(self):
+        # 功能:在单个商品详情页内完成价格、链接、店铺、说明书、去重和落库的完整聚合流程。
+        """
+        基于入口配置统一校验标题、品牌和品规,替代内部大量硬编码分支。
+        """
+        # 阶段 1:先拿价格和标题,并在最早阶段过滤无关商品。
+        # 价格优先走规格弹窗,因为这里还能顺便拿到已选规格文本。
+        min_price, ext = self.drug_price_ex()
+        title_info = self.get_title()
+
+        if not title_info:
+            print('标题获取为空')
+            self.swipe_back(1)
+            return
+
+        # 先只按标题/品牌做一次粗过滤,尽早淘汰无关商品。
+        if not self.is_link_useful(title_info):
+            self.swipe_back(1)
+            self.unrelated_data += 1
+            return
+
+        # 规格弹窗提价失败时,再回退到详情页直接取价。
+        if not min_price:
+            min_price = self.drug_price()
+            if not min_price:
+                print('提取价格出错,回退到列表页')
+                self.swipe_back(1)
+                self.unrelated_data += 1
+                return
+
+        # 阶段 2:补齐商品链接和店铺信息,这两类字段是后续落库和去重的关键上下文。
+        product_link = self.get_product_link()
+        time.sleep(2)
+
+        # 有的页面店铺信息不在首屏,这里按配置决定是否直接读取还是先滑动到店铺区域。
+        if self.direct_shop_lookup:
+            shop = self.get_shop_name()
+        else:
+            for _ in range(15):
+                if self.d(textStartsWith="进店").exists:
+                    print('开始获取店铺名')
+                    break
+                self.d.swipe_ext("up", scale=0.3)
+                time.sleep(self.get_sleep_time())
+                if self.d(textStartsWith="进店").exists:
+                    print('可以开始获取店铺名')
+            shop = self.get_shop_name()
+
+        if not shop:
+            print('当前店铺名称为空')
+            self.swipe_back(1)
+            self.unrelated_data += 1
+            return
+
+        scrape_date = self.get_current_date()
+        dup_data = {
+            'min_price': min_price,
+            'shop': shop,
+            'scrape_date': scrape_date,
+            'platform': '3'
+        }
+        # 同一天同店铺同价格的数据视为重复,避免重复入库。
+        if self.data_is_exists(dup_data):
+            print('存在相同数据不入库')
+            self.back_to_list_page()
+            return
+
+        shop_data = {
+            'shop': shop,
+            'store_url': product_link,
+            'scrape_date': scrape_date,
+            'platform': '拼多多',
+            'create_time': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+            'update_time': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S")
+        }
+
+
+        province_name = ''
+        city_name = ''
+
+        # 插入店铺数据
+        if self.shop_is_exists(shop_data):
+            print("店铺数据已存在,进行省市回填")
+            self.get_province_city(shop_data)
+            province_name = shop_data['province_name']
+            city_name = shop_data['city_name']
+        else:
+            print(f"店铺数据不存在,插入{self.shop_table_name}店铺表")
+            self.save_to_shop_database(shop_data)
+
+
+
+        # 阶段 3:确认是否存在说明书页,并在有说明书时补提取规格、生产单位和批准文号。
+        is_has_instructions = self.has_instructions()
+        self.loggerPdd.info(f'是否有说明书:{is_has_instructions}')
+        manufacture_date = ''
+        credit_code = ext
+        # 说明书页不是每个商品都有;没有时允许继续落库,只是相关字段留空。
+        if is_has_instructions:
+            try:
+                instructions_info = self.get_instructions_data()
+                expiry_date = instructions_info['有效期'].strip('。')
+                manufacturer = instructions_info['生产单位'].strip('。')
+                approval_number = instructions_info['批准文号'].strip('。')
+                specifications = instructions_info['产品规格'].strip('。')
+            except Exception as e:
+                print(f'获取详情页规格参数出错:{e}')
+                self.swipe_back(2)
+                return
+        else:
+            expiry_date = ''
+            manufacturer = ''
+            approval_number = ''
+            specifications = ''
+
+        # 二次校验把说明书里的规格也纳入判断,避免标题模糊匹配带来误采。
+        if not self.is_link_useful(title_info, specifications):
+            self.swipe_back(1)
+            self.unrelated_data += 1
+            return
+
+        self.unrelated_data = 0
+        # 箱数提取失败时不阻断主流程,只把单盒价格回退为 0。
+        if extract_box_number(credit_code):
+            one_box_price = min_price / extract_box_number(credit_code)
+        else:
+            print("单瓶药品价格没处理成功")
+            one_box_price = 0
+
+        # 阶段 4:把当前详情页提取结果整理成统一落库结构。
+        save_data = {
+            'enterprise_id': self.enterprise_id,
+            'platform_id': self.platform,
+            'platform_item_id': '',
+            'province_id': 0,
+            'city_id': 0,
+            'province_name': province_name,
+            'city_name': city_name,
+            'area_info': "",
+            'product_name': title_info,
+            'product_specs': specifications,
+            'one_box_price': one_box_price,
+            'manufacture_date': manufacture_date,
+            'expiry_date': expiry_date,
+            'manufacturer': manufacturer,
+            'approval_number': approval_number,
+            'is_sold_out': 0,
+            'online_posting_count': 1,
+            'continuous_listing_count': 1,
+            'link_url': product_link,
+            'store_name': shop,
+            'store_url': '',
+            'shipment_province_id': 0,
+            'shipment_province_name': "",
+            'shipment_city_id': 0,
+            'shipment_city_name': "",
+            'company_name': "",
+            'qualification_number': "",
+            'scrape_date': scrape_date,
+            'min_price': min_price,
+            'number': 0,
+            'sales': "",
+            'inventory': "",
+            'snapshot_url': "",
+            'insert_time': datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
+        }
+        # 调用 save_to_database() 的目的是把当前已经校验通过的数据立即持久化,
+        # 避免后续页面跳转、返回或异常中断导致采集结果丢失。
+        self.save_to_database(save_data)
+    def main(self, device_id, search_key_length, keyword_idx):
+        # 功能:执行单设备的完整采集主循环,直到达到结束页、采集上限或异常退出条件。
+        completed_normally = False
+        stop_by_max_count = False
+        spider_no = 0
+        current_page = self.page
+        # 阶段 1:建立设备连接,准备进入搜索页面。
+        self.connect_devices(device_id)
+        time.sleep(self.get_sleep_time())
+
+        # 第一个关键字会重启 App 并重新进入搜索页,后续关键字只复用当前输入框。
+        if keyword_idx == 0:
+            print("搜索前,先重启APP")
+            self.restart_app()
+            # 搜索关键字
+            self.enter_target_page()
+        else:
+            print("清空前面的文字,再输入关键词")
+            self.d.send_keys(self.search_key, clear=True)
+            time.sleep(1)
+            print("点击搜索")
+            self.d.xpath('//*[@text="搜索"]').click()
+            time.sleep(1)
+
+        # 阶段 2:向调度系统上报“执行中”,然后开始按页扫描商品列表。
+        #  上报状态
+        # 进入主循环前先上报“执行中”,让调度系统能看到设备已经开始跑任务。
+        report_api(self.task_id, self.page, 2,finish_status=0)
+        for idx in range(300):
+            print(f'第{current_page}页')
+            # self.wr_re("写", self.device_id, self.sort, current_page)
+            if spider_no > 30:
+                time.sleep(300)
+                spider_no = 0
+            # 连续命中太多无关商品时,认为当前搜索结果已偏离目标,主动收尾。
+            if self.unrelated_data > 30:
+                print(f'[{self.program_start_time}]----{self.search_key}----连续超过30个不达标的数据则停止采集')
+                print(
+                    f"[程序启动时间:{self.program_start_time}-----程序结束时间:{self.app_current_time()}]----搜索关键词:{self.search_key}----点击了{self.click_counts}个商品")
+                self.swipe_down()
+                time.sleep(self.get_sleep_time())  # 下滑后等待页面稳定
+                click_success = self.click_target_product_by_search_key(fuzzy_match=False)
+                if not click_success:
+                    self.finish_task_abnormally(
+                        current_page,
+                        f"连续超过30个不达标的数据后,关键词「{self.search_key}」商品点击失败",
+                        finish_status=1
+                    )
+                    return
+                print("点击搜索框")
+                self.d(className='android.widget.EditText').click()
+                time.sleep(self.get_sleep_time())
+
+                if keyword_idx == search_key_length - 1:
+                    print("程序最后一个品规采集完毕,返回主屏幕")
+                    completed_normally = self.finish_task_normally(
+                        current_page,
+                        '连续超过30个不达标的数据,结束采集'
+                    )
+
+                else:
+                    completed_normally = True
+                break
+            # 达到采集上限后不再继续翻页,直接走正常结束分支。
+            if self.is_max_count_reached():
+                completed_normally = self.finish_task_with_max_count(current_page)
+                # 向下滑
+                self.swipe_down()
+                time.sleep(self.get_sleep_time())
+                # 点击搜索框
+                click_success = self.click_target_product_by_search_key(fuzzy_match=False)
+                if not click_success:
+                    print(f"关键词「{self.search_key}」商品点击失败")
+                    return
+                print("点击搜索框")
+                self.d(className='android.widget.EditText').click()
+                time.sleep(self.get_sleep_time())
+                break
+
+            # 售罄次数大于4基本就是号废了但是如果下次点击不会出现这种情况就要重置为0
+            # 连续多次命中售罄商品时,认为当前账号/结果页已失去采集价值,提前退出。
+            if self.sold_out_counts > 4:
+                self.finish_task_abnormally(
+                    current_page,
+                    "====商品已售罄4次,结束采集(号不能用)",
+                    finish_status=1
+                )
+                print(
+                    f"[程序启动时间:{self.program_start_time}-----程序结束时间:{self.app_current_time()}]----搜索关键词:{self.search_key}----点击了{self.click_counts}个商品")
+                break
+            # 阶段 3:获取当前页可见商品卡片,并逐个点击进入详情页采集。
+            drug_lis = self.get_drug_lis(idx)
+            print('数量', len(drug_lis))
+            for idd, drug_one in enumerate(drug_lis):
+                print(idd + 1, drug_one.info)
+                time.sleep(self.get_sleep_time())
+                top = drug_one.info['bounds']['top']
+                bottom = drug_one.info['bounds']['bottom']
+                if bottom <= 1524 and top >= 258:
+                    drug_one.click()
+                    self.click_counts += 1
+                    time.sleep(self.get_sleep_time())
+                    # 先判断是否售罄次数是否大于4
+                    if self.sold_out_counts >= 4:
+                        print(
+                            f"[程序启动时间:{self.program_start_time}-----程序结束时间:{self.app_current_time()}]----搜索关键词:{self.search_key}----点击了{self.click_counts}个商品")
+                        self.finish_task_abnormally(
+                            current_page,
+                            "====这是在第一页有两个,商品已售罄4次,结束采集(号不能用)====",
+                            finish_status=1
+                        )
+                        time.sleep(self.get_sleep_time())
+                        self.d.press('home')
+                        return
+                    # 这里先判断“商品已售罄”,是为了尽早放弃无效详情页;
+                    # 如果不先做这一步,后续详情采集会浪费时间且可能干扰账号状态判断。
+                    if self.d.xpath('//*[contains(@text, "商品已售罄")]').wait(timeout=5):
+                        print("======商品已售罄======")
+                        self.sold_out_counts += 1
+                        if self.back_to_list_page():
+                            continue
+                    # 采集药品信息
+                    # 进入详情页后的采集与回退是最容易卡死的阶段,需要单独兜底。
+                    try:
+                        # 重置商品售罄次数
+                        self.sold_out_counts = 0
+
+                        self.integrate_data_v2()
+                        # 检测下是否回退到列表页
+                        if self.back_to_list_page():
+                            print('回退到列表页', True)
+                        else:
+                            print(f'[{self.app_current_time()}] 回退到列表页失败')
+                            print(
+                                f"[程序启动时间:{self.program_start_time}-----结束时间:{self.app_current_time()}]----搜索关键词:{self.search_key}----点击了{self.click_counts}个商品")
+                            self.finish_task_abnormally(current_page, "回退到列表页失败,结束采集")
+                            return
+                        time.sleep(self.get_sleep_time())
+                        spider_no += 1
+                        if self.is_max_count_reached():
+                            completed_normally = self.finish_task_with_max_count(current_page)
+                            stop_by_max_count = True
+                            break
+                    except Exception as e:
+                        self.loggerPdd.error(f'采集药品详情数据出错:{e}')
+                        if not self.back_to_list_page():
+                            self.finish_task_abnormally(current_page, '采集药品详情数据出错且无法回到列表页,结束采集')
+                            return
+                        else:
+                            continue
+            # 阶段 4:处理翻页前的收尾条件,包括采集上限、结束页和列表到底。
+            if stop_by_max_count:
+                break
+            # 配置了结束页时,以调用方传入的页边界作为最高优先级退出条件。
+            if self.end_page is not None and current_page >= self.end_page:
+                completed_normally = self.finish_task_normally(
+                    current_page,
+                    f"已采集到结束页 {self.end_page},结束任务"
+                )
+                break
+            if self.d(textStartsWith="抱歉,没有更多商品啦~").exists:
+                completed_normally = self.finish_task_normally(current_page, '已经到达列表页最底部')
+                break
+            # 阶段 5:当前页还没触发任何结束条件时,继续滑到下一页。
+            print('开始滑入下一页')
+            end_y = 300
+            self.d.swipe(200, 1400, 200, end_y, 0.4)
+            time.sleep(self.get_sleep_time())
+        # 阶段 6:根据最终状态做统一收尾,保证任务一定会走到正常或异常结束分支之一。
+        if completed_normally:
+            self.clear_progress_file()
+        elif not self.finish_reported:
+            self.finish_task_abnormally(current_page, "采集流程异常结束")
+        return completed_normally
+# pdd
+def main():
+    # 功能:启动调度器入口,先立即执行一轮派单,再注册后续轮询。
+    logging.info(f"PDD 调度器启动,轮询间隔 {SCHEDULER_INTERVAL_SECONDS} 秒")
+    dispatch_pending_tasks()
+    schedule_dispatch(SCHEDULER_INTERVAL_SECONDS)
+    scheduler_stop_event.wait()
+
+if __name__ == '__main__':
+   main()

+ 4 - 0
pdd1/prob.txt

@@ -0,0 +1,4 @@
+1、补充今天采集的数据的省市
+
+
+2、

+ 20 - 0
pdd1/process_shop/doub.py

@@ -0,0 +1,20 @@
+import os
+import json
+from dotenv import load_dotenv
+import pymysql
+from typing import List, Dict, Optional
+import time
+from playwright.sync_api import (
+    sync_playwright,
+    TimeoutError as PlaywrightTimeoutError,
+    BrowserContext
+)
+import requests
+from urllib.parse import urlparse
+# 补充百度OCR所需依赖
+import base64
+from PIL import Image
+import io
+
+
+#

+ 1477 - 0
pdd1/process_shop/get_pdd_shop_name.py

@@ -0,0 +1,1477 @@
+import os
+import json
+from dotenv import load_dotenv
+import pymysql
+from typing import List, Dict, Optional
+import time
+from playwright.sync_api import (
+    sync_playwright,
+    TimeoutError as PlaywrightTimeoutError,
+    BrowserContext
+)
+import requests
+from urllib.parse import urlparse
+# 补充百度OCR所需依赖
+import base64
+from PIL import Image
+import io
+
+import asyncio
+
+# 加载环境变量
+load_dotenv()
+
+# ===================== 全局常量配置(集中管理,方便修改)=====================
+# 数据库默认配置
+DEFAULT_DB_CONFIG = {
+    "host": "localhost",
+    "port": 3306,
+    "user": "root",
+    "password": "",
+    "db_name": "",
+    "table_name": ""
+}
+
+
+# Playwright配置
+PLAYWRIGHT_CONFIG = {
+    "headless": False,
+    "slow_mo": 300,
+    "browser_args": [
+        "--start-maximized",
+        "--disable-blink-features=AutomationControlled",  # 核心防检测
+        "--no-sandbox",  # 兼容Windows/Linux
+        "--disable-dev-shm-usage",  # 解决内存不足
+        "--disable-popup-blocking",  # 禁用弹窗拦截
+        "--disable-extensions",  # 禁用扩展
+        "--disable-gpu",  # 禁用GPU加速
+        "--lang=zh-CN,zh",  # 中文语言
+        "--user-agent=Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.0.0 Safari/537.36"
+    ],
+    "viewport": {"width": 2050, "height": 1200},
+    "locale": "zh-CN",
+    "timezone_id": "Asia/Shanghai",
+    "default_timeout": 15000,
+    "navigation_timeout": 30000,
+    "login_state_path": "pdd_login_state.json",  # 登录状态持久化文件
+    "tianyancha_login_state": "tianyancha_login_state.json"  # 天眼查登录状态保存路径
+}
+
+
+# 百度OCR配置
+BAIDU_OCR_CONFIG = {
+    "api_key": os.getenv('APP_KEY'),
+    "secret_key": os.getenv('APP_SECRET'),
+    "scale": 1.5  # OCR图片放大倍数
+}
+
+
+
+
+# 图片保存配置
+IMAGE_CONFIG = {
+    "save_dir": "pdd_goods_images",  # 独立文件夹名(项目根目录下)
+    "timeout": 10,  # 图片下载超时时间(秒)
+    "retry": 1  # 下载失败重试次数
+}
+
+# 1. 日期变量赋值SQL
+SET_DATE_SQL = "SET @date_constant = CURDATE();"
+
+
+# 2. 核心查询SQL
+QUERY_SQL = """
+    SELECT 
+      product, 
+      shop, 
+      product_link, 
+      scrape_date, 
+      business_license_company, 
+      search_key
+    FROM (
+      SELECT 
+        pd.product,
+        pd.shop,
+        pd.product_link,
+        pd.scrape_date,
+        psi.business_license_company,
+        pd.search_key,
+        ROW_NUMBER() OVER (PARTITION BY pd.shop ORDER BY pd.search_key ASC) AS rn
+      FROM pdd_drug_middle pd
+      LEFT JOIN pdd_shop_info_middle psi ON psi.shop = pd.shop
+      WHERE pd.scrape_date >= @date_constant 
+        AND psi.business_license_company IS NULL
+    ) AS sub
+    WHERE rn = 1
+    ORDER BY search_key;
+"""
+
+
+
+# ===================== 百度OCR类(完整整合)=====================
+class BaiduOCR:
+    """百度 OCR 文字识别封装类"""
+    def __init__(self, api_key: str, secret_key: str):
+        """
+        初始化百度 OCR
+        :param api_key: 百度智能云应用的 API Key
+        :param secret_key: 百度智能云应用的 Secret Key
+        """
+        self.api_key = api_key
+        self.secret_key = secret_key
+        self.access_token: Optional[str] = None
+        # 获取 access_token(有效期30天,建议缓存)
+        self._get_access_token()
+
+    def _get_access_token(self) -> bool:
+        """
+        获取百度 OCR 的 access_token(有效期30天)
+        :return: 是否获取成功
+        """
+        url = "https://aip.baidubce.com/oauth/2.0/token"
+        params = {
+            "grant_type": "client_credentials",
+            "client_id": self.api_key,
+            "client_secret": self.secret_key
+        }
+        try:
+            response = requests.post(url, params=params, timeout=10)
+            response.raise_for_status()
+            result = response.json()
+            if "access_token" in result:
+                self.access_token = result["access_token"]
+                print(f"✅ 成功获取 access_token:{self.access_token[:20]}...")
+                return True
+            else:
+                print(f"❌ 获取 access_token 失败:{result}")
+                return False
+        except Exception as e:
+            print(f"❌ 获取 access_token 异常:{e}")
+            return False
+
+    def _enlarge_and_crop_image(self, image_path: str, scale: float=1.5, crop_ratio: float=0.5) -> bytes:
+        """
+        先裁剪图片上半部分(保留有效内容),再放大图片(解决OCR尺寸错误)
+        :param image_path: 原图路径
+        :param scale: 放大倍数(推荐1.5~3.0)
+        :param crop_ratio: 裁剪比例(0.5=保留上50%,0.6=保留上60%,可根据图片调整)
+        :return: 处理后的图片二进制数据
+        """
+        try:
+            with Image.open(image_path) as img:
+                # ========== 步骤1:裁剪上半部分(核心修复) ==========
+                # 计算裁剪区域:左=0,上=0,右=原图宽度,下=原图高度×裁剪比例
+                crop_box = (
+                    0,  # 左边界
+                    0,  # 上边界
+                    img.width,  # 右边界
+                    int(img.height * crop_ratio)  # 下边界(只保留上半部分)
+                )
+                img_cropped = img.crop(crop_box)  # 执行裁剪
+                print(f"✅ 图片裁剪完成:保留上{int(crop_ratio * 100)}%区域,尺寸={img_cropped.size}")
+
+                # ========== 步骤2:放大裁剪后的图片 ==========
+                new_width = int(img_cropped.width * scale)
+                new_height = int(img_cropped.height * scale)
+                # 高质量放大(Lanczos算法,最清晰)
+                img_resized = img_cropped.resize(
+                    (new_width, new_height),
+                    Image.Resampling.LANCZOS
+                )
+
+                # ========== 仅5行,强制缩到4096×4096以内(核心微调) ==========
+                MAX_OCR_SIZE = 4096  # 百度OCR最大允许宽度/高度
+                if img_resized.width > MAX_OCR_SIZE:
+                    ratio = MAX_OCR_SIZE / img_resized.width  # 计算缩放比例
+                    img_resized = img_resized.resize(
+                        (MAX_OCR_SIZE, int(img_resized.height * ratio)),
+                        Image.Resampling.LANCZOS
+                    )
+
+
+
+                if img_resized.mode == 'RGBA':
+                    # 创建白色背景的RGB画布
+                    rgb_img = Image.new('RGB', img_resized.size, (255, 255, 255))
+                    # 将RGBA图片粘贴到RGB画布(透明区域显示白色)
+                    rgb_img.paste(img_resized, mask=img_resized.split()[3])  # mask=alpha通道
+                    img_resized = rgb_img
+
+                # # ========== 保存处理后图片到本地 ==========
+                # # 1. 确保pdd_goods_images文件夹存在(不存在则创建)
+                # save_dir = "pdd_goods_images"
+                # if not os.path.exists(save_dir):
+                #     os.makedirs(save_dir)
+                # # 2. 提取原图片文件名(比如从image_path中拿到"鸿祥堂大药房旗舰店_1773649991220.jpeg")
+                # file_name = os.path.basename(image_path)
+                # # 3. 拼接保存路径
+                # save_path = os.path.join(save_dir, file_name)
+                # # 4. 保存图片到本地(质量和OCR用的一致)
+                # img_resized.save(save_path, format='JPEG', quality=95)
+                # print(f"✅ 处理后图片已保存到:{save_path}")
+                # # ======================================================
+
+
+                # 保存到内存(不生成本地文件)
+                img_byte_arr = io.BytesIO()
+                # 保存为 JPG,保证清晰度
+                img_resized.save(img_byte_arr, format='JPEG', quality=95)
+                img_byte_arr = img_byte_arr.getvalue()
+
+                # 校验文件大小(超4MB则再次压缩)
+                file_size = len(img_byte_arr) / 1024 / 1024  # 转MB
+                if file_size > 4:
+                    print(f"⚠️ 文件超4MB({file_size:.2f}MB),二次压缩...")
+                    img_byte_arr = io.BytesIO()
+                    img_resized.save(img_byte_arr, format='JPEG', quality=70, optimize=True)
+                    img_byte_arr = img_byte_arr.getvalue()
+
+                # 打印最终尺寸(方便调试)
+                print(f"✅ 图片放大完成:最终尺寸={img_resized.size}")
+                return img_byte_arr
+        except Exception as e:
+            print(f"❌ 图片裁剪/放大失败:{str(e)}")
+            return b''
+
+    def general_ocr(self, image_path: str, scale: float = 1.5) -> Optional[Dict]:
+        """
+        调用百度通用文字识别(支持图片放大)
+        :param image_path: 本地图片路径
+        :param scale: 放大倍数,默认2倍
+        :return: OCR识别结果
+        """
+        if not self.access_token:
+            print("❌ access_token 无效,请先初始化")
+            return None
+
+        try:
+            """
+            百度OCR通用识别(整合裁剪+放大)
+            """
+            # 替换原放大逻辑为「裁剪+放大」
+            image_data = self._enlarge_and_crop_image(image_path, scale=scale, crop_ratio=0.5)
+            if not image_data:
+                print("❌ 图片处理失败,无法识别")
+                return {}
+            image_base64 = base64.b64encode(image_data).decode("utf-8")
+
+        except Exception as e:
+            print(f"❌ 图片放大/读取失败:{e}")
+            return None
+
+        # 调用 OCR 接口
+        url = f"https://aip.baidubce.com/rest/2.0/ocr/v1/general_basic?access_token={self.access_token}"
+        headers = {"Content-Type": "application/x-www-form-urlencoded"}
+        data = {"image": image_base64}
+
+        try:
+            response = requests.post(url, headers=headers, data=data, timeout=10)
+            response.raise_for_status()
+            result = response.json()
+            if "words_result" in result:
+                print(f"✅ 识别成功,共识别到 {len(result['words_result'])} 行文字")
+                return result
+            else:
+                print(f"❌ 识别失败:{result}")
+                return None
+        except Exception as e:
+            print(f"❌ 调用 OCR 接口异常:{e}")
+            return None
+
+    def extract_enterprise_info(self, ocr_result: Dict) -> Dict:
+        """
+        从OCR识别结果中提取企业名称和社会信用代码
+        :param ocr_result: general_ocr 返回的识别结果字典
+        :return: 包含企业名称和社会信用代码的JSON格式字典
+                 格式:{"enterprise_name": "企业名称", "credit_code": "社会信用代码"}
+        """
+        # 初始化返回结果(默认空值)
+        enterprise_info = {
+            "enterprise_name": "",
+            "credit_code": ""
+        }
+
+        if not ocr_result or "words_result" not in ocr_result:
+            print("❌ OCR识别结果为空,无法提取企业信息")
+            return enterprise_info
+
+        all_text_lines = []
+        # 遍历所有识别的文字行,匹配关键词
+        for item in ocr_result["words_result"]:
+            line_text = item["words"].strip()  # 去除首尾空格
+            if line_text and line_text not in all_text_lines:  # 去空+去重
+                all_text_lines.append(line_text)
+        print(f"📝 OCR识别的有效行:{all_text_lines}")
+
+        # ==================== 1. 提取并清洗企业名称 ====================
+        enterprise_name = ""
+        # 名称匹配关键词(覆盖所有场景)
+        name_keywords = ["企业名称", "名称:", "名:", "称:"]
+        # 常见错别字修正映射
+        name_correction = {
+            "人药房": "大药房",
+            "有松司": "有限公司",
+            "松司": "公司",
+            "关药房": "大药房"
+        }
+
+        for idx, line_text in enumerate(all_text_lines):
+            # 场景1:包含"企业名称"(处理括号冗余,如"企业名称(名称xxx")
+            if "企业名称" in line_text:
+                # 移除所有括号及内部内容,再提取名称
+                import re
+                # 正则移除括号(()/())及内容
+                clean_line = re.sub(r'\([^)]*\)|\([^)]*\)', '', line_text)
+                # 提取"企业名称"后的所有内容
+                name_part = clean_line.split("企业名称")[-1].strip()
+                # 若还有"名称"前缀,继续拆分
+                if "名称" in name_part:
+                    name_part = name_part.split("名称")[-1].strip()
+                enterprise_name = name_part
+                break
+            # 场景2:单行包含"名称:"/"名:"/"称:"
+            elif any(key in line_text for key in ["名称:", "名:", "称:"]):
+                name_part = line_text.split(":")[-1].strip() if ":" in line_text else line_text.split(":")[-1].strip()
+                enterprise_name = name_part
+                break
+            # 场景3:跨行拆分(前一行是"名",当前行以"称:"开头)
+            elif idx > 0 and all_text_lines[idx - 1] == "名" and line_text.startswith("称:"):
+                name_part = line_text.split(":")[-1].strip()
+                enterprise_name = name_part
+                break
+
+        # 清洗企业名称:修正错别字、移除多余空格
+        if enterprise_name:
+            for wrong, right in name_correction.items():
+                enterprise_name = enterprise_name.replace(wrong, right)
+
+            # 步骤2:移除开头/结尾的冒号(中文+英文)、空格、特殊符号
+            enterprise_name = enterprise_name.strip(":: \t\n\r")
+            # 步骤3:移除中间多余空格
+            enterprise_name = enterprise_name.replace(" ", "")  # 移除所有空格
+
+        # ==================== 2. 提取并清洗社会信用代码 ====================
+        credit_code = ""
+        # 信用代码匹配关键词(兼容错别字+多格式)
+        code_keywords = ["社会信用代码:", "统一社会信用代码:", "社会震用代码:"]
+        for line_text in all_text_lines:
+            # 匹配任意关键词
+            for keyword in code_keywords:
+                if keyword in line_text:
+                    code_part = line_text.split(keyword)[-1].strip()
+                    credit_code = code_part.replace(" ", "")  # 移除空格(如"91360105 MAEGBDKMXF")
+                    break
+            if credit_code:  # 找到后跳出循环
+                break
+
+        # ==================== 赋值并打印结果 ====================
+        enterprise_info["enterprise_name"] = enterprise_name
+        enterprise_info["credit_code"] = credit_code
+        # enterprise_info["address"] = address
+
+        # 打印提取结果
+        if enterprise_name:
+            print(f"✅ 提取到企业名称:{enterprise_name}")
+        else:
+            print("⚠️ 未识别到企业名称字段")
+
+        if credit_code:
+            print(f"✅ 提取到社会信用代码:{credit_code}")
+        else:
+            print("⚠️ 未识别到社会信用代码字段")
+
+        return enterprise_info
+
+
+
+
+
+
+# ===================== 数据库读取类 =====================
+class DBGoodsReader:
+    """数据库商品链接读取器"""
+
+    def __init__(
+            self,
+            host: str = DEFAULT_DB_CONFIG["host"],
+            port: int = DEFAULT_DB_CONFIG["port"],
+            user: str = DEFAULT_DB_CONFIG["user"],
+            password: str = DEFAULT_DB_CONFIG["password"],
+            db_name: str = DEFAULT_DB_CONFIG["db_name"],
+            charset: str = "utf8mb4"
+    ):
+        self.host = host
+        self.port = port
+        self.user = user
+        self.password = password
+        self.db_name = db_name
+        self.charset = charset
+        self.conn: Optional[pymysql.connections.Connection] = None
+        self.cursor: Optional[pymysql.cursors.DictCursor] = None
+
+
+
+
+    def connect_db(self) -> bool:
+        """连接数据库(带重试机制)"""
+        max_retry = 2
+        for retry in range(max_retry + 1):
+            try:
+                self.conn = pymysql.connect(
+                    host=self.host,
+                    port=self.port,
+                    user=self.user,
+                    password=self.password,
+                    database=self.db_name,
+                    charset=self.charset,
+                    cursorclass=pymysql.cursors.DictCursor,
+                    connect_timeout=10  # 连接超时
+                )
+                self.cursor = self.conn.cursor()
+                print(f"✅ 成功连接数据库:{self.db_name}")
+                return True
+            except pymysql.MySQLError as e:
+                if retry < max_retry:
+                    print(f"❌ 数据库连接失败(重试{retry + 1}/{max_retry}):{e}")
+                    time.sleep(1)
+                    continue
+                print(f"❌ 数据库连接最终失败:{e}")
+                return False
+
+    def get_shop_and_goods(self) -> List[Dict]:
+        """
+        读取待补充企业信息的店铺数据(每个店铺仅取1条)
+        返回:包含product/shop/product_link等字段的字典列表
+        """
+        if not self.conn or not self.cursor:
+            print("❌ 未连接数据库,请先调用 connect_db()")
+            return []
+
+        try:
+            # 步骤1:执行日期变量赋值
+            self.cursor.execute(SET_DATE_SQL)
+
+            # 步骤2:执行核心查询
+            self.cursor.execute(QUERY_SQL)
+
+            # 步骤3:获取结果(DictCursor返回字典格式,字段名对应SQL列名)
+            results = self.cursor.fetchall()
+            print(f"✅ 成功读取 {len(results)} 条待补充企业信息的店铺数据")
+            return results
+
+        except pymysql.MySQLError as e:
+            print(f"❌ 读取数据失败:{e}")
+            return []
+
+    def _get_next_id(self) -> int:
+        """获取表中最大ID并+1,用于生成新插入数据的ID(若ID非自增)"""
+        try:
+            sql = "SELECT IFNULL(MAX(id), 0) + 1 AS next_id FROM pdd_shop_info_middle"
+            self.cursor.execute(sql)
+            result = self.cursor.fetchone()
+            next_id = result.get("next_id", 9078)  # 默认初始值9078(兼容示例)
+            print(f"✅ 获取到下一个可用ID:{next_id}")
+            return next_id
+        except pymysql.MySQLError as e:
+            print(f"❌ 获取自增ID失败,使用默认值9078:{e}")
+            return 9078
+
+    def insert_enterprise_info(self, shop_name: str, enterprise_info: Dict) -> bool:
+        """
+        向pdd_shop_info_middle表插入企业信息(替代原更新逻辑)
+        :param shop_name: 店铺名称(关联表的shop字段)
+        :param enterprise_info: 包含tyc_company_name/tyc_company_code/tyc_company_address的字典
+        :return: 插入是否成功
+        """
+        if not self.conn or not self.cursor:
+            print("❌ 未连接数据库,请先调用 connect_db()")
+            return False
+
+        if not shop_name:
+            print("❌ 店铺名称为空,无法更新")
+            return False
+
+        business_company_name = enterprise_info.get("tyc_company_name", "").strip()
+        qualification_number = enterprise_info.get("tyc_company_code", "").strip()
+        contact_address = enterprise_info.get("tyc_company_address", "").strip()
+        business_license_address = contact_address  # 两个地址字段都用同一个值
+
+        # 空值校验提示
+        empty_fields = []
+        if not business_company_name:
+            empty_fields.append("企业名称")
+        if not qualification_number:
+            empty_fields.append("统一信用代码")
+        if not contact_address:
+            empty_fields.append("企业地址")
+        if empty_fields:
+            print(f"⚠️ 店铺[{shop_name}]以下字段为空:{','.join(empty_fields)},仍继续插入(空值)")
+
+
+        # 生成插入ID(若表ID为自增主键,可删除ID相关逻辑,SQL中也去掉id字段)
+        next_id = self._get_next_id()
+
+        insert_sql = """
+                    INSERT INTO `test2`.`pdd_shop_info_middle` (
+                        `id`, 
+                        `shop`, 
+                        `contact_address`, 
+                        `qualification_number`, 
+                        `business_license_company`, 
+                        `business_license_address`, 
+                        `scrape_date`, 
+                        `platform`, 
+                        `province`, 
+                        `city`, 
+                        `create_time`, 
+                        `update_time`
+                    ) VALUES (%s, %s, %s, %s, %s, %s, CURDATE(), '拼多多', '', '', NOW(), NOW())
+                """
+
+        # 组装插入参数
+        insert_params = [
+            next_id,
+            shop_name,
+            contact_address,
+            qualification_number,
+            business_company_name,
+            business_license_address
+        ]
+
+        try:
+            # 执行插入
+            self.cursor.execute(insert_sql, insert_params)
+            self.conn.commit()
+
+            # 检查影响行数
+            affected_rows = self.cursor.rowcount
+            if affected_rows > 0:
+                print(f"✅ 店铺[{shop_name}]成功插入1条数据(ID:{next_id})")
+                print(f"   插入内容:企业名称={business_company_name} | 信用代码={qualification_number} | 地址={contact_address}")
+                return True
+            else:
+                print(f"⚠️ 店铺[{shop_name}]插入0行数据,无数据变更")
+                return False
+        except pymysql.MySQLError as e:
+            print(f"❌ 店铺[{shop_name}]插入失败:{e}")
+            self.conn.rollback()  # 回滚事务
+            return False
+        except Exception as e:
+            print(f"❌ 店铺[{shop_name}]插入异常:{e}")
+            self.conn.rollback()
+            return False
+
+
+
+
+
+    def close_db(self) -> None:
+        """安全关闭数据库连接"""
+        if self.cursor:
+            try:
+                self.cursor.close()
+            except Exception:
+                pass
+        if self.conn:
+            try:
+                self.conn.close()
+                print("✅ 数据库连接已关闭")
+            except Exception:
+                pass
+
+
+
+
+
+
+
+# ===================== 天眼查浏览器类 =====================
+class TianyanchaBrowser:
+    """天眼查浏览器:自动打开、登录、搜索企业名"""
+    def __init__(self):
+        self.pw = None
+        self.browser = None
+        self.context: Optional[BrowserContext] = None
+        self.page = None
+        self.login_state_path = PLAYWRIGHT_CONFIG["tianyancha_login_state"]
+
+
+
+    def check_scan_login_prompt(self):
+        """
+        检查是否出现「扫码登录」提示,若出现则暂停并提示手动扫码
+        """
+        try:
+            # 定位扫码登录提示文本(结合父div,避免误匹配其他页面文本)
+            scan_prompt_locator = self.page.locator(
+                "div.scan-title",
+                has_text="扫码登录 更快 更安全"
+            )
+
+            scan_prompt_locator.wait_for(
+                state="visible",
+                timeout=10000  # 超时10秒,可根据网络调整
+            )
+
+            # 提示出现,暂停脚本让你手动扫码
+            print("⚠️ 检测到天眼查扫码登录提示!")
+            input("请打开天眼查APP扫码完成登录后,按回车键继续执行脚本...")
+
+        except PlaywrightTimeoutError:
+            # 超时未出现,说明无需扫码,直接继续
+            print("✅ 未检测到扫码登录提示,跳过扫码步骤")
+
+
+    def _load_login_state(self) -> Optional[Dict]:
+        """加载本地登录状态"""
+        if os.path.exists(self.login_state_path):
+            try:
+                with open(self.login_state_path, "r", encoding="utf-8") as f:
+                    return json.load(f)
+            except json.JSONDecodeError:
+                print(f"⚠️ 天眼查登录状态文件损坏,将重新登录")
+                os.remove(self.login_state_path)
+        return None
+
+
+    def _save_login_state(self) -> None:
+        """保存登录状态"""
+        if self.context:
+            try:
+                self.context.storage_state(path=self.login_state_path) # 同步保存
+                print(f"✅ 天眼查登录状态已保存到:{self.login_state_path}")
+            except Exception as e:
+                print(f"⚠️ 天眼查登录状态保存失败:{e}")
+
+    def init_browser(self, pw) -> bool:  # ✅ 保持async
+        """初始化天眼查浏览器"""
+        try:
+            self.pw = pw  # ✅ 核心修改:直接使用传进来的 playwright 引擎,不再自己 start()
+            # 启动防检测浏览器
+            self.browser = self.pw.chromium.launch(
+                headless=PLAYWRIGHT_CONFIG["headless"],
+                slow_mo=PLAYWRIGHT_CONFIG["slow_mo"],
+                args=PLAYWRIGHT_CONFIG["browser_args"],
+                ignore_default_args=["--enable-automation"],
+                timeout=60000
+            )
+
+
+            # 加载登录状态或手动登录
+            login_state = self._load_login_state()
+            if login_state:
+                self.context = self.browser.new_context(
+                    viewport=None,  # ✅ 设为None,适配最大化窗口
+                    locale=PLAYWRIGHT_CONFIG["locale"],
+                    timezone_id=PLAYWRIGHT_CONFIG["timezone_id"],
+                    ignore_https_errors=True,
+                    storage_state=login_state  # ✅ 加载已保存的登录状态
+                )
+                print("✅ 已加载天眼查本地登录状态")
+            else:  # ✅ 无登录状态:提示手动登录
+                self.context = self.browser.new_context(
+                    viewport=None,
+                    locale=PLAYWRIGHT_CONFIG["locale"],
+                    timezone_id=PLAYWRIGHT_CONFIG["timezone_id"],
+                    ignore_https_errors=True,
+                )
+
+
+
+            # 初始化页面
+            self.page = self.context.new_page()
+
+            # self.page.window_maximize()  # ✅ 强制窗口最大化(兜底)
+
+            self.page.set_default_timeout(PLAYWRIGHT_CONFIG['default_timeout'])
+            self.page.set_default_navigation_timeout(PLAYWRIGHT_CONFIG['navigation_timeout'])
+            return True
+        except Exception as e:
+            print(f"❌ 天眼查浏览器初始化失败:{e}")
+            self.close()  # ✅ await关闭
+            return False
+
+
+    def search_enterprise(self, enterprise_name: str) -> bool:
+        """
+        打开天眼查并搜索指定企业名
+        :param enterprise_name: 从OCR提取的企业名称
+        :return: 搜索是否成功
+        """
+        if not self.page:
+            print("❌ 天眼查浏览器未初始化!")
+            return False
+        if not enterprise_name or enterprise_name.strip() == '':
+            print("❌ 企业名称为空!无法搜索")
+            return False
+
+        try:
+            #设置一个计数器,去往官网只运行一次
+
+
+            # 1. 打开天眼查首页(替换你指定的链接)
+            print(f"\n📌 打开天眼查:https://www.tianyancha.com/")
+            self.page.goto(
+                "https://www.tianyancha.com/",
+                wait_until="networkidle",
+                timeout=30000
+            )
+            # input("天眼查登录")
+
+            # 2. 定位天眼查搜索框(适配最新页面结构)
+            # 搜索框selector:优先用placeholder匹配,兼容不同版本
+            # 先检查是否需要扫码登录
+            # self.check_scan_login_prompt()
+
+            # ========== 核心:自动检测并处理登录 ==========
+            # 定位「登录/注册」按钮(完全匹配你提供的HTML结构)
+            login_button = self.page.locator(
+                "div.tyc-header-nav-item.tyc-nav-user span.tyc-nav-user-btn",
+                has_text="登录/注册"
+            ).nth(0)
+
+            try:
+                # 等待按钮出现(最多10秒),如果出现说明未登录
+                login_button.wait_for(state="visible", timeout=10000)
+                print("⚠️ 检测到未登录状态,正在点击「登录/注册」按钮...")
+                login_button.click()  # 点击按钮,唤起扫码登录弹窗
+
+                # 提示你手动扫码登录
+                print("\n🔔 请打开天眼查APP,扫描页面上的登录二维码,只有四十秒,登录完成后按回车键继续...")
+
+                # 等待登录完成:等待「登录/注册」按钮消失(说明已成功登录)
+                self.page.wait_for_selector(
+                    "div.tyc-header-nav-item.tyc-nav-user span.tyc-nav-user-btn",
+                    state="hidden",  # 等待元素隐藏
+                    timeout=40000  # 最多等30秒,给足扫码时间
+                )
+                print("✅ 扫码登录成功!")
+            except PlaywrightTimeoutError:
+                # 10秒内没找到「登录/注册」按钮 → 说明已经处于登录状态
+                print("✅ 检测到已登录状态,无需重复登录")
+
+            print("\n⚠️ 请先完成天眼查登录!")
+            # self.page = self.context.new_page()  # ✅ await创建页面
+            # self.page.goto("https://www.tianyancha.com", timeout=30000)  # ✅ await跳转
+            # input("请在浏览器中完成天眼查登录,登录后按回车继续...")
+            self.context.storage_state(path=self.login_state_path)  # ✅ await保存状态
+            print(f"✅ 天眼查登录状态已保存到:{self.login_state_path}")
+
+
+            search_locator = None
+            try:
+                # 优先定位:placeholder匹配
+                search_locator = self.page.locator('input[placeholder="请输入公司名称、老板姓名、品牌名称等"]')
+                if search_locator.count() > 1:
+                    search_locator = self.page.locator('input[placeholder="请输入公司名称、老板姓名、品牌名称等"]').nth(1)
+                else:
+                    search_locator = self.page.locator('input[placeholder="请输入公司名称、老板姓名、品牌名称等"]')
+                # # 备用定位:ID匹配
+                # if not search_locator.count():
+                #     search_locator = self.page.locator('input#header-company-search')
+
+                # 等待搜索框加载(超时会触发TimeoutError)
+                search_locator.wait_for(timeout=10000, state="visible")
+                print("✅ 定位到天眼查搜索框")
+            except PlaywrightTimeoutError:
+                print(f"❌ 搜索框定位超时:页面加载过慢或搜索框元素不存在")
+                return False
+            except Exception as e:
+                print(f"❌ 搜索框定位失败:{str(e)}")
+                return False
+
+
+
+            # 3. 清空搜索框 + 输入企业名 + 回车搜索
+            search_locator.click()
+            search_locator.clear()
+            print(f"📌 输入企业名:{enterprise_name}")
+            # 模拟真人输入延迟
+            search_locator.fill(enterprise_name)
+            self.page.wait_for_timeout(1000)
+
+            # 推荐:直接用键盘回车触发搜索,这在大部分前端框架中最稳定
+            search_locator.press("Enter")
+            print("🖱️ 已触发回车搜索")
+            #点击搜索按钮
+            # search_btn = self.page.locator("button.50ab4.tyc-header-suggest-button_52bf6")
+            # await  search_btn.click()  # 回车搜索
+
+            # 4. 等待搜索结果加载
+            self.page.wait_for_load_state("networkidle", timeout=20000)
+            print(f"✅ 天眼查搜索完成!已搜索:{enterprise_name}")
+            return True
+
+        except PlaywrightTimeoutError:
+            print(f"❌ 天眼查搜索超时(企业名:{enterprise_name})")
+            return False
+        except Exception as e:
+            print(f"❌ 天眼查搜索异常:{e}")
+            return False
+
+    def get_enterprise_info(self) -> Dict:
+        """
+        从天眼查搜索结果页提取核心字段(可自定义字段)
+        返回:包含三个字段的字典(示例:法定代表人、注册资本、成立日期)
+        """
+        enterprise_detail = {
+            "tyc_company_name": "",     # 公司名
+            "tyc_company_code": "",             # 统一社会信用代码
+            "tyc_company_address": ""           # 成立日期
+        }
+        if not self.page:
+            print("❌ 天眼查页面未初始化")
+            return enterprise_detail
+        try:
+            # 等待详情页加载
+            # self.page.('div.company-header-container', timeout=8000)
+            self.page.wait_for_timeout(timeout=4000)
+            try:
+                # 提取公司名,可能会出现很多个结果,但路径都一样,一般取第一个。
+                company_name_locator = self.page.locator("div.index_name__qEdWi span").nth(0)
+                if company_name_locator.count():
+                    company_name = company_name_locator.inner_text()
+                    enterprise_detail['tyc_company_name'] = company_name.strip()
+                    print(f"获取到公司名:{enterprise_detail['tyc_company_name']}")
+                else:
+                    print(f"没有获取到企业名,网页路径有问题")
+            except Exception as e:
+                input("提取企业元素发生问题,检查一下")
+                print(f"提取企业名时发生异常:{str(e)},网页路径或元素定位异常")
+                enterprise_detail['tyc_company_name'] = ""
+
+            try:
+                # 提取统一社会信用代码
+                code_locator = self.page.locator("div.index_info-col__UVcZb.index_credit-code__kWuDZ span").nth(0)
+                if code_locator.count():
+                    code = code_locator.inner_text()
+                    enterprise_detail['tyc_company_code'] = code.strip()
+                    print(f"获取到企业信用代码:{enterprise_detail['tyc_company_code']}")
+                else:
+                    print(f"没有获取到企业信用代码,网页路径有问题")
+            except Exception as e:
+                print(f"提取统一社会信用代码时发生异常:{str(e)},网页路径或元素定位异常")
+                enterprise_detail['tyc_company_code'] = ""
+
+            try:
+                address_locator = self.page.locator("div.index_contact-col__7AboU.index_address__mHjQD .index_value__Pl0Nh").nth(0)
+                if address_locator.count():
+                    address = address_locator.inner_text()
+                    enterprise_detail['tyc_company_address'] = address.strip()
+                    print(f"获取到企业地址:{enterprise_detail['tyc_company_address']}")
+                else:
+                    print(f"没有获取到企业地址,网页路径有问题")
+            except Exception as e:
+                print(f"提取企业地址时发生异常:{str(e)},网页路径或元素定位异常")
+                enterprise_detail['tyc_company_address'] = ""
+
+
+
+            print("\n📌 提取的企业核心信息:")
+            print(f"公司名:{enterprise_detail['tyc_company_name']}")
+            print(f"企业信用代码:{enterprise_detail['tyc_company_code']}")
+            print(f"企业地址:{enterprise_detail['tyc_company_address']}")
+            return enterprise_detail
+
+        except Exception as e:
+            print(f"❌ 提取企业信息失败:{e}")
+            return enterprise_detail
+
+
+
+    def close(self) -> None:
+        """关闭浏览器"""
+        if self.page:
+            try:
+                self.page.close()
+            except Exception:
+                pass
+        if self.context:
+            try:
+                self.context.close()
+            except Exception:
+                pass
+        if self.browser:
+            try:
+                self.browser.close()
+                print("✅ 天眼查浏览器已关闭")
+            except Exception:
+                pass
+
+
+
+
+
+
+
+
+
+
+class PddLinkBrowser:
+    """拼多多链接浏览器(支持登录持久化+图片下载+OCR识别)"""
+
+    def __init__(self, login_state_path: str = PLAYWRIGHT_CONFIG["login_state_path"]):
+        self.login_state_path = login_state_path
+        self.browser = None
+        self.context: Optional[BrowserContext] = None
+        self.page = None
+        # 初始化图片保存文件夹
+        self._init_image_dir()
+
+        # 初始化百度OCR实例
+        self.ocr_client: Optional[BaiduOCR] = None
+        self._init_ocr_client()
+
+        # ========== 初始化天眼查浏览器 ==========
+        self.tyc_browser = TianyanchaBrowser()
+
+    def _init_ocr_client(self):
+        """初始化百度OCR客户端"""
+        api_key = BAIDU_OCR_CONFIG["api_key"]
+        secret_key = BAIDU_OCR_CONFIG["secret_key"]
+        if not api_key or not secret_key:
+            print("⚠️ 未配置百度OCR的API_KEY/SECRET_KEY,将跳过OCR识别")
+            return
+        self.ocr_client = BaiduOCR(api_key=api_key, secret_key=secret_key)
+        print("✅ 百度OCR客户端初始化完成")
+
+
+
+    # ========== 检测手机登录框并等待手动登录 ==========
+    def _check_login_box(self) -> bool:
+        """
+        检测是否出现「手机登录」框(div.phone-login 包含"手机登录"文本)
+        :return: True=检测到并完成登录;False=未检测到登录框
+        """
+        if not self.page:
+            print("❌ 页面未初始化,无法检测登录框")
+            return False
+        try:
+            # 精准定位登录框元素:div.phone-login 下的 span(包含"手机登录"文本)
+            login_locator = self.page.locator("div.phone-login span")
+            # 等待元素可见(最多5秒,超时则认为无登录框)
+            login_locator.wait_for(timeout=5000, state="visible")
+            # 获取元素文本(包含伪元素的"手机登录")
+            login_text = login_locator.inner_text().strip()
+
+            if "手机登录" in login_text:
+                print("\n⚠️ 检测到【手机登录】框,请手动完成登录!")
+                input("登录完成后,请按回车键继续执行脚本...")
+                # 登录后等待页面重新加载完成(确保登录状态生效)
+                self.page.wait_for_load_state("networkidle", timeout=15000)
+                print("✅ 登录已完成,继续处理当前商品")
+                return True
+            return False
+        except PlaywrightTimeoutError:
+            # 超时说明页面没有登录框,直接返回False
+            return False
+        except Exception as e:
+            print(f"⚠️ 检测登录框时发生异常:{str(e)[:60]},继续执行")
+            return False
+    # ========== 登录检测方法结束 ==========
+
+
+
+
+    # ========== 自定义向下滑动方法 ==========
+    def _scroll_down(self, distance: int = 500, step: int = 50, interval: int =100):
+        """
+        分步向下滑动指定距离(解决图片懒加载,避免一次性跳转)
+        :param distance: 总滑动距离(像素,默认500)
+        :param step: 每次滑动的步长(像素,默认50,越小越平缓)
+        :param interval: 每次滑动后的间隔时间(毫秒,默认100)
+        """
+        if not self.page:
+            print("❌ 浏览器页面未初始化,无法滑动")
+            return
+
+        # 容错处理:步长/总距离为非正数时直接返回
+        if step <= 0 or distance <= 0:
+            print(f"⚠️ 无效的滑动参数(总距离:{distance},步长:{step}),跳过滑动")
+            return
+
+
+        try:
+            remaining = distance  # 剩余未滑动的距离
+            print(f"📝 开始分步滑动:总距离{distance}像素,每次滑{step}像素,间隔{interval}ms")
+
+            while remaining > 0:
+                current_step = min(step, remaining)
+                self.page.evaluate(f"window.scrollBy(0, {current_step})")
+                remaining -= current_step
+                self.page.wait_for_timeout(interval)
+
+            self.page.wait_for_timeout(2000)
+            print(f"✅ 分步滑动完成,总滑动距离:{distance}像素")
+        except Exception as e:
+            print(f"⚠️ 分步滑动失败:{str(e)[:50]}")
+
+    # ========== 滑动方法结束 ==========
+
+
+
+    def _init_image_dir(self):
+        """创建图片保存文件夹(不存在则创建)"""
+        if not os.path.exists(IMAGE_CONFIG["save_dir"]):
+            os.makedirs(IMAGE_CONFIG['save_dir'])
+            print(f"✅ 图片保存文件夹已创建:{os.path.abspath(IMAGE_CONFIG['save_dir'])}")
+        else:
+            print(f"✅ 图片保存文件夹已存在:{os.path.abspath(IMAGE_CONFIG['save_dir'])}")
+
+
+    def _get_image_filename(self, img_src: str, shop_name: str) -> str:
+        """生成唯一的图片文件名(避免重复)"""
+        # 提取原始文件名后缀(如.png/.jpg)
+        parsed_url = urlparse(img_src)
+        ext = os.path.splitext(parsed_url.path)[-1] or '.png'
+        # 清洗店铺名(避免特殊字符)
+        clean_shop = "".join([c for c in shop_name if c.isalnum() or c in ["_", "-"]])[:20]
+        # 时间戳+店铺名+随机数,确保唯一
+        timestamp = str(int(time.time() * 1000))
+        filename = f"{clean_shop}_{timestamp}{ext}"
+        return filename
+
+    def _download_image(self, img_src: str, shop_name: str) -> Optional[str]:
+        """
+        下载图片到指定文件夹
+        :return: 成功返回保存路径,失败返回None
+        """
+        if not img_src:
+            print("⚠️ 图片链接为空,跳过下载")
+            return None
+
+        # 生成唯一文件名
+        filename = self._get_image_filename(img_src, shop_name)
+        save_path = os.path.join(IMAGE_CONFIG["save_dir"], filename)
+
+        # 下载重试逻辑
+        for retry in range(IMAGE_CONFIG["retry"] + 1):
+            try:
+                # 发送请求下载图片(添加headers模拟浏览器)
+                headers = {
+                    "User-Agent": PLAYWRIGHT_CONFIG["browser_args"][-1].split("=")[1],
+                    "Referer": "https://www.pinduoduo.com/",
+                    "Accept": "image/avif,image/webp,image/apng,image/svg+xml,image/*,*/*;q=0.8"  # 新增:支持jpeg格式
+                }
+                response = requests.get(
+                    img_src,
+                    headers=headers,
+                    timeout=IMAGE_CONFIG["timeout"],
+                    stream=True, # 流式下载,避免内存溢出
+                    allow_redirects=True  # 显式开启重定向(拼多多签名链接可能302)
+                )
+                response.raise_for_status()  # 抛出HTTP错误(4xx/5xx)
+
+                # 保存图片到文件
+                with open(save_path, "wb") as f:
+                    for chunk in response.iter_content(chunk_size=8192):
+                        f.write(chunk)
+
+                # 校验文件是否保存成功
+                if os.path.getsize(save_path) > 0:
+                    print(f"✅ 图片下载成功:{save_path}")
+                    return save_path
+                else:
+                    os.remove(save_path)  # 删除空文件
+                    print(f"⚠️ 图片下载为空,重试{retry+1}/{IMAGE_CONFIG['retry']}")
+            except requests.exceptions.HTTPError as e:
+                if e.response.status_code == 403:
+                    print(f"❌ 图片签名过期/无权限:{img_src[:50]}...")
+                    return None  # 403无需重试,直接跳过
+                elif retry < IMAGE_CONFIG["retry"]:
+                    print(f"⚠️ HTTP错误(重试{retry + 1}/{IMAGE_CONFIG['retry']}):{e}")
+                    time.sleep(1)
+                    continue
+                print(f"❌ 图片下载失败:{e}")
+                return None
+            except Exception as e:
+                if retry < IMAGE_CONFIG["retry"]:
+                    print(f"⚠️ 下载失败(重试{retry + 1}/{IMAGE_CONFIG['retry']}):{str(e)[:50]}")
+                    time.sleep(1)
+                    continue
+                print(f"❌ 图片最终下载失败:{str(e)[:50]}")
+                return None
+
+
+
+    def _process_ocr(self, image_path: str) -> Optional[Dict]:
+        """OCR识别后立即调用天眼查搜索+提取企业信息"""
+        """
+        调用OCR识别并提取企业信息
+        :param image_path: 图片路径
+        :return: 企业信息字典
+        """
+        if not self.ocr_client:
+            print("⚠️ OCR客户端未初始化,跳过识别")
+            return None
+
+        if not os.path.exists(image_path):
+            print(f"❌ 图片文件不存在:{image_path}")
+            return None
+
+        # 调用OCR识别
+        ocr_result = self.ocr_client.general_ocr(
+            image_path=image_path,
+            scale=BAIDU_OCR_CONFIG["scale"]
+        )
+
+        print(f"识别结果{ocr_result}")
+        if not ocr_result:
+            return None
+
+        # 提取企业信息
+        enterprise_info = self.ocr_client.extract_enterprise_info(ocr_result)
+        print("\n📌 提取的企业信息:")
+        enterprise_name = enterprise_info.get("enterprise_name", "")
+        if not enterprise_name:
+            print("⚠️ 未提取到企业名称,跳过天眼查")
+            return enterprise_info
+
+        # 2. 调用天眼查搜索+提取字段
+        if self.tyc_browser.search_enterprise(enterprise_name):
+            # 提取三个核心字段
+            tyc_info = self.tyc_browser.get_enterprise_info()
+            # 合并OCR结果和天眼查字段
+            enterprise_info.update(tyc_info)
+
+
+
+        print("\n📌 最终整合结果:")
+        print(json.dumps(enterprise_info, ensure_ascii=False, indent=4))
+        return enterprise_info
+
+
+
+
+
+    def _load_login_state(self) -> Optional[Dict]:
+        """加载本地登录状态"""
+        if os.path.exists(self.login_state_path):
+            try:
+                with open(self.login_state_path, "r", encoding="utf-8") as f:
+                    return json.load(f)
+            except json.JSONDecodeError:
+                print(f"⚠️ 登录状态文件损坏:{self.login_state_path},将重新登录")
+                os.remove(self.login_state_path)
+        return None
+
+
+    def _save_login_state(self) -> None:
+        """保存登录状态到本地"""
+        if self.context:
+            try:
+                self.context.storage_state(path=self.login_state_path)
+                print(f"✅ 登录状态已保存到:{self.login_state_path}")
+            except Exception as e:
+                print(f"⚠️ 保存登录状态失败:{e}")
+
+    def init_browser(self) -> bool:
+        """初始化浏览器(加载登录状态/提示登录)"""
+        try:
+            # ✅ 核心修改 1:全局只启动【一次】 Playwright 引擎!存到 self.pw 中
+            self.pw = sync_playwright().start()
+
+            # ✅ 核心修改 2:把启动好的引擎传给天眼查去用
+            self.tyc_browser.init_browser(self.pw)
+
+            # 启动优化后的浏览器
+            # ✅ 核心修改 3:拼多多也用这同一个引擎启动浏览器
+            self.browser = self.pw.chromium.launch(
+                headless=PLAYWRIGHT_CONFIG["headless"],
+                slow_mo=PLAYWRIGHT_CONFIG["slow_mo"],
+                args=PLAYWRIGHT_CONFIG["browser_args"],
+                ignore_default_args=["--enable-automation"],  # 隐藏自动化标识
+                timeout=60000
+            )
+
+            # 加载登录状态或创建新上下文
+            login_state = self._load_login_state()
+            if login_state:
+                self.context = self.browser.new_context(
+                    viewport=PLAYWRIGHT_CONFIG["viewport"],
+                    locale=PLAYWRIGHT_CONFIG["locale"],
+                    timezone_id=PLAYWRIGHT_CONFIG["timezone_id"],
+                    ignore_https_errors=True,
+                    storage_state=login_state  # 加载登录状态
+                )
+                print("✅ 已加载本地登录状态")
+            else:
+                self.context = self.browser.new_context(
+                    viewport=PLAYWRIGHT_CONFIG["viewport"],
+                    locale=PLAYWRIGHT_CONFIG["locale"],
+                    timezone_id=PLAYWRIGHT_CONFIG["timezone_id"],
+                    ignore_https_errors=True
+                )
+                print("\n⚠️ 未检测到登录状态,请先完成拼多多登录!")
+                self.page = self.context.new_page()
+                self.page.goto("https://www.pinduoduo.com", timeout=30000)
+                input("请在浏览器中完成登录,登录后按回车继续...")
+                self.context.storage_state(path=self.login_state_path)  # ✅ 加await保存状态
+
+            # 初始化页面
+            self.page = self.context.new_page()
+            self.page.set_default_timeout(PLAYWRIGHT_CONFIG["default_timeout"])
+            self.page.set_default_navigation_timeout(PLAYWRIGHT_CONFIG["navigation_timeout"])
+            return True
+        except Exception as e:
+            print(f"❌ 浏览器初始化失败:{e}")
+            self.close()  # ✅ 核心修复:加await
+            return False
+
+    def open_links(self, goods_data: List[Dict], db_reader: DBGoodsReader) -> List[Dict]:
+        """依次打开商品链接(支持店名过滤+图片下载+OCR识别+天眼查搜索提取)"""
+        if not self.page:
+            print("❌ 浏览器未初始化")
+            return []
+
+        total = len(goods_data)
+        if total == 0:
+            print("⚠️ 无商品链接可处理")
+            return []
+
+        print(f"\n📋 共待处理 {total} 条商品链接")
+
+        # 收集所有抓取到的结果(可选,如果想最后统一保存的话)
+        all_results = []
+
+        for idx, item in enumerate(goods_data, 1):
+            shop = item.get("shop", "未知店铺").strip()
+            link = (item.get("product_link") or "").strip()
+
+            if not link:
+                print(f"\n⚠️ 第{idx}/{total}条:店铺【{shop}】链接为空,跳过")
+                continue
+
+            print(f"\n{'=' * 15} 第 {idx}/{total} 条 {'=' * 15}")
+            print(f"🏪 数据库店名:{shop}")
+            print(f"🔗 商品链接:{link}")
+
+            # ========== 判断店名是否包含“旗舰店” ==========
+            if "旗舰店" not in shop:
+                print(f"⚠️ 第{idx}/{total}条:店铺【{shop}】名称不含“旗舰店”,跳过")
+                #涉及突破滑块验证的部分了。
+
+
+                continue
+            else:
+                print(f"⚠️ 第{idx}/{total}条:店铺【{shop}】名称包含“旗舰店”,打开商品链接")
+
+            try:
+                # 1. 打开商品链接
+                self.page.goto(
+                    link,
+                    wait_until="load",
+                    timeout=PLAYWRIGHT_CONFIG["navigation_timeout"]
+                )
+                self.page.wait_for_load_state("networkidle", timeout=15000)
+                print(f"✅ 页面加载成功:{self.page.title()}...")
+
+                # 检测登录框
+                self._check_login_box()
+
+
+                #如果已售罄,不跳过。
+                # sold_out_locator  = self.page.locator("")
+                # if sold_out_locator.count() > 0 :
+                #     print("该商品已售罄,跳过这次采集")
+                #
+
+                # 2. 店名匹配判断
+                page_shop_locator = self.page.locator("div.BAq4Lzv7")
+                try:
+                    page_shop_locator.wait_for(timeout=5000)
+
+                    page_shop_text = (page_shop_locator.inner_text()).strip().lower()
+                except PlaywrightTimeoutError:
+                    print(f"❌ 未找到页面店名元素,可能页面结构改变或被风控,跳过")
+                    continue
+
+                db_shop_text = shop.lower()
+                print(f"🏪 页面元素店铺名:{page_shop_text}")
+                if page_shop_text != db_shop_text:
+                    print(f"❌ 店名不匹配(数据库:{db_shop_text} | 页面:{page_shop_text}),跳过")
+
+                    self.page.wait_for_timeout(2000)
+                    continue
+                print(f"✅ 店名匹配成功!")
+
+                # 自定义滑动距离,触发图片懒加载
+                self._scroll_down(distance=2100)
+
+                # ========== 获取图片src并下载 ==========
+                # shop_name = 'pdd_shop_info_middle_back'
+                final_enterprise_info = None
+                try:
+                    # ========== 原定位策略(优先使用) ==========
+                    img_locators = self.page.locator("img[role='img'][aria-label='查看图片']")
+
+                    img_count = img_locators.count()
+
+                    # ========== 原定位不足时,切换到备用定位 ==========
+                    if img_count < 2:
+                        print(f"⚠️ 原定位仅匹配到{img_count}个图片,尝试备用定位(拼多多懒加载图片)...")
+                        input("请手动检查页面图片元素,按回车继续...")
+                        continue
+                        # 备用定位:匹配截图里的「pdd-lazy-image」类资质图片(带水印的营业执照)
+                        # backup_img_locators = self.page.locator(
+                        #     "img.pdd-lazy-image.loaded"  # 精准匹配已加载的懒加载图片
+                        # )
+                        #
+                        # backup_count = backup_img_locators.count()
+                        #
+                        # if backup_count >= 2:
+                        #     img_locators = backup_img_locators
+                        #     img_count = backup_count
+                        #     print(f"✅ 备用定位生效,匹配到图片元素:{img_count} 个")
+                        # else:
+                        #     print(f"⚠️ 原定位({img_count}个) + 备用定位({backup_count}个)均不足2个,跳过下载")
+                        #     input("请手动检查页面图片元素,按回车继续...")
+                        #     continue  # 跳过当前店铺,避免卡死
+
+
+                    print(f"📸 匹配到图片元素:{img_count} 个")
+                    # 3. 定位第二个元素
+                    target_img_locator = img_locators.nth(1)
+
+                    target_img_locator.wait_for(timeout=5000, state="visible")
+
+                    # 4. 获取第二个图片的src
+                    img_src = target_img_locator.get_attribute("src")
+                    if img_src:
+                        print(f"🖼️ 第2个图片 src:{img_src[:80]}...")
+                        image_path = self._download_image(img_src, shop)
+
+                        if image_path:
+                            # ========== 核心:调用OCR并获取最终的天眼查数据 ==========
+                            final_enterprise_info = self._process_ocr(image_path)
+
+                    else:
+                        print(f"⚠️ 第2个图片的src为空")
+                except Exception as e:
+                    print(f"❌ 获取图片/识别失败:{str(e)[:100]}")
+
+                # 3. 收集数据并自动循环
+                if final_enterprise_info:
+                    # 将原数据库的店名也塞进去,方便后续入库对比
+                    print(f"天眼查---查出来的数据为{final_enterprise_info}")
+
+                    # final_enterprise_info['pdd_shop_name'] = shop
+                    all_results.append(final_enterprise_info)
+
+                    # 获取到的数据回填数据库
+                    update_success = db_reader.insert_enterprise_info(
+                        shop_name=shop,
+                        enterprise_info=final_enterprise_info,  # 直接传入天眼查返回的字典
+                    )
+                    if update_success:
+                        print(f"✅ 店铺[{shop}]数据回填成功")
+                    else:
+                        print(f"❌ 店铺[{shop}]数据回填失败")
+
+
+                    print(f"\n🎉 成功获取数据,准备进入下一条...")
+                else:
+                    print(f"\n⚠️ 本条未获取到有效企业信息,准备进入下一条...")
+                self.page.wait_for_timeout(5000)
+
+            except PlaywrightTimeoutError:
+                print(f"⏰ 页面加载/元素定位超时:{link}")
+                input("排查问题")
+                continue
+            except Exception as e:
+                print(f"❌ 第{idx}条处理异常:{str(e)[:100]}...,跳过")
+                continue
+
+        return all_results
+
+    def close(self) -> None:
+        """关闭浏览器(异步版,补全所有await)"""
+        # 先关闭天眼查浏览器
+        if hasattr(self, 'tyc_browser') and self.tyc_browser:
+            self.tyc_browser.close()
+
+        if hasattr(self, 'pw') and self.pw:
+            try:
+                self.pw.stop()
+                print("✅ Playwright 驱动已彻底停止")
+            except Exception:
+                pass
+
+        # 关闭拼多多浏览器
+        if self.page:
+            try:
+                self.page.close()
+            except Exception:
+                pass
+        if self.context:
+            try:
+                self.context.close()
+            except Exception:
+                pass
+        if self.browser:
+            try:
+                self.browser.close()
+                print("✅ 拼多多浏览器已关闭")
+            except Exception:
+                pass
+
+def main():
+    """主函数:整合数据库读取+链接浏览"""
+    # 1. 读取环境变量并补全默认值
+    db_config = {
+        "host": os.getenv("DB_HOST", DEFAULT_DB_CONFIG["host"]),
+        "port": int(os.getenv("DB_PORT", DEFAULT_DB_CONFIG["port"])),
+        "user": os.getenv("DB_USERNAME", DEFAULT_DB_CONFIG["user"]),
+        "password": os.getenv("DB_PASSWORD", DEFAULT_DB_CONFIG["password"]),
+        "db_name": os.getenv("DB_DATABASE", DEFAULT_DB_CONFIG["db_name"]),
+        "table_name": os.getenv("DB_TABLENAME", DEFAULT_DB_CONFIG["table_name"])
+    }
+
+    # 2. 初始化数据库读取器
+    db_reader = DBGoodsReader(
+        host=db_config["host"],
+        port=db_config["port"],
+        user=db_config["user"],
+        password=db_config["password"],
+        db_name=db_config["db_name"]
+    )
+
+
+    if not db_reader.connect_db():
+        return
+
+    # 3. 读取商品链接
+    goods_data = db_reader.get_shop_and_goods()
+    # 预览前5条数据
+    if goods_data:
+        print("\n📌 数据预览(前5条):")
+        for idx, item in enumerate(goods_data[:5], 1):
+            print(f"第{idx}条 | 店铺:{item['shop'][:20]} | 链接:{item['product_link'][:50]}...")
+
+    # 4. 初始化浏览器并打开链接
+    # 初始化拼多多浏览器
+    pdd_browser = PddLinkBrowser()
+    if not pdd_browser.init_browser():
+        return
+
+    # 接收返回的所有提取结果
+    extracted_data = pdd_browser.open_links(goods_data, db_reader)
+
+    # 打印最终统计
+    print(f"\n📊 爬取任务结束,共成功提取 {len(extracted_data)} 条企业信息!")
+    if extracted_data:
+        # 这里你可以将 extracted_data 写入数据库,或者存为 json/csv
+        # 例如打印第一条看看:
+        print("💡 最终数据示例:", json.dumps(extracted_data[0], ensure_ascii=False, indent=2))
+
+    pdd_browser.close()
+    db_reader.close_db()
+
+
+if __name__ == "__main__":
+    try:
+        main()
+    except KeyboardInterrupt:
+        print("\n⚠️ 程序被用户中断")
+    except Exception as e:
+        print(f"\n❌ 程序运行出错:{e}")

Fichier diff supprimé car celui-ci est trop grand
+ 0 - 0
pdd1/process_shop/pdd_login_state.json


Fichier diff supprimé car celui-ci est trop grand
+ 0 - 0
pdd1/process_shop/tianyancha_login_state.json


BIN
pdd1/拼多多盒数处理脚本/__pycache__/config.cpython-314.pyc


BIN
pdd1/拼多多盒数处理脚本/__pycache__/logger_config.cpython-314.pyc


BIN
pdd1/拼多多盒数处理脚本/__pycache__/main.cpython-314.pyc


Certains fichiers n'ont pas été affichés car il y a eu trop de fichiers modifiés dans ce diff