config.py 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522
  1. # config.py - 药帮忙数据采集配置文件
  2. from datetime import datetime
  3. import pymysql
  4. from dotenv import load_dotenv
  5. import os
  6. import oss2
  7. from PIL import Image
  8. from logger_config import logger
  9. # 第一步:加载.env文件(必须放在配置读取前)
  10. # load_dotenv() 默认读取当前目录的.env文件;若.env在其他路径,可指定:load_dotenv("/path/to/.env")
  11. # load_dotenv()
  12. # MySQL配置(和你原有MYSQL_CONFIG结构一致)
  13. MYSQL_CONFIG = {
  14. "host": "47.119.164.65", # 本地MySQL地址
  15. "port": 3306, # 端口
  16. "user": "test_c", # 你的MySQL用户名
  17. "password": "Dfwy@2025", # 你的MySQL密码
  18. "database": "test2", # 已建好的数据库名
  19. "charset": "utf8mb4" # 字符集(避免中文乱码)
  20. }
  21. # MYSQL_CONFIG = {
  22. # "host": os.getenv("MYSQL_HOST"), # 读取.env中的MYSQL_HOST
  23. # "user": os.getenv("MYSQL_USER"),
  24. # "password": os.getenv("MYSQL_PASSWORD"), # 敏感值从.env读取
  25. # "database": os.getenv("MYSQL_DATABASE"),
  26. # "port": int(os.getenv("MYSQL_PORT", 3306)), # 可选配置:设置默认值3306,避免.env缺失时报错
  27. # "charset": "utf8mb4"
  28. # }
  29. #模糊匹配url
  30. def fuzzy_match_product_url_in_db_mysql(product_url):
  31. # 先做非空判断和通配符转义
  32. if not product_url:
  33. logger.warning("⚠️ 待匹配的 product_url 为空,跳过数据库查询")
  34. return None
  35. # # 转义 product_url 中的 % 和 _,避免被当作 SQL 通配符
  36. # escaped_product_url = product_url.replace("%", "\%").replace("_", "\_")
  37. try:
  38. conn = pymysql.connect(**MYSQL_CONFIG)
  39. cursor = conn.cursor()
  40. # 2. 执行 MySQL 模糊查询 SQL(核心逻辑不变,占位符用 %s)
  41. # 说明:%product_url% 实现包含式模糊匹配,MySQL 同样支持该通配符
  42. sql = "SELECT * FROM ybm_drug_middle WHERE product_link LIKE %s"
  43. match_value = f"%{product_url}%"
  44. cursor.execute(sql, (match_value,))
  45. # 3. 获取查询结果并格式化(方便后续使用)
  46. result = cursor.fetchone() # 获取第一条匹配结果(返回元组,如 (id, product_url, price, ...))
  47. if result:
  48. # 提取数据表字段名,将元组转换为字典(更易读取和使用)
  49. column_names = [desc[0] for desc in cursor.description]
  50. result_dict = dict(zip(column_names, result))
  51. return result_dict # 匹配成功,返回格式化字典
  52. else:
  53. return None # 无匹配结果,返回 None
  54. except Exception as e:
  55. logger.error(f"❌ MySQL 数据库模糊匹配失败:{str(e)}")
  56. return None
  57. finally:
  58. # 4. 关闭数据库连接,避免资源泄露(无论成功与否,都要关闭)
  59. if 'conn' in locals() and conn:
  60. conn.close()
  61. # ==================== 从数据库提取商品 ====================
  62. def get_search_keywords_from_db():
  63. """从数据库读取keywords字段,生成SEARCH_KEYWORDS列表"""
  64. keywords = []
  65. conn = None
  66. cursor = None
  67. try:
  68. # 校验MYSQL_CONFIG完整性
  69. required_configs = ['host', 'user', 'password', 'database']
  70. for cfg in required_configs:
  71. if cfg not in MYSQL_CONFIG:
  72. raise ValueError(f"MYSQL_CONFIG缺失必要配置:{cfg}")
  73. # 建立数据库连接
  74. conn = pymysql.connect(**MYSQL_CONFIG)
  75. cursor = conn.cursor()
  76. sql = 'SELECT scrape_name FROM ybm_scape_name_config WHERE status = 1'
  77. cursor.execute(sql)
  78. # 提取所有keywords字段值,生成列表
  79. results = cursor.fetchall()
  80. keywords = [row[0].strip() for row in results if row[0].strip()]
  81. print(f"成功从数据库读取 {len(keywords)} 个关键词(status=1)")
  82. except Exception as e:
  83. print(f"读取数据库关键词失败:{str(e)}")
  84. # 读取失败时,可返回空列表或备用列表(可选)
  85. keywords = []
  86. finally:
  87. print("读取到的关键词示例:")
  88. print(keywords[:5])
  89. # 关闭游标和连接(容错处理)
  90. if cursor:
  91. try:
  92. cursor.close()
  93. except:
  94. pass
  95. if conn:
  96. try:
  97. conn.close()
  98. except:
  99. pass
  100. return keywords
  101. # ==================== 1. 核心业务配置 ====================
  102. # 搜索关键词列表
  103. SEARCH_KEYWORDS = get_search_keywords_from_db()
  104. # get_search_keywords_from_db()
  105. # ['999荆防颗粒','999 感冒灵颗粒']
  106. # [
  107. # "999复方感冒灵颗粒",
  108. # "999糠酸莫米松凝胶",
  109. # "999感冒灵颗粒",
  110. # "999皮炎平复方醋酸地塞米松乳膏",
  111. # "三九胃泰颗粒",
  112. # "顺峰康王酮康他索乳膏",
  113. # "999强力枇杷露",
  114. # "999小柴胡颗粒",
  115. # "999板蓝根颗粒",
  116. # "999抗病毒口服液",
  117. # "温胃舒颗粒",
  118. # "养胃舒颗粒",
  119. # "999盐酸氨溴索口服溶液",
  120. # "999蒲地蓝消炎片",
  121. # "999速复康复方氨酚烷胺胶囊",
  122. # "999咽炎片",
  123. # "999小儿止咳糖浆",
  124. # "999小儿感冒颗粒",
  125. # "999小儿氨酚黄那敏颗粒",
  126. # "999感冒清热颗粒",
  127. # "999藿香正气合剂",
  128. # "999皮炎平曲安奈德益康唑乳膏",
  129. # "999必无忧盐酸特比萘芬凝胶",
  130. # "999精装感冒灵颗粒",
  131. # "999感冒灵胶囊",
  132. # "999荆防颗粒",
  133. # "999精氨酸布洛芬颗粒",
  134. # "999盐酸特比萘芬喷雾剂",
  135. # "999止咳枇杷糖浆",
  136. # "999复方金银花颗粒",
  137. # "999盐酸特比萘芬乳膏",
  138. # "999复方板蓝根颗粒",
  139. # "999布洛芬混悬液",
  140. # "999布洛芬缓释胶囊",
  141. # "999速复康磷酸奥司他韦胶囊",
  142. # "999维生素EC颗粒",
  143. # "999玉屏风口服液",
  144. # "史达功右美沙芬愈创甘油醚糖浆",
  145. # "999对乙酰氨基酚口服溶液",
  146. # "999小儿感冒宁颗粒",
  147. # "999葡萄糖酸锌口服溶液",
  148. # "999黄芪精",
  149. # "今维多赐多康牌蛋白粉",
  150. # "999小儿咳喘灵颗粒",
  151. # "999小儿咳喘灵口服液",
  152. # "华润神鹿儿泻停颗粒",
  153. # "999小儿咽扁颗粒",
  154. # "999速复康铝碳酸镁咀嚼片",
  155. # "999选平硝酸咪康唑乳膏",
  156. # "三九胃泰胶囊",
  157. # "999正天胶囊",
  158. # "999正天丸",
  159. # "壮骨关节胶囊",
  160. # "999壮骨关节丸",
  161. # "999银菊清咽颗粒",
  162. # "999表虚感冒颗粒"
  163. # ]
  164. # MySQL表结构(确保和你建好的表一致,仅做校验用)
  165. # CREATE_TABLE_SQL = """
  166. # CREATE TABLE IF NOT EXISTS yjj_medicine_data (
  167. # id INT AUTO_INCREMENT PRIMARY KEY COMMENT '自增主键',
  168. # product_title VARCHAR(500) COMMENT '商品标题',
  169. # product_url VARCHAR(1000) COMMENT '商品详情页链接',
  170. # purchase_price DECIMAL(10,2) DEFAULT 0.00 COMMENT '采购价格',
  171. # discount_price DECIMAL(10,2) DEFAULT 0.00 COMMENT '折扣价格',
  172. # spec VARCHAR(200) DEFAULT '未知规格' COMMENT '规格',
  173. # box_count INT DEFAULT 1 COMMENT '盒数',
  174. # store_name VARCHAR(200) DEFAULT '未知店铺' COMMENT '店铺名称',
  175. # company_name VARCHAR(200) DEFAULT '未知公司' COMMENT '公司名称',
  176. # validity_date VARCHAR(100) DEFAULT '无有效期' COMMENT '有效日期',
  177. # production_date VARCHAR(100) DEFAULT '无生产日期' COMMENT '生产日期',
  178. # approval_number VARCHAR(100) DEFAULT '无批准文号' COMMENT '批准文号',
  179. # keyword VARCHAR(100) DEFAULT '无搜素关键词' COMMENT '搜素关键词',
  180. # collect_time DATETIME COMMENT '采集时间'
  181. # ) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COMMENT='药九九采集数据';
  182. # """
  183. # ==================== 2. 反爬配置 ====================
  184. # 随机延迟范围(模拟真人操作间隔)
  185. MIN_CLICK_DELAY = 1.5 # 点击间隔最小秒数
  186. MAX_CLICK_DELAY = 3.5 # 点击间隔最大秒数
  187. MIN_INPUT_DELAY = 0.1 # 打字每个字符的最小延迟
  188. MAX_INPUT_DELAY = 0.3 # 打字每个字符的最大延迟
  189. MIN_PAGE_DELAY = 2.0 # 页面加载后最小等待秒数
  190. MAX_PAGE_DELAY = 4.0 # 页面加载后最大等待秒数
  191. # 关键词间的反爬长延迟(比单个商品更长)
  192. MIN_KEYWORD_DELAY = 8.0
  193. MAX_KEYWORD_DELAY = 15.0
  194. # 滚动配置(固定1400px±50px)
  195. SCROLL_TARGET_DISTANCE = 400 # 目标滚动距离
  196. SCROLL_OFFSET_RANGE = 50 # 随机偏移范围
  197. SCROLL_STEP = 50 # 每次滚动步长(越小越慢,越像真人)
  198. SCROLL_INTERVAL = 0.05 # 步长间隔(秒)
  199. # ==================== 3. Cookie & 登录配置 ====================
  200. COOKIE_FILE_PATH = "ybm_cookies.json" # Cookie保存路径
  201. # 需要登录后访问的验证页面(用于检测Cookie是否有效)
  202. LOGIN_VALIDATE_URL = "https://www.ybm100.com/new/"
  203. # 账号密码
  204. USERNAME = "18008650300"
  205. PASSWORD = "12345678"
  206. # USERNAME = "yjj112031"
  207. # PASSWORD = "123456"
  208. # 目标登录URL
  209. TARGET_LOGIN_URL = "https://www.ybm100.com/new/login"
  210. # "https://www.yyjzt.com/login?redirect=%2FgoodDetail%3FladderNum%26itemStoreId%3D124250306%26sourceProdetail%3D%252Fsearch%26is_store%3D0"
  211. # ==================== 4. 元素选择器配置 ====================
  212. # 基础选择器
  213. USERNAME_SELECTOR = "input[placeholder*=请输入账号]"
  214. PASSWORD_SELECTOR = "input[placeholder*=请输入密码]"
  215. LOGIN_BTN_SELECTOR = "button:has(span:text('登录'))"
  216. SEARCH_INPUT_SELECTOR = "input[placeholder*='药品名称/厂家名称']"
  217. SEARCH_INPUT_SELECTOR2 = "div.home-search-container-search-head"
  218. SEARCH_BTN_SELECTOR = 'div.home-search-container-search-head-btn[data-scmd="text-搜索"]'
  219. # 采集元素选择器(根据页面实际调整!)
  220. #这里得改
  221. PRODUCT_ITEM_SELECTOR = "div.product-list-item" # 商品项容器
  222. PRODUCT_TITLE_SELECTOR = "div.product-name" # 商品标题
  223. PRODUCT_PRICE_SELECTOR = "div.main-price" # 商品价格
  224. PRODUCT_STORE_SELECTOR = 'div[data-v-382008f5].shop-name' #店铺名称
  225. PRODUCT_COMPANY_SELECTOR = "div.product-manufacturer" # 公司名称
  226. PRODUCT_VALIDITY_SELECTOR = "div.product-period" # 有效期
  227. # ==================== 5. 等待时间配置(毫秒) ====================
  228. ELEMENT_TIMEOUT = 10000
  229. LOGIN_AFTER_CLICK = 5000
  230. SEARCH_BTN_TIMEOUT = 5000
  231. COLLECT_DELAY = 3000
  232. DETAIL_LOAD_TIMEOUT = 5000 # 点击商品后等待详情加载的时间
  233. # ==================== 6. 浏览器配置 ====================
  234. BROWSER_HEADLESS = False
  235. BROWSER_CHANNEL = "chrome"
  236. SLOW_MO_MIN = 50
  237. SLOW_MO_MAX = 100
  238. # ==================== 7. CSV配置 ====================
  239. CSV_HEADERS = [
  240. "商品标题", "商品采购价格", "商品折扣价格", "规格", "盒数",
  241. "店铺名称", "公司名称",
  242. "有效日期", "生产日期", "批准文号", "采集时间"
  243. ] # 表头
  244. # 注:CSV_FILE_PATH 因包含动态时间戳,保留在主文件中定义
  245. #存放营业执照图片路径
  246. # cropped_screenshot_path =
  247. #百度OCR配置
  248. request_url_config = "https://aip.baidubce.com/rest/2.0/ocr/v1/business_license"
  249. AppKey_config = "tRK2RhyItCSh6BzyT4CNVXQa"
  250. AppSecret_config = "TDgKiPo94i2mOM1sDqOuDnlcK1bG66jh"
  251. token_url_config = 'https://aip.baidubce.com/oauth/2.0/token'
  252. # ---------------------- OSS 配置项 ----------------------
  253. OSS_ACCESS_KEY_ID = 'LTAI5tDwjfteBvivYN41r8sJ'
  254. OSS_ACCESS_KEY_SECRET = 'yowuOGi2nYYnrqGpO3qcz94C4brcPp'
  255. OSS_ENDPOINT = "oss-cn-shenzhen.aliyuncs.com"
  256. OSS_BUCKET_NAME = "zhijiayun-jiansuo"
  257. OSS_PREFIX = "scrape_data/"
  258. # 本地截图配置
  259. LOCAL_SCREENSHOT_DIR = "local_screenshots" # 本地截图保存目录
  260. LOCAL_SCREENSHOT_NAME = None # 自动生成文件名,无需手动指定
  261. LOCAL_CROPPED_DIR = "./local_cropped_screenshots" # 裁剪后图片保存目录
  262. # 图片压缩配置
  263. IMAGE_COMPRESS_ENABLE = True # 是否开启图片压缩(True=开启,False=关闭)
  264. IMAGE_COMPRESS_QUALITY = 30 # jpg/jpeg格式压缩质量(1-95,数值越大画质越好,文件越大,推荐80-90)
  265. IMAGE_COMPRESS_PNG_LEVEL = 9 # png格式压缩级别(0-9,数值越大压缩率越高,速度越慢,推荐5-7)
  266. # ---------------------- 工具函数 ----------------------
  267. def init_local_screenshot_dir():
  268. """
  269. 初始化本地截图目录(如果不存在则创建)
  270. """
  271. if not os.path.exists(LOCAL_SCREENSHOT_DIR):
  272. os.makedirs(LOCAL_SCREENSHOT_DIR)
  273. print(f"本地截图目录【{LOCAL_SCREENSHOT_DIR}】创建成功")
  274. else:
  275. print(f"本地截图目录【{LOCAL_SCREENSHOT_DIR}】已存在")
  276. def init_oss_bucket():
  277. """
  278. 初始化OSS Bucket对象,用于后续上传操作
  279. """
  280. try:
  281. # 创建认证对象
  282. auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
  283. bucket = oss2.Bucket(auth, OSS_ENDPOINT, OSS_BUCKET_NAME)
  284. # 验证Bucket是否可访问(可选)
  285. bucket.get_bucket_info()
  286. print("OSS Bucket 初始化成功")
  287. return bucket
  288. except Exception as e:
  289. print(f"OSS Bucket 初始化失败:{str(e)}")
  290. raise
  291. def upload_local_screenshot_to_oss(bucket, local_file_path, oss_file_path=None):
  292. """
  293. 将截图内容上传到OSS
  294. :param bucket: 初始化好的OSS Bucket对象
  295. :param screenshot_content: 截图内容(字节流,或本地文件路径)
  296. :param oss_file_path: 上传到OSS后的文件路径(如screenshots/20260130_100000_target_page.jpg)
  297. :return: 上传后的OSS文件公网访问链接
  298. """
  299. # 1. 校验本地文件是否存在
  300. if not os.path.exists(local_file_path):
  301. raise FileNotFoundError(f"本地截图文件不存在:{local_file_path}")
  302. # 2. 生成默认的OSS文件路径(如果用户未指定)
  303. if not oss_file_path:
  304. # 提取本地文件名作为OSS文件名,保持一致性
  305. local_file_name = os.path.basename(local_file_path)
  306. oss_file_path = f"screenshots/{local_file_name}"
  307. try:
  308. # 3. 上传本地文件到OSS(核心修改:使用put_object_from_file)
  309. bucket.put_object_from_file(oss_file_path, local_file_path)
  310. # 4. 构造OSS文件的公网访问链接
  311. oss_file_url = f"https://{OSS_BUCKET_NAME}.{OSS_ENDPOINT}/{oss_file_path}"
  312. print(f"本地截图上传OSS成功,访问链接:{oss_file_url}")
  313. return oss_file_url
  314. except Exception as e:
  315. print(f"本地截图上传OSS失败:{str(e)}")
  316. raise
  317. # ---------------------- 补全/修改:裁剪函数(新增完整裁剪+删原图逻辑) ----------------------
  318. def crop_local_screenshot(local_file_path, cropped_file_path=None, crop_region=None):
  319. """
  320. 裁剪本地截图文件(完整实现:裁剪后图片压缩,裁剪+保存裁剪文件+删除原图)
  321. :param local_file_path: 原始本地截图文件路径
  322. :param cropped_file_path: 裁剪后图片的保存路径(可选)
  323. :param crop_region: 裁剪区域(元组,格式:(left, upper, right, lower)),可选
  324. :return: 裁剪后图片的本地路径
  325. """
  326. # 1. 校验原始文件是否存在
  327. if not os.path.exists(local_file_path):
  328. raise FileNotFoundError(f"原始截图文件不存在:{local_file_path}")
  329. # 2. 初始化裁剪后文件目录(自动创建)(你的原有逻辑,保持不变)
  330. os.makedirs(LOCAL_CROPPED_DIR, exist_ok=True)
  331. # 3. 生成默认裁剪后文件路径(避免重名,带_cropped标识)
  332. if not cropped_file_path:
  333. file_name = os.path.basename(local_file_path)
  334. file_name_no_ext, file_ext = os.path.splitext(file_name)
  335. cropped_file_name = f"{file_name_no_ext}_cropped{file_ext}"
  336. cropped_file_path = os.path.join(LOCAL_CROPPED_DIR, cropped_file_name)
  337. with Image.open(local_file_path) as img:
  338. img_width, img_height = img.size
  339. print(f"获取截图尺寸:宽={img_width},高={img_height}") # 打印尺寸,方便排查
  340. if not crop_region:
  341. left = 0
  342. upper = 0
  343. right = int(img_width)
  344. lower = int(img_height * 0.3)
  345. crop_region = (left, upper, right, lower)
  346. print(f"未指定裁剪区域,默认裁剪中间30%区域:{crop_region}")
  347. # 4.2 新增:校验裁剪区域合法性(避免超出图片尺寸)
  348. c_left, c_upper, c_right, c_lower = crop_region
  349. if c_right > img_width or c_lower > img_height or c_left < 0 or c_upper < 0:
  350. raise ValueError(f"裁剪区域超出图片尺寸!图片尺寸:({img_width}, {img_height}),裁剪区域:{crop_region}")
  351. # 4.3 执行裁剪并保存裁剪后的图片
  352. cropped_img = img.crop(crop_region)
  353. # 4.4 压缩并保存裁剪后的图片
  354. file_ext = os.path.splitext(cropped_file_path)[1].lower() # 获取文件后缀(小写,兼容JPG/Jpg等)
  355. try:
  356. if IMAGE_COMPRESS_ENABLE:
  357. # 区分图片格式,应用不同压缩策略
  358. if file_ext in ['.jpg', '.jpeg']:
  359. # JPG/JPEG格式:质量压缩(有损压缩,平衡画质和大小)
  360. cropped_img.save(
  361. cropped_file_path,
  362. format='JPEG', # 强制指定JPEG格式,确保压缩生效
  363. quality=IMAGE_COMPRESS_QUALITY, # 压缩质量(配置项中定义)
  364. optimize=True, # 开启优化,提升压缩效果(减小文件体积)
  365. progressive=True # 生成渐进式JPG,网页加载更友好(可选,不影响压缩效果)
  366. )
  367. print(f"JPG图片压缩保存成功,压缩质量:{IMAGE_COMPRESS_QUALITY},保存到:{cropped_file_path}")
  368. else:
  369. cropped_img.save(cropped_file_path, format='JPEG')
  370. print(f"未开启压缩,裁剪图片直接保存到:{cropped_file_path}")
  371. except Exception as e:
  372. # 压缩失败兜底:直接保存未压缩的JPG图片,不中断后续流程
  373. cropped_img.save(cropped_file_path, format='JPEG')
  374. print(f"JPG图片压缩失败,已直接保存未压缩版本:{str(e)}")
  375. # 5. 裁剪成功后,删除原始截图文件(带异常处理)
  376. try:
  377. if os.path.exists(cropped_file_path): # 确保裁剪文件生成成功,再删原图
  378. os.remove(local_file_path)
  379. print(f"原始截图文件已删除:{local_file_path}")
  380. else:
  381. print(f"裁剪文件未生成,暂不删除原始截图:{local_file_path}")
  382. except OSError as e:
  383. print(f"删除原始截图文件失败(文件可能被占用):{str(e)}")
  384. # 6. 返回裁剪+压缩后的文件路径
  385. return cropped_file_path
  386. def screenshot_target_page_to_local_then_oss(target_page, local_file_path=None, oss_file_path=None, full_page=True, crop_region=None):
  387. """
  388. 对target_page截图保存到本地→裁剪图片(删原图)→上传裁剪后的图片到OSS(修改后整合版)
  389. :param target_page: Playwright的Page对象(已加载目标页面)
  390. :param local_file_path: 本地截图文件的完整路径(可选)
  391. :param oss_file_path: OSS上的文件路径(可选)
  392. :param full_page: 是否截取全屏(True=全屏,False=当前可视区域)
  393. :param crop_region: 自定义裁剪区域(元组:(left, upper, right, lower)),可选
  394. :return: 裁剪后文件路径 + OSS文件访问链接
  395. """
  396. # 1. 初始化本地截图目录(不存在则创建,避免保存文件时报错)
  397. os.makedirs(LOCAL_SCREENSHOT_DIR, exist_ok=True)
  398. # 2. 生成默认的本地文件路径(如果用户未指定)
  399. if not local_file_path:
  400. current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
  401. local_file_name = f"{current_time}_target_page.jpg"
  402. local_file_path = os.path.join(LOCAL_SCREENSHOT_DIR, local_file_name)
  403. # 3. 对target_page截图并保存到本地(核心修改:指定path参数)
  404. print(f"正在对target_page截图,将保存到:{local_file_path}")
  405. target_page.screenshot(
  406. path=local_file_path, # 保存到本地文件的核心参数
  407. full_page=full_page, # 是否全屏截图
  408. omit_background=False, # 是否忽略背景
  409. timeout=10000 # 截图超时时间
  410. )
  411. print(f"本地截图保存成功")
  412. # 4. 调用裁剪函数,处理原图(裁剪+删原图)
  413. cropped_file_path = crop_local_screenshot(
  414. local_file_path=local_file_path,
  415. crop_region=crop_region
  416. )
  417. # 5. 初始化OSS Bucket
  418. bucket = init_oss_bucket()
  419. # 6. 修改:上传裁剪后的图片,而非原始截图
  420. oss_file_url = upload_local_screenshot_to_oss(bucket, cropped_file_path, oss_file_path)
  421. # 6. 返回本地文件路径和OSS链接,方便后续使用
  422. return cropped_file_path, oss_file_url