oss_config.py 9.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. from datetime import datetime
  2. import pymysql
  3. from dotenv import load_dotenv
  4. import os
  5. import oss2
  6. from PIL import Image
  7. # ---------------------- OSS 配置项 ----------------------
  8. OSS_ACCESS_KEY_ID = 'LTAI5tDwjfteBvivYN41r8sJ'
  9. OSS_ACCESS_KEY_SECRET = 'yowuOGi2nYYnrqGpO3qcz94C4brcPp'
  10. OSS_ENDPOINT = "oss-cn-shenzhen.aliyuncs.com"
  11. OSS_BUCKET_NAME = "zhijiayun-jiansuo"
  12. OSS_PREFIX = "scrape_data/"
  13. # 本地截图配置
  14. LOCAL_SCREENSHOT_DIR = "local_screenshots" # 本地截图保存目录
  15. LOCAL_SCREENSHOT_NAME = None # 自动生成文件名,无需手动指定
  16. LOCAL_CROPPED_DIR = "./local_cropped_screenshots" # 裁剪后图片保存目录
  17. # 图片压缩配置
  18. IMAGE_COMPRESS_ENABLE = True # 是否开启图片压缩(True=开启,False=关闭)
  19. IMAGE_COMPRESS_QUALITY = 30 # jpg/jpeg格式压缩质量(1-95,数值越大画质越好,文件越大,推荐80-90)
  20. IMAGE_COMPRESS_PNG_LEVEL = 9 # png格式压缩级别(0-9,数值越大压缩率越高,速度越慢,推荐5-7)
  21. # ---------------------- 工具函数 ----------------------
  22. def init_local_screenshot_dir():
  23. """
  24. 初始化本地截图目录(如果不存在则创建)
  25. """
  26. if not os.path.exists(LOCAL_SCREENSHOT_DIR):
  27. os.makedirs(LOCAL_SCREENSHOT_DIR)
  28. print(f"本地截图目录【{LOCAL_SCREENSHOT_DIR}】创建成功")
  29. else:
  30. print(f"本地截图目录【{LOCAL_SCREENSHOT_DIR}】已存在")
  31. def init_oss_bucket():
  32. """
  33. 初始化OSS Bucket对象,用于后续上传操作
  34. """
  35. try:
  36. # 创建认证对象
  37. auth = oss2.Auth(OSS_ACCESS_KEY_ID, OSS_ACCESS_KEY_SECRET)
  38. bucket = oss2.Bucket(auth, OSS_ENDPOINT, OSS_BUCKET_NAME)
  39. # 验证Bucket是否可访问(可选)
  40. bucket.get_bucket_info()
  41. print("OSS Bucket 初始化成功")
  42. return bucket
  43. except Exception as e:
  44. print(f"OSS Bucket 初始化失败:{str(e)}")
  45. raise
  46. def upload_local_screenshot_to_oss(bucket, local_file_path, oss_file_path=None):
  47. """
  48. 将截图内容上传到OSS
  49. :param bucket: 初始化好的OSS Bucket对象
  50. :param screenshot_content: 截图内容(字节流,或本地文件路径)
  51. :param oss_file_path: 上传到OSS后的文件路径(如screenshots/20260130_100000_target_page.jpg)
  52. :return: 上传后的OSS文件公网访问链接
  53. """
  54. # 1. 校验本地文件是否存在
  55. if not os.path.exists(local_file_path):
  56. raise FileNotFoundError(f"本地截图文件不存在:{local_file_path}")
  57. # 2. 生成默认的OSS文件路径(如果用户未指定)
  58. if not oss_file_path:
  59. # 提取本地文件名作为OSS文件名,保持一致性
  60. local_file_name = os.path.basename(local_file_path)
  61. oss_file_path = f"screenshots/{local_file_name}"
  62. try:
  63. # 3. 上传本地文件到OSS(核心修改:使用put_object_from_file)
  64. bucket.put_object_from_file(oss_file_path, local_file_path)
  65. # 4. 构造OSS文件的公网访问链接
  66. oss_file_url = f"https://{OSS_BUCKET_NAME}.{OSS_ENDPOINT}/{oss_file_path}"
  67. print(f"本地截图上传OSS成功,访问链接:{oss_file_url}")
  68. return oss_file_url
  69. except Exception as e:
  70. print(f"本地截图上传OSS失败:{str(e)}")
  71. raise
  72. # ---------------------- 补全/修改:裁剪函数(新增完整裁剪+删原图逻辑) ----------------------
  73. def crop_local_screenshot(local_file_path, cropped_file_path=None, crop_region=None):
  74. """
  75. 裁剪本地截图文件(完整实现:裁剪后图片压缩,裁剪+保存裁剪文件+删除原图)
  76. :param local_file_path: 原始本地截图文件路径
  77. :param cropped_file_path: 裁剪后图片的保存路径(可选)
  78. :param crop_region: 裁剪区域(元组,格式:(left, upper, right, lower)),可选
  79. :return: 裁剪后图片的本地路径
  80. """
  81. # 1. 校验原始文件是否存在
  82. if not os.path.exists(local_file_path):
  83. raise FileNotFoundError(f"原始截图文件不存在:{local_file_path}")
  84. # 2. 初始化裁剪后文件目录(自动创建)(你的原有逻辑,保持不变)
  85. os.makedirs(LOCAL_CROPPED_DIR, exist_ok=True)
  86. # 3. 新增:生成默认裁剪后文件路径(避免重名,带_cropped标识)
  87. if not cropped_file_path:
  88. file_name = os.path.basename(local_file_path)
  89. file_name_no_ext, file_ext = os.path.splitext(file_name)
  90. cropped_file_name = f"{file_name_no_ext}_cropped{file_ext}"
  91. cropped_file_path = os.path.join(LOCAL_CROPPED_DIR, cropped_file_name)
  92. with Image.open(local_file_path) as img:
  93. img_width, img_height = img.size
  94. print(f"获取截图尺寸:宽={img_width},高={img_height}") # 打印尺寸,方便排查
  95. if not crop_region:
  96. left = int(img_width * 0.1)
  97. upper = 0
  98. right = int(img_width * 0.9)
  99. lower = int(img_height * 0.3)
  100. crop_region = (left, upper, right, lower)
  101. print(f"未指定裁剪区域,默认裁剪中间30%区域:{crop_region}")
  102. # 4.2 新增:校验裁剪区域合法性(避免超出图片尺寸)
  103. c_left, c_upper, c_right, c_lower = crop_region
  104. if c_right > img_width or c_lower > img_height or c_left < 0 or c_upper < 0:
  105. raise ValueError(f"裁剪区域超出图片尺寸!图片尺寸:({img_width}, {img_height}),裁剪区域:{crop_region}")
  106. # 4.3 执行裁剪并保存裁剪后的图片
  107. cropped_img = img.crop(crop_region)
  108. # 4.4 压缩并保存裁剪后的图片
  109. file_ext = os.path.splitext(cropped_file_path)[1].lower() # 获取文件后缀(小写,兼容JPG/Jpg等)
  110. try:
  111. if IMAGE_COMPRESS_ENABLE:
  112. # 区分图片格式,应用不同压缩策略
  113. if file_ext in ['.jpg', '.jpeg']:
  114. # JPG/JPEG格式:质量压缩(有损压缩,平衡画质和大小)
  115. cropped_img.save(
  116. cropped_file_path,
  117. format='JPEG', # 强制指定JPEG格式,确保压缩生效
  118. quality=IMAGE_COMPRESS_QUALITY, # 压缩质量(配置项中定义)
  119. optimize=True, # 开启优化,提升压缩效果(减小文件体积)
  120. progressive=True # 生成渐进式JPG,网页加载更友好(可选,不影响压缩效果)
  121. )
  122. print(f"JPG图片压缩保存成功,压缩质量:{IMAGE_COMPRESS_QUALITY},保存到:{cropped_file_path}")
  123. else:
  124. cropped_img.save(cropped_file_path, format='JPEG')
  125. print(f"未开启压缩,裁剪图片直接保存到:{cropped_file_path}")
  126. except Exception as e:
  127. # 压缩失败兜底:直接保存未压缩的JPG图片,不中断后续流程
  128. cropped_img.save(cropped_file_path, format='JPEG')
  129. print(f"JPG图片压缩失败,已直接保存未压缩版本:{str(e)}")
  130. # 5. 裁剪成功后,删除原始截图文件(带异常处理)
  131. try:
  132. if os.path.exists(cropped_file_path): # 确保裁剪文件生成成功,再删原图
  133. os.remove(local_file_path)
  134. print(f"原始截图文件已删除:{local_file_path}")
  135. else:
  136. print(f"裁剪文件未生成,暂不删除原始截图:{local_file_path}")
  137. except OSError as e:
  138. print(f"删除原始截图文件失败(文件可能被占用):{str(e)}")
  139. # 6. 返回裁剪+压缩后的文件路径
  140. return cropped_file_path
  141. def screenshot_target_page_to_local_then_oss(target_page, local_file_path=None, oss_file_path=None, full_page=True, crop_region=None):
  142. """
  143. 对target_page截图保存到本地→裁剪图片(删原图)→上传裁剪后的图片到OSS(修改后整合版)
  144. :param target_page: Playwright的Page对象(已加载目标页面)
  145. :param local_file_path: 本地截图文件的完整路径(可选)
  146. :param oss_file_path: OSS上的文件路径(可选)
  147. :param full_page: 是否截取全屏(True=全屏,False=当前可视区域)
  148. :param crop_region: 自定义裁剪区域(元组:(left, upper, right, lower)),可选
  149. :return: 裁剪后文件路径 + OSS文件访问链接
  150. """
  151. # 1. 初始化本地截图目录(不存在则创建,避免保存文件时报错)
  152. os.makedirs(LOCAL_SCREENSHOT_DIR, exist_ok=True)
  153. # 2. 生成默认的本地文件路径(如果用户未指定)
  154. if not local_file_path:
  155. current_time = datetime.now().strftime("%Y%m%d_%H%M%S")
  156. local_file_name = f"{current_time}_target_page.jpg"
  157. local_file_path = os.path.join(LOCAL_SCREENSHOT_DIR, local_file_name)
  158. # 3. 对target_page截图并保存到本地(核心修改:指定path参数)
  159. print(f"正在对target_page截图,将保存到:{local_file_path}")
  160. target_page.screenshot(
  161. path=local_file_path, # 保存到本地文件的核心参数
  162. full_page=full_page, # 是否全屏截图
  163. omit_background=False, # 是否忽略背景
  164. timeout=10000 # 截图超时时间
  165. )
  166. print(f"本地截图保存成功")
  167. # 4. 调用裁剪函数,处理原图(裁剪+删原图)
  168. cropped_file_path = crop_local_screenshot(
  169. local_file_path=local_file_path,
  170. crop_region=crop_region
  171. )
  172. # 5. 初始化OSS Bucket
  173. bucket = init_oss_bucket()
  174. # 6. 修改:上传裁剪后的图片,而非原始截图
  175. oss_file_url = upload_local_screenshot_to_oss(bucket, cropped_file_path, oss_file_path)
  176. # 6. 返回本地文件路径和OSS链接,方便后续使用
  177. return cropped_file_path, oss_file_url