# 可以运行版 # 获取淘宝数据:https://s.taobao.com/ # 搜索键盘相关数据,会自动拦截登录页面(所以需要cookie) import csv import time import requests from pprint import pprint import hashlib import json import re import random import os url = "https://h5api.m.taobao.com/h5/mtop.relationrecommend.wirelessrecommend.recommend/2.0/" class Taobao(object): def __init__(self): self.headers = { } self.cookies ="xlly_s=1; thw=cn; _samesite_flag_=true; cookie2=151fad5f9ffa4608c32ca72c662db8be; t=14298717e952f76a2f7396109d4c6cc1; mt=ci=0_0; cna=k1dMIAlFSjQCAXQHYietAHSq; wk_cookie2=1d7bced8e668cc2efdd4140f95f073b0; cancelledSubSites=empty; _tb_token_=e160ae359e5b8; 3PcFlag=1740999324684; unb=1861565883; lgc=h1uanglei; cookie17=UondFeoLTo8Txg%3D%3D; dnk=h1uanglei; tracknick=h1uanglei; _l_g_=Ug%3D%3D; sg=i3f; _nk_=h1uanglei; cookie1=BdM1QJq9vlgKbKGCUjTOwZyt1axrmsRipD2ngmSbYXg%3D; sgcookie=E100%2BvyUzpriPnESSk8a3%2F3zgj4HedGT2vBIfSwWFDbJUp6Dp2nJwFCcBYiGyvb0o59P7JTivBnaO%2FnN4KE1KU3CIC9RUy11OWyaieGfD9bQJ44RIFtPLD42T9TyerwB6Ejw; havana_lgc2_0=eyJoaWQiOjE4NjE1NjU4ODMsInNnIjoiMjUxOWQ0Nzg2ZjAyZTk4N2ZhNTMxMTNiMTIxZjI0ZjUiLCJzaXRlIjowLCJ0b2tlbiI6IjFQM1c1OWpGOTRDVHAzWkNUaTNJSGZRIn0; _hvn_lgc_=0; havana_lgc_exp=1772103365431; cookie3_bak=151fad5f9ffa4608c32ca72c662db8be; cookie3_bak_exp=1741258565431; wk_unb=UondFeoLTo8Txg%3D%3D; sn=; uc3=nk2=C3x7SEGhHLJn&lg2=U%2BGCWk%2F75gdr5Q%3D%3D&vt3=F8dD2E8Y%2FMtFiwzuXBQ%3D&id2=UondFeoLTo8Txg%3D%3D; csg=be6ba657; env_bak=FM%2Bgnk3pmw0lBUQ8h3n3gsJh%2FCB5N5xvUHV%2FRZ7KU%2FkT; skt=19181ed02977a367; existShop=MTc0MDk5OTM2NQ%3D%3D; uc4=id4=0%40UOE3G%2BGXiDhqK35olO7ifdvHqrWA&nk4=0%40CTuUbbUuJY4etRCqmXjXAAmLGlY%3D; _cc_=U%2BGCWk%2F7og%3D%3D; sdkSilent=1741085781835; uc1=cookie16=VFC%2FuZ9az08KUQ56dCrZDlbNdA%3D%3D&pas=0&cookie21=URm48syIYB3rzvI4Dim4&cookie15=Vq8l%2BKCLz3%2F65A%3D%3D&cookie14=UoYaiuNF0i8sLw%3D%3D&existShop=false; mtop_partitioned_detect=1; _m_h5_tk=941a87691759f9a8fd7dc2b5d7b5618f_1741081550471; _m_h5_tk_enc=7f659ca5a2421e2b64cba4c13382ec96; x5sectag=274983; x5sec=7b2274223a313734313037353536392c22733b32223a2262313135313833343865373438303932222c22617365727665723b33223a22307c434f586f6d72344745506e39334a7744476730784f4459784e5459314f44677a4f7a457a4967567a593256755a544343747532332f502f2f2f2f3842227d; tfstk=gEArBffjibhrlhTwcCfU3DCnAgCRQ6osaBsC-eYhPgjlVebhTnxIygTn2HRFmepSy9i88zd2bWNS2YL3T61nfcGs1U3R96m_tztyBuQC5823AW4cww6-cqZj1ULR9zrn5Kc_YR39uT2hxHfciNbdt7X3ZqWc5wf3Z9VunojAmMfhxTcmiw_LtMXhxq8cDwfhxHXoutYJ-Hx8giWo8TX2sCHo8TIMriP3_PQhEVL8d5VMgaWytUjVlZOVzTSNHu0kxQxX-QORHbF59E9wYpxmg7-e2UAh3BiboO5cmOvOEPPNhKxJgtJjso7vnU9Hw3zUtadW19SO32VddsA1IdxzJVLcZwxdBBiTwgYHWCp1_XPca9jrfkQ0WOApUk2FEZQVfqu2LZQ-C8v1iVwLptkAuGgtWJedEZQVfqu4pJB26ZSsWVC..; isg=BHl5A6NGOq5zV-Y5Ysx_zV-viOVThm04KaayZZuvZqAfIp206tAFCL80pCbUmgVw" self.em = "941a87691759f9a8fd7dc2b5d7b5618f" self.proxies = ['', # 'http://183.164.243.157:8089', ] """ mtopjsonp6({"api":"mtop.relationrecommend.wirelessrecommend.recommend","data":{},"ret":["FAIL_SYS_ILLEGAL_ACCESS::非法请求"] sign参数每次请求都会变化,导致请求不到数据(参数sign逆向) """ # eE(em.token + "&" + eC + "&" + eS + "&" + ep.data) def getSign(self, eC, page): # em.token em = self.em eS = '12574478' signParam = { "device": "HMA-AL00", "isBeta": "false", "grayHair": "false", "from": "nt_history", "brand": "HUAWEI", "info": "wifi", "index": "4", "rainbow": "", "schemaType": "auction", "elderHome": "false", "isEnterSrpSearch": "true", "newSearch": "false", "network": "wifi", "subtype": "", "hasPreposeFilter": "false", "prepositionVersion": "v2", "client_os": "Android", "gpsEnabled": "false", "searchDoorFrom": "srp", "debug_rerankNewOpenCard": "false", "homePageVersion": "v7", "searchElderHomeOpen": "false", "search_action": "initiative", "sugg": "_4_1", "sversion": "13.6", "style": "list", "ttid": "600000@taobao_pc_10.7.0", "needTabs": "true", "areaCode": "CN", "vm": "nw", "countryNum": "156", "m": "pc", "page": page, "n": 48, "q": "%E9%9D%9E%E5%A4%84%E6%96%B9%E8%8D%AF", "qSource": "url", "pageSource": "", "tab": "all", "pageSize": 48, "totalPage": 100, "totalResults": 4800, "sourceS": "0", "sort": "_coefp", "bcoffset": "", "ntoffset": "", "filterTag": "", "service": "", "prop": "", "loc": "", "start_price": "", "end_price": "", "startPrice": "", "endPrice": "", "itemIds": "", "p4pIds": "", "p4pS": "", "categoryp": "", "ha3Kvpairs": "", "myCNA": "k1dMIAlFSjQCAXQHYietAHSq" } n = json.dumps(signParam) # print(json.dumps(json.dumps(signParam))) data = { "appId": "34385", "params": n } # print(data) n_data = json.dumps(data).replace(" ", "") str = em + "&" + eC + "&" + eS + "&" + json.dumps(data).replace(" ","") # print(str) MD5 = hashlib.md5() MD5.update(str.encode("utf-8")) sign = MD5.hexdigest() return sign,n_data if __name__ == '__main__': tb = Taobao() for i in range(1,100): time.sleep(random.randint(10, 20)) date_time = str(int(time.time() * 1000)) sign, n = tb.getSign(eC=date_time, page = i) cookie = tb.cookies print(sign) params = { 'jsv': '2.7.4', 'appKey': '12574478', 't': date_time, 'sign': sign, 'api': 'mtop.relationrecommend.wirelessrecommend.recommend', 'v': '2.0', 'timeout': '10000', 'type': 'jsonp', 'dataType': 'jsonp', 'callback': 'mtopjsonp' + str(random.randint(1, 30)), 'data': n } # 修改请求头,添加 Accept-Encoding 和 Content-Type user_agent_pool = [ "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/114.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/108.0.5359.125 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/106.0.5249.119 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/115.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36" ] headers = { "User-Agent": user_agent_pool[random.randint(0, len(user_agent_pool) - 1)], "Referer": "https://s.taobao.com/", "cookie": cookie, "Accept-Encoding": "gzip, deflate, br", "Content-Type": "application/json; charset=utf-8" } # 增加重试机制 max_retries = len(tb.proxies) for _ in range(max_retries): proxy = tb.proxies[random.randint(0, len(tb.proxies) - 1)] proxies = { 'http': proxy, 'https': proxy } try: resp = requests.get(url, params=params, headers=headers, proxies=proxies) break except requests.exceptions.ProxyError as e: print(f"使用代理 {proxy} 失败: {e}") else: print("所有代理都尝试过,仍然无法连接。") continue # print(resp.text) resp.encoding = 'utf-8' html = resp.text print(html) if html: print('请求成功') else: # 读取文件 with open('./python/log/taobao.log', 'r', encoding='utf-8') as f: html = f.read() # 确保目录存在 log_dir = './python/log' if not os.path.exists(log_dir): os.makedirs(log_dir) # 采集数据 info = re.findall(r'mtopjsonp\d+\((.*)', html)[0].replace(')', '') # 写入文件 jsonData = json.loads(info) # 循环获取数据 with open('./python/log/taobao.csv',mode="w+",newline='',encoding="utf-8") as f: writer = csv.writer(f) try: # 写入表头 head = ['标题','图片链接','价格','地区','销量','店铺'] writer.writerow(head) for item in jsonData['data']['itemsArray']: dit = { 'title': item['title'].replace('', '').replace('',''), 'img': item['pic_path'], 'price': item['price'], 'procity': item['procity'], 'realSales': item['realSales'], 'shopName': item['nick'], } writer.writerow(dit.values()) print(dit) except Exception as e: time.sleep(60*2) print(e) print('采集数据失败,继续') continue