🎷 https://www.nike.com/w/mens-jackets-vests-50r7yznik1
import requests, random, csv
from selenium import webdriver
from copy import deepcopy
from time import sleep
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
import json
class Nike:
def __init__(self):
self.headers = {
"accept": "*/*",
"accept-language": "zh-CN,zh;q=0.9",
"anonymousid": "DB9F77B467244D3380F46D77A949CA61",
"nike-api-caller-id": "nike:dotcom:browse:wall.client:2.0",
"origin": "https://www.nike.com",
"priority": "u=1, i",
"referer": "https://www.nike.com/",
"sec-ch-ua": "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"",
"sec-ch-ua-mobile": "?0",
"sec-ch-ua-platform": "\"Windows\"",
"sec-fetch-dest": "empty",
"sec-fetch-mode": "cors",
"sec-fetch-site": "same-site",
"user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
}
self.cookies = {
"ni_d": "F2BF6F37-898C-4D48-a34E-2735758BB01E",
"anonymousId": "DB9F77B467244D3380F46D77A949CA61",
"_gcl_au": "1.1.619564759.1744682120",
"KP_UIDz": "0an3bv11lykiNG7ds31Par3jp36lAeuCHPmIAxyMWxTU89Pru1w0TW0TFX77pfthMP7FKp1efxaaVQvAxGUyosXvsW4icE527PPH5Hi3C3P6GWtvUzneizHWwEq4pFauffqBBdPYSZRrWjS9bG5CDNhRsFngmcw2xoiZtzF",
"NIKE_COMMERCE_COUNTRY": "US",
"NIKE_COMMERCE_LANG_LOCALE": "en_US",
"CONSUMERCHOICE": "us/en_us",
"CONSUMERCHOICE_SESSION": "t",
"_fbp": "fb.1.1744684405289.451553942058",
"_gid": "GA1.2.27798748.1744684406",
"FPID": "FPID2.2.yZ4FjwYwvazCmV32PJZPzNfdSchRzMCrKM844HvBbeA%3D.1744684406",
"FPLC": "G3KboO2KatJyvzc0KToZ%2FHDDOI2Ny1vs7tWWW7KraLC%2BO%2BhwEPRZ9icFNQmgxoiRp8fYqnQOHEkUwq7chpctA8iNtJM1op9z437T5aYsKGerPfGBISu%2B3B5kN%2FkHbg%3D%3D",
"FPAU": "1.1.619564759.1744682120",
"_scid": "gCxIFzllXSzu651WAMv30KsxBJV97vaI",
"_tt_enable_cookie": "1",
"_ttp": "01JRVMPP7R5GS0HV1A4S27CA4K_.tt.1",
"_pin_unauth": "dWlkPU56TXpZbVEzTm1VdFltSTBNeTAwTVdRNUxXRTJZakl0T0RSak1tUm1aVEF6WVdabQ",
"_ScCbts": "%5B%5D",
"_clck": "1glsgic%7C2%7Cfv3%7C0%7C1931",
"geoloc": "cc=CN,rc=JX,tp=vhigh,tz=GMT+8,la=28.68,lo=115.88",
"KP_UIDz-ssn": "02HkJtSRKSRm1TrWpAUoyNrgtTnv19X6WtwWuPOFDABox4wHW9phR13pFhZ6LM38uHhiBSHlieeU40DvgiXY0FJdgxCy41NE6Asx6XiFNAD9hEAEuRAxMbiG5T9rqn78bQnrNae6IGI1KEdDGo738gcIRKPqBEtQlQsgV6xUA0",
"ak_bmsc": "C29EB1E3627F40B86EFCE390E7E65931~000000000000000000000000000000~YAAQJArgeustYwuWAQAAHaMNOBuj9oS6/bFAuRYG7o+LkJOE92vQdRE9krsfJG3PnJNv+u9jzRs5hu9w+upxvBpNcODWOvRmreGwKhtXj+XwikbPqFHvDt5dw9nAuGNgF01ZzLp6n+RazpTRHzeWTray+b/a8Wp9J76LXYs8/Az+M9Ddseseka8ic4XfsDkG2TZ3mUqsGqU0FyK8JpuDcVfpMy9TCKJytEiqcLEPJYQJn1Ttng/uWCUws/Gj3iLh2e5f178BT4j1E2uoOjeLce0bZnBkxSNc5SSezibdDCTyu+Qjj1BrM02OMVQbg//shfcvqAkvHnTnt2XoGu/iE7IP60fbKG8i0OT6q+mlpIYrBwtr7oKyPtif3eUhcjnvMLXunGAnWnw=",
"AKA_A2": "A",
"FPGSID": "1.1744701395.1744701673.G-QTVTHYLBQS.I88c9y46yCBmQdTY-Vtsxw",
"_scid_r": "qaxIFzllXSzu651WAMv30KsxBJV97vaIy7META",
"_clsk": "110tng2%7C1744701682774%7C4%7C0%7Ci.clarity.ms%2Fcollect",
"_ga": "GA1.2.1739174720.1744684406",
"ttcsid_CCUOCJ3C77UDPV42FEH0": "1744695724762.3.1744701683304",
"ttcsid": "1744695724762.3.1744701683304",
"_rdt_uuid": "1744684405289.f50f1e4b-d292-4122-8c20-3e0fffb749b6",
"_uetsid": "03079e1019a211f088bf0f48b719b208",
"_uetvid": "0307c77019a211f0affb0ffca4af3cf3",
"pixlee_analytics_cookie_legacy": "%7B%22CURRENT_PIXLEE_USER_ID%22%3A%22b21e33f4-4a33-3b75-cc8a-c25d5de98c8e%22%2C%22TIME_SPENT%22%3A12%7D",
"_ga_QTVTHYLBQS": "GS1.1.1744695718.3.1.1744701735.60.0.0",
"bm_sv": "074E6DBA46CBFD3C65DEEEFCCA154009~YAAQ1ONH0kjCzDSWAQAA7eZTOBuC/P137rlqicpRzKsDhJxgMVIEIRGuZUmklrII9jpfm+ipPtugMXFpD9v/wE9oNt+pbvjuyw4L4TnQPQuHuA1MpzPe/SVJBDok1FT3E5Fd9WyrmaXg78oCYMBHckShkrbCFXUnWjPI6cTdcjn3mzHQgYgOATKJL/ZIiohM1rCy4wx3EpqwMCijVgiY6a4z3N8hqLASBxyRlPoTVf17yxLq5qbgQro7quHTdNwW~1"
}
self.url = "https://api.nike.com/discover/product_wall/v1/marketplace/US/language/en/consumerChannelId/d9a5bc42-4b9c-4976-858a-f159cf99c647"
self.index_url = 'https://www.nike.com/w/mens-jackets-vests-50r7yznik1'
self.params = {
"path": "/w/mens-jackets-vests-50r7yznik1",
"attributeIds": "72ff5f48-1b58-4042-8d72-f8a1c057ee06,0f64ecc7-d624-4e91-b171-b83a03dd8550",
"queryType": "PRODUCTS",
"anchor": "0",
"count": "24"
}
self.id = 0 # 保存到csv文件不用id字段
self.init_data = {
'Handle': '',
'Title': '',
'Body (HTML)': '',
'Vendor': '',
'Type': '',
'Tags': '',
'Published': '',
'Option1 Name': '',
'Option1 Value': '',
'Option2 Name': '',
'Option2 Value': '',
'Option3 Name': '',
'Option3 Value': '',
'Variant SKU': '',
'Variant Grams': '',
'Variant Inventory Tracker': '',
'Variant Inventory Qty': '',
'Variant Inventory Policy': '',
'Variant Fulfillment Service': '',
'Variant Price': '',
'Variant Compare At Price': '',
'Variant Requires Shipping': '',
'Variant Taxable': '',
'Variant Barcode': '',
'Image Src': '',
'Image Position': '',
'Image Alt Text': '',
'Gift Card': '',
'SEO Title': '',
'SEO Description': '',
'Variant Image': '',
'Status': '',
'Collection': '',
}
self.field_names = ['Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags', 'Published', 'Option1 Name',
'Option1 Value', 'Option2 Name', 'Option2 Value', 'Option3 Name', 'Option3 Value',
'Variant SKU', 'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty',
'Variant Inventory Policy', 'Variant Fulfillment Service', 'Variant Price',
'Variant Compare At Price', 'Variant Requires Shipping', 'Variant Taxable',
'Variant Barcode', 'Image Src', 'Image Position', 'Image Alt Text', 'Gift Card',
'SEO Title', 'SEO Description', 'Variant Image', 'Status', 'Collection']
self.file = None
self.writer = None
self.total = 0
def get_driver(self, url, xpath_txt):
options = webdriver.ChromeOptions()
options.add_argument('--headless')
options.add_argument('--disable-gpu')
options.page_load_strategy = "none"
driver = webdriver.Chrome(options=options)
driver.implicitly_wait(20)
driver.maximize_window()
flag = True
while flag:
flag = False
try:
print('正在获取', url, '的页面数据')
driver.get(url)
driver.find_element('xpath', xpath_txt)
except:
flag = True
print(url, '没定位到,重新请求...')
# self.writer_to_file(driver.page_source, 'w', 'utf-8')
return driver
def writer_to_file(self, data, mode, encoding=None):
if 'b' in encoding:
open('./text.html', mode).write(data)
else:
open('./text.html', mode, encoding=encoding).write(data)
print('写入文件成功!')
def random_sleep(self):
sleep(random.random() + 0.5)
def save_csv(self, data):
self.writer.writerow({
'Handle': data['Handle'],
'Title': data['Title'],
'Body (HTML)': data['Body (HTML)'],
'Vendor': data['Vendor'],
'Type': data['Type'],
'Tags': data['Tags'],
'Published': data['Published'],
'Option1 Name': data['Option1 Name'],
'Option1 Value': data['Option1 Value'],
'Option2 Name': data['Option2 Name'],
'Option2 Value': data['Option2 Value'],
'Option3 Name': data['Option3 Name'],
'Option3 Value': data['Option3 Value'],
'Variant SKU': data['Variant SKU'],
'Variant Grams': data['Variant Grams'],
'Variant Inventory Tracker': data['Variant Inventory Tracker'],
'Variant Inventory Qty': data['Variant Inventory Qty'],
'Variant Inventory Policy': data['Variant Inventory Policy'],
'Variant Fulfillment Service': data['Variant Fulfillment Service'],
'Variant Price': data['Variant Price'],
'Variant Compare At Price': data['Variant Compare At Price'],
'Variant Requires Shipping': data['Variant Requires Shipping'],
'Variant Taxable': data['Variant Taxable'],
'Variant Barcode': data['Variant Barcode'],
'Image Src': data['Image Src'],
'Image Position': data['Image Position'],
'Image Alt Text': data['Image Alt Text'],
'Gift Card': data['Gift Card'],
'SEO Title': data['SEO Title'],
'SEO Description': data['SEO Description'],
'Variant Image': data['Variant Image'],
'Status': data['Status'],
'Collection': data['Collection']
})
def __del__(self):
self.file.close()
def get_total_page(self, url):
driver = self.get_driver(url, '//h1[@class="wall-header__title css-r2u0ax"]')
res = driver.find_element('xpath', '//h1[@class="wall-header__title css-r2u0ax"]/span').text.strip('()')
driver.close()
return int(res)
def get_raw_data_list(self, page):
self.params['anchor'] = str(page)
raw_data_list = requests.get(self.url, headers=self.headers, cookies=self.cookies, params=self.params).json()['productGroupings']
self.random_sleep()
return raw_data_list
def switch_url(self, img_json):
if 'image' == img_json['cardType']:
res = img_json['properties']['portrait']['url']
temp_list = res.split('/')
temp_list[-3] = 't_PDP_1728_v1/f_auto,q_auto:eco'
res = ''
for x in temp_list:
res = res + x + '/'
res = res.strip('/')
return res
elif 'video' == img_json['cardType']:
print(img_json['properties']['portrait'])
return img_json['properties']['videoURL']
else:
print('错误!!!')
return -1
def parse(self):
self.file = open('./nike.csv', 'w', newline='', encoding='utf-8-sig')
self.writer = csv.DictWriter(self.file, fieldnames=self.field_names)
self.writer.writeheader()
self.total = self.get_total_page(self.index_url)
print(self.total)
total_page = self.total // 24 + 1
for page in range(total_page):
page = page * 24
pages_data = self.get_raw_data_list(page)
for page_data in pages_data:
self.id += 1
print(self.id, '开始')
color_products = page_data['products']
for color_product in color_products:
data = deepcopy(self.init_data)
data['Handle'] = color_product['productCode']
data['Title'] = color_product['copy']['title']
data['Type'] = 'Clothing,Jackets & Vests'
data['Published'] = 'TRUE'
data['Option1 Name'] = 'Color'
data['Option1 Value'] = color_product['displayColors']['colorDescription']
data['Option2 Name'] = 'Size'
data['Variant Inventory Tracker'] = 'Shopify'
data['Variant Price'] = color_product['prices']['currentPrice']
data['Variant Compare At Price'] = color_product['prices']['initialPrice']
data['Collection'] = data['Type']
color_product_url = color_product['pdpUrl']['url']
flag = True
while flag:
flag = False
try:
driver = self.get_driver(color_product_url, '//p[@class="nds-text css-pxxozx e1yhcai00 text-align-start appearance-body1 color-primary weight-regular"]')
# 显式等待,最多等待10秒直到元素加载完成
element = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.ID, '__NEXT_DATA__')))
# 获取<script>标签内的JSON文本(推荐使用textContent属性)
json_text = element.get_attribute('textContent')
# 将JSON字符串解析为Python字典
json_dict = json.loads(json_text)
except:
print('没解析到详情字典,现在重新解析')
flag = True
data['Body (HTML)'] = driver.find_element('xpath', '//p[@class="nds-text css-pxxozx e1yhcai00 text-align-start appearance-body1 color-primary weight-regular"]').text
size_list = driver.find_elements('xpath', '//div[@class="d-sm-flx flx-dir-sm-c flx-dir-lg-cr"]//label')
print('尺寸个数:', len(size_list))
if len(size_list) == 0:
size_list.append('one size')
for size in size_list:
if type(size) == str:
data['Option2 Value'] = size
else:
data['Option2 Value'] = size.text.strip()
img_url_list = json_dict['props']['pageProps']['selectedProduct']['contentImages']
img_url_len = len(img_url_list)
print('图片个数:', img_url_len)
data['Image Src'] = self.switch_url(img_url_list[0])
if -1 == data['Image Src']:
break
data['Image Position'] = 1
self.save_csv(data)
print(data)
for i in range(1, img_url_len):
temp_data = deepcopy(self.init_data)
temp_data['Handle'] = data['Handle']
temp_data['Image Src'] = self.switch_url(img_url_list[i])
if -1 == temp_data['Image Src']:
break
temp_data['Image Position'] = i + 1
self.save_csv(temp_data)
print(temp_data)
driver.close()
print(self.id, '结束')
def run(self):
self.parse()
if __name__ == '__main__':
nike = Nike()
nike.run()