import re
import os
import json
import requests
from lxml import etree
def save_to_json(product_data):
"""线程安全地将产品数据保存到JSON文件"""
if not os.path.exists("all_products.json"):
with open("all_products.json", "w", encoding="utf-8") as f:
json.dump([], f)
try:
with open("all_products.json", "r", encoding="utf-8") as f:
existing_products = json.load(f)
except (json.JSONDecodeError, FileNotFoundError):
existing_products = []
existing_products.append(product_data) # 直接添加,假设每次清空文件
with open("all_products.json", "w", encoding="utf-8") as f:
json.dump(existing_products, f, ensure_ascii=False, indent=2)
print(f"Saved product: {product_data['Title']} (URL: {product_data['URL']})")
for page in range(1, 24):
url = f"https://services.mybcapps.com/bc-sf-filter/filter?t=1744857545229&_=pf&shop=apl-athletic-propulsion-labs.myshopify.com&page={page}&limit=23&sort=manual&display=grid&collection_scope=159088050249&tag=&product_available=false&variant_available=false&build_filter_tree=false&check_cache=false&sort_first=available&callback=BoostPFSFilterCallback&event_type=page"
headers = {
"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/122.0.6261.95 Safari/537.36"
}
response = requests.get(url, headers=headers).text
mather = re.findall('"products":(.*?),"event_type"', response, re.DOTALL)
products_list = json.loads(mather[0])
for products in products_list:
mark = products["handle"]
detail_url = 'https://www.athleticpropulsionlabs.com/collections/core/products/' + mark
try:
price = products["price_min_usd"]
except:
price = products["price_min"]
title = products["title"]
img_list = products["images"]
body_html = products["body_html"]
color = products["options_with_values"][0]["values"][0]["title"].replace(' / ', '/')
size_option_list = products["options_with_values"][1]["values"]
size_list = [i["title"] for i in size_option_list]
product_data = {
"Mark": mark,
"Title": title,
"Color": color,
"Price": price,
"Body": body_html,
"Img_list": img_list,
"Size_list": size_list,
"Other_link": [],
"Category": "Bestsellers",
"variant_image": '',
"URL": 'https://www.athleticpropulsionlabs.com/collections/core/products/' + mark
}
save_to_json(product_data)
print(f"Page - {page} 完成")