🛠️shopify采集脚本 - lululemon
🌻 lululemon

import requests
import json
import time
import csv
from datetime import datetime

def get_product_images(primary_image):
    """根据primaryImage生成1-5的图片链接"""
    images = []
    if primary_image:
        # 从primaryImage中提取基本URL
        # 例如从 https://images.lululemon.com/is/image/lululemon/LW3HQFS_038426_1
        # 提取到 https://images.lululemon.com/is/image/lululemon/LW3HQFS_038426
        base_url = primary_image.rsplit('_', 1)[0]

        # 生成1到5的图片链接
        for i in range(1, 6):
            image_url = f"{base_url}_{i}"
            images.append(image_url)

    return images

def convert_to_shopify_format(product_data):
    """将产品数据转换为Shopify格式"""
    # 打印调试信息
    print(f"\n处理产品: {product_data['displayName']}")

    shopify_product = {
        "title": product_data["displayName"],
        "body_html": "",  # 可以添加产品描述
        "vendor": "lululemon",
        "product_type": product_data["parentCategoryUnifiedId"].replace("-", " ").title(),
        "created_at": datetime.now().isoformat(),
        "handle": product_data["unifiedId"],
        "published_at": datetime.now().isoformat(),
        "template_suffix": "",
        "status": "active",
        "published_scope": "web",
        "tags": [],
        "variants": [],
        "options": [
            {
                "name": "Color",
                "position": 1,
                "values": []
            },
            {
                "name": "Size",
                "position": 2,
                "values": [str(i) for i in range(0, 21, 2)]  # 生成0,2,4,...,20的尺码
            }
        ],
        "images": []
    }

    # 收集所有颜色
    colors = {}  # 用于存储颜色和对应的SKU样式

    # 从 skuStyleOrder 收集颜色信息
    for sku_style in product_data["skuStyleOrder"]:
        # 收集颜色信息
        if sku_style["colorName"] not in colors:
            colors[sku_style["colorName"]] = {
                "colorId": sku_style["colorId"],
                "styleId": sku_style["styleId"]
            }
        if sku_style["colorName"] not in shopify_product["options"][0]["values"]:
            shopify_product["options"][0]["values"].append(sku_style["colorName"])

    # 为每个颜色创建所有尺码的变体
    for color_name, color_info in colors.items():
        # 为每个尺码创建变体
        for size in shopify_product["options"][1]["values"]:
            # 构建SKU
            sku = f"{color_info['styleId']}_{size}"
            variant = {
                "sku": sku,
                "price": product_data["listPrice"][0],
                "compare_at_price": None,
                "option1": color_name,
                "option2": size,
                "available": True,
                "inventory_quantity": 1,
                "requires_shipping": True
            }
            shopify_product["variants"].append(variant)

    print(f"\n创建的变体数量: {len(shopify_product['variants'])}")
    print(f"使用的尺码范围: {shopify_product['options'][1]['values']}")

    # 处理图片 - 使用primaryImage生成1-5的图片链接
    for swatch in product_data["swatches"]:
        if swatch["primaryImage"]:
            images = get_product_images(swatch["primaryImage"])
            # 获取该颜色的所有变体SKU
            color_name = next(name for name, info in colors.items() if info["colorId"] == swatch["colorId"])
            variant_ids = [v["sku"] for v in shopify_product["variants"] if v["option1"] == color_name]

            for idx, img_url in enumerate(images, 1):
                shopify_product["images"].append({
                    "src": img_url,
                    "position": len(shopify_product["images"]) + 1,
                    "variant_ids": variant_ids
                })

    return shopify_product

def write_products_to_csv(products, writer):
    """将产品数据写入CSV文件"""
    for product in products:
        base_row = {
            'Handle': product['handle'],
            'Title': product['title'],
            'Body (HTML)': product['body_html'],
            'Vendor': product['vendor'],
            'Type': product['product_type'],
            'Tags': '',
            'Published': 'TRUE',
            'Option1 Name': 'Color',
            'Option2 Name': 'Size',
            'Option3 Name': '',
            'Option3 Value': '',
            'Variant Grams': '0',
            'Variant Inventory Tracker': 'shopify',
            'Variant Inventory Qty': '100',
            'Variant Inventory Policy': 'deny',
            'Variant Fulfillment Service': 'manual',
            'Variant Requires Shipping': 'TRUE',
            'Variant Taxable': 'TRUE',
            'Variant Barcode': '',
            'Image Alt Text': '',
            'Gift Card': 'FALSE',
            'SEO Title': product['title'],
            'SEO Description': '',
            'Google Shopping / Google Product Category': '',
            'Google Shopping / Gender': '',
            'Google Shopping / Age Group': '',
            'Google Shopping / MPN': '',
            'Google Shopping / AdWords Grouping': '',
            'Google Shopping / AdWords Labels': '',
            'Google Shopping / Condition': '',
            'Google Shopping / Custom Product': '',
            'Google Shopping / Custom Label 0': '',
            'Google Shopping / Custom Label 1': '',
            'Google Shopping / Custom Label 2': '',
            'Google Shopping / Custom Label 3': '',
            'Google Shopping / Custom Label 4': '',
            'Variant Weight Unit': 'g',
            'Variant Tax Code': '',
            'Cost per item': '',
            'Status': 'active'
        }

        # 为每个变体写入一行
        for variant in product['variants']:
            row = base_row.copy()
            row.update({
                'Option1 Value': variant['option1'],
                'Option2 Value': variant['option2'],
                'Variant SKU': variant['sku'],
                'Variant Price': variant['price'],
                'Variant Compare At Price': variant.get('compare_at_price', ''),
            })

            # 查找该变体对应的图片
            variant_images = [img['src'] for img in product['images'] 
                            if variant['sku'] in img['variant_ids']]

            if variant_images:
                # 写入第一张图片信息
                row['Image Src'] = variant_images[0]
                row['Image Position'] = '1'
                row['Variant Image'] = variant_images[0]
                writer.writerow(row)

                # 写入剩余图片
                for idx, img_url in enumerate(variant_images[1:], 2):
                    image_row = {
                        'Handle': product['handle'],
                        'Image Src': img_url,
                        'Image Position': str(idx)
                    }
                    writer.writerow(image_row)
            else:
                # 如果没有图片，仍然写入产品信息
                writer.writerow(row)

def get_lululemon_products():
    url = "https://shop.lululemon.com/snb/graphql"
    page = 1

    # 创建CSV文件和writer对象
    csv_filename = f'lululemon_products_{datetime.now().strftime("%Y%m%d_%H%M%S")}.csv'
    fieldnames = [
        "Handle", "Title", "Body (HTML)", "Vendor", "Type", "Tags", "Published",
        "Option1 Name", "Option1 Value", "Option2 Name", "Option2 Value",
        "Option3 Name", "Option3 Value", "Variant SKU", "Variant Grams",
        "Variant Inventory Tracker", "Variant Inventory Qty", "Variant Inventory Policy",
        "Variant Fulfillment Service", "Variant Price", "Variant Compare At Price",
        "Variant Requires Shipping", "Variant Taxable", "Variant Barcode",
        "Image Src", "Image Position", "Image Alt Text", "Gift Card",
        "SEO Title", "SEO Description", "Google Shopping / Google Product Category",
        "Google Shopping / Gender", "Google Shopping / Age Group",
        "Google Shopping / MPN", "Google Shopping / AdWords Grouping",
        "Google Shopping / AdWords Labels", "Google Shopping / Condition",
        "Google Shopping / Custom Product", "Google Shopping / Custom Label 0",
        "Google Shopping / Custom Label 1", "Google Shopping / Custom Label 2",
        "Google Shopping / Custom Label 3", "Google Shopping / Custom Label 4",
        "Variant Image", "Variant Weight Unit", "Variant Tax Code",
        "Cost per item", "Status"
    ]

    with open(csv_filename, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.DictWriter(f, fieldnames=fieldnames)
        writer.writeheader()

        headers = {
            "accept": "application/graphql+json, application/json",
            "accept-language": "zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6",
            "content-type": "application/json",
            "origin": "https://shop.lululemon.com",
            "priority": "u=1, i",
            "referer": "https://shop.lululemon.com/c/women-clothes/n14uwk",
            "sec-ch-ua": '"Not A(Brand";v="8", "Chromium";v="132", "Microsoft Edge";v="132"',
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": '"Windows"',
            "sec-fetch-dest": "empty",
            "sec-fetch-mode": "cors", 
            "sec-fetch-site": "same-origin",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/132.0.0.0 Safari/537.36 Edg/132.0.0.0",
            "x-lll-referrer": "Channel=Web,Page=CDP",
            "x-lll-request-correlation-id": "029b94b5-e82d-463b-84e0-a787e2e72aea"
        }

        while True:
            payload = {
                "query": """query CategoryPageDataQuery($category:String!,$cid:String,$forceMemberCheck:Boolean,$nValue:String,$cdpHash:String,$sl:String!,$locale:String!,$Ns:String,$storeId:String,$pageSize:Int,$page:Int,$onlyStore:Boolean,$useHighlights:Boolean,$abFlags:[String],$styleboost:[String],$fusionExperimentVariant:String){categoryPageData(category:$category nValue:$nValue cdpHash:$cdpHash locale:$locale sl:$sl Ns:$Ns page:$page pageSize:$pageSize storeId:$storeId onlyStore:$onlyStore forceMemberCheck:$forceMemberCheck cid:$cid useHighlights:$useHighlights abFlags:$abFlags styleboost:$styleboost fusionExperimentVariant:$fusionExperimentVariant){activeCategory allLocaleNvalues{CA US __typename}categoryLabel fusionExperimentId fusionExperimentVariant fusionQueryId h1Title isBopisEnabled isFusionQuery isWMTM name results:totalProducts totalProductPages currentPage type bopisProducts{allAvailableSizes currencyCode defaultSku displayName listPrice parentCategoryUnifiedId productOnSale:onSale productSalePrice:salePrice pdpUrl productCoverage repositoryId:productId productId inStore unifiedId highlights{highlightLabel highlightIconWeb priority visibility subText abFlag{abFlagName showIcon showHighlight showSubText visibility __typename}__typename}skuStyleOrder{colorGroup colorId colorName inStore size sku skuStyleOrderId styleId01 styleId02 styleId __typename}swatches{primaryImage hoverImage url colorId inStore __typename}__typename}storeInfo{totalInStoreProducts totalInStoreProductPages storeId __typename}products{allAvailableSizes currencyCode defaultSku displayName intendedCupSize listPrice parentCategoryUnifiedId productOnSale:onSale productSalePrice:salePrice pdpUrl productCoverage repositoryId:productId productId inStore unifiedId highlights{highlightLabel highlightIconWeb priority visibility subText abFlag{abFlagName showIcon showHighlight showSubText visibility __typename}__typename}skuStyleOrder{colorGroup colorId colorName inStore size sku skuStyleOrderId styleId01 styleId02 styleId __typename}swatches{primaryImage hoverImage url colorId inStore __typename}__typename}seoLinks{next prev self __typename}__typename}}""",
                "operationName": "CategoryPageDataQuery",
                "variables": {
                    "nValue": None,
                    "cdpHash": "n14uwk",
                    "category": "women-clothes",
                    "locale": "en_US",
                    "sl": "US",
                    "page": page,
                    "pageSize": 12,
                    "forceMemberCheck": False,
                    "abFlags": ["cdpSeodsEnabled"],
                    "useHighlights": True
                }
            }

            try:
                print(f"正在获取第 {page} 页数据...")
                response = requests.post(url, headers=headers, json=payload)
                response.raise_for_status()
                data = response.json()

                # 检查是否还有产品数据
                if not data["data"]["categoryPageData"]["products"]:
                    break

                # 转换产品数据并直接写入CSV
                products = [convert_to_shopify_format(product) for product in data["data"]["categoryPageData"]["products"]]
                write_products_to_csv(products, writer)
                print(f"第 {page} 页数据已写入CSV文件")

                page += 1
                time.sleep(1)  # 添加延迟，避免请求过快

            except requests.exceptions.RequestException as e:
                print(f"请求失败: {e}")
                break
            except Exception as e:
                print(f"处理数据时出错: {e}")
                break

        print(f"数据已保存到 {csv_filename}")

if __name__ == "__main__":
    get_lululemon_products()
搜索结果