🛠️shopify采集脚本 - https://www.cupshe.com/
📹 https://www.cupshe.com/

import random, csv, requests, logging, time, json, re
from selenium import webdriver
from copy import deepcopy
from time import sleep
from lxml import etree
from DrissionPage import Chromium, ChromiumOptions

class Cupshe:
    def __init__(self):
        self.index_url = 'https://bff-shopify.cupshe.com/service/col/page'
        self.detail_url = 'https://cfs.cupshe.com/commodity/selfbuild/detail'

        self.index_headers = {
            # "ab-types": "%5B%7B%22abTestId%22%3A100%2C%22abVarient%22%3A%22pdp_height%22%7D%2C%7B%22abTestId%22%3A138%2C%22abVarient%22%3A%22homepageclassic%22%7D%2C%7B%22abTestId%22%3A161%2C%22abVarient%22%3A%22collection_self_ranking_pure%22%7D%2C%7B%22abTestId%22%3A185%7D%2C%7B%22abTestId%22%3A190%2C%22abVarient%22%3A%22false%22%7D%2C%7B%22abTestId%22%3A208%2C%22abVarient%22%3A%22variant_1%22%7D%2C%7B%22abTestId%22%3A228%2C%22abVarient%22%3A%22test1%22%7D%2C%7B%22abTestId%22%3A358%2C%22abVarient%22%3A%221%22%7D%2C%7B%22abTestId%22%3A359%2C%22abVarient%22%3A%221%22%7D%2C%7B%22abTestId%22%3A370%2C%22abVarient%22%3A%22default%22%7D%2C%7B%22abTestId%22%3A391%2C%22abVarient%22%3A%22show%22%7D%5D", #
            "accept": "application/json, text/plain, */*",
            "accept-language": "zh-CN,zh;q=0.9",
            "authorization;": "",
            "btn-code;": "",
            "buried-language": "zh-CN",
            "cache-control": "no-cache",
            "lang;": "",
            "model": "undefined",
            "net-type": "3g",
            "origin": "https://www.cupshe.com",
            "os": "Windows",
            "os-ver": "10",
            "pragma": "no-cache",
            "priority": "u=1, i",
            "referer": "https://www.cupshe.com/",
            "sec-ch-ua": "\"Chromium\";v=\"136\", \"Google Chrome\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": "\"Windows\"",
            "sec-fetch-dest": "empty",
            "sec-fetch-mode": "cors",
            "sec-fetch-site": "same-site",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36",
            "userid;": ""
        }
        self.detail_headers = {
            # "ab-types": "%5B%7B%22abTestId%22%3A100%2C%22abVarient%22%3A%22pdp_height%22%7D%2C%7B%22abTestId%22%3A138%2C%22abVarient%22%3A%22homepageclassic%22%7D%2C%7B%22abTestId%22%3A190%2C%22abVarient%22%3A%22false%22%7D%2C%7B%22abTestId%22%3A208%2C%22abVarient%22%3A%22variant_1%22%7D%2C%7B%22abTestId%22%3A228%2C%22abVarient%22%3A%22test1%22%7D%2C%7B%22abTestId%22%3A284%2C%22abVarient%22%3A%22big_data_seriesup%22%7D%2C%7B%22abTestId%22%3A286%2C%22abVarient%22%3A%22big_data_seriesup%22%7D%2C%7B%22abTestId%22%3A291%2C%22abVarient%22%3A%22biw_a%22%7D%2C%7B%22abTestId%22%3A359%2C%22abVarient%22%3A%221%22%7D%2C%7B%22abTestId%22%3A370%2C%22abVarient%22%3A%22default%22%7D%2C%7B%22abTestId%22%3A391%2C%22abVarient%22%3A%22show%22%7D%5D",
            "accept": "*/*",
            "accept-language": "zh-CN,zh;q=0.9",
            "cache-control": "no-cache",
            "origin": "https://www.cupshe.com",
            "pragma": "no-cache",
            "priority": "u=1, i",
            "referer": "https://www.cupshe.com/",
            "sec-ch-ua": "\"Chromium\";v=\"136\", \"Google Chrome\";v=\"136\", \"Not.A/Brand\";v=\"99\"",
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": "\"Windows\"",
            "sec-fetch-dest": "empty",
            "sec-fetch-mode": "cors",
            "sec-fetch-site": "same-site",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/136.0.0.0 Safari/537.36"
        }

        self.index_params = {
            "visitorType": "0,4",
            "siteId": "1",
            "channelId": "1",
            "brandId": "1",
            "terminalId": "1",
            "loginMethod": "0",
            "lang": "en-GB",
            "langCode": "en-GB",
            "klarnaCode": "en-US",
            "klarnaCodeEn": "en-US",
            "nuveiLangCode": "en",
            "currency": "USD",
            "currencyCode": "$",
            "shopId": "1",
            "siteName": "us",
            "currencyPosition": "left",
            "numTypeLike": "en",
            "calcDiscountMethod": "calcDiscountWithoutOff",
            "subTerminal": "1",
            "userSelectLang": "1",
            "themeType": "vacation",
            "sortId": "1",
            "pageNum": '', #
            "pageSize": "48",
            "seoUrl": '', #
            "utmMedium": "",
            "utmSource": "",
            "utmCampaign": "",
            "utmTerm": "",
            "skcFeed": "",
            "abType": '', #
            "sceneId": '', #
            "distinctId": "01a859d3-2dc8-43c0-a371-8a17fcebd0d7",
            "peopleType": "0"
        }
        self.detail_params = {
            "subTerminal": "1",
            "siteId": "1",
            "channelId": "1",
            "brandId": "1",
            "terminalId": "1",
            "loginMethod": "0",
            "lang": "en-GB",
            "langCode": "en-GB",
            "klarnaCode": "en-US",
            "klarnaCodeEn": "en-US",
            "nuveiLangCode": "en",
            "currency": "USD",
            "currencyCode": "$",
            "shopId": "1",
            "siteName": "us",
            "currencyPosition": "left",
            "numTypeLike": "en",
            "calcDiscountMethod": "calcDiscountWithoutOff",
            "userSelectLang": "1",
            "themeType": "vacation",
            "skcCode": '', #
            "source": "1",
            "visitorType": "0,4",
            "peopleType": "0"
        }

        self.id = 0  # 保存到csv文件不用id字段
        self.init_data = {
            'Handle': '',
            'Title': '',
            'Body (HTML)': '',
            'Vendor': '',
            'Type': '',
            'Tags': '',
            'Published': 'TRUE',
            'Option1 Name': 'Color',
            'Option1 Value': '',
            'Option2 Name': 'Size',
            'Option2 Value': '',
            'Option3 Name': '',
            'Option3 Value': '',
            'Variant SKU': '',
            'Variant Grams': '',
            'Variant Inventory Tracker': 'Shopify',
            'Variant Inventory Qty': '99999',
            'Variant Inventory Policy': '',
            'Variant Fulfillment Service': '',
            'Variant Price': '',
            'Variant Compare At Price': '',
            'Variant Requires Shipping': '',
            'Variant Taxable': '',
            'Variant Barcode': '',
            'Image Src': '',
            'Image Position': '',
            'Image Alt Text': '',
            'Gift Card': '',
            'SEO Title': '',
            'SEO Description': '',
'Variant Image': '',
            'Status': '',
            'Collection': '',
        }
        self.empty_data = {
            'Handle': '',
            'Title': '',
            'Body (HTML)': '',
            'Vendor': '',
            'Type': '',
            'Tags': '',
            'Published': '',
            'Option1 Name': '',
            'Option1 Value': '',
            'Option2 Name': '',
            'Option2 Value': '',
            'Option3 Name': '',
            'Option3 Value': '',
            'Variant SKU': '',
            'Variant Grams': '',
            'Variant Inventory Tracker': '',
            'Variant Inventory Qty': '',
            'Variant Inventory Policy': '',
            'Variant Fulfillment Service': '',
            'Variant Price': '',
            'Variant Compare At Price': '',
            'Variant Requires Shipping': '',
            'Variant Taxable': '',
            'Variant Barcode': '',
            'Image Src': '',
            'Image Position': '',
            'Image Alt Text': '',
            'Gift Card': '',
            'SEO Title': '',
            'SEO Description': '',
            'Variant Image': '',
            'Status': '',
            'Collection': '',
        }
        self.field_names = ['Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags', 'Published', 'Option1 Name',
                            'Option1 Value', 'Option2 Name', 'Option2 Value', 'Option3 Name', 'Option3 Value',
                            'Variant SKU', 'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty',
                            'Variant Inventory Policy', 'Variant Fulfillment Service', 'Variant Price',
                            'Variant Compare At Price', 'Variant Requires Shipping', 'Variant Taxable',
                            'Variant Barcode', 'Image Src', 'Image Position', 'Image Alt Text', 'Gift Card',
                            'SEO Title', 'SEO Description', 'Variant Image', 'Status', 'Collection']
        self.file = None
        self.writer = None

        self.browser = None
        self.tab = None

        self.cnt = 0

        self.total_product_num = 0
        self.total_page = 0

        self.type_list = [
            {
                '_type': 'New Arrivals',
                'seo_url': 'new-arrivals',
                'ab_type': 'original_ranking_mix',
                'scene_id': 'original_ranking'
            },
            {
                '_type': 'Bikinis',
                'seo_url': 'bikinis',
                'ab_type': 'collection_self_ranking_pure',
                'scene_id': 'us_web_nmix_collect_v103'
            },
            {
                '_type': 'One-Pieces',
                'seo_url': 'one-piece',
                'ab_type': 'collection_self_ranking_pure',
                'scene_id': 'us_web_nmix_collect_v103'
            },
            {
                '_type': 'Dresses',
                'seo_url': 'vacationdress',
                'ab_type': 'collection_self_ranking_pure',
                'scene_id': 'us_web_nmix_collect_v103'
            },
            {
                '_type': 'Cover-Ups',
                'seo_url': 'cover-up-1',
                'ab_type': '',
                'scene_id': ''
            },
            {
                '_type': 'Rompers & Jumpsuits',
                'seo_url': 'jumpsuits-rompers',
                'ab_type': 'original_ranking_mix',
                'scene_id': 'original_ranking'
            },
            {
                '_type': 'Tops & Bottoms',
                'seo_url': 'alltopsvacaion-copy',
                'ab_type': '',
                'scene_id': ''
            },
            {
                '_type': 'Clothing',
                'seo_url': 'allbestsellersvacation',
                'ab_type': '',
                'scene_id': ''
            }
        ]

    def simulated_smooth_scroll(self, driver, step=1000, interval=0.5, timeout=30):
        # 平滑移动到底部

        start_time = time.time()
        last_height = driver.execute_script("return document.documentElement.scrollHeight")
        current_position = 0

        while time.time() - start_time < timeout:
            # 计算剩余滚动距离
            remaining = last_height - current_position

            # 动态调整步长
            current_step = min(step, remaining) if remaining > 0 else 0

            if current_step <= 0:
                break

            # 执行分步滚动
            driver.execute_script(f"window.scrollBy(0, {current_step})")
            current_position += current_step

            # 等待滚动和内容加载
            time.sleep(interval * (current_step / step))  # 动态间隔

            # 检查新高度
            new_height = driver.execute_script(
                "return document.documentElement.scrollHeight"
            )

            # 更新高度（处理动态加载）
            if new_height > last_height:
                last_height = new_height

    def get_driver(self, url, xpath_txt=None, is_turn=False):
        options = webdriver.ChromeOptions()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        options.page_load_strategy = "none"

        driver = webdriver.Chrome(options=options)

        driver.implicitly_wait(10)
        driver.maximize_window()

        while True:
            try:
                print('正在获取', url, '的页面数据')
                driver.get(url)

                if is_turn:
                    sleep(1)
                    self.simulated_smooth_scroll(driver)

                if xpath_txt:
                    driver.find_element('xpath', xpath_txt)
                else:
                    self.random_sleep(5)

                break

            except:
                print(url, '没定位到，重新请求...')

        # self.writer_to_file(driver.page_source, 'w', 'utf-8')

        return driver

    def driver_continue(self, driver, url, xpath_txt=None, is_turn=False):
        flag = True
        while flag:
            flag = False
            try:
                print('正在获取', url, '的页面数据')
                driver.get(url)

                if is_turn:
                    self.random_sleep()
                    self.simulated_smooth_scroll(driver)

                driver.find_element('xpath', xpath_txt)

            except:
                flag = True
                print(url, '没定位到，重新请求...')

        # self.writer_to_file(driver.page_source, 'w', 'utf-8')

    def get_page_html(self, url, xpath_txt=None, is_turn=False):
        driver = self.get_driver(url, xpath_txt, is_turn=is_turn)

        page_source = driver.page_source

        driver.close()

        return etree.HTML(page_source)

    def writer_to_file(self, data, mode, encoding=None):
        if 'b' in encoding:
            open('./text.html', mode).write(data)
        else:
            open('./text.html', mode, encoding=encoding).write(data)

        print('写入文件成功！')

    def driver_click(self, driver, timeout=2):
        driver.click()
        self.random_sleep(timeout)

    def driver_back(self, driver, timeout=2):
        driver.back()
        self.random_sleep(timeout)

    def driver_refresh(self, driver, timeout=2):
        driver.refresh()
        self.random_sleep(timeout)

    def tab_wait(self, tab, timeout=3):
        tab.wait(timeout)
        return tab

    def tab_get(self, tab, url, xpath_txt='html', is_turn=False):
        print('正在获取', url, '的数据')

        while True:
            tab.get(url)
            self.tab_wait(tab)

            if is_turn:
                tab.scroll.to_bottom()
                self.tab_wait(tab)

            ele = tab.ele(f'x:{xpath_txt}')
            if ele:
                break

            print('没有请求到元素，重新请求中')
            tab.wait(2)

        return tab

    def get_dp_html(self, tab, url, xpath_txt='html', is_turn=False):
        tab = self.tab_get(tab, url, xpath_txt=xpath_txt, is_turn=is_turn)

        res = etree.HTML(tab.html)

        return res

    def random_sleep(self, timeout=2):
        sleep(random.random() + timeout)

    def save_csv(self, data):
        self.writer.writerow({
            'Handle': data['Handle'],
            'Title': data['Title'],
            'Body (HTML)': data['Body (HTML)'],
            'Vendor': data['Vendor'],
            'Type': data['Type'],
            'Tags': data['Tags'],
            'Published': data['Published'],
            'Option1 Name': data['Option1 Name'],
            'Option1 Value': data['Option1 Value'],
            'Option2 Name': data['Option2 Name'],
            'Option2 Value': data['Option2 Value'],
            'Option3 Name': data['Option3 Name'],
            'Option3 Value': data['Option3 Value'],
            'Variant SKU': data['Variant SKU'],
            'Variant Grams': data['Variant Grams'],
            'Variant Inventory Tracker': data['Variant Inventory Tracker'],
            'Variant Inventory Qty': data['Variant Inventory Qty'],
            'Variant Inventory Policy': data['Variant Inventory Policy'],
            'Variant Fulfillment Service': data['Variant Fulfillment Service'],
            'Variant Price': data['Variant Price'],
            'Variant Compare At Price': data['Variant Compare At Price'],
            'Variant Requires Shipping': data['Variant Requires Shipping'],
            'Variant Taxable': data['Variant Taxable'],
            'Variant Barcode': data['Variant Barcode'],
            'Image Src': data['Image Src'],
            'Image Position': data['Image Position'],
            'Image Alt Text': data['Image Alt Text'],
            'Gift Card': data['Gift Card'],
            'SEO Title': data['SEO Title'],
            'SEO Description': data['SEO Description'],
            'Variant Image': data['Variant Image'],
            'Status': data['Status'],
            'Collection': data['Collection']
        })

    def get_response(self, url, headers, params, timeout=2):
        while True:
            try:
                response = requests.get(url, headers=headers, params=params, timeout=10)
                break
            except:
                print('没有请求到，重新请求')
                self.random_sleep(timeout)

        self.random_sleep(timeout)

        return response

    def get_html(self, url, page, seo_url, ab_type, scene_id):
       return

    def tab_run_js(self, tab, js_code):
        while True:
            try:
                tab.run_js(js_code)
                break
            except Exception as e:
                print('捕获tab_run_js方法的run_js:', e)
                tab.wait(1)

    def ele_click(self, tab, ele):
        while True:
            try:
                tab.actions.click(ele)
                break
            except Exception as e:
                print('捕获ele_click方法的actions.click:', e)
                tab.wait(1)

        tab.wait(2)

    def dp_click_ad(self, tab, xpath_txt):
        ad_ele = tab.ele(f'x:{xpath_txt}', timeout=2)
        if ad_ele:
            print('有广告:', ad_ele)
            self.ele_click(tab, ad_ele)
            self.tab_wait(tab, 1)

    def infinite_scroll(self, tab):
        product_url_list = []
        w_cnt = 0
        turn_cnt = 0
        is_bottom = False

        while True:
            self.dp_click_ad(tab, '//button[@class="css-2vqtum"]')
            self.dp_click_ad(tab, '//button[@class="css-71t821"]')

            if is_bottom:
                break

            tab.scroll.to_bottom()
            self.tab_wait(tab, 1)
            self.tab_run_js(self.tab, 'window.scrollBy(0, -1000)')

            turn_cnt += 1
            print(f'翻页了{turn_cnt}次')

            n_cnt = 0

            self.tab_wait(tab, 5)
            product_url_ele_list = tab.eles('x://div[@class="product-thumbnail plpv3 css-1gasjii"]//a[@class="css-avqw6d"]')

            for product_url_ele in product_url_ele_list:
                product_url = product_url_ele.attr('href')
                if 'https:' not in product_url:
                    product_url = 'https://www.coach.com' + product_url

                if product_url in product_url_list:
                    continue

                n_cnt += 1
                product_url_list.append(product_url)

            print('产品个数：', len(product_url_ele_list))
            print('获取到的产品个数', len(product_url_list))

            if n_cnt == 0:
                w_cnt += 1
            else:
                w_cnt = 0

            if w_cnt >= 5:
                print('到底了')
                is_bottom = True

        return product_url_list

    def get_detail_tab(self, url, xpath_txt='html', backup_xpath_txt='html'):
        print('正在获取', url, '的数据')

        tab = self.browser.latest_tab

        while True:
            tab.get(url)
            self.tab_wait(tab)

            ele = tab.ele(f'x:{xpath_txt}')
            if ele:
                print('第1个xpath找到的')
                break
            else:
                t_ele = tab.ele(f'x:{backup_xpath_txt}')
                if t_ele:
                    print('第2个xpath找到的')
                    break

            print('没有请求到元素，重新请求中')
            tab.wait(2)

        return tab

    def get_product_img_url_list(self, tab, data):
        tab.wait(5)
        self.dp_click_ad(tab, '//button[@class="css-2vqtum"]')

        data['Option1 Value'] = tab.ele('x://p[contains(@class, "color-name")]').text

        product_color_img_url_list = []

        img_temp_ele = tab.ele('x://li[contains(@class, "is-prev")]')
        if img_temp_ele:
            product_color_img_url_cnt = int(img_temp_ele.attr('data-slide-index'))
        else:
            product_color_img_url_cnt = len(tab.eles('x://div[@class="css-1161qt5"]/div')) - 1

        print(data['Option1 Value'], '色的图片个数应为：', product_color_img_url_cnt)

        tab.wait(6)
        raw_product_color_img_ele_list = tab.eles('x://li[contains(@class, "splide__slide")]/div | //li[contains(@class, "splide__slide")]//video/source | //div[@class="css-1161qt5"]/div[@class="css-113el1s"]/div')

        for raw_product_color_img_ele in raw_product_color_img_ele_list:
            raw_product_color_img_url = raw_product_color_img_ele.attr('src')

if not raw_product_color_img_url:
                continue

            raw_product_color_img_url = raw_product_color_img_url.split('?')[0]

            if raw_product_color_img_url in product_color_img_url_list:
                continue

            product_color_img_url_list.append(raw_product_color_img_url)

        print(data['Option1 Value'], '色获取到的图片个数为：', len(product_color_img_url_list))

        return product_color_img_url_list

    def init_tab(self):
        co = ChromiumOptions()
        co.set_browser_path(r'C:\Program Files\Google\Chrome\Application\chrome.exe')
        co.auto_port()
        # co.headless()
        # co.set_argument('--no-sandbox')

        self.browser = Chromium(co)

        self.tab = self.browser.latest_tab
        self.tab.set.window.max()

    def handle_img_url(self, raw_img_url):
        return raw_img_url.split('?')[0]

    def save_all_data(self, product_data_list):
        for product_data in product_data_list:
            self.cnt += 1
            self.save_csv(product_data)
            print('第', self.cnt, '行保存完成！')

    # 取消翻译
    def cancel_translate(self, url):
        tab = self.tab_get(self.tab, url, '//div[@class="collection_item overflow-hidden"]')
        tab.actions.move_to(tab.ele('x://div[@class="gt-translate-btn-bg"]'))
        tab.wait(2)
        tab.actions.click(tab.ele('x://div[@class="gt-translate-switch"]'))
        tab.wait(2)

    def get_index_json(self, page, seo_url, ab_type, scene_id, timeout=2):
        params = deepcopy(self.index_params)

        params['pageNum'] = str(page)
        params['seoUrl'] = seo_url
        params['abType'] = ab_type
        params['sceneId'] = scene_id

        while True:
            print(f'正在获取 {params['seoUrl']} 的第 {params['pageNum']} 页的数据')

            try:
                res = self.get_response(self.index_url, self.index_headers, params, timeout).json()
                if 'success' == res['retInfo'] and len(res['data']['dataDetail']) > 0 and res['data']['dataDetail'][0]['skcs'][0]['title']:
                    break

                print('json数据获取失败，重新获取')
            except Exception as e:
                print('json数据获取失败，重新获取')
                print(e)

        return res['data']

    def get_total_page(self, seo_url, ab_type, scene_id):
        page_json = self.get_index_json(1, seo_url, ab_type, scene_id)

        page_size = page_json['pageSize']
        self.total_product_num = page_json['total']
        self.total_page = (self.total_product_num // page_size) + (self.total_product_num % page_size != 0)

        print('总产品个数为：', self.total_product_num)
        print('总页数为：', self.total_page)

    def get_detail_json(self, skc_code, timeout=2):
        params = deepcopy(self.detail_params)

        params['skcCode'] = skc_code

        while True:
            print(f'正在获取 {params['skcCode']} 的数据')

            try:
                res = self.get_response(self.detail_url, self.detail_headers, params, timeout).json()
                if 'success' == res['retInfo'] and res['data']['commodities'][0]['title']:
                    break

                print('json数据获取失败，重新获取')
            except Exception as e:
                print('json数据获取失败，重新获取')
                print(e)

        return res['data']

    def check_color(self, color_dict_filter, color_text):
        for color_dict in color_dict_filter:
            if color_dict['name'] == color_text:
                return True

        return False

    def test_product(self, skc_code):
        self.file = open('./test.csv', 'w', newline='', encoding='utf-8-sig')
        self.writer = csv.DictWriter(self.file, fieldnames=self.field_names)
        self.writer.writeheader()

        logging.captureWarnings(True)

        skc_code = skc_code.upper()
        product_json = self.get_detail_json(skc_code)

        self.id += 1
        print(self.id, '开始')

        product_data_list = self.product_detail_parse('One', product_json, skc_code)

        self.save_all_data(product_data_list)

        print(self.id, '结束')

        if self.file:
            self.file.close()

        if self.browser:
            self.browser.quit()

    def product_detail_parse(self, _type, product_json, skc_code):
        product_data_list = []
        sku_id = 0
        product_color_dict_filter = []

        data = deepcopy(self.init_data)

        data['Type'] = _type
        data['Collection'] = data['Type']

        data['Handle'] = (_type + '-' + product_json['spuCode']).replace(' ', '-').lower()

        for product_color_json in product_json['commodities']:
            data['Title'] = product_color_json['title']
            data['Body (HTML)'] = product_color_json['description']

            # -----------------------------------------------
            # 获取颜色字段值
            product_color = product_color_json['color']
            if self.check_color(product_color_dict_filter, product_color):
                product_color_dict_id = 0
                for product_color_dict in product_color_dict_filter:
                    if product_color_dict['name'] == product_color:
                        break

                    product_color_dict_id += 1

                product_color_dict_filter[product_color_dict_id]['num'] += 1
                data['Option1 Value'] = product_color_json['color'] + '_' + str(product_color_dict_filter[product_color_dict_id]['num'])
            else:
                data['Option1 Value'] = product_color_json['color']
                product_color_dict_filter.append({'name': product_color_json['color'], 'num': 1})

            # -----------------------------------------------

            # -------------------------------------------
            # 获取图片

            product_img_url_list = []

            product_img_url_json_list = product_color_json['medias']

            for product_img_url_json in product_img_url_json_list:
                product_img_url_list.append(product_img_url_json['src'])

            product_img_url_len = len(product_img_url_list)

            # -------------------------------------------

            for product_color_size_json in product_color_json['skus']:
                _data = deepcopy(data)
                sku_id += 1

                _data['Option2 Value'] = product_color_size_json['localSize']

                _data['Variant SKU'] = _type + '-' + product_color_json['skcCode'] + '-' + _data['Title'] + '-' + _data['Option1 Value'] + '-' + _data['Option2 Value'] + '-' + str(sku_id)
                _data['Variant SKU'] = _data['Variant SKU'].replace(' ', '-').lower()

                _data['Variant Price'] = product_color_size_json['discountPriceStr']
                _data['Variant Compare At Price'] = product_color_size_json['retailPriceStr']

                _data['Image Src'] = product_img_url_list[0]
                _data['Image Position'] = 1
                _data['Variant Image'] = _data['Image Src']

                print(f'第{self.id}个产品 {skc_code} 的第{sku_id}个sku的颜色个数为{len(product_json['commodities'])}，尺寸个数为：{len(product_color_json['skus'])}，图片个数为：{product_img_url_len}')

product_data_list.append(_data)
                print(_data)

                for i in range(1, product_img_url_len):
                    temp_data = deepcopy(self.empty_data)

                    temp_data['Handle'] = _data['Handle']
                    temp_data['Published'] = 'TRUE'
                    temp_data['Image Src'] = product_img_url_list[i]
                    temp_data['Image Position'] = i + 1

                    product_data_list.append(temp_data)
                    print(temp_data)

        return product_data_list

    def product_parse(self, product_skc_code, _type):
        self.id += 1

        product_json = self.get_detail_json(product_skc_code)  # 第3个请求

        with open('./filter.txt', 'r', encoding='utf-8') as f:
            filter_txt = f.read()

        product_uuid = (_type + '-' + product_json['spuCode']).replace(' ', '-').lower()
        if product_uuid in filter_txt:
            print(self.id, '已完成')
            return

        print(self.id, '开始')

        product_data_list = self.product_detail_parse(_type, product_json, product_skc_code)

        self.save_all_data(product_data_list)

        print(self.id, '结束')

        with open('./filter.txt', 'a', encoding='utf-8') as f:
            f.write(product_uuid + '\n')

    def parse(self, _type, seo_url, ab_type, scene_id):
        self.get_total_page(seo_url, ab_type, scene_id) # 第1个请求

        for page in range(1, self.total_page + 1):

            product_json_list = self.get_index_json(page, seo_url, ab_type, scene_id)['dataDetail'] # 第2个请求

            product_skc_code_list = []
            for product_json in product_json_list:
                product_skc_code_list.append(product_json['skcs'][0]['skcCode'])

            for product_skc_code in product_skc_code_list:
                self.product_parse(product_skc_code, _type)

    def run(self, is_continue=False):
        if is_continue:
            self.file = open('./cupshe.csv', 'a', newline='', encoding='utf-8-sig')
            self.writer = csv.DictWriter(self.file, fieldnames=self.field_names)
        else:
            self.file = open('./cupshe.csv', 'w', newline='', encoding='utf-8-sig')
            self.writer = csv.DictWriter(self.file, fieldnames=self.field_names)
            self.writer.writeheader()

            with open('./filter.txt', 'w', encoding='utf-8') as f:
                f.write('')

        logging.captureWarnings(True)

        for _type in self.type_list:
            self.parse(_type['_type'], _type['seo_url'], _type['ab_type'], _type['scene_id'])

        if self.file:
            self.file.close()

if __name__ == '__main__':
    cupshe = Cupshe()
    cupshe.run()

    skc_code_1 = 'CAA12E5E075AA' # 单颜色，没打折价格 # https://www.cupshe.com/products/licorice-twist-black-one-piece-swimsuit-CAA12E5E075AA
    skc_code_2 = 'CAA12D4M084AB' # 多颜色，一个颜色没打折价格，一个颜色有打折价格 # https://www.cupshe.com/products/checker-chic-black-tankini-set-CAA12D4M084AB
    skc_code_3 = 'caa05a3d123hh' # 有一样的颜色 # https://www.cupshe.com/products/backless-v-neck-mini-dress
    skc_code_4 = 'caa11d5d001gg' #
    skc_code_5 = 'CAA12E5E011AA' # 没颜色，有尺寸
    # cupshe.test_product(skc_code_1)

# https://www.cupshe.com/

# 接口：https://bff-shopify.cupshe.com/service/col/page
# 详情接口：https://cfs.cupshe.com/commodity/selfbuild/detail

# page从1开始，一页有48个产品

# 上传代码的时候把True给去掉 √
# 试试全部产品数据 √
# 试试部分产品数据 √
搜索结果