🛠️shopify采集脚本 - https://www.ae.com/us/en/c/aerie/swimsuits/cat7030095?pagetype=plp&_gl=1*1cku7np*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp
🌻 https://www.ae.com/us/en/c/aerie/swimsuits/cat7030095?pagetype=plp&_gl=11cku7np_upMQ.._gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp

import random, csv, requests, logging, time, json, re
from selenium import webdriver
from copy import deepcopy
from time import sleep
from lxml import etree
from DrissionPage import Chromium, ChromiumOptions

class Ae:
    def __init__(self):
        self.url_list = [
            'https://www.ae.com/us/en/c/aerie/bras/cat4840012?pagetype=plp&_gl=1*e39ufq*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp',
            'https://www.ae.com/us/en/c/aerie/undies/cat6460002?pagetype=plp&_gl=1*e39ufq*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp',
            'https://www.ae.com/us/en/c/aerie/activewear/cat1090003?pagetype=plp&_gl=1*xk9an1*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp',
            'https://www.ae.com/us/en/c/aerie/clothing-accessories/pajamas/cat6460089?pagetype=plp&_gl=1*1cku7np*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp',
            'https://www.ae.com/us/en/c/aerie/swimsuits/cat7030095?pagetype=plp&_gl=1*1cku7np*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp'
        ]

        self.file_name_list = [
            'ea_3_bras',
            'ea_4_womens_underwear',
            'ea_5_offline_by_aerie',
            'ea_6_womens_pajamas',
            'ea_7_womens_swimsuits_&_swimwear'
        ]

        self.type_list = [
            "Bras",
            "Women's Underwear",
            "OFFLINE By Aerie",
            "Women's Pajamas",
            "Women's Swimsuits & Swimwear"
        ]

        self.headers = {}

        self.id = 0  # 保存到csv文件不用id字段
        self.init_data = {
            'Handle': '',
            'Title': '',
            'Body (HTML)': '',
            'Vendor': '',
            'Type': '',
            'Tags': '',
            'Published': 'TRUE',
            'Option1 Name': 'Color',
            'Option1 Value': '',
            'Option2 Name': 'Size',
            'Option2 Value': '',
            'Option3 Name': '',
            'Option3 Value': '',
            'Variant SKU': '',
            'Variant Grams': '',
            'Variant Inventory Tracker': 'Shopify',
            'Variant Inventory Qty': '99999',
            'Variant Inventory Policy': '',
            'Variant Fulfillment Service': '',
            'Variant Price': '',
            'Variant Compare At Price': '',
            'Variant Requires Shipping': '',
            'Variant Taxable': '',
            'Variant Barcode': '',
            'Image Src': '',
            'Image Position': '',
            'Image Alt Text': '',
            'Gift Card': '',
            'SEO Title': '',
            'SEO Description': '',
            'Variant Image': '',
            'Status':'',
            'Collection': '',
        }
        self.empty_data = {
            'Handle': '',
            'Title': '',
            'Body (HTML)': '',
            'Vendor': '',
            'Type': '',
            'Tags': '',
            'Published': '',
            'Option1 Name': '',
            'Option1 Value': '',
            'Option2 Name': '',
            'Option2 Value': '',
            'Option3 Name': '',
            'Option3 Value': '',
            'Variant SKU': '',
            'Variant Grams': '',
            'Variant Inventory Tracker': '',
            'Variant Inventory Qty': '',
            'Variant Inventory Policy': '',
            'Variant Fulfillment Service': '',
            'Variant Price': '',
            'Variant Compare At Price': '',
            'Variant Requires Shipping': '',
            'Variant Taxable': '',
            'Variant Barcode': '',
            'Image Src': '',
            'Image Position': '',
            'Image Alt Text': '',
            'Gift Card': '',
            'SEO Title': '',
            'SEO Description': '',
            'Variant Image': '',
            'Status': '',
            'Collection': '',
        }
        self.field_names = ['Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags', 'Published', 'Option1 Name',
                            'Option1 Value', 'Option2 Name', 'Option2 Value', 'Option3 Name', 'Option3 Value',
                            'Variant SKU', 'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty',
                            'Variant Inventory Policy', 'Variant Fulfillment Service', 'Variant Price',
                            'Variant Compare At Price', 'Variant Requires Shipping', 'Variant Taxable',
                            'Variant Barcode', 'Image Src', 'Image Position', 'Image Alt Text', 'Gift Card',
                            'SEO Title', 'SEO Description', 'Variant Image', 'Status', 'Collection']
        self.file = None
        self.writer = None

        self.browser = None
        self.tab = None

        self.cnt = 0

    def simulated_smooth_scroll(self, driver, step=1000, interval=0.5, timeout=30):
        # 平滑移动到底部

        start_time = time.time()
        last_height = driver.execute_script("return document.documentElement.scrollHeight")
        current_position = 0

        while time.time() - start_time < timeout:
            # 计算剩余滚动距离
            remaining = last_height - current_position

            # 动态调整步长
            current_step = min(step, remaining) if remaining > 0 else 0

            if current_step <= 0:
                break

            # 执行分步滚动
            driver.execute_script(f"window.scrollBy(0, {current_step})")
            current_position += current_step

            # 等待滚动和内容加载
            time.sleep(interval * (current_step / step))  # 动态间隔

            # 检查新高度
            new_height = driver.execute_script(
                "return document.documentElement.scrollHeight"
            )

            # 更新高度（处理动态加载）
            if new_height > last_height:
                last_height = new_height

    def get_driver(self, url, xpath_txt=None, is_turn=False):
        options = webdriver.ChromeOptions()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        options.page_load_strategy = "none"

        driver = webdriver.Chrome(options=options)

        driver.implicitly_wait(10)
        driver.maximize_window()

        while True:
try:
                print('正在获取', url, '的页面数据')
                driver.get(url)

                if is_turn:
                    sleep(1)
                    self.simulated_smooth_scroll(driver)

                if xpath_txt:
                    driver.find_element('xpath', xpath_txt)
                else:
                    self.random_sleep(5)

                break

            except:
                print(url, '没定位到，重新请求...')

        # self.writer_to_file(driver.page_source, 'w', 'utf-8')

        return driver

    def driver_continue(self, driver, url, xpath_txt=None, is_turn=False):
        flag = True
        while flag:
            flag = False
            try:
                print('正在获取', url, '的页面数据')
                driver.get(url)

                if is_turn:
                    self.random_sleep()
                    self.simulated_smooth_scroll(driver)

                driver.find_element('xpath', xpath_txt)

            except:
                flag = True
                print(url, '没定位到，重新请求...')

        # self.writer_to_file(driver.page_source, 'w', 'utf-8')

    def get_page_html(self, url, xpath_txt=None, is_turn=False):
        driver = self.get_driver(url, xpath_txt, is_turn=is_turn)

        page_source = driver.page_source

        driver.close()

        return etree.HTML(page_source)

    def writer_to_file(self, data, mode, encoding=None):
        if 'b' in encoding:
            open('./text.html', mode).write(data)
        else:
            open('./text.html', mode, encoding=encoding).write(data)

        print('写入文件成功！')

    def driver_click(self, driver, timeout=2):
        driver.click()
        self.random_sleep(timeout)

    def driver_back(self, driver, timeout=2):
        driver.back()
        self.random_sleep(timeout)

    def driver_refresh(self, driver, timeout=2):
        driver.refresh()
        self.random_sleep(timeout)

    def tab_wait(self, tab, timeout=3):
        tab.wait(timeout)
        return tab

    def get_dp_html(self, tab, url, xpath_txt='html', is_turn=False):
        tab = self.tab_get(tab, url, xpath_txt=xpath_txt)

        res = etree.HTML(tab.html)

        return res

    def random_sleep(self, timeout=2):
        sleep(random.random() + timeout)

    def save_csv(self, data):
        self.writer.writerow({
            'Handle': data['Handle'],
            'Title': data['Title'],
            'Body (HTML)': data['Body (HTML)'],
            'Vendor': data['Vendor'],
            'Type': data['Type'],
            'Tags': data['Tags'],
            'Published': data['Published'],
            'Option1 Name': data['Option1 Name'],
            'Option1 Value': data['Option1 Value'],
            'Option2 Name': data['Option2 Name'],
            'Option2 Value': data['Option2 Value'],
            'Option3 Name': data['Option3 Name'],
            'Option3 Value': data['Option3 Value'],
            'Variant SKU': data['Variant SKU'],
            'Variant Grams': data['Variant Grams'],
            'Variant Inventory Tracker': data['Variant Inventory Tracker'],
            'Variant Inventory Qty': data['Variant Inventory Qty'],
            'Variant Inventory Policy': data['Variant Inventory Policy'],
            'Variant Fulfillment Service': data['Variant Fulfillment Service'],
            'Variant Price': data['Variant Price'],
            'Variant Compare At Price': data['Variant Compare At Price'],
            'Variant Requires Shipping': data['Variant Requires Shipping'],
            'Variant Taxable': data['Variant Taxable'],
            'Variant Barcode': data['Variant Barcode'],
            'Image Src': data['Image Src'],
            'Image Position': data['Image Position'],
            'Image Alt Text': data['Image Alt Text'],
            'Gift Card': data['Gift Card'],
            'SEO Title': data['SEO Title'],
            'SEO Description': data['SEO Description'],
            'Variant Image': data['Variant Image'],
            'Status': data['Status'],
            'Collection': data['Collection']
        })

    def get_response(self, url):
        while True:
            print('正在获取', url, '的数据')
            try:
                response = requests.get(url, headers=self.headers)
                break
            except:
                print('没有请求到，重新请求')
                self.random_sleep()

        self.random_sleep()

        return response

    def get_html(self, url):
        response = self.get_response(url)
        return etree.HTML(response.text)

    def tab_run_js(self, tab, js_code, timeout=2):
        while True:
            try:
                tab.run_js(js_code)
                break
            except Exception as e:
                print('捕获tab_run_js方法的run_js:', e)
                tab.wait(timeout)

        tab.wait(timeout)

    def ele_click(self, tab, ele, timeout=2):
        try:
            tab.actions.click(ele)
        except Exception as e:
            print('捕获ele_click方法的actions.click:', e)
            tab.wait(timeout)

            ele.click('js')

        tab.wait(timeout)

    def dp_click_ad(self, tab, xpath_txt):
        ad_ele = tab.ele(f'x:{xpath_txt}', timeout=2)
        if ad_ele:
            print('有广告:', ad_ele)
            self.ele_click(tab, ad_ele)
            self.tab_wait(tab, 1)

    def infinite_scroll(self, tab, timeout=2):
        turn_cnt = 0

        while True:
            # self.dp_click_ad(tab, '//button[contains(@class, "klaviyo-close-form")]')
            tab.scroll.to_bottom()
            self.tab_wait(tab, timeout)
            self.tab_run_js(self.tab, 'window.scrollBy(0, -3000)')
            self.tab_wait(tab, timeout)
            next_button_ele = tab.ele('x://button[contains(@class, "more")]')
            if next_button_ele:
                self.ele_click(tab, next_button_ele, timeout)
            else:
                break

            turn_cnt += 1
            print(f'翻页了{turn_cnt}次')

            self.tab_wait(tab, 5)

    def tab_get(self, tab, url, xpath_txt='html', backup_xpath_txt='html', timeout=3, ip_timeout=60):
        tab = self.browser.latest_tab

        while True:
            print('正在获取', url, '的数据')
            tab.get(url)
            self.tab_wait(tab, timeout)

            t_ele = tab.ele('x://title')
            if t_ele and ('error-404' in t_ele.text):
                print('没有这个页面')
                break

            ele = tab.ele(f'x:{xpath_txt}')
            if ele:
                print('第1个xpath找到的')
                break
            else:
                t_ele = tab.ele(f'x:{backup_xpath_txt}')
                if t_ele:
                    t_ele = tab.ele('x://div[@class="px-captcha-message"]')
                    if t_ele and ('机器人' in t_ele.text):
                        while True:
                            tt_ele = tab.ele('x://div[@class="px-captcha-message"]')
                            if not tt_ele or ('机器人' not in tt_ele.text):
                                break

                            print(f'还没解开人机验证等， {ip_timeout} 秒后尝试')
                            tab.wait(ip_timeout)

                        break

                if backup_xpath_txt != 'html' and t_ele:
                    print('第2个xpath找到的')
                    break

            print('没有请求到元素，重新请求中')

        return tab

    def init_tab(self):
        co = ChromiumOptions()
        co.set_browser_path(r'C:\Program Files\Google\Chrome\Application\chrome.exe')
        co.auto_port()
        co.headless()

        self.browser = Chromium(co)

        self.tab = self.browser.latest_tab
        self.tab.set.window.max()

    def handle_img_url(self, raw_img_url):
        return raw_img_url

    def save_all_data(self, product_data_list):
        for product_data in product_data_list:
            self.cnt += 1
            self.save_csv(product_data)
            print('第', self.cnt, '行保存完成！')

    # 取消翻译
    def cancel_translate(self, url):
        tab = self.tab_get(self.tab, url, '//div[@class="collection_item overflow-hidden"]')
        tab.actions.move_to(tab.ele('x://div[@class="gt-translate-btn-bg"]'))
        tab.wait(2)
        tab.actions.click(tab.ele('x://div[@class="gt-translate-switch"]'))
        tab.wait(2)

    def click_accept(self, tab=None, url=None, xpath_txt='html'):
        if not tab and url:
            tab = self.tab_get(tab, url, xpath_txt=xpath_txt)

        temp_ele = tab.ele('x://div[contains(@class, "policy_acceptBtn")]')
        if temp_ele:
            self.ele_click(tab, temp_ele)

    def get_img_url_list(self, tab, timeout=2):
        while True:
            img_url_list = []
            img_flag = True

            img_url_ele_list = tab.eles('x://div[contains(@class, "item-image")]')
            for img_url_ele in img_url_ele_list:
                img_url_ele = img_url_ele.ele('x:./picture/img')

                if not img_url_ele:
                    img_flag = False
                    print(f'图片未加载完成，等待 {timeout} 秒重新获取图片')
                    self.random_sleep(timeout)
                    break

                img_url = img_url_ele.attr('src')

                if 'https:' not in img_url:
                    img_url = 'https:' + img_url

                img_url_list.append(img_url)

            if img_flag:
                break

        return img_url_list

    def tab_eles(self, tab, xpath_txt, timeout=3):
        while True:
            try:
                ele_list = tab.eles('x://ul[contains(@class, "dropdown-menu")]/li/a')
                break
            except Exception as e:
                print(str(e))
                self.random_sleep(timeout)

        return ele_list

    def get_size_list(self, tab):
        size_list = []
        size_filter = []

        size_ele_list = self.tab_eles(tab, '//ul[contains(@class, "dropdown-menu")]/li/a')
        for size_ele in size_ele_list:
            _size = size_ele.text.strip()
            if _size in size_filter:
                continue

            size_list.append(_size)
            size_filter.append(_size)

        return size_list

    def color_dict_index(self, color_dict_filter, _color):
        color_dict_id = 0
        for color_dict in color_dict_filter:
            if _color == color_dict['name']:
                return color_dict_id

            color_dict_id += 1

        return -1

    def product_detail_parse(self, url, _type):
        _type = _type.replace('-', ' ').title()
        product_data_list = []
        sku_id = 0
        product_color_dict_filter = []

        data = deepcopy(self.init_data)
        data['Type'] = _type
        data['Collection'] = data['Type']
        data['Handle'] = (_type + '-' + url.split('?')[0].split('/')[-2]).replace(' ', '-').lower()

        tab = self.tab_get(self.tab, url, '//div[contains(@class, "collapsible-list")]/div[./div[contains(@data-track-args, "details:clicked")]]/div[contains(@class, "accordion-item-body")]') # 第2个请求

        # ------------------------------------------------
        # 获取body

        body_click_ele = tab.ele('x://div[contains(@class, "collapsible-list")]/div/div[contains(@data-track-args, "details:clicked")]')
        self.ele_click(tab, body_click_ele, 3)
        data['Body (HTML)'] = tab.ele('x://div[contains(@class, "collapsible-list")]/div[./div[contains(@data-track-args, "details:clicked")]]/div[contains(@class, "accordion-item-body")]').html

        # ------------------------------------------------

        product_color_click_len = len(tab.eles('x://div[contains(@class, "product-swatches")]/div[contains(@class, "swatch")]'))
        for product_color_click_id in range(product_color_click_len):
            try:
                product_color_click_ele = tab.eles('x://div[contains(@class, "product-swatches")]/div[contains(@class, "swatch")]')[product_color_click_id]
            except IndexError as e:
                print(str(e))
                break

            self.ele_click(tab, product_color_click_ele, 3)

            product_img_url_list = self.get_img_url_list(tab)
            product_img_url_len = len(product_img_url_list)

            product_size_list = self.get_size_list(tab)
            product_size_len = len(product_size_list)

            product_color = tab.ele('x://span[contains(@class, "product-color")]').text.strip()
            product_color_dict_id = self.color_dict_index(product_color_dict_filter, product_color)

            if product_color_dict_id == -1:
                product_color_dict_filter.append({'name': product_color, 'num': 1})
            else:
                product_color_dict_filter[product_color_dict_id]['num'] += 1
                product_color = product_color + '_' + str(product_color_dict_filter[product_color_dict_id]['num'])

            data['Option1 Value'] = product_color

            for product_size in product_size_list:
                sku_id += 1
                _data = deepcopy(data)

                _data['Option2 Value'] = product_size
                _data['Title'] = tab.ele('x://h1[contains(@class, "product-name")]').text.strip()
                _data['Variant SKU'] = (_type + '-' + _data['Title'] + '-' + _data['Option1 Value'] + '-' + _data['Option2 Value'] + str(sku_id)).replace(' ', '-').strip()

                product_sale_price_ele = tab.ele('x://div[contains(@class, "product-sale-price")]')
                if product_sale_price_ele:
                    product_price = product_sale_price_ele.text.replace('Now', '').replace('$', '').replace(',', '').strip()
                    product_compare_price = tab.ele('x://*[contains(@class, "list-price")]').text.replace('$', '').replace(',', '').strip()
                else:
                    product_price = tab.ele('x://*[contains(@class, "list-price")]').text.replace('$', '').replace(',', '').strip()
                    product_compare_price = ''

                _data['Variant Price'] = product_price
                _data['Variant Compare At Price'] = product_compare_price

                _data['Image Src'] = product_img_url_list[0]
                _data['Image Position'] = 1
                _data['Variant Image'] = _data['Image Src']

                print(f'第{self.id}个产品 {url} 的第{sku_id}个sku的 {_data['Option1 Name']} 的个数为：{product_color_click_len}，{_data['Option2 Name']} 的个数为：{product_size_len}，图片个数为：{product_img_url_len}')

product_data_list.append(_data)
                print(_data)

                for i in range(1, product_img_url_len):
                    temp_data = deepcopy(self.empty_data)

                    temp_data['Handle'] = _data['Handle']
                    temp_data['Published'] = 'TRUE'
                    temp_data['Image Src'] = product_img_url_list[i]
                    temp_data['Image Position'] = i + 1

                    product_data_list.append(temp_data)
                    print(temp_data)

        return product_data_list

    def parse(self, url, _type):

        tab = self.tab_get(self.tab, url, '//div[contains(@class, "results-list")]//div[contains(@class, "tile-media")]/a[contains(@class, "_tile-link")]')  # 第1个请求

        # -------------------------------------------------------
        # 获取所有产品的url

        last_product_url_len = 0
        load_cnt = 0

        while True:
            product_url_list = []
            product_uuid_filter = []

            product_url_ele_list = tab.eles('x://div[contains(@class, "results-list")]//div[contains(@class, "tile-media")]/a[contains(@class, "_tile-link")]')

            for product_url_ele in product_url_ele_list:
                product_url = product_url_ele.attr('href')

                product_uuid = (_type + '-' + product_url.split('?')[0].split('/')[-2]).replace(' ', '-').lower()
                if product_uuid in product_uuid_filter:
                    continue

                product_url_list.append(product_url)
                product_uuid_filter.append(product_uuid)

            product_url_len = len(product_url_list)
            print(f'加载了 {product_url_len} 个产品')

            if product_url_len >= 100:
                break

            if last_product_url_len == product_url_len:
                load_cnt += 1
                print(f'重复获取产品url {product_url_len} 个数 {load_cnt} 次')
                if load_cnt >= 15:
                    break
            else:
                last_product_url_len = product_url_len
                load_cnt = 0

            self.tab_run_js(tab, 'window.scrollBy(0, 2000)')

        # -------------------------------------------------------

        for product_url in product_url_list:
            self.id += 1

            with open('./filter.txt', 'r', encoding='utf-8') as f:
                filter_txt = f.read()

            product_uuid = (_type + '-' + product_url.split('?')[0].split('/')[-2]).replace(' ', '-').lower()

            if product_uuid in filter_txt:
                print(self.id, '已完成')
                continue

            print(f'第{self.id}个产品开始')

            product_data_list = self.product_detail_parse(product_url, _type)

            self.save_all_data(product_data_list)

            print(f'第{self.id}个产品结束')

            with open('./filter.txt', 'a', encoding='utf-8') as f:
                f.write(product_uuid + '\n')

    def one_product_test(self, url, _type='One'):
        self.file = open('./test.csv', 'w', newline='', encoding='utf-8-sig')
        self.writer = csv.DictWriter(self.file, fieldnames=self.field_names)
        self.writer.writeheader()

        logging.captureWarnings(True)

        self.init_tab()

        self.id += 1
        print(self.id, '开始')

        product_data_list = self.product_detail_parse(url, _type)

        self.save_all_data(product_data_list)

        print(self.id, '结束')

        if self.file:
            self.file.close()

        if self.browser:
            self.browser.quit()

    def multi_product_run(self, url, file_name, _type, is_continue=False):
        if is_continue:
            self.file = open(f'./{file_name}.csv', 'a', newline='', encoding='utf-8-sig')
            self.writer = csv.DictWriter(self.file, fieldnames=self.field_names)
        else:
            self.file = open(f'./{file_name}.csv', 'w', newline='', encoding='utf-8-sig')
            self.writer = csv.DictWriter(self.file, fieldnames=self.field_names)
            self.writer.writeheader()

            with open('./filter.txt', 'w', encoding='utf-8') as f:
                f.write('')

        self.init_tab()

        logging.captureWarnings(True)

        self.parse(url, _type)

        if self.file:
            self.file.close()

        if self.browser:
            self.browser.quit()

    def run(self, is_continue=False):

        type_id = 4
        self.multi_product_run(self.url_list[type_id], self.file_name_list[type_id], self.type_list[type_id], is_continue)

        url_1 = ''

        # self.one_product_test(url_1, "Women's Clothing: Tops, Bottoms & Dresses")

if __name__ == '__main__':
    ae = Ae()
    ae.run()

#0 https://www.ae.com/us/en/c/aerie/bras/cat4840012?pagetype=plp&_gl=1*e39ufq*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp # 3

#1 https://www.ae.com/us/en/c/aerie/undies/cat6460002?pagetype=plp&_gl=1*e39ufq*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp # 4

#2 https://www.ae.com/us/en/c/aerie/activewear/cat1090003?pagetype=plp&_gl=1*xk9an1*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp # 5

#3 https://www.ae.com/us/en/c/aerie/clothing-accessories/pajamas/cat6460089?pagetype=plp&_gl=1*1cku7np*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp # 6

#4 https://www.ae.com/us/en/c/aerie/swimsuits/cat7030095?pagetype=plp&_gl=1*1cku7np*_up*MQ..*_gs*MQ..&gclid=Cj0KCQjwxdXBBhDEARIsAAUkP6jO5JRCLi05MS7cwXUZf8jo-TiKM-oqM05V9F--ZfsKjYdbmOOYlXEaAq1rEALw_wcB&gbraid=0AAAAADovoqIZXp-cUdvcul1lf1rQtszYp # 7

# 改url
# 改文件名
# 改类型
# 改init_data的字段名

# 上传代码的时候把True给去掉 √
# 试试全部产品数据 √
# 试试部分产品数据 √

"""
'ea_3_bras',
'ea_4_womens_underwear',
'ea_5_offline_by_aerie',
'ea_6_womens_pajamas',
'ea_7_womens_swimsuits_&_swimwear'
"""
发布时间：2025-05-30 09:59
搜索结果