搜索结果

×

搜索结果将在这里显示。

🎹 https://www.wybotpool.com/collections/best-seller?page=1

import random, csv, requests, logging, time, json
from selenium import webdriver
from copy import deepcopy
from time import sleep
from lxml import etree

class Wybotpool:
    def __init__(self):
        self.url = 'https://services.mybcapps.com/bc-sf-filter/filter?t={}&_=pf&shop=wybotpool.myshopify.com&page={}&limit=24&sort=manual&locale=en&event_type=collection&build_filter_tree=true&sid=fdaf53b8-89aa-4672-b23a-85c7e426853f&pg=collection_page&zero_options=true&product_available=false&variant_available=false&sort_first=available&urlScheme=2&collection_scope=435527647509'
        self.index_headers = {
            "accept": "*/*",
            "accept-language": "zh-CN,zh;q=0.9",
            "origin": "https://www.wybotpool.com",
            "priority": "u=1, i",
            "referer": "https://www.wybotpool.com/",
            "sec-ch-ua": "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"",
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": "\"Windows\"",
            "sec-fetch-dest": "empty",
            "sec-fetch-mode": "cors",
            "sec-fetch-site": "cross-site",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36"
        }
        self.detail_headers = {
            "accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7",
            "accept-language": "zh-CN,zh;q=0.9",
            "cache-control": "max-age=0",
            "if-none-match": "\"cacheable:a54c17b792eaa6c60a5d927ae41c17f1\"",
            "priority": "u=0, i",
            "sec-ch-ua": "\"Google Chrome\";v=\"135\", \"Not-A.Brand\";v=\"8\", \"Chromium\";v=\"135\"",
            "sec-ch-ua-mobile": "?0",
            "sec-ch-ua-platform": "\"Windows\"",
            "sec-fetch-dest": "document",
            "sec-fetch-mode": "navigate",
            "sec-fetch-site": "same-origin",
            "sec-fetch-user": "?1",
            "upgrade-insecure-requests": "1",
            "user-agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/135.0.0.0 Safari/537.36",
            'Cookie': 'secure_customer_sig=; localization=US; cart_currency=USD; _shopify_y=3B8B3179-7b52-464B-bd4b-fac7ec09965c; _tracking_consent=%7B%22con%22%3A%7B%22CMP%22%3A%7B%22a%22%3A%22%22%2C%22m%22%3A%22%22%2C%22p%22%3A%22%22%2C%22s%22%3A%22%22%7D%7D%2C%22v%22%3A%222.1%22%2C%22region%22%3A%22USCA%22%2C%22reg%22%3A%22%22%2C%22purposes%22%3A%7B%22a%22%3Atrue%2C%22p%22%3Atrue%2C%22m%22%3Atrue%2C%22t%22%3Atrue%7D%2C%22display_banner%22%3Afalse%2C%22sale_of_data_region%22%3Atrue%2C%22consent_id%22%3A%223B2ED241-81f2-45A0-9d20-bea9711added%22%7D; _orig_referrer=; _landing_page=%2Fcollections%2Fbest-seller%3Fpage%3D1; _gcl_au=1.1.702472632.1745209070; ex_id=VI5QtXqBAa; section_order_id=HVn9yMwS41; OT_FBPLID=fb.1.1745209072852.1745209146890; _ga=GA1.1.1795863811.1745209073; _ttp=01JSB929JQBW44WS9EK3GDH7YZ_.tt.0; _fbp=fb.1.1745209078243.95121229751165592; _clck=19e6ku2%7C2%7Cfv9%7C0%7C1937; _ama=1795863811.1745209073; _hjSessionUser_4938866=eyJpZCI6ImZkYTliMTBlLWNiZmEtNTNiOC05MmZlLTM1NzM1MGFhZmIzZCIsImNyZWF0ZWQiOjE3NDUyMDkwNzc5OTQsImV4aXN0aW5nIjp0cnVlfQ==; obj_fbp_fbc={"fbc":null,"fbp":"fb.1.1745209078243.95121229751165592"}; uid=mk898f9559-902b-44d4-a76d-beca48df7250; _mk_sync=1745234293186; _shopify_sa_p=; _hjSession_4938866=eyJpZCI6ImFjZGQxZDVhLWU0Y2UtNDcxNC1iN2JjLTE1MzIwNmFiZWIyMCIsImMiOjE3NDUyMjYzODY5NjEsInMiOjAsInIiOjAsInNiIjowLCJzciI6MCwic2UiOjAsImZzIjowLCJzcCI6MX0=; shopify_pay_redirect=pending; _shopify_s=BF685AEA-51a7-475F-9e64-5f1406f70bb7; _shopify_sa_t=2025-04-21T10%3A12%3A42.052Z; _ga_EDZ21YSM97=GS1.1.1745226379.3.1.1745230362.0.0.0; _ga_1PW4FTN3H3=GS1.1.1745226386.3.1.1745230364.0.0.0; ttcsid_CVPO6QJC77UAIFNB4L2G=1745226381110.3.1745230363897; __kla_id=eyJjaWQiOiJPVFUzWlRVMU1tRXRPV0kzTWkwME5ETTJMVGhtTVRJdE0ySTFZV016WVdNNE16SmkiLCIkcmVmZXJyZXIiOnsidHMiOjE3NDUyMDkwNzQsInZhbHVlIjoiIiwiZmlyc3RfcGFnZSI6Imh0dHBzOi8vd3d3Lnd5Ym90cG9vbC5jb20vY29sbGVjdGlvbnMvYmVzdC1zZWxsZXI/cGFnZT0xIn0sIiRsYXN0X3JlZmVycmVyIjp7InRzIjoxNzQ1MjMwMzY0LCJ2YWx1ZSI6Imh0dHBzOi8vd3d3Lnd5Ym90cG9vbC5jb20vY29sbGVjdGlvbnMvYmVzdC1zZWxsZXI/cGFnZT0xIiwiZmlyc3RfcGFnZSI6Imh0dHBzOi8vd3d3Lnd5Ym90cG9vbC5jb20vcHJvZHVjdHMvd3lib3QtYTEtY29yZGxlc3Mtcm9ib3RpYy1wb29sLWNsZWFuZXIifX0=; ttcsid=1745226381133.3.1745230364081; _ga_650TKPNTV2=GS1.1.1745226379.3.1.1745230366.0.0.0; kalles_shown_pages=14; _ga_DSVMTPXYY8=GS1.1.1745226379.3.1.1745230368.53.0.600900298; _uetsid=996d10301e6711f08c2779907bd9a513; _uetvid=996d35501e6711f0ba536b675c3df293; pt_79aglo4g=deviceId%3D0ab5db63-031d-41f7-92f5-7462afb704a3%26sessionId%3D68eac1d5-1ca6-4c26-b12e-8628afe00ae7%26accountId%3D%26vn%3D3%26pvn%3D9%26lastActionTime%3D1745230370460%26; keep_alive=eyJ2IjoxLCJ0cyI6MTc0NTIzMDU4NDUyOCwiZW52Ijp7IndkIjowLCJ1YSI6MSwiY3YiOjEsImJyIjoxfSwiYmh2Ijp7Im1hIjoxNSwiY2EiOjAsImthIjowLCJzYSI6MywidCI6MjIxLCJubSI6MSwidmMiOjB9LCJzZXMiOnsicCI6NiwicyI6MTc0NTIyMzQ4OTg2MSwiZCI6Njk0NH19'
        }

        self.id = 0  # 保存到csv文件不用id字段
        self.init_data = {
            'Handle': '',
            'Title': '',
            'Body (HTML)': '',
            'Vendor': '',
            'Type': '',
            'Tags': '',
            'Published': 'TRUE',
            'Option1 Name': 'Color',
            'Option1 Value': '',
            'Option2 Name': 'Size',
            'Option2 Value': '',
            'Option3 Name': '',
            'Option3 Value': '',
            'Variant SKU': '',
            'Variant Grams': '',
            'Variant Inventory Tracker': 'Shopify',
            'Variant Inventory Qty': '99999',
            'Variant Inventory Policy': '',
            'Variant Fulfillment Service': '',
            'Variant Price': '',
            'Variant Compare At Price': '',
            'Variant Requires Shipping': '',
            'Variant Taxable': '',
            'Variant Barcode': '',
            'Image Src': '',
            'Image Position': '',
            'Image Alt Text': '',
            'Gift Card': '',
            'SEO Title': '',
            'SEO Description': '',
            'Variant Image': '',
            'Status': '',
            'Collection': '',
        }
        self.empty_data = {
            'Handle': '',
            'Title': '',
            'Body (HTML)': '',
            'Vendor': '',
            'Type': '',
            'Tags': '',
            'Published': '',
            'Option1 Name': '',
            'Option1 Value': '',
            'Option2 Name': '',
            'Option2 Value': '',
            'Option3 Name': '',
            'Option3 Value': '',
            'Variant SKU': '',
            'Variant Grams': '',
            'Variant Inventory Tracker': '',
            'Variant Inventory Qty': '',
            'Variant Inventory Policy': '',
            'Variant Fulfillment Service': '',
            'Variant Price': '',
            'Variant Compare At Price': '',
            'Variant Requires Shipping': '',
            'Variant Taxable': '',
'Variant Barcode': '',
            'Image Src': '',
            'Image Position': '',
            'Image Alt Text': '',
            'Gift Card': '',
            'SEO Title': '',
            'SEO Description': '',
            'Variant Image': '',
            'Status': '',
            'Collection': '',
        }
        self.field_names = ['Handle', 'Title', 'Body (HTML)', 'Vendor', 'Type', 'Tags', 'Published', 'Option1 Name',
                            'Option1 Value', 'Option2 Name', 'Option2 Value', 'Option3 Name', 'Option3 Value',
                            'Variant SKU', 'Variant Grams', 'Variant Inventory Tracker', 'Variant Inventory Qty',
                            'Variant Inventory Policy', 'Variant Fulfillment Service', 'Variant Price',
                            'Variant Compare At Price', 'Variant Requires Shipping', 'Variant Taxable',
                            'Variant Barcode', 'Image Src', 'Image Position', 'Image Alt Text', 'Gift Card',
                            'SEO Title', 'SEO Description', 'Variant Image', 'Status', 'Collection']
        self.file = None
        self.writer = None

    def simulated_smooth_scroll(self, driver, step=1000, interval=0.5, timeout=30):
        # 平滑移动到底部

        start_time = time.time()
        last_height = driver.execute_script("return document.documentElement.scrollHeight")
        current_position = 0

        while time.time() - start_time < timeout:
            # 计算剩余滚动距离
            remaining = last_height - current_position

            # 动态调整步长
            current_step = min(step, remaining) if remaining > 0 else 0

            if current_step <= 0:
                break

            # 执行分步滚动
            driver.execute_script(f"window.scrollBy(0, {current_step})")
            current_position += current_step

            # 等待滚动和内容加载
            time.sleep(interval * (current_step / step))  # 动态间隔

            # 检查新高度
            new_height = driver.execute_script(
                "return document.documentElement.scrollHeight"
            )

            # 更新高度(处理动态加载)
            if new_height > last_height:
                last_height = new_height

    def get_driver(self, url, xpath_txt=None, is_turn=False):
        options = webdriver.ChromeOptions()
        options.add_argument('--headless')
        options.add_argument('--disable-gpu')
        options.page_load_strategy = "none"

        driver = webdriver.Chrome(options=options)

        driver.implicitly_wait(10)
        driver.maximize_window()

        flag = True
        while flag:
            flag = False
            try:
                print('正在获取', url, '的页面数据')
                driver.get(url)

                if is_turn:
                    sleep(1)
                    self.simulated_smooth_scroll(driver)

                if xpath_txt:
                    driver.find_element('xpath', xpath_txt)
                else:
                    self.random_sleep(2)

            except:
                flag = True
                print(url, '没定位到,重新请求...')

        # self.writer_to_file(driver.page_source, 'w', 'utf-8')

        return driver

    def driver_continue(self, driver, url, xpath_txt, is_turn=False):
        flag = True
        while flag:
            flag = False
            try:
                print('正在获取', url, '的页面数据')
                driver.get(url)

                if is_turn:
                    sleep(1)
                    self.simulated_smooth_scroll(driver)

                driver.find_element('xpath', xpath_txt)

            except:
                flag = True
                print(url, '没定位到,重新请求...')

        # self.writer_to_file(driver.page_source, 'w', 'utf-8')

    def get_page_html(self, url, xpath_txt, is_turn=False):
        driver = self.get_driver(url, xpath_txt, is_turn=is_turn)

        page_source = driver.page_source

        driver.close()

        return etree.HTML(page_source)

    def writer_to_file(self, data, mode, encoding=None):
        if 'b' in encoding:
            open('./text.html', mode).write(data)
        else:
            open('./text.html', mode, encoding=encoding).write(data)

        print('写入文件成功!')

    def random_sleep(self, timeout=1):
        sleep(random.random() + timeout)

    def save_csv(self, data):
        self.writer.writerow({
            'Handle': data['Handle'],
            'Title': data['Title'],
            'Body (HTML)': data['Body (HTML)'],
            'Vendor': data['Vendor'],
            'Type': data['Type'],
            'Tags': data['Tags'],
            'Published': data['Published'],
            'Option1 Name': data['Option1 Name'],
            'Option1 Value': data['Option1 Value'],
            'Option2 Name': data['Option2 Name'],
            'Option2 Value': data['Option2 Value'],
            'Option3 Name': data['Option3 Name'],
            'Option3 Value': data['Option3 Value'],
            'Variant SKU': data['Variant SKU'],
            'Variant Grams': data['Variant Grams'],
            'Variant Inventory Tracker': data['Variant Inventory Tracker'],
            'Variant Inventory Qty': data['Variant Inventory Qty'],
            'Variant Inventory Policy': data['Variant Inventory Policy'],
            'Variant Fulfillment Service': data['Variant Fulfillment Service'],
            'Variant Price': data['Variant Price'],
            'Variant Compare At Price': data['Variant Compare At Price'],
            'Variant Requires Shipping': data['Variant Requires Shipping'],
            'Variant Taxable': data['Variant Taxable'],
            'Variant Barcode': data['Variant Barcode'],
            'Image Src': data['Image Src'],
            'Image Position': data['Image Position'],
            'Image Alt Text': data['Image Alt Text'],
            'Gift Card': data['Gift Card'],
            'SEO Title': data['SEO Title'],
            'SEO Description': data['SEO Description'],
            'Variant Image': data['Variant Image'],
            'Status': data['Status'],
            'Collection': data['Collection']
        })

    def get_response(self, url, header):
        print('正在获取', url, '的数据')

        response = requests.get(url, headers=header)
        self.random_sleep()

        return response

    def get_index_json(self, page):
        """
        page是正常的页数,从1开始
        """

        t = str(int(time.time() * 1000))
        url = self.url.format(t, page)

        return self.get_response(url, self.index_headers).json()

    def get_total_page(self):
        index_json = self.get_index_json(1)
        print('商品的总数:', index_json['total_product'])

        return index_json['total_page'] + 1

    def parse(self):
        data = deepcopy(self.init_data)

        data['Type'] = 'Best Seller'
        data['Collection'] = data['Type']

        total_page = self.get_total_page()
        total_page = self.get_total_page()
        print('总页数为:', total_page)

        for page in range(total_page):
            page += 1

            index_json = self.get_index_json(page)

product_json_list = index_json['products']

            for product_json in product_json_list:
                self.id += 1
                print(self.id, '开始')

                data['Title'] = product_json['title']
                data['Handle'] = data['Title'].lower().replace(' ', '-')
                data['Body (HTML)'] = product_json['body_html']
                data['Variant SKU'] = product_json['skus'][0]
                data['Variant Price'] = product_json['price_min']
                data['Variant Compare At Price'] = product_json['compare_at_price_min']

                image_json_list = product_json['images_info']
                image_json_len = len(image_json_list)
                print('图片的个数为:', image_json_len)

                data['Image Src'] = image_json_list[0]['src']
                data['Image Position'] = 1
                data['Variant Image'] = data['Image Src']

                self.save_csv(data)
                print(data)

                for i in range(1, image_json_len):
                    temp_data = deepcopy(self.empty_data)

                    temp_data['Handle'] = data['Handle']
                    temp_data['Published'] = 'TRUE'
                    temp_data['Image Src'] = image_json_list[i]['src']
                    temp_data['Image Position'] = i + 1

                    self.save_csv(temp_data)
                    print(temp_data)

                print(self.id, '结束')

    def run(self):
        self.file = open('wybotpool.csv', 'w', newline='', encoding='utf-8-sig')
        self.writer = csv.DictWriter(self.file, fieldnames=self.field_names)
        self.writer.writeheader()

        logging.captureWarnings(True)

        self.parse()

        self.file.close()

if __name__ == '__main__':
    wybotpool = Wybotpool()
    wybotpool.run()