示例#1
0
    def products_for_url(cls, url, category=None, extra_args=None):
        products = []
        if url == cls.prepago_url:
            # Plan Prepago
            p = Product(
                'GTD Prepago',
                cls.__name__,
                category,
                url,
                url,
                'Claro Prepago',
                -1,
                Decimal(0),
                Decimal(0),
                'CLP',
            )
            products.append(p)
        elif url == cls.equipos_url:
            session = session_with_proxy(extra_args)
            body = session.get(url).text
            json_body = re.search(r'var catalog = (.+)', body).groups()[0][:-1]
            json_body = json.loads(json_body)

            for json_product in json_body['products']:
                if not json_product['published']:
                    continue

                name = json_product['name']
                sku = json_product['id']
                price = Decimal(remove_words(json_product['leasing_price']))
                description = html_to_markdown(json_product['description'])

                picture_urls = [
                    'https://nuevo.gtdmanquehue.com' + im['options']['url']
                    for im in json_product['images']
                ]

                product = Product(name,
                                  cls.__name__,
                                  'Cell',
                                  url,
                                  url,
                                  sku,
                                  -1,
                                  price,
                                  price,
                                  'CLP',
                                  sku=sku,
                                  cell_plan_name='GTD Prepago',
                                  description=description,
                                  picture_urls=picture_urls)

                products.append(product)
        else:
            raise Exception('Invalid URL: ' + url)

        return products
示例#2
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        products = []
        if url == cls.prepago_url:
            # Plan Prepago
            products.append(
                Product(
                    'Entel Prepago',
                    cls.__name__,
                    category,
                    url,
                    url,
                    'Entel Prepago',
                    -1,
                    Decimal(0),
                    Decimal(0),
                    'CLP',
                ))

        elif 'entel.cl/planes/' in url:
            # Plan Postpago
            products.extend(cls._plans(url, extra_args))
        elif 'miportal.entel.cl' in url:
            # Equipo postpago
            products.extend(cls._celular_postpago(url, extra_args))
        else:
            raise Exception('Invalid URL: ' + url)
        return products
示例#3
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['Authorization'] = 'Basic OGRiZDViZGY4M2Y5NzA3MTlkY' \
                                           'jE2NmRiODdhZDZhNWQ='
        data = json.loads(session.get(url).text)

        products = []

        for product_entry in data['product_listing']['variants']:
            if product_entry['available']:
                stock = -1
            else:
                stock = 0

            name = product_entry['title'].strip()
            sku = str(product_entry['id'])
            price = Decimal(product_entry['price'])
            product_url = cls.url

            p = Product(
                name,
                cls.__name__,
                category,
                product_url,
                url,
                sku,
                stock,
                price,
                price,
                'CLP',
                sku=sku,
            )
            products.append(p)

        return products
示例#4
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        # &_from=0&_to=49
        session = session_with_proxy(extra_args)

        page = 0
        page_size = 50
        products = []

        while True:
            target_url = '{}&_from={}&_to={}'.format(
                url, page*page_size, (page + 1) * page_size - 1
            )
            data = session.get(target_url)

            json_data = json.loads(data.text)

            if not json_data:
                if page == 0:
                    raise Exception('Empty category: ' + target_url)
                break

            for product in json_data:
                name = product['productName']
                sku = product['productReference']
                product_url = product['link']
                stock = product['items'][0]['sellers'][0][
                    'commertialOffer']['AvailableQuantity']
                price = Decimal(product['items'][0]['sellers'][0]
                                ['commertialOffer']['Price'])

                pictures = product['items'][0]['images']
                picture_urls = []

                for picture in pictures:
                    picture_urls.append(picture['imageUrl'])

                description = html_to_markdown(product['description'])

                p = Product(
                    name,
                    cls.__name__,
                    category,
                    product_url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    part_number=product['productReference'],
                    description=description,
                    picture_urls=picture_urls
                )

                products.append(p)
            page += 1

        return products
示例#5
0
 def products_for_url(cls, url, category=None, extra_args=None):
     products = []
     if url == cls.prepago_url:
         # Plan Prepago
         p = Product(
             'WOM Prepago',
             cls.__name__,
             category,
             url,
             url,
             'WOM Prepago',
             -1,
             Decimal(0),
             Decimal(0),
             'CLP',
         )
         products.append(p)
     elif url == cls.planes_url:
         # Plan Postpago
         products.extend(cls._plans(url, extra_args))
     elif '/equipos/' in url:
         # Equipo postpago
         products.extend(cls._celular_postpago(url, extra_args))
     else:
         raise Exception('Invalid URL: ' + url)
     return products
示例#6
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('div', 'product-name').text.strip()
        sku = soup.find('input', {'name': 'product'})['value'].strip()

        price_string = soup.find('span', 'price').text

        price = Decimal(price_string.replace(
            '.', '').replace('$', '').replace(',', '.'))

        description = html_to_markdown(
            str(soup.find('div', 'product-collateral')))

        picture_urls = [tag['src'] for tag in
                        soup.findAll('img', {'id': 'image'})]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            -1,
            price,
            price,
            'ARS',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
示例#7
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['user-agent'] = 'python-requests/2.21.0'
        response = session.get(url)

        if response.status_code == 404:
            return []

        page_source = response.text
        soup = BeautifulSoup(page_source, 'html.parser')

        if not soup.find('body') or \
                not soup.find('h1', {'id': 'nombre-producto'}):
            return []

        name = soup.find('h1', {'id': 'nombre-producto'}).text.strip()
        sku = soup.find('div', {'itemprop': 'sku'}).text.strip()

        ajax_session = session_with_proxy(extra_args)
        ajax_session.headers['user-agent'] = 'python-requests/2.21.0'
        ajax_session.headers['x-requested-with'] = 'XMLHttpRequest'
        ajax_session.headers['content-type'] = \
            'application/x-www-form-urlencoded'

        stock_data = json.loads(
            ajax_session.post(
                'https://catalogo.movistar.cl/fullprice/stockproducto/validar/',
                'sku=' + sku).text)

        stock = stock_data['respuesta']['cantidad']

        price_container = soup.find('span', 'special-price').find('p')
        price = Decimal(remove_words(price_container.text))

        description = html_to_markdown(
            str(soup.find('div', 'detailed-desktop')))

        if 'seminuevo' in description:
            condition = 'https://schema.org/RefurbishedCondition'
        else:
            condition = 'https://schema.org/NewCondition'

        picture_urls = [soup.find('meta', {'property': 'og:image'})['content']]

        return [
            Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    condition=condition,
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)
        ]
示例#8
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        soup = BeautifulSoup(response.text, 'html.parser')
        data = re.search(r'value_product = ([\s\S]+?)\];',
                         response.text).groups()[0] + ']'
        data = json.loads(data)[0]

        name = data['descripcion'].strip()
        sku = data['idproducto'].strip()
        stock = round(float(data['stock']))
        offer_price = Decimal(data['precioweb1'])
        normal_price = Decimal(data['precioweb2'])
        description = None
        if data['long_descrip']:
            description = html_to_markdown(data['long_descrip'])
        picture_urls = [x['href'] for x in soup.findAll('a', 'fancybox')]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
示例#9
0
    def _get_product(cls, container, category):
        product_url = container.find('a')['href'].split('?')[0]
        if 'https' not in product_url:
            product_url = 'https://www.paris.cl' + product_url

        data = json.loads(
            container.find('div', 'product-tile')['data-product'])
        name = data['name']
        sku = data['variant']

        normal_price = Decimal(data['price'])
        if data['dimension20']:
            offer_price = Decimal(data['dimension20'])
        else:
            offer_price = normal_price

        stock = -1

        p = Product(
            name,
            cls.__name__,
            category,
            product_url,
            product_url,
            sku,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
        )

        return p
示例#10
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1', 'product_title').text.strip()
        sku = soup.find('span', 'sku').text.strip()
        stock = -1

        if 'LG' not in name.upper().split(' '):
            stock = 0

        price = Decimal(
            soup.find('p', 'price').find('span', 'amount').text.replace(
                '₲.', '').replace('.', ''))

        picture_urls = [soup.find('meta', {'name': 'og:image'})['content']]

        return [
            Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'PYG',
                    sku=sku,
                    picture_urls=picture_urls)
        ]
示例#11
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        page_source = session.get(url).text
        soup = BeautifulSoup(page_source, 'html.parser')

        name = soup.find('h1', 'product_title').text
        sku = soup.find('div', 'wd-wishlist-btn').find('a')['data-product-id']

        stock_container = soup.find('p', 'stock')

        if stock_container:
            stock_text = stock_container.text.split(' ')[0]
            if stock_text == 'Agotado':
                stock = 0
            else:
                stock = int(stock_text)
        else:
            stock = -1

        part_number_container = soup.find('span', 'sku')

        if part_number_container:
            part_number = part_number_container.text.strip()
        else:
            part_number = None

        price_container = soup.find('p', 'price')

        if price_container.find('ins'):
            price = Decimal(
                price_container.find('ins').text.replace('$',
                                                         '').replace('.', ''))
        else:
            price = Decimal(
                price_container.text.replace('$', '').replace('.', ''))

        picture_containers = soup.findAll('div', 'product-image-wrap')
        picture_urls = [p.find('a')['href'] for p in picture_containers]

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description,
                    part_number=part_number)

        return [p]
示例#12
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('meta', {'name': 'Title'})['content'].strip()
        sku = soup.find('input', {'name': 'id'})['value'].strip()

        price_string = soup.find('input', {'id': 'product_price'})['value']
        price = Decimal(price_string)

        description = html_to_markdown(
            str(soup.find('div', {'id': 'especificaciones-container'})))

        picture_urls = [tag['data-zoom-image'] for tag in
                        soup.find('div', 'owl-carousel').findAll('img')]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            -1,
            price,
            price,
            'ARS',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
示例#13
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        page_source = session.get(url).text
        soup = BeautifulSoup(page_source, 'html.parser')

        name = soup.find('h1', {'itemprop': 'name'})
        if not name:
            name = soup.find('p', {'itemprop': 'name'})

        name = name.text
        sku = soup.find('span', 'variant-sku').text

        potential_stock_containers = soup.findAll('h6')
        stock_container = None

        for container in potential_stock_containers:
            if 'EXISTENCIA' in container.text:
                stock_container = container.parent.find('div').contents

        if not stock_container:
            stock = 0
        else:
            for item in stock_container:
                if 'pzas.' in item:
                    stock = int(item.replace('pzas.', ''))
                    break

        price = soup.find('span', 'gf_product-price money').text

        price = Decimal(price.replace('$', '').replace(',', ''))

        images = soup.findAll('meta', {'property': 'og:image:secure_url'})
        picture_urls = [i["content"] for i in images]

        description = html_to_markdown(
            str(soup.find('div', 'product-description')))

        if 'reacondicionado' in name.lower():
            condition = 'https://schema.org/RefurbishedCondition'
        else:
            condition = 'https://schema.org/NewCondition'

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'MXN',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description,
                    condition=condition)

        return [p]
示例#14
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1', 'entry-title').text.strip()
        sku = soup.find('input', {'name': 'product_id'})['value'].strip()
        description = html_to_markdown(
            str(soup.find('div', 'product_description')))
        picture_urls = [tag['href'] for tag in soup.findAll('a', 'thickbox')]
        price = Decimal(remove_words(soup.find('span', 'currentprice').text))

        price *= Decimal('1.19')
        price = price.quantize(0)

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    -1,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
示例#15
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        pricing_data = re.search(r'vtex.events.addData\(([\S\s]+?)\);',
                                 page_source).groups()[0]
        pricing_data = json.loads(pricing_data)

        skus_data = re.search(r'var skuJson_0 = ([\S\s]+?);CATALOG',
                              page_source).groups()[0]
        skus_data = json.loads(skus_data)
        name = '{} {}'.format(pricing_data['productBrandName'],
                              pricing_data['productName'])
        price = Decimal(pricing_data['productPriceTo'])

        soup = BeautifulSoup(page_source, 'html.parser')

        picture_urls = [
            tag['rel'][0] for tag in soup.findAll('a', {'id': 'botaoZoom'})
        ]

        description = html_to_markdown(
            str(soup.find('section', 'product-specs')))
        products = []

        if 'productEans' in pricing_data:
            ean = pricing_data['productEans'][0]
            if len(ean) == 12:
                ean = '0' + ean
            if not check_ean13(ean):
                ean = None
        else:
            ean = None

        for sku_data in skus_data['skus']:
            sku = str(sku_data['sku'])
            stock = pricing_data['skuStocks'][sku]

            if sku_data['sellerId'] == 'lojamultilaser':
                price = (price * Decimal('0.95')).quantize(Decimal('0.01'))

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'COP',
                        sku=sku,
                        ean=ean,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products
示例#16
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        response = session.get(url)

        if response.status_code in [404]:
            return []

        page_source = response.text
        soup = BeautifulSoup(page_source, 'html5lib')

        name = soup.find('h1', 'name').text
        sku = soup.find('div', {'itemprop': 'sku'}).text

        availability = soup.find('div', 'availability')

        if availability:
            stock = int(soup.find('div', 'availability').find('strong').text)
        else:
            stock = 0

        price = Decimal(
            soup.find('span', 'price').text.replace('$', '').replace(',', ''))

        if soup.find('div', {'id': 'owl-carousel-gallery'}):
            picture_urls = [
                i['src'] for i in soup.find('div', {
                    'id': 'owl-carousel-gallery'
                }).findAll('img', 'img-fluid')
            ]
        else:
            picture_urls = [soup.find('img', 'img-fluid')['src']]

        description = html_to_markdown(str(soup.find('div', 'description')))

        ths = soup.findAll('th')
        part_number = None

        for th in ths:
            if th.text == "mpn":
                part_number = th.parent.find('td').text

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'MXN',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description,
                    part_number=part_number)

        return [p]
示例#17
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        page_source = session.get(url).text

        soup = BeautifulSoup(page_source, 'html.parser')

        picture_urls = []

        for tag in soup.findAll('li', 'owl-item'):
            picture_path = tag.find('a')['data-zoom'].replace(' ',
                                                              '%20').strip()

            if not picture_path:
                picture_path = tag.find('a')['data-normal'].replace(
                    ' ', '%20').strip()

            if not picture_path:
                continue
            picture_url = 'https:' + picture_path
            picture_urls.append(picture_url)

        if not picture_urls:
            picture_urls = None

        pricing_data = demjson.decode(
            re.search(r'dataLayer = ([\S\s]+?);dataLayer',
                      page_source).groups()[0])[0]

        products = []

        for product_entry in pricing_data['product']:
            name = product_entry['productName']
            sku = str(product_entry['productSku'])
            price = Decimal(product_entry['productDiscount'])

            if product_entry['productAvailable']:
                stock = -1
            else:
                stock = 0

            description = html_to_markdown(
                html.unescape(product_entry['productDescription']))

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'BRL',
                        sku=sku,
                        description=description,
                        picture_urls=picture_urls)
            products.append(p)

        return products
示例#18
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1').text.strip()

        if soup.find('link', {'itemprop': 'availability'})['href'] == \
                'http://schema.org/InStock':
            stock = -1
        else:
            stock = 0

        sku = soup.find('div', 'product-name').find('span').text.strip()

        panels = [
            soup.find('div', {'id': 'description'}),
            soup.find('div', {'id': 'additional'})
        ]

        description = '\n\n'.join([html_to_markdown(str(panel))
                                   for panel in panels])

        normal_price = soup.find('p', {'itemprop': 'price'}).text
        normal_price = Decimal(normal_price.replace('R$', '').replace(
            '.', '').replace(',', '.'))

        if stock == 0:
            offer_price = normal_price
        else:
            offer_price = soup.find('span', 't_boleto_price').text
            offer_price = Decimal(offer_price.split('$')[1].replace(
                '.', '').replace(',', '.'))

        pictures_container = soup.find('ul', 'bxslider')

        if pictures_container:
            picture_urls = [link['href']
                            for link in pictures_container.findAll('a')]
        else:
            picture_urls = [soup.find('a', 'cloud-zoom-gallery')['href']]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'BRL',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
示例#19
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response_text = session.get(url).text

        variants_raw_data = re.search(r'var meta = ([\S\s]+?);\n',
                                      response_text).groups()[0]
        variants_data = json.loads(variants_raw_data)['product']['variants']

        products = []

        for variant in variants_data:
            variant_id = variant['id']
            sku = variant['sku']
            color = variant['public_title']

            variant_url = '{}?variant={}'.format(url, variant_id)
            variant_url_source = session.get(variant_url).text
            soup = BeautifulSoup(variant_url_source, 'html.parser')
            name = soup.find('h1', 'product_name').text + " ({})".format(color)
            stock = 0

            if soup.find('link', {'itemprop': 'availability'})['href'] == \
                    'http://schema.org/InStock':
                stock = -1

            price_text = soup.find('span', 'current_price').text.strip()\
                .replace('$', '').replace('.', '')

            if price_text == '-':
                continue

            price = Decimal(price_text)
            image_containers = soup.findAll('div', 'image__container')
            picture_urls = [
                'http:' + i.find('img')['data-src'] for i in image_containers
            ]

            description = html_to_markdown(
                str(soup.find('div', {'data-et-handle': 'tabs-descripcion'})))

            p = Product(name,
                        cls.__name__,
                        category,
                        variant_url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'CLP',
                        sku=sku,
                        picture_urls=picture_urls,
                        description=description)

            products.append(p)

        return products
示例#20
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code == 404:
            return []

        soup = BeautifulSoup(response.text, 'html.parser')

        name = soup.find('span', {'itemprop': 'name'}).text
        sku = soup.find('div', {'itemprop': 'sku'}).text

        stock = 0
        stock_container = soup.find('div', 'product-stock')

        if stock_container:
            stock = int(stock_container.text.strip().split(' ')[1])

        offer_price = Decimal(
            soup.find('span', 'efectivo').find('span', 'price').text.replace(
                '$', '').replace('.', ''))
        normal_price = offer_price * Decimal(1.034)

        image_scripts = soup.findAll('script', {'type': 'text/x-magento-init'})
        picture_urls = []

        for script in image_scripts:
            if 'mage/gallery/gallery' in script.text:
                image_data = json.loads(
                    script.text)['[data-gallery-role=gallery-placeholder]'][
                        'mage/gallery/gallery']['data']
                for data in image_data:
                    picture_urls.append(data['img'])

        description = html_to_markdown(str(soup.find('div', 'description')))

        if len(sku) > 50:
            sku = sku[0:50]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
            part_number=sku,
            picture_urls=picture_urls,
            description=description,
        )

        return [p]
示例#21
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)

        response = session.get(url)

        if response.url != url:
            return []

        page_source = response.text
        soup = BeautifulSoup(page_source, 'html.parser')

        name = soup.find('h1', 'product_title').text
        sku_container = soup.find('span', 'sku')
        if not sku_container:
            return []

        sku = sku_container.text

        if soup.find('p', 'out-of-stock'):
            stock = 0
        else:
            stock = -1

        price_container = soup.find('p', 'price').find('ins')

        if price_container:
            price = price_container.find('span', 'amount').text
        else:
            price = soup.find('p', 'price').find('span', 'amount').text

        price = Decimal(price.replace('$', '').replace(',', ''))

        images = soup.find(
            'figure', 'woocommerce-product-gallery__wrapper').findAll('img')

        picture_urls = [i['src'] for i in images]

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            price,
            price,
            'MXN',
            sku=sku,
            picture_urls=picture_urls,
            description=description,
        )

        return [p]
示例#22
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h1', 'product_title').text.strip()
        sku = soup.find('meta',
                        {'property': 'product:retailer_item_id'})['content']

        if not soup.find('input', {'id': 'the-cantidad-selector'}):
            return []

        stock = soup.find('input', {'id': 'the-cantidad-selector'})['max']

        if stock:
            stock = int(stock)
        else:
            stock = -1

        if 'LG' not in name.upper().split(' '):
            stock = 0

        normal_price = Decimal(
            soup.find('p', 'price').find('span', 'amount').text.replace(
                'Gs.', '').replace('.', '').strip())
        offer_price = Decimal(
            soup.find('p', 'price').find('span', {
                'id': 'elpreciocentralPorta'
            }).text.split('Gs.')[-1].replace('.', '').replace('!', '').strip())

        if normal_price < offer_price:
            offer_price = normal_price

        description = html_to_markdown(
            str(soup.find('div', {'itemprop': 'description'})))

        pictures = soup.findAll('div', 'thumbnails-single owl-carousel')
        picture_urls = []

        for picture in pictures:
            picture_url = picture.find('a')['href']
            picture_urls.append(picture_url)

        return [
            Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'PYG',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)
        ]
示例#23
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = \
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
            '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'

        soup = BeautifulSoup(session.get(url, timeout=30).text, 'html.parser')

        containers = soup.findAll('div', 'textOtrosPrecios')

        normal_price = Decimal(remove_words(containers[0].text))

        stock_image = containers[1].find('img')['src']

        if stock_image in [
                'images/imagenes/ico_normal.jpg',
                'images/imagenes/ico_bajo.jpg'
        ]:
            stock = -1
        else:
            stock = 0

        sku = containers[2].text.strip()
        name = soup.find('div', 'textTituloProducto').text.strip()
        offer_price = Decimal(
            remove_words(soup.find('div', 'textPrecioContado').text))

        description = html_to_markdown(str(soup.find('div', 'p7TPcontent')))

        main_picture = soup.findAll(
            'table', {'id': 'table20'})[1].findAll('img')[2]['src']

        picture_paths = [main_picture]
        picture_paths.extend(
            [tag['src'] for tag in soup.findAll('img', 'Imagen')])

        picture_urls = []
        for path in picture_paths:
            picture_id = path.split('=')[-1]
            picture_url = 'http://www.ttchile.cl/images/imgproductos/' \
                          'imgImagenMarco.php?imagen=' + picture_id
            picture_urls.append(picture_url.replace(' ', '%20'))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    normal_price,
                    offer_price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
示例#24
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code == 404:
            return []

        page_source = session.get(url).text
        soup = BeautifulSoup(page_source, 'html.parser')

        name = soup.find('h1', 'detailsInfo_right_title').text
        sku = soup.find('div', 'detailsInfo_right_artnum')\
            .text.replace('SKU:', '').strip()

        if not soup.find('span', 'stockFlag'):
            stock = 0
        else:
            stock = int(soup.find('span', 'stockFlag').find('span').text)

        if not soup.find('span', 'priceText'):
            return []

        price = Decimal(
            soup.find('span', 'priceText').text.replace('$',
                                                        '').replace(',', ''))

        if soup.find('div', 'detailsInfo_left_picture_morepictures')\
                .find('div', 'emslider2_items'):
            picture_urls = []
            images = soup.find('div', 'detailsInfo_left_picture_morepictures')\
                .find('div', 'emslider2_items').findAll('li')

            for image in images:
                picture_urls.append(image.find('a')['data-src'])
        else:
            picture_urls = None

        description = html_to_markdown(
            str(soup.find('div', 'cpattributes-box')))

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'MXN',
                    sku=sku,
                    picture_urls=picture_urls,
                    description=description,
                    part_number=sku)

        return [p]
示例#25
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.status_code == 404:
            return []

        soup = BeautifulSoup(response.text, 'html5lib')

        name = soup.find('h1', 'name').text.strip()
        info_table = soup.find('div', 'listing')
        rows = info_table.findAll('tr')

        sku = rows[0].find('td', 'td_right').text.strip()

        stock = 0

        for i in range(0, len(rows) - 1):
            left_text = rows[i].find('td', 'td_left').text
            if 'Cantidad' not in left_text:
                continue
            right_text = rows[i].find('td', 'td_right').text
            if '+' in right_text:
                stock = -1
                break
            if 'Agotado' not in right_text:
                stock += int(right_text)

        price = Decimal(rows[-1].find('td',
                                      'td_right').text.split('$')[-1].replace(
                                          ',', ''))

        description = html_to_markdown(str(soup.find('div', 'description')))

        image_containers = soup.findAll('li', 'wrapper_pic_div')
        picture_urls = []

        for image in image_containers:
            picture_url = image.find('a')['href'].replace(' ', '%20')
            picture_urls.append(picture_url)

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'USD',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
示例#26
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('div', 'product-info__description').text.strip()
        sku = soup.find(
            'div',
            'product-info__title').find('span').text.split(':')[1].strip()

        stock_container = soup.find('input', {'id': 'producto_cantidad'})
        if stock_container:
            stock = int(stock_container['max'])
        else:
            stock = 0

        price_container = soup.find('span', 'price-box__new')

        old_price_container = price_container.find('s')

        if old_price_container:
            old_price = Decimal(remove_words(old_price_container.text))
            price = (old_price * Decimal('0.9')).quantize(0)
        else:
            price = Decimal(remove_words(price_container.text))

        description = html_to_markdown(str(soup.find('div', 'tab-content')),
                                       'http://www.eglo.cl')

        picture_containers = soup.findAll('a', 'swiper-slide')

        if picture_containers:
            picture_urls = []
            for container in picture_containers:
                picture_url = container.find('img')['src']
                picture_urls.append(picture_url)
        else:
            picture_urls = [
                soup.find('div', 'product-main-'
                          'image__item').img['src']
            ]

        p = Product(name,
                    cls.__name__,
                    category,
                    url,
                    url,
                    sku,
                    stock,
                    price,
                    price,
                    'CLP',
                    sku=sku,
                    description=description,
                    picture_urls=picture_urls)

        return [p]
示例#27
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)
        response = session.get(url)

        if response.url != url:
            return []

        page_source = response.text

        pricing_data = re.search(r'digitalData = ([\S\s]+?); </script',
                                 page_source).groups()[0]

        for kw in ['domain', 'fullName', 'protocol', 'pathname', 'referrer']:
            for_replace = "'{}': .+".format(kw)
            pricing_data = re.sub(for_replace, '', pricing_data)

        pricing_data = demjson.decode(pricing_data)['page']['product']

        name = pricing_data['title']
        sku = pricing_data['idSku']

        if pricing_data['stockAvailability']:
            stock = -1
        else:
            stock = 0

        if 'cashPrice' in pricing_data:
            normal_price = Decimal(pricing_data['salePrice'])
            offer_price = Decimal(pricing_data['cashPrice'])
        else:
            normal_price = Decimal(0)
            offer_price = Decimal(0)

        soup = BeautifulSoup(page_source, 'html.parser')

        description = html_to_markdown(str(soup.find('div', 'description')))

        picture_urls = [tag['data-src'] for tag in
                        soup.findAll('img', 'carousel-product__item-img')]

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'BRL',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
示例#28
0
    def products_for_url(cls, url, category=None, extra_args=None):
        category_id = urllib.parse.parse_qs(
            urllib.parse.urlparse(url).fragment)['_id'][0]
        session = session_with_proxy(extra_args)

        params = {
            'IdMenu': category_id,
            'textoBusqueda': "",
            'producto': "",
            'marca': "",
            'pager': "",
            'ordenamiento': 0,
            'precioDesde': "",
            'precioHasta': ""
        }

        session.get('https://www.jumbo.com.ar/')
        session.headers.update(
            {'Content-Type': 'application/json; charset=UTF-8'})

        response = session.post(
            'https://www.jumbo.com.ar/Comprar/HomeService.aspx/'
            'ObtenerArticulosPorDescripcionMarcaFamiliaLevex',
            json.dumps(params))

        containers = json.loads(json.loads(
            response.text)['d'])['ResultadosBusquedaLevex']

        products = []

        for container in containers:
            name = container['DescripcionArticulo'].strip()
            price = Decimal(container['Precio'])
            sku = container['IdArticulo']
            stock = int(container['Stock'])
            picture_urls = [
                'https://images.jumbo.com.ar/JumboComprasArchivos/'
                'Archivos/' + container['IdArchivoBig']
            ]

            p = Product(name,
                        cls.__name__,
                        category,
                        url,
                        url,
                        sku,
                        stock,
                        price,
                        price,
                        'ARS',
                        sku=sku,
                        picture_urls=picture_urls)

            products.append(p)

        return products
示例#29
0
    def products_for_url(cls, url, category=None, extra_args=None):
        print(url)
        session = session_with_proxy(extra_args)
        session.headers['User-Agent'] = \
            'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 ' \
            '(KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36'
        soup = BeautifulSoup(session.get(url).text, 'html.parser')

        name = soup.find('h2', 'product_title').text.strip()
        sku = soup.find('span', 'sku').text.strip()

        stock_text = soup.find('span', 'stock').text.strip()
        stock = 0
        if stock_text != 'Agotado':
            stock = int(stock_text.split(' ')[0])

        price_container = soup.find('p', 'price')

        if not price_container.text.strip():
            return []

        offer_price = Decimal(
            remove_words(price_container.find('ins').find('span').text))
        normal_price = Decimal(
            remove_words(price_container.find('del').find('span').text))

        picture_containers = soup.findAll('div', 'img-thumbnail')
        picture_urls = []

        for picture in picture_containers:
            try:
                picture_url = picture.find('img')['content']
                picture_urls.append(picture_url)
            except KeyError:
                continue

        description = html_to_markdown(
            str(soup.find('div', {'id': 'tab-description'})))

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            sku,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
            description=description,
            picture_urls=picture_urls
        )

        return [p]
示例#30
0
    def products_for_url(cls, url, category=None, extra_args=None):
        session = session_with_proxy(extra_args)

        soup = BeautifulSoup(session.get(url).text, 'html.parser')
        name = soup.findAll('td', 'texto-precio-ahorro')[1].text.strip()

        if soup.find('img', {'src': 'images/ficha/ico_sin_stock.gif'}):
            stock = 0
        else:
            stock = int(soup.find('td', 'stock-product').text.split()[0])
        sku = soup.find('td', 'sku').text.split()[-1]

        part_number = soup.findAll('td', 'texto-precio-ahorro')[2]\
            .find('td').text.split(':')[1].strip()

        container = soup.find('td', 'lowPrice')

        offer_price = container.contents[0].split('$')[1]
        offer_price = offer_price.split('IVA')[0]
        offer_price = Decimal(remove_words(offer_price))

        normal_price = container.parent.parent.find(
            'td', 'price-normal').contents[0].split('$')[1].split('IVA')[0]
        normal_price = Decimal(remove_words(normal_price))

        picture_links = soup.findAll('a', {'rel': 'lightbox[roadtrip]'})

        picture_urls = []
        for tag in picture_links:
            if not tag.find('img'):
                continue
            picture_url = tag.find('img')['src'].replace(' ', '%20')
            if picture_url == 'http://www.clie.cl/photos/':
                continue
            picture_urls.append(picture_url)

        if not picture_urls:
            picture_urls = None

        p = Product(
            name,
            cls.__name__,
            category,
            url,
            url,
            part_number,
            stock,
            normal_price,
            offer_price,
            'CLP',
            sku=sku,
            part_number=part_number,
            picture_urls=picture_urls
        )

        return [p]