Пример #1
0
def parse(url):
    clean_url = pat.findall(url)[0]
    with get_session() as s:
        data = s.get(clean_url).text

    try:
        lds = json.loads('{"@context"' +
                         find_str(data, '{"@context"', '</script'))
    except JSONDecodeError:
        return message(code='invalid_url')

    price = price_sale = int(lds['offers'][0]['price'])
    try:
        x = find_str(data, 'data:[[null,null,[[[[null,[', ']\n]')
        price = x.split('","')[-1].split('\xa0')[-1][:-1].replace(',', '')
        price = 0 if price == '' else int(price)
    except IndexError:
        pass

    return dict(url=clean_url,
                name=lds['name'],
                price=price,
                price_sale=price_sale,
                price_card=price_sale,
                image=lds['image'],
                raw=lds)
Пример #2
0
def vtex_parser(content, url):
    """Para los sitios basados en el e-commerce Vtex, que exponen los datos del producto de la misma forma"""
    try:
        data = json.loads(find_str(content, vtex_str, ';'))
    except JSONDecodeError:
        return None

    if not data.get('available', False):
        return message(code='out_of_stock')

    sku = data.get('skus', [{}])[0]
    price = sku.get('spotPrice', 0)
    price_sale = sku.get('bestPrice', price)

    if sku.get('listPrice') > 0:
        # En las pruebas, Ilko no usa este campo, pero Casa de la Cerveza sí (precio normal)
        price = sku.get('listPrice', 0)

    # El precio en int aparece como "1000000" en vez de "10000.00", entonces busco en el valor formateado
    # cuántos decimales hay, asumiendo que el separador decimal es una coma
    price_fmt = sku.get('listPriceFormated', '').split(',')
    if len(price_fmt) > 1 and len(price_fmt[-1]) > 0:
        price /= (10**len(price_fmt[-1]))
        price_sale /= (10**len(price_fmt[-1]))

    return StoreItem(url=url,
                     name=data.get('name', ''),
                     image=sku.get('image', ''),
                     price=price,
                     price_sale=price_sale,
                     price_card=price_sale,
                     raw=data)
Пример #3
0
def parse(url):
    with get_session() as s:
        req = s.get(url)

    try:
        data_json = find_str(req.text, '__NEXT_DATA__" type="application/json">', '</script>')
        data = json.loads(data_json.strip())
        del data['props']['features']
        del data['props']['footer']
        del data['props']['header']
    except json.decoder.JSONDecodeError:
        return None

    prod = data['props']['pageProps']['productProps']['result']
    prices = {i['type']: only_numbers(i['price']) for i in prod['variants'][0]['price']}
    price = prices['NORMAL']
    price_sale = prices.get('AB', prices.get('INTERNET', price))
    price_card = prices.get('CMR', price_sale)

    return dict(
        url=url,
        name=prod['name'],
        price=price,
        price_sale=price_sale,
        price_card=price_card,
        image=media_url + prod['id'],
        raw=data
    )
Пример #4
0
def parse(url):
    with get_session() as s:
        req = s.get(url)
        req2 = s.get(url_promos,
                     headers={
                         'x-api-key': 'IuimuMneIKJd3tapno2Ag1c1WcAES97j'
                     }).json()

    try:
        data_json = find_str(req.text, '__renderData = ', ';</script>')
        data = json.loads(json.loads(data_json))
    except json.decoder.JSONDecodeError:
        return message(code='product_not_found')

    del data['menu']
    prod = data['pdp']['product'][0]['items'][0]
    prod_sell = prod['sellers'][0]['commertialOffer']

    price_card = prod_sell['Price']
    prod_id = data['pdp']['product'][0]['productId']
    if prod_id in req2['products']:
        for offer_id in req2['products'][prod_id]:
            if offer_id in req2['promotions']:
                offer = req2['promotions'][offer_id]
                if offer['tcenco'] and offer['value'] < price_card:
                    price_card = offer['value']

    # TODO: dónde está el precio con tarjeta?¿?¿¿
    return dict(url=url,
                name=prod['name'],
                price=prod_sell['ListPrice'],
                price_sale=prod_sell['Price'],
                price_card=price_card,
                image=prod['images'][0]['imageUrl'],
                raw=data)
Пример #5
0
def parse(url):
    with get_session() as s:
        req = s.get(url)

    if 'https://schema.org/OutOfStock' in req.text:
        return message(code='out_of_stock')

    try:
        data_json = find_str(req.text, '"variants":', ',"layoutType"')
        data_prod = json.loads(data_json)[0]
    except (json.decoder.JSONDecodeError, TypeError):
        return message(code='invalid_url')

    prices = {
        i['type']: int(i['price'][0].replace('.', ''))
        for i in data_prod['prices']
    }
    price = prices.get('normalPrice', prices.get('internetPrice', 0))
    price_sale = prices.get('internetPrice', price)
    price_card = prices.get('cmrPrice', price_sale)

    return dict(url=url,
                name=data_prod['name'],
                price=price,
                price_sale=price_sale,
                price_card=price_card,
                image=media_url.format(data_prod['id']),
                raw=data_prod)
Пример #6
0
def parse(url):
    clean_url = pat.findall(url)[0]
    with get_session() as s:
        data = s.get(clean_url, cookies=conf).text

    try:
        page_data = json.loads(find_str(data, 'data: ', ',\n'))
    except JSONDecodeError:
        return message(code='product_not_found')

    if not page_data or 'priceModule' not in page_data:
        return message(code='product_not_found')

    prices = page_data['priceModule']
    price_offer = price = prices['formatedPrice']
    if 'formatedActivityPrice' in prices:
        price_offer = prices['formatedActivityPrice']

    return dict(url=clean_url,
                name=page_data['pageModule']['title'],
                price=price,
                price_sale=price_offer,
                price_card=price_offer,
                image=page_data['pageModule']['imagePath'],
                raw=page_data)
Пример #7
0
def parse(url):
    with get_session() as s:
        req = s.get(url)

    try:
        data_json = find_str(req.text,
                             '__NEXT_DATA__" type="application/json">',
                             '</script>')
        data = json.loads(data_json.strip())
        del data['props']['categories']
        del data['props']['settings']
    except json.decoder.JSONDecodeError:
        return None

    prod = data['props']['pageProps']['data']
    price = prod['prices']['regularPrice']
    price_sale = prod['prices'].get('discountPrice', price) or price
    price_card = prod['prices'].get('cmrPrice', price_sale) or price_sale

    return dict(url=url,
                name=prod['name'],
                price=price,
                price_sale=price_sale,
                price_card=price_card,
                image=prod['images'][0]['url'],
                raw=data)
Пример #8
0
def parse(url):
    with get_session() as s:
        cont = s.get(url).text

    try:
        cont_json = find_str(cont, '<script type="application/ld+json">',
                             '</script>')
        if not cont_json:
            raise ValueError()
        data = json.loads(cont_json.strip())
    except (JSONDecodeError, ValueError):
        return message(code='invalid_url')

    try:
        price_normal = int(
            find_str(cont, '"price"><s>$', '<').replace('.', ''))
    except AttributeError:
        price_normal = 0

    prices = sorted([int(i['price']) for i in data['offers']])
    price_sale = price_card = price_normal

    if price_normal != 0:
        if len(prices) > 1:
            price_card, price_sale = prices[0], prices[1]
        elif price_normal != 0 and prices[0] != price_normal:
            price_card = prices[0]
            if 'class="img-tc"' not in cont:
                price_sale = prices[0]
    else:
        price_sale = price_card = price_normal = prices[0]

    return dict(url=url,
                name=html.unescape(data['name']),
                price=price_normal,
                price_sale=price_sale,
                price_card=price_card,
                image=data['image'],
                raw=data)
Пример #9
0
def retailrocket_parser(content, url):
    """Para tiendas basadas en RetailRocket, como AudioMúsica al parecer"""
    try:
        data = json.loads(find_str(content, rr_str, ');'))
    except JSONDecodeError:
        return None

    price = data.get('price', 0)
    return StoreItem(url=url,
                     name=data.get('name', ''),
                     image=data.get('pictureUrl', ''),
                     price=price,
                     price_sale=price,
                     price_card=price,
                     raw=data)
Пример #10
0
def parse(url):
    with get_session() as s:
        req = s.get(url)

    data = json.loads(find_str(req.text, '__APP_INITIAL_STATE__ = ', '</script>'))
    price = only_numbers(data['deal']['priceSummary']['value']['formattedAmount'])
    price_offer = only_numbers(data['deal']['priceSummary']['price']['formattedAmount'])

    return dict(
        url=url,
        name=data['deal']['title'],
        price=price,
        price_sale=price_offer,
        price_card=price_offer,
        image=data['deal']['largeImageUrl'],
        raw=data
    )
Пример #11
0
def parse(url):
    with get_session() as s:
        req = s.get(url)

    ind = 'alternate" href="android-app://cl.baytex.ripley/product/'
    start = req.text.index(ind)
    if start == -1:
        return None

    part = find_str(req.text, ind, '"')
    with get_session() as s:
        data = s.get(api_url + part).json()[0]

    if 'prices' not in data or 'listPrice' not in data['prices']:
        return message(code='out_of_stock')

    return dict(url=url,
                name=data['name'],
                price=int(data['prices']['listPrice']),
                price_sale=int(data['prices']['offerPrice']),
                price_card=int(data['prices'].get(
                    'cardPrice', data['prices']['offerPrice'])),
                image='https:' + data['fullImage'],
                raw=data)