def parse(url): clean_url = pat.findall(url)[0] with get_session() as s: data = s.get(clean_url).text try: lds = json.loads('{"@context"' + find_str(data, '{"@context"', '</script')) except JSONDecodeError: return message(code='invalid_url') price = price_sale = int(lds['offers'][0]['price']) try: x = find_str(data, 'data:[[null,null,[[[[null,[', ']\n]') price = x.split('","')[-1].split('\xa0')[-1][:-1].replace(',', '') price = 0 if price == '' else int(price) except IndexError: pass return dict(url=clean_url, name=lds['name'], price=price, price_sale=price_sale, price_card=price_sale, image=lds['image'], raw=lds)
def vtex_parser(content, url): """Para los sitios basados en el e-commerce Vtex, que exponen los datos del producto de la misma forma""" try: data = json.loads(find_str(content, vtex_str, ';')) except JSONDecodeError: return None if not data.get('available', False): return message(code='out_of_stock') sku = data.get('skus', [{}])[0] price = sku.get('spotPrice', 0) price_sale = sku.get('bestPrice', price) if sku.get('listPrice') > 0: # En las pruebas, Ilko no usa este campo, pero Casa de la Cerveza sí (precio normal) price = sku.get('listPrice', 0) # El precio en int aparece como "1000000" en vez de "10000.00", entonces busco en el valor formateado # cuántos decimales hay, asumiendo que el separador decimal es una coma price_fmt = sku.get('listPriceFormated', '').split(',') if len(price_fmt) > 1 and len(price_fmt[-1]) > 0: price /= (10**len(price_fmt[-1])) price_sale /= (10**len(price_fmt[-1])) return StoreItem(url=url, name=data.get('name', ''), image=sku.get('image', ''), price=price, price_sale=price_sale, price_card=price_sale, raw=data)
def parse(url): with get_session() as s: req = s.get(url) try: data_json = find_str(req.text, '__NEXT_DATA__" type="application/json">', '</script>') data = json.loads(data_json.strip()) del data['props']['features'] del data['props']['footer'] del data['props']['header'] except json.decoder.JSONDecodeError: return None prod = data['props']['pageProps']['productProps']['result'] prices = {i['type']: only_numbers(i['price']) for i in prod['variants'][0]['price']} price = prices['NORMAL'] price_sale = prices.get('AB', prices.get('INTERNET', price)) price_card = prices.get('CMR', price_sale) return dict( url=url, name=prod['name'], price=price, price_sale=price_sale, price_card=price_card, image=media_url + prod['id'], raw=data )
def parse(url): with get_session() as s: req = s.get(url) req2 = s.get(url_promos, headers={ 'x-api-key': 'IuimuMneIKJd3tapno2Ag1c1WcAES97j' }).json() try: data_json = find_str(req.text, '__renderData = ', ';</script>') data = json.loads(json.loads(data_json)) except json.decoder.JSONDecodeError: return message(code='product_not_found') del data['menu'] prod = data['pdp']['product'][0]['items'][0] prod_sell = prod['sellers'][0]['commertialOffer'] price_card = prod_sell['Price'] prod_id = data['pdp']['product'][0]['productId'] if prod_id in req2['products']: for offer_id in req2['products'][prod_id]: if offer_id in req2['promotions']: offer = req2['promotions'][offer_id] if offer['tcenco'] and offer['value'] < price_card: price_card = offer['value'] # TODO: dónde está el precio con tarjeta?¿?¿¿ return dict(url=url, name=prod['name'], price=prod_sell['ListPrice'], price_sale=prod_sell['Price'], price_card=price_card, image=prod['images'][0]['imageUrl'], raw=data)
def parse(url): with get_session() as s: req = s.get(url) if 'https://schema.org/OutOfStock' in req.text: return message(code='out_of_stock') try: data_json = find_str(req.text, '"variants":', ',"layoutType"') data_prod = json.loads(data_json)[0] except (json.decoder.JSONDecodeError, TypeError): return message(code='invalid_url') prices = { i['type']: int(i['price'][0].replace('.', '')) for i in data_prod['prices'] } price = prices.get('normalPrice', prices.get('internetPrice', 0)) price_sale = prices.get('internetPrice', price) price_card = prices.get('cmrPrice', price_sale) return dict(url=url, name=data_prod['name'], price=price, price_sale=price_sale, price_card=price_card, image=media_url.format(data_prod['id']), raw=data_prod)
def parse(url): clean_url = pat.findall(url)[0] with get_session() as s: data = s.get(clean_url, cookies=conf).text try: page_data = json.loads(find_str(data, 'data: ', ',\n')) except JSONDecodeError: return message(code='product_not_found') if not page_data or 'priceModule' not in page_data: return message(code='product_not_found') prices = page_data['priceModule'] price_offer = price = prices['formatedPrice'] if 'formatedActivityPrice' in prices: price_offer = prices['formatedActivityPrice'] return dict(url=clean_url, name=page_data['pageModule']['title'], price=price, price_sale=price_offer, price_card=price_offer, image=page_data['pageModule']['imagePath'], raw=page_data)
def parse(url): with get_session() as s: req = s.get(url) try: data_json = find_str(req.text, '__NEXT_DATA__" type="application/json">', '</script>') data = json.loads(data_json.strip()) del data['props']['categories'] del data['props']['settings'] except json.decoder.JSONDecodeError: return None prod = data['props']['pageProps']['data'] price = prod['prices']['regularPrice'] price_sale = prod['prices'].get('discountPrice', price) or price price_card = prod['prices'].get('cmrPrice', price_sale) or price_sale return dict(url=url, name=prod['name'], price=price, price_sale=price_sale, price_card=price_card, image=prod['images'][0]['url'], raw=data)
def parse(url): with get_session() as s: cont = s.get(url).text try: cont_json = find_str(cont, '<script type="application/ld+json">', '</script>') if not cont_json: raise ValueError() data = json.loads(cont_json.strip()) except (JSONDecodeError, ValueError): return message(code='invalid_url') try: price_normal = int( find_str(cont, '"price"><s>$', '<').replace('.', '')) except AttributeError: price_normal = 0 prices = sorted([int(i['price']) for i in data['offers']]) price_sale = price_card = price_normal if price_normal != 0: if len(prices) > 1: price_card, price_sale = prices[0], prices[1] elif price_normal != 0 and prices[0] != price_normal: price_card = prices[0] if 'class="img-tc"' not in cont: price_sale = prices[0] else: price_sale = price_card = price_normal = prices[0] return dict(url=url, name=html.unescape(data['name']), price=price_normal, price_sale=price_sale, price_card=price_card, image=data['image'], raw=data)
def retailrocket_parser(content, url): """Para tiendas basadas en RetailRocket, como AudioMúsica al parecer""" try: data = json.loads(find_str(content, rr_str, ');')) except JSONDecodeError: return None price = data.get('price', 0) return StoreItem(url=url, name=data.get('name', ''), image=data.get('pictureUrl', ''), price=price, price_sale=price, price_card=price, raw=data)
def parse(url): with get_session() as s: req = s.get(url) data = json.loads(find_str(req.text, '__APP_INITIAL_STATE__ = ', '</script>')) price = only_numbers(data['deal']['priceSummary']['value']['formattedAmount']) price_offer = only_numbers(data['deal']['priceSummary']['price']['formattedAmount']) return dict( url=url, name=data['deal']['title'], price=price, price_sale=price_offer, price_card=price_offer, image=data['deal']['largeImageUrl'], raw=data )
def parse(url): with get_session() as s: req = s.get(url) ind = 'alternate" href="android-app://cl.baytex.ripley/product/' start = req.text.index(ind) if start == -1: return None part = find_str(req.text, ind, '"') with get_session() as s: data = s.get(api_url + part).json()[0] if 'prices' not in data or 'listPrice' not in data['prices']: return message(code='out_of_stock') return dict(url=url, name=data['name'], price=int(data['prices']['listPrice']), price_sale=int(data['prices']['offerPrice']), price_card=int(data['prices'].get( 'cardPrice', data['prices']['offerPrice'])), image='https:' + data['fullImage'], raw=data)