from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = "wine_com" root_url = "http://www.wine.com/" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "USA" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=True) def getprice(pricestr): print(pricestr) if pricestr == '': return pricestr pricestr = re.sub("[^0-9.$]", "", pricestr) price = parse('${pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) if price is None: price = parse('${dol:d}', pricestr) return price.named['dol'] * 100 else: return price.named['pence']
from ers import all_keywords_fr as keywords from ers import clean_xpathd_text from ers import fpath_namer, mh_brands, clean_url from matcher import BrandMatcher from validators import validate_raw_files, check_products_detection parser = etree.HTMLParser() # Init variables and assets shop_id = 'auchan_drive' root_url = 'https://www.auchandrive.fr' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'FR' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=True) brm = BrandMatcher() def getprice(pricestr): if not pricestr: return None pricestr = re.sub("[^0-9,€]", "", pricestr) price = parse('{euro:d},{cent:d}€', pricestr) if price is not None: return price.named['euro'] * 100 + price.named['cent'] ################### # # CTG page xpathing # ###################
from validators import validate_raw_files from create_csvs import create_csvs from ers import all_keywords_jp as keywords, fpath_namer, mh_brands, clean_url, headers from ers import TEST_PAGES_FOLDER_PATH from matcher import BrandMatcher from custom_browser import CustomDriver from parse import parse import re from ers import clean_xpathd_text # Init variables and assets shop_id = "seijoishii" root_url = "https://www.seijoishii.com/" country = "JP" searches, categories, products = {}, {}, {} driver = CustomDriver(headless=False) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9]", "", pricestr) price = parse('{pound:d}', pricestr) if price: return price.named['pound'] * 100 ################### # # CTG page xpathing # ################### exple_ctg_page_path = op.join(TEST_PAGES_FOLDER_PATH, "seijoishii",
import shutil from custom_browser import CustomDriver from parse import parse from validators import validate_raw_files from create_csvs import create_csvs # Init variables and assets shop_id = "mondovino" root_url = "https://www.mondovino.ch" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "CH" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=False, download_images=True) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9.chf]", "", pricestr.lower()) price = parse('chf{pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] urls_ctgs_dict = {
from validators import validate_raw_files from create_csvs import create_csvs from custom_browser import CustomDriver from ers import all_keywords_uk as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from parse import parse # Init variables and assets shop_id = 'harrods' root_url = 'https://www.harrods.com/' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'UK' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, firefox=True, download_images=True) urls_ctgs_dict = { 'champagne': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/champagne-and-sparkling?view=Product&list=List&viewAll=False&pageNumber={page}', 'sparkling': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/champagne-and-sparkling?view=Product&list=List&viewAll=False&pageNumber={page}', 'still_wines': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/white-wine?view=Product&list=List&viewAll=False&pageNumber={page}', 'whisky': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=34605', 'cognac': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=23692', 'vodka': 'https://www.harrods.com/en-gb/food-and-wine/wine-and-spirits/spirits?view=Product&list=List&viewAll=False&pageNumber={page}&categoryFilterIds=23688', 'white_wine':
import shutil from custom_browser import CustomDriver from parse import parse import re # Init variables and assets shop_id = "pogos_wine_spirits" root_url = "https://www.pogoswine.com/" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "USA" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=True) def getprice(pricestr): if pricestr == '': return pricestr if pricestr.count('$') >= 2: pricestr = "$" + pricestr.split('$')[1] pricestr = re.sub("[^0-9.$]", "", pricestr) price = parse('${pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence']
import requests_cache, imghdr from validators import validate_raw_files from create_csvs import create_csvs from ers import all_keywords_fr as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver import re shop_id = 'vicampo' root_url = 'https://www.vicampo.de' requests_cache.install_cache(allowable_methods=('GET')) country = 'DE' searches, categories, products = {}, {}, {} driver = CustomDriver(firefox=True) from parse import parse def getprice(pricestr): pricestr = re.sub("[^0-9,€]", "", pricestr) if pricestr == '': return None print(pricestr) price = parse('€{dol:d},{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['dol'] * 100 + price.named['pence']
from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = "selfridges" root_url = "http://www.selfridges.com/GB/en/" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "UK" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=False) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9.£]", "", pricestr) price = parse('£{pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] urls_ctgs_dict = {
# from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = "bevmo" root_url = "http://shop.bevmo.com/" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "USA" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=False, download_images=True) def getprice(pricestr): pricestr = re.sub("[^0-9.$]", "", pricestr) if pricestr == '': return pricestr price = parse('${pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] def init_bevmo(driver):
from create_csvs import create_csvs from ers import all_keywords_aus as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from parse import parse from custom_browser import CustomDriver # Init variables and assets shop_id = 'goodygoody' root_url = 'https://www.goodygoody.com/' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'AUS' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=True) def getprice(pricestr): if pricestr == '': return pricestr pricestr = pricestr.replace(',', '').strip() price = parse('${dol:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['dol'] * 100 + price.named['pence'] def init_goodygoody(driver):
from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = "dan_murphy" root_url = "http://www.danmurphys.com.au/" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "AUS" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=True) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9.$]", "", pricestr) price = parse('${pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence']
from ers import all_keywords_fr as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = 'vinatis' root_url = 'https://www.vinatis.com' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'FR' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=False) def getprice(pricestr): pricestr = pricestr.replace(' ', '') if pricestr == '': return pricestr price = parse('{dol:d},{pence:d}€', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['dol'] * 100 + price.named['pence'] urls_ctgs_dict = {
from create_csvs import create_csvs from ers import all_keywords_uk as keywords from ers import fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver import re # Init variables and assets shop_id = 'asda' root_url = 'https://groceries.asda.com/' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'UK' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=False, download_images=True) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9.£p]", "", pricestr) price = parse('£{pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] print(getprice('£40.00'))
from validators import validate_raw_files from create_csvs import create_csvs from ers import all_keywords_fr as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = 'grandsvins_prives' root_url = 'https://grandsvins-prives.com' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'FR' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=False) urls_ctgs_dict = { 'champagne': 'https://grandsvins-prives.com/10-les-bulles#region=0&aoc=CHAMPAGNE&producteur=0&prix=0&couleur=0', 'sparkling': 'https://grandsvins-prives.com/10-les-bulles#region=0&aoc=CRÉMANT-DE-BORDEAUX&producteur=0&prix=0&couleur=0', 'still_wines': 'https://grandsvins-prives.com/18-nos-blancs/', 'red_wine': 'https://grandsvins-prives.com/20-bordeaux-grands-crus', 'whisky': 'https://grandsvins-prives.com/recherche?controller=search&orderby=position&orderway=desc&search_query=whisky&submit_search=', 'cognac': 'https://grandsvins-prives.com/83-spiritueux#region=0&aoc=COGNAC&producteur=0&prix=0&couleur=0', 'vodka':
from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = "argonaut" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) root_url = "https://www.argonautliquor.com/" country = "USA" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=True, download_images=True) def getprice(pricestr): if pricestr == '': return pricestr price = parse('${pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] urls_ctgs_dict = {'vodka': 'https://www.argonautliquor.com/search/result_size/96/categories/Vodka/page/{page}',
from ers import all_keywords_usa as keywords, fpath_namer, mh_brands from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = 'kroger' root_url = 'https://www.kroger.com/' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'USA' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=False, download_images=True, firefox=True) brm = BrandMatcher() urls_ctgs_dict = { 'champagne': 'https://www.kroger.com/pl/champagne/0818000004?page={page}&tab=0', 'sparkling': 'https://www.kroger.com/pl/sparkling-wine/0818000003?page={page}&tab=0', 'still_wines': 'https://www.kroger.com/pl/white-wine/08122?page={page}&tab=0', 'whisky': 'https://www.kroger.com/pl/scotch-whiskey/0812100579?page={page}&tab=0', 'cognac': 'https://www.kroger.com/pl/brandy-cognac/0812100572?page={page}&tab=0', 'vodka': 'https://www.kroger.com/pl/vodka/0812100582?page={page}&tab=0', 'red_wine': 'https://www.kroger.com/pl/red-wine/08120?page={page}&tab=0', 'white_wine': 'https://www.kroger.com/pl/white-wine/08122?page={page}&tab=0', 'gin': 'https://www.kroger.com/pl/gin/0812100575?page={page}&tab=0', 'tequila': 'https://www.kroger.com/pl/tequila/0812100581?page={page}&tab=0', 'brandy': 'https://www.kroger.com/pl/brandy-cognac/0812100572?page={page}&tab=0', 'rum': 'https://www.kroger.com/pl/rum/0812100578?page={page}&tab=0',
from ers import all_keywords_aus as keywords, mh_brands, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer, fpath_namer import shutil from helpers.random_user_agent import randomua import requests from custom_browser import CustomDriver from time import sleep # Init variables and assets shop_id = 'my_bottle_shop' root_url = 'https://www.mybottleshop.com.au' session = requests_cache.CachedSession(fpath_namer(shop_id, 'requests_cache')) session.headers = {'User-Agent': randomua()} driver = CustomDriver(headless=False, download_images=True) with session.cache_disabled(): session.get('https://www.mybottleshop.com.au/directory/currency/switch/currency/AUD/uenc/') # print(session.cookies) country = 'AUS' searches, categories, products = {}, {}, {} from parse import parse def getprice(pricestr): if not pricestr: return price = parse('{pound:d}', pricestr) if price: return price.named['pound'] * 100 price = parse('{pound:d}.{pence:d}', pricestr)
from ers import all_keywords_de as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver import requests # Init variables and assets shop_id = 'hawesko' root_url = 'https://www.hawesko.de' country = 'DE' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=False) from parse import parse def getprice(pricestr): if not pricestr: return price = parse('{poundandcent:d}', pricestr) return price.named['poundandcent'] * 100 def getpromoprice(pricestr): pricestr = pricestr.replace(' ', '') if pricestr == '': return pricestr
from validators import validate_raw_files from create_csvs import create_csvs from ers import all_keywords_jp as keywords, fpath_namer, mh_brands, clean_url from ers import TEST_PAGES_FOLDER_PATH from matcher import BrandMatcher from custom_browser import CustomDriver from parse import parse from ers import clean_xpathd_text import re # Init variables and assets shop_id = "aeon_dewine" root_url = "https://www.aeondewine.com" country = "JP" searches, categories, products = {}, {}, {} driver = CustomDriver(headless=False) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9]", "", pricestr) price = parse('{pound:d}', pricestr) if price: return price.named['pound'] * 100 ################### # # CTG page xpathing # ################### exple_ctg_page_path = op.join(TEST_PAGES_FOLDER_PATH, "aeon_dewine",
from ers import all_keywords_uk as keywords from ers import fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver # Init variables and assets shop_id = 'waitrose' root_url = 'https://www.waitrose.com' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'UK' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=False, download_images=False) def getprice(pricestr): if pricestr == '': return None pricestr = pricestr.replace('Itemprice', '') pricestr = re.sub("[^0-9.£p]", "", pricestr) price = parse('£{pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) print("?? price", pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence']
from create_csvs import create_csvs from ers import all_keywords_uk as keywords from ers import fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver # Init variables and assets shop_id = 'waitrose_cellar' root_url = 'http://www.waitrosecellar.com' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'UK' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=False) def getprice(pricestr): if pricestr == '': return pricestr pricestr = pricestr.replace(',', '').strip() price = parse('£{pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] urls_ctgs_dict = {
from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = "nicks_wine_merchant" root_url = "https://www.nicks.com.au/" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "AUS" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=True) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9.$]", "", pricestr) price = parse('${pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] urls_ctgs_dict = {
from create_csvs import create_csvs from ers import all_keywords_usa as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = 'twin_liquors' root_url = 'http://www.twinliquors.com' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'USA' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=False) urls_ctgs_dict = { 'champagne': 'http://twinliquors.com/shop/catalogsearch/result/?q=champagne', 'sparkling': 'http://twinliquors.com/shop/catalogsearch/result/?q=Sparkling+wine', 'still_wines': 'http://twinliquors.com/shop/wine.html', 'whisky': 'http://twinliquors.com/shop/catalogsearch/result/?q=whisky', 'cognac': 'http://twinliquors.com/shop/catalogsearch/result/?q=cognac', 'vodka': 'http://twinliquors.com/shop/catalogsearch/result/?q=vodka', 'red_wine': 'http://twinliquors.com/shop/catalogsearch/result/?q=red+wine', 'white_wine': 'http://twinliquors.com/shop/catalogsearch/result/?q=red+wine', 'tequila': 'http://twinliquors.com/shop/catalogsearch/result/?q=tequila', 'gin': 'http://twinliquors.com/shop/catalogsearch/result/?q=gin', 'rum': 'http://twinliquors.com/shop/catalogsearch/result/?q=rum', 'liquor': 'http://twinliquors.com/shop/catalogsearch/result/?q=liquor', }
from create_csvs import create_csvs from ers import all_keywords_usa as keywords, fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from parse import parse from custom_browser import CustomDriver # Init variables and assets shop_id = 'reservebar' root_url = "https://www.reservebar.com" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'USA' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=True) def getprice(pricestr): if pricestr == '': return pricestr pricestr = pricestr.replace(',', '').strip() price = parse('${dol:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['dol'] * 100 + price.named['pence'] urls_ctgs_dict = {
from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = "freshdirect" root_url = "http://www.freshdirect.com" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "USA" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=False) def getprice(pricestr): if pricestr == '': return pricestr if "/ea" in pricestr: pricestr = pricestr.split('/ea')[0] print(pricestr.count('$'), pricestr) if pricestr.count('$') == 2: pricestr = pricestr.split(' ')[0] print(pricestr) pricestr = re.sub("[^0-9.$]", "", pricestr) price = parse('${pound:d}.{pence:d}', pricestr) if price is None: price = parse('${pound:d}', pricestr)
from lxml import etree parser = etree.HTMLParser() from custom_browser import CustomDriver import random # Init variables and assets driver = CustomDriver(headless=False, firefox=True, download_images=True) random.choice([1, 2, 3]) count = 0 driver.get('https://www.leparisien.fr') while True: print('Looping', count) elems = driver.driver.find_elements_by_xpath("//a[@href]") elems = [el.get_attribute('href') for el in elems] elems2 = [el for el in elems if "www.leparisien.fr" in el] elems3 = [ el for el in elems if ("www.leparisien.fr" in el) and ('.php' in el) ] if elems3: url = random.choice(elems3) count += 1 print(count, url) driver.get(url) elif elems2: url = random.choice(elems2) count += 1 print(count, url) driver.get(url) else: driver.get('https://www.leparisien.fr')
from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse from validators import validate_raw_files from create_csvs import create_csvs # Init variables and assets shop_id = "bodeboca" root_url = "https://bodeboca.com" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "ES" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=False, download_images=True, firefox=True) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9,€]", "", pricestr) pricestr = pricestr.split('€')[0] + '€' price = parse('{pound:d},{pence:d}€', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence']
from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse # Init variables and assets shop_id = "wallys" root_url = "http://www.wallywine.com/" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "USA" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=True) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9.$]", "", pricestr) price = parse('${pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] urls_ctgs_dict = { "vodka": "https://www.wallywine.com/spirits-brews/spirits/type/vodka.html",
from create_csvs import create_csvs from ers import all_keywords_uk as keywords from ers import fpath_namer, mh_brands, clean_url, headers from matcher import BrandMatcher from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver # Init variables and assets shop_id = 'sainsbury' root_url = 'https://www.sainsburys.co.uk' requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = 'UK' searches, categories, products = {}, {}, {} driver = CustomDriver(headless=True, download_images=False) def getprice(pricestr): if pricestr == '': return pricestr pricestr = pricestr.replace(',', '').strip() price = parse('£{pound:d}.{pence:d}', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] urls_ctgs_dict = {
from ers import COLLECTION_DATE, file_hash, img_path_namer import shutil from custom_browser import CustomDriver from parse import parse from validators import validate_raw_files from create_csvs import create_csvs # Init variables and assets shop_id = "lavinia_es" root_url = "http://www.lavinia.es" requests_cache.install_cache(fpath_namer(shop_id, 'requests_cache')) country = "ES" searches, categories, products = {}, {}, {} # If necessary driver = CustomDriver(headless=False) def getprice(pricestr): if pricestr == '': return pricestr pricestr = re.sub("[^0-9,€]", "", pricestr) price = parse('{pound:d},{pence:d}€', pricestr) if price is None: price = parse('{pence:d}p', pricestr) return price.named['pence'] else: return price.named['pound'] * 100 + price.named['pence'] urls_ctgs_dict = {