def create_single_wine(unparsed_wine, title, url, article, tags): color, eyes, nose, mouth, overall, producer = ('N/A', ) * 6 price = 0 region, sub_region, variety, vintage, description, abv = ('', ) * 6 # Wine Description single_wine_descrip = re.compile(r"""<p><strong>|<p.*?>(.*?)<\/p>""", re.M | re.S) descrip_results = single_wine_descrip.findall(str(article)) description_soup = BeautifulSoup(' '.join(descrip_results), 'html.parser') description = description_soup.getText() title = html.unescape(title) # Wine color if 'white' in (tag.lower() for tag in tags): color = 'White' if 'red' in (tag.lower() for tag in tags): color = 'Red' # Process single-wine results if unparsed_wine: for key, value in unparsed_wine: value_soup = BeautifulSoup(value, 'html.parser') value = value_soup.getText() key_soup = BeautifulSoup(key, 'html.parser') key = key_soup.getText().lower() # Todo: Switch to dictionary to shorten this? if "eyes" in key: eyes = value elif "nose" in key: nose = value elif "mouth" in key: mouth = value elif "all in all" in key: overall = value elif "producer" in key: producer = value elif "price" in key: price = value.replace("$", "") elif "sub-region" in key: sub_region = value elif "region" in key: region = value elif "variety" in key: variety = str(value) elif "vintage" in key: vintage = value elif "abv" in key: abv = value variety_types = [ 'reisling', 'chardonnay', 'sauvignon blanc', 'syrah', 'shiraz', 'cabernet sauvignon', 'merlot', 'pinot noir' ] if not variety: variety = 'N/A' for variety_type in variety_types: if variety_type in title: variety = variety_type.capitalize() # Search for previous history previous_history = History.objects.filter(url=url) if previous_history: h = History.objects.get(url=url) h.date = datetime.datetime.now(datetime.timezone.utc) h.save() previous_wine = Wine.wines.filter(name=title, color=color, harvested_from=h) if previous_wine: p_wine = Wine.wines.get(name=title, color=color, harvested_from=h) if p_wine.was_modified or p_wine.deleted: print( "Wine has been modified or deleted, not updating!") return False else: print("Removing wine") p_wine.harvest_delete() else: # Create History h = History(url=url, wine_count=1, date=datetime.datetime.now(datetime.timezone.utc)) h.save() # Save the wine to the database w = Wine(name=title, color=color, eyes=eyes, nose=nose, mouth=mouth, overall=overall, producer=producer, price=price, region=region, sub_region=sub_region, variety=variety, vintage=vintage, abv=abv, description=description, tags=tags, harvest_data=str(article), harvested_from=h, harvested_date=datetime.datetime.now( datetime.timezone.utc)) w.harvest_save() return True
def create_multi_wines_exception_a(url, article): wines_regex = re.compile( r"""<span style="text-decoration: underline;">(.*?)(?=<span)""", re.M | re.S | re.I) wines = wines_regex.findall(str(article)) title_regex = re.compile(r"""<strong>(.*?) –""") wine_info_regex = re.compile(r"""<strong>(.*?)<\/strong>(.*?)(?=<)""", re.I | re.M | re.S) # Create history previous_history = History.objects.filter(url=url, wine_count=len(wines)) if previous_history: h = History.objects.get(url=url, wine_count=len(wines)) h.date = datetime.datetime.now(datetime.timezone.utc) h.save() else: h = History(url=url, wine_count=len(wines), date=datetime.datetime.now(datetime.timezone.utc)) h.save() for wine in wines: info = wine_info_regex.findall(wine) color, eyes, nose, mouth, overall, producer = ('N/A', ) * 6 price = 0 region, sub_region, variety, vintage, description, abv = ('', ) * 6 title = title_regex.findall(wine) if title: title = title[0] else: print("Couldn't determine title!") continue if 'Riesling' in title: variety = 'Riesling' elif 'Chardonnay' in title: variety = 'Chardonnay' elif 'Sauvignon Blanc' in title: variety = 'Sauvignon Blanc' elif 'Syrah': variety = 'Syrah' elif 'Shiraz': variety = 'Shiraz' elif 'Cabernet Sauvignon': variety = 'Cabernet Sauvignon' elif 'Merlot': variety = 'Merlot' elif 'Pinot Noir': variety = 'Pinot Noir' for key, value in info: value_soup = BeautifulSoup(value, 'html.parser') value = value_soup.getText() key_soup = BeautifulSoup(key, 'html.parser') key = key_soup.getText() if ("Eyes" in key) or ("eyes" in key): eyes = value elif ("Nose" in key) or ("nose" in key): nose = value elif ("Mouth" in key) or ("mouth" in key): mouth = value elif ("All in all" in key) or ("all in all" in key): overall = value elif ("Producer" in key) or ("producer" in key): producer = value elif ("Price" in key) or ("price" in key): price = value.replace("$", "") elif ("Sub-Region" in key) or ("sub-region" in key): sub_region = value elif ("Region" in key) or ("region" in key): region = value elif ("Variety" in key) or ("variety" in key): variety = value elif ("Vintage" in key) or ("vintage" in key): vintage = value elif ("ABV" in key) or ("abv" in key): abv = value # Search for previous history if previous_history: previous_wine = Wine.wines.filter(name=title, color=color, harvested_from=h) if previous_wine: p_wine = Wine.wines.get(name=title, color=color, harvested_from=h) if p_wine.was_modified or p_wine.deleted: print( "Wine has been modified or deleted, not updating!") return False else: print("Removing wine and updating") p_wine.harvest_delete() w = Wine(name=title, color=color, eyes=eyes, nose=nose, mouth=mouth, overall=overall, producer=producer, price=price, region=region, sub_region=sub_region, variety=variety, vintage=vintage, abv=abv, description=description, tags='', harvest_data=str(article), harvested_from=h, harvested_date=datetime.datetime.now( datetime.timezone.utc)) w.harvest_save()
def create_multi_wines(wines, url, article, tags): # Regex title_regex = re.compile(r"""<strong>(.*)""", re.M | re.S | re.I) alt_title_regex = re.compile(r"""<span .*>(.*?)</span>""") # Create history previous_history = History.objects.filter(url=url, wine_count=len(wines)) if previous_history: h = History.objects.get(url=url, wine_count=len(wines)) h.date = datetime.datetime.now(datetime.timezone.utc) h.save() else: h = History(url=url, wine_count=len(wines), date=datetime.datetime.now(datetime.timezone.utc)) h.save() print("Length of wines:" + str(len(wines))) for wine in wines: # ['title', 'subregion/region price', 'description'] title = title_regex.findall(wine[0]) if title: title = title[0] else: title = alt_title_regex.findall(wine[0]) if not title: # If there is no title there is no wine so break this loop print("No wine??") h.wine_count -= 1 h.save() continue title = html.unescape(strip_tags(title)) color, eyes, nose, mouth, overall, producer = ('N/A', ) * 6 price = 0 region, sub_region, variety, vintage, description, abv = ('', ) * 6 if 'Riesling' in title: variety = 'Riesling' elif 'Chardonnay' in title: variety = 'Chardonnay' elif 'Sauvignon Blanc' in title: variety = 'Sauvignon Blanc' elif 'Syrah' in title: variety = 'Syrah' elif 'Shiraz' in title: variety = 'Shiraz' elif 'Cabernet Sauvignon' in title: variety = 'Cabernet Sauvignon' elif 'Merlot' in title: variety = 'Merlot' elif 'Pinot Noir' in title: variety = 'Pinot Noir' else: variety = '' if '#ff99cc' in wine[0]: color = 'Rosé' if '#339966' in wine[0]: color = 'White' if '#800000' in wine[0]: color = 'Red' region_price_regex = re.compile(r"""\((.*?)\) \$(.+)|\((.*?)\)""", re.M | re.S | re.I) region_price = region_price_regex.findall(wine[1]) total_region = [] if region_price: region_price = region_price[0] if region_price[0]: total_region = region_price[0] if region_price[1]: price = region_price[1] if region_price[2]: total_region = region_price[2] if total_region: region_list = total_region.split(',') # print(len(region_list)) region_count = len(region_list) if region_count is 1: region = region_list[0].strip() elif region_count is 2: region = region_list[1].strip() sub_region = region_list[0].strip() elif region_count > 2: # print(region_list) region_number = region_count - 1 sub_region_number = region_count - 2 alt_sub_region_number = region_count - 3 region = region_list[region_number].strip() sub_region = region_list[alt_sub_region_number].strip( ) + ", " + region_list[sub_region_number].strip() description = str(wine[2].strip()) # Search for previous history if previous_history: previous_wine = Wine.wines.filter(name=title, color=color, harvested_from=h) if previous_wine: p_wine = Wine.wines.get(name=title, color=color, harvested_from=h) if p_wine.was_modified or p_wine.deleted: print( "Wine has been modified or deleted, not updating!") return False else: print("Removing wine and updating") p_wine.harvest_delete() w = Wine(name=title, color=color, eyes=eyes, nose=nose, mouth=mouth, overall=overall, producer=producer, price=price, region=region, sub_region=sub_region, variety=variety, vintage=vintage, abv=abv, description=str(description), tags=str(tags), harvest_data=str(article), harvested_from=h, harvested_date=datetime.datetime.now( datetime.timezone.utc)) w.harvest_save() return True
def create_single_wine(unparsed_wine, title, url, article, tags): color, eyes, nose, mouth, overall, producer = ('N/A',) * 6 price = 0 region, sub_region, variety, vintage, description, abv = ('',) * 6 # Description single_wine_descrip = re.compile(r"""<p><strong>|<p.*?>(.*?)<\/p>""", re.M | re.S) descrip_results = single_wine_descrip.findall(str(article)) description_soup = BeautifulSoup(' '.join(descrip_results), 'html.parser') description = description_soup.getText() title = html.unescape(title) # Try figure out the colour if ('white' in tags) or ('White' in tags): color = 'White' if ('red' in tags) or ('Red' in tags): color = 'Red' # Process single-wine results if unparsed_wine: for key, value in unparsed_wine: value_soup = BeautifulSoup(value, 'html.parser') value = value_soup.getText() key_soup = BeautifulSoup(key, 'html.parser') key = key_soup.getText() if ("Eyes" in key) or ("eyes" in key): eyes = value elif ("Nose" in key) or ("nose" in key): nose = value elif ("Mouth" in key) or ("mouth" in key): mouth = value elif ("All in all" in key) or ("all in all" in key): overall = value elif ("Producer" in key) or ("producer" in key): producer = value elif ("Price" in key) or ("price" in key): price = value.replace("$", "") elif ("Sub-Region" in key) or ("sub-region" in key): sub_region = value elif ("Region" in key) or ("region" in key): region = value elif ("Variety" in key) or ("variety" in key): variety = str(value) elif ("Vintage" in key) or ("vintage" in key): vintage = value elif ("ABV" in key) or ("abv" in key): abv = value if not variety: if 'Riesling' in title: variety = 'Riesling' elif 'Chardonnay' in title: variety = 'Chardonnay' elif 'Sauvignon Blanc' in title: variety = 'Sauvignon Blanc' elif 'Syrah' in title: variety = 'Syrah' elif 'Shiraz' in title: variety = 'Shiraz' elif 'Cabernet Sauvignon' in title: variety = 'Cabernet Sauvignon' elif 'Merlot' in title: variety = 'Merlot' elif 'Pinot Noir' in title: variety = 'Pinot Noir' else: variety = 'N/A' # Search for previous history previous_history = History.objects.filter(url=url) if previous_history: h = History.objects.get(url=url) h.date = datetime.datetime.now(datetime.timezone.utc) h.save() previous_wine = Wine.wines.filter(name=title, color=color, harvested_from=h) if previous_wine: p_wine = Wine.wines.get(name=title, color=color, harvested_from=h) if p_wine.was_modified or p_wine.deleted: print("Wine has been modified or deleted, not updating!") return False else: print("Removing wine") p_wine.harvest_delete() else: # Create History h = History(url=url, wine_count=1, date=datetime.datetime.now(datetime.timezone.utc)) h.save() # Save the wine to the database w = Wine(name=title, color=color, eyes=eyes, nose=nose, mouth=mouth, overall=overall, producer=producer, price=price, region=region, sub_region=sub_region, variety=variety, vintage=vintage, abv=abv, description=description, tags=tags, harvest_data=str(article), harvested_from=h, harvested_date=datetime.datetime.now(datetime.timezone.utc)) w.harvest_save() return True
def create_multi_wines_exception_a(url, article): wines_regex = re.compile(r"""<span style="text-decoration: underline;">(.*?)(?=<span)""", re.M | re.S | re.I) wines = wines_regex.findall(str(article)) title_regex = re.compile(r"""<strong>(.*?) –""") wine_info_regex = re.compile(r"""<strong>(.*?)<\/strong>(.*?)(?=<)""", re.I | re.M | re.S) # Create history previous_history = History.objects.filter(url=url, wine_count=len(wines)) if previous_history: h = History.objects.get(url=url, wine_count=len(wines)) h.date = datetime.datetime.now(datetime.timezone.utc) h.save() else: h = History(url=url, wine_count=len(wines), date=datetime.datetime.now(datetime.timezone.utc)) h.save() for wine in wines: info = wine_info_regex.findall(wine) color, eyes, nose, mouth, overall, producer = ('N/A',) * 6 price = 0 region, sub_region, variety, vintage, description, abv = ('',) * 6 title = title_regex.findall(wine) if title: title = title[0] else: print("Couldn't determine title!") continue if 'Riesling' in title: variety = 'Riesling' elif 'Chardonnay' in title: variety = 'Chardonnay' elif 'Sauvignon Blanc' in title: variety = 'Sauvignon Blanc' elif 'Syrah': variety = 'Syrah' elif 'Shiraz': variety = 'Shiraz' elif 'Cabernet Sauvignon': variety = 'Cabernet Sauvignon' elif 'Merlot': variety = 'Merlot' elif 'Pinot Noir': variety = 'Pinot Noir' for key, value in info: value_soup = BeautifulSoup(value, 'html.parser') value = value_soup.getText() key_soup = BeautifulSoup(key, 'html.parser') key = key_soup.getText() if ("Eyes" in key) or ("eyes" in key): eyes = value elif ("Nose" in key) or ("nose" in key): nose = value elif ("Mouth" in key) or ("mouth" in key): mouth = value elif ("All in all" in key) or ("all in all" in key): overall = value elif ("Producer" in key) or ("producer" in key): producer = value elif ("Price" in key) or ("price" in key): price = value.replace("$", "") elif ("Sub-Region" in key) or ("sub-region" in key): sub_region = value elif ("Region" in key) or ("region" in key): region = value elif ("Variety" in key) or ("variety" in key): variety = value elif ("Vintage" in key) or ("vintage" in key): vintage = value elif ("ABV" in key) or ("abv" in key): abv = value # Search for previous history if previous_history: previous_wine = Wine.wines.filter(name=title, color=color, harvested_from=h) if previous_wine: p_wine = Wine.wines.get(name=title, color=color, harvested_from=h) if p_wine.was_modified or p_wine.deleted: print("Wine has been modified or deleted, not updating!") return False else: print("Removing wine and updating") p_wine.harvest_delete() w = Wine(name=title, color=color, eyes=eyes, nose=nose, mouth=mouth, overall=overall, producer=producer, price=price, region=region, sub_region=sub_region, variety=variety, vintage=vintage, abv=abv, description=description, tags='', harvest_data=str(article), harvested_from=h, harvested_date=datetime.datetime.now(datetime.timezone.utc)) w.harvest_save()
def create_multi_wines(wines, url, article, tags): # Regex title_regex = re.compile(r"""<strong>(.*)""", re.M | re.S | re.I) alt_title_regex = re.compile(r"""<span .*>(.*?)</span>""") # Create history previous_history = History.objects.filter(url=url, wine_count=len(wines)) if previous_history: h = History.objects.get(url=url, wine_count=len(wines)) h.date = datetime.datetime.now(datetime.timezone.utc) h.save() else: h = History(url=url, wine_count=len(wines), date=datetime.datetime.now(datetime.timezone.utc)) h.save() print("Length of wines:" + str(len(wines))) for wine in wines: # ['title', 'subregion/region price', 'description'] title = title_regex.findall(wine[0]) if title: title = title[0] else: title = alt_title_regex.findall(wine[0]) if not title: # If there is no title there is no wine so break this loop print("No wine??") h.wine_count -= 1 h.save() continue title = html.unescape(strip_tags(title)) color, eyes, nose, mouth, overall, producer = ('N/A',) * 6 price = 0 region, sub_region, variety, vintage, description, abv = ('',) * 6 if 'Riesling' in title: variety = 'Riesling' elif 'Chardonnay' in title: variety = 'Chardonnay' elif 'Sauvignon Blanc' in title: variety = 'Sauvignon Blanc' elif 'Syrah' in title: variety = 'Syrah' elif 'Shiraz' in title: variety = 'Shiraz' elif 'Cabernet Sauvignon' in title: variety = 'Cabernet Sauvignon' elif 'Merlot' in title: variety = 'Merlot' elif 'Pinot Noir' in title: variety = 'Pinot Noir' else: variety = '' if '#ff99cc' in wine[0]: color = 'Rosé' if '#339966' in wine[0]: color = 'White' if '#800000' in wine[0]: color = 'Red' region_price_regex = re.compile(r"""\((.*?)\) \$(.+)|\((.*?)\)""", re.M | re.S | re.I) region_price = region_price_regex.findall(wine[1]) total_region = [] if region_price: region_price = region_price[0] if region_price[0]: total_region = region_price[0] if region_price[1]: price = region_price[1] if region_price[2]: total_region = region_price[2] if total_region: region_list = total_region.split(',') # print(len(region_list)) region_count = len(region_list) if region_count is 1: region = region_list[0].strip() elif region_count is 2: region = region_list[1].strip() sub_region = region_list[0].strip() elif region_count > 2: # print(region_list) region_number = region_count - 1 sub_region_number = region_count - 2 alt_sub_region_number = region_count - 3 region = region_list[region_number].strip() sub_region = region_list[alt_sub_region_number].strip() + ", " + region_list[ sub_region_number].strip() description = str(wine[2].strip()) # Search for previous history if previous_history: previous_wine = Wine.wines.filter(name=title, color=color, harvested_from=h) if previous_wine: p_wine = Wine.wines.get(name=title, color=color, harvested_from=h) if p_wine.was_modified or p_wine.deleted: print("Wine has been modified or deleted, not updating!") return False else: print("Removing wine and updating") p_wine.harvest_delete() w = Wine(name=title, color=color, eyes=eyes, nose=nose, mouth=mouth, overall=overall, producer=producer, price=price, region=region, sub_region=sub_region, variety=variety, vintage=vintage, abv=abv, description=str(description), tags=str(tags), harvest_data=str(article), harvested_from=h, harvested_date=datetime.datetime.now(datetime.timezone.utc)) w.harvest_save() return True