def get_met_office() -> dict: """Get forecast for today""" raw_html = scrape.simple_get( 'https://www.metoffice.gov.uk/weather/forecast/gcqqnw58n') soup = BeautifulSoup(raw_html, 'html.parser') tab_today = soup.find(id="tabDay0") # the first a is pretty much what we want a_today = tab_today.find('a') str_today = str(a_today) # DRY this up max_start = str_today.find('Maximum daytime temperature: ') # check for -1 max_finish = str_today.find('C;', max_start) + 1 # check for -1 min_start = str_today.find( 'Minimum nighttime temperature: ') # check for -1 min_finish = str_today.find('C.', min_start) + 1 max_text = str_today[max_start:max_finish] min_text = str_today[min_start:min_finish] # text after that and stop at Sunrise str_sunrise = str_today.find('Sunrise') # check for -1 overview_text = str_today[min_finish + 1:str_sunrise] return { 'met_min': min_text, 'met_max': max_text, 'met_summary': overview_text, }
def get_top8_url(event): # searches for link to top8 decklist page response = simple_get(event) soup = BeautifulSoup(response, 'html.parser') links = [] for link in soup.findAll('a', attrs={'href': re.compile('')}): links.append(link.get('href')) top8 = [event for event in links if "top-8-decks" in event] if len(top8) > 0: print('https://magic.wizards.com' + top8[0]) return 'https://magic.wizards.com' + top8[0] else: return None
def get_bbc() -> dict: """Get forecast for today""" raw_html = scrape.simple_get('https://www.bbc.co.uk/weather/2655708') soup = BeautifulSoup(raw_html, 'html.parser') tab_today = soup.find(id="daylink-0") day_body = tab_today.find('div', class_='wr-day__body') description = day_body.find( 'div', class_='wr-day__details__weather-type-description').text min_temp = day_body.find('div', class_='wr-day-temperature__low').text max_temp = day_body.find('div', class_='wr-day-temperature__high').text return { 'bbc_min': min_temp, 'bbc_max': max_temp, 'bbc_summary': description, }
from bs4 import BeautifulSoup from scrape import simple_get from requests import post from adventcode_sessiondata import cookie import collections raw_html = simple_get('https://adventofcode.com/2018/day/2/input', cookie) html = BeautifulSoup(raw_html, 'html.parser') global stripped_html stripped_html = html.text.split('\n') # pop off the last element, which is an empty string stripped_html.pop() # stripped_html = ['abcd', 'abdc', 'bbdc', 'ahct', 'thys', 'usyd'] def find_them(): for i in range(len(stripped_html)): for j in range(i + 1, len(stripped_html)): count = 0 for k in range(len(stripped_html[1])): if stripped_html[i][k] != stripped_html[j][k]: count += 1 if count > 1: break else: pass if k == 25 and count <= 1:
#'/name/nm0000136/'] # Johnny Depp #'/name/nm0000152/', # Richard Gere #'/name/nm0001557/'] # Viggo Mortesen #'/name/nm0000093/', # Brad Pitt #'/name/nm0000115/', # Nicolas Cage #'/name/nm0000018/', # Kirk Douglas #'/name/nm0000142/', # Clint Eastwood #'/name/nm0000886/'] # Warren Beaty # Robert Redford? For comedy? ] # Update target gender sc.target_gender = sc.TargetGender('m') for a in act: raw_html = sc.simple_get(glob_url + a) actor_name = get_actor_name(raw_html) final = trim_movie_list(related_movies(raw_html)) print(str(len(final)) + ' items') print("Working on actor: " + actor_name) sc.scrape_movies(final, (actor_name, 'm')) fact = [ #'/name/nm0000098/' # Jennifer Aniston '/name/nm0000402/' # Carrie Fisher '/name/nm0000235/' # Uma Thurman ] sc.target_gender = sc.TargetGender('f') for f in fact: