def test_generate_headline_draw(): matches = [] new_match = match.Match(date.today(), "PREM", "1", "MAN U", 'H') new_match.set_result_data('D', (1, 1)) matches.append(new_match) selected_match = match.choose_random_match(matches) assert onthisday.generate_headline(selected_match).find("drew") != -1
def parse_and_save_dataset(fname_in, fname_out): # load in the dataset and save it out again in a more normalised form # open the file, read a line in at a time file_in = open(fname_in, "r") matches = [] loaded_match = None for line in file_in: match_details = line.split(',') match_date = datetime.strptime(match_details[0], "%Y-%m-%d") found_myteam = False opponent = "" place = 'N' my_team_score = -1 opponent_score = -1 result = '' if match_details[5] == config.MY_TEAM: # MY_TEAM is the home team found_myteam = True opponent = match_details[6] my_team_score = int(match_details[7]) opponent_score = int(match_details[8]) place = ('H' if match_details[4] == "TRUE" else 'N') elif match_details[6] == config.MY_TEAM: # MY_TEAM is the away team found_myteam = True opponent = match_details[5] my_team_score = int(match_details[8]) opponent_score = int(match_details[7]) place = ('A' if match_details[4] == "TRUE" else 'N') if found_myteam: result = ('W' if my_team_score > opponent_score else ('L' if my_team_score < opponent_score else 'D')) loaded_match = match.Match(match_date, match_details[2], '', opponent, place) loaded_match.set_result_data(result, tuple(match_details[7], match_details[8])) matches.append(loaded_match) file_in.close() match.save_matches_to_file(fname_out, matches)
def test_cupfinal_winners(): new_match = match.Match(date(2019, 1, 1), "FA Cup", "Final", "MAN U", 'H') new_match.set_result_data('W', (5, 1)) assert onthisday.generate_headline(new_match).find("are champions!") != -1
def test_format_competition_round_headline_noround(): new_match = match.Match(date(2019, 1, 1), "PREM", "", "MAN U", 'H') new_match.set_result_data('L', (5, 1)) assert onthisday.format_competition_round_headline(new_match).find( "round") == -1
def test_format_intro_headline_roundnotnil(): new_match = match.Match(date(2019, 1, 1), "PREM", "2", "MAN U", 'H') new_match.set_result_data('L', (5, 1)) assert onthisday.format_intro_headline(new_match).find("round") != -1
def test_format_intro_headline_date(): new_match = match.Match(date(2019, 1, 1), "PREM", "1", "MAN U", 'H') new_match.set_result_data('L', (5, 1)) assert onthisday.format_intro_headline(new_match).find( "01 Jan, 2019") != -1
def test_generate_headline_aet(): new_match = match.Match(date(2019, 1, 1), "PREM", "", "MAN U", 'H') new_match.set_result_data('W', (4, 3), "AET", "") assert onthisday.generate_headline(new_match).find("after extra time") > -1
def test_generate_headline_pst(): new_match = match.Match(date(2019, 1, 1), "PREM", "", "MAN U", 'H') new_match.set_result_data('W', (4, 3), "PST", "") assert onthisday.generate_headline(new_match).find("penalties") > -1
def scrape_worldfootball(team, year): url = WF_URL + "/teams/{}/{}/3/" page = requests.get(url.format(team, year)) soup = BeautifulSoup(page.text, 'html.parser') div_list_box = soup.find_all("div", {"class": "box"}) competition = "" score = (1, 1) matches = [] new_match = None match_report = "" normal_time = "NT" for div in div_list_box: div_class_data = div.find("div", {"class": "data"}) if div_class_data: table_list = div_class_data.find_all( "table", { "class": "standard_tabelle", "cellpadding": "3", "cellspacing": "1" }) if table_list: for table in table_list: table_competition = table.find_all("td", { "colspan": "8", "class": "hell" }) if table_competition: # if we have found a competition heading then know we have the result data # need to loop over all of the rows inside this table # if the row is a header then capture this as the competition # then loop through all of the rows to see if they contain a result table_rows = table.find_all("tr") for row in table_rows: if row.find_all("td", { "colspan": "8", "class": "hell" }): print("competition found" ) # of form name 1984/1985 competition = row.find("td").find("a").find( "b").contents[0].strip() print( row.find("td").find("a").find( "b").contents[0].strip()) else: if row.find("td"): cols = row.find_all("td") print(competition) competition_round = cols[0].find( "a").contents[0].strip() print(competition_round) date = datetime.strptime( cols[1].find("a").contents[0].strip(), "%d/%m/%Y") print(date) location = cols[3].contents[0] print(location) opponent = cols[5].find( "a").contents[0].strip() print(opponent) # some of the matches have a results page linked from the score if cols[6].find("a"): score, normal_time = parse_score( cols[6].find( "a").contents[0].strip()) match_report = WF_URL + cols[6].find( "a").get("href") else: score, normal_time = parse_score( cols[6].contents[0].strip()) match_report = "" new_match = match.Match( date, competition, competition_round, opponent, location) new_match.set_result_data( match.calc_result_myteam_first(score), score, normal_time, match_report) matches.append(new_match) # TD[6] is score with everton first, shows 2:1 (0:1,1:1) aet # final score is before the brackets, in the brackets may # show half time and at end of normal tim # aet used to indicate after extra time # so up until the first space (or end of string as # HT score isn't always shown), is the end score # to get scorers, need to follow a link to go onto # match report page, not always present print(len(matches)) return matches