示例#1
0
def test_generate_headline_draw():
    matches = []
    new_match = match.Match(date.today(), "PREM", "1", "MAN U", 'H')
    new_match.set_result_data('D', (1, 1))
    matches.append(new_match)
    selected_match = match.choose_random_match(matches)
    assert onthisday.generate_headline(selected_match).find("drew") != -1
def parse_and_save_dataset(fname_in, fname_out):
    # load in the dataset and save it out again in a more normalised form
    # open the file, read a line in at a time
    file_in = open(fname_in, "r")
    matches = []
    loaded_match = None
    for line in file_in:
        match_details = line.split(',')
        match_date = datetime.strptime(match_details[0], "%Y-%m-%d")
        found_myteam = False
        opponent = ""
        place = 'N'
        my_team_score = -1
        opponent_score = -1
        result = ''
        if match_details[5] == config.MY_TEAM:  # MY_TEAM is the home team
            found_myteam = True
            opponent = match_details[6]
            my_team_score = int(match_details[7])
            opponent_score = int(match_details[8])
            place = ('H' if match_details[4] == "TRUE" else 'N')
        elif match_details[6] == config.MY_TEAM:  # MY_TEAM is the away team
            found_myteam = True
            opponent = match_details[5]
            my_team_score = int(match_details[8])
            opponent_score = int(match_details[7])
            place = ('A' if match_details[4] == "TRUE" else 'N')

        if found_myteam:
            result = ('W' if my_team_score > opponent_score else ('L' if my_team_score < opponent_score else 'D'))
            loaded_match = match.Match(match_date, match_details[2], '', opponent, place)
            loaded_match.set_result_data(result, tuple(match_details[7], match_details[8]))
            matches.append(loaded_match)

    file_in.close()
    match.save_matches_to_file(fname_out, matches)
示例#3
0
def test_cupfinal_winners():
    new_match = match.Match(date(2019, 1, 1), "FA Cup", "Final", "MAN U", 'H')
    new_match.set_result_data('W', (5, 1))
    assert onthisday.generate_headline(new_match).find("are champions!") != -1
示例#4
0
def test_format_competition_round_headline_noround():
    new_match = match.Match(date(2019, 1, 1), "PREM", "", "MAN U", 'H')
    new_match.set_result_data('L', (5, 1))
    assert onthisday.format_competition_round_headline(new_match).find(
        "round") == -1
示例#5
0
def test_format_intro_headline_roundnotnil():
    new_match = match.Match(date(2019, 1, 1), "PREM", "2", "MAN U", 'H')
    new_match.set_result_data('L', (5, 1))
    assert onthisday.format_intro_headline(new_match).find("round") != -1
示例#6
0
def test_format_intro_headline_date():
    new_match = match.Match(date(2019, 1, 1), "PREM", "1", "MAN U", 'H')
    new_match.set_result_data('L', (5, 1))
    assert onthisday.format_intro_headline(new_match).find(
        "01 Jan, 2019") != -1
示例#7
0
def test_generate_headline_aet():
    new_match = match.Match(date(2019, 1, 1), "PREM", "", "MAN U", 'H')
    new_match.set_result_data('W', (4, 3), "AET", "")
    assert onthisday.generate_headline(new_match).find("after extra time") > -1
示例#8
0
def test_generate_headline_pst():
    new_match = match.Match(date(2019, 1, 1), "PREM", "", "MAN U", 'H')
    new_match.set_result_data('W', (4, 3), "PST", "")
    assert onthisday.generate_headline(new_match).find("penalties") > -1
def scrape_worldfootball(team, year):
    url = WF_URL + "/teams/{}/{}/3/"
    page = requests.get(url.format(team, year))
    soup = BeautifulSoup(page.text, 'html.parser')

    div_list_box = soup.find_all("div", {"class": "box"})

    competition = ""
    score = (1, 1)
    matches = []
    new_match = None
    match_report = ""
    normal_time = "NT"
    for div in div_list_box:
        div_class_data = div.find("div", {"class": "data"})
        if div_class_data:
            table_list = div_class_data.find_all(
                "table", {
                    "class": "standard_tabelle",
                    "cellpadding": "3",
                    "cellspacing": "1"
                })
            if table_list:
                for table in table_list:
                    table_competition = table.find_all("td", {
                        "colspan": "8",
                        "class": "hell"
                    })
                    if table_competition:
                        # if we have found a competition heading then know we have the result data
                        # need to loop over all of the rows inside this table
                        # if the row is a header then capture this as the competition
                        # then loop through all of the rows to see if they contain a result
                        table_rows = table.find_all("tr")
                        for row in table_rows:
                            if row.find_all("td", {
                                    "colspan": "8",
                                    "class": "hell"
                            }):
                                print("competition found"
                                      )  # of form name 1984/1985
                                competition = row.find("td").find("a").find(
                                    "b").contents[0].strip()
                                print(
                                    row.find("td").find("a").find(
                                        "b").contents[0].strip())
                            else:
                                if row.find("td"):
                                    cols = row.find_all("td")
                                    print(competition)
                                    competition_round = cols[0].find(
                                        "a").contents[0].strip()
                                    print(competition_round)
                                    date = datetime.strptime(
                                        cols[1].find("a").contents[0].strip(),
                                        "%d/%m/%Y")
                                    print(date)
                                    location = cols[3].contents[0]
                                    print(location)
                                    opponent = cols[5].find(
                                        "a").contents[0].strip()
                                    print(opponent)
                                    #  some of the matches have a results page linked from the score
                                    if cols[6].find("a"):
                                        score, normal_time = parse_score(
                                            cols[6].find(
                                                "a").contents[0].strip())
                                        match_report = WF_URL + cols[6].find(
                                            "a").get("href")
                                    else:
                                        score, normal_time = parse_score(
                                            cols[6].contents[0].strip())
                                        match_report = ""
                                    new_match = match.Match(
                                        date, competition, competition_round,
                                        opponent, location)
                                    new_match.set_result_data(
                                        match.calc_result_myteam_first(score),
                                        score, normal_time, match_report)
                                    matches.append(new_match)
                                    # TD[6] is score with everton first, shows 2:1 (0:1,1:1) aet
                                    # final score is before the brackets, in the brackets may
                                    # show half time and at end of normal tim
                                    # aet used to indicate after extra time
                                    # so up until the first space (or end of string as
                                    # HT score isn't always shown), is the end score
                                    # to get scorers, need to follow a link to go onto
                                    # match report page, not always present

    print(len(matches))
    return matches