Exemplo n.º 1
0
def create_proxied_browser_instance(proxy=None, use_proxy=False, headless=False, use_data_dir=False) -> webdriver.Chrome:
    chrome_options = webdriver.ChromeOptions()
    capabilities = webdriver.DesiredCapabilities.CHROME
    prefs = {'disk-cache-size': 4096}
    if headless:
        chrome_options.headless = True
        prefs["profile.managed_default_content_settings.images"] = 2
    chrome_options.add_experimental_option('prefs', prefs)
    chrome_options.add_argument("--start-maximized")
    chrome_options.add_argument("log-level=3")
    chrome_options.add_argument("ignore-certificate-errors")
    if use_data_dir:
        chrome_options.add_argument('user-data-dir={D}'.format(D=BROWSER_DATA))

    try:
        extension = create_firefox_extension()
    except Exception as a:
        print(str(a))
        extension = None
    if extension:
        chrome_options.add_extension(extension)

    driver_path = get_chrome_driver()
    if not driver_path:
        raise Exception("Chrome driver not found")
    driver: webdriver.Chrome = webdriver.Chrome(
        executable_path=driver_path,
        options=chrome_options,
        desired_capabilities=capabilities
    )
  
    return driver
Exemplo n.º 2
0
def scrap_player_name_flashscore(flash_id, flash_url):
    driver = get_chrome_driver()
    match_url = "https://www.flashscore.com/player/{0}/{1}/".format(
        flash_url, flash_id)
    driver.get(match_url)
    time.sleep(1)
    player_name = driver.find_element_by_class_name("teamHeader__name").text
    driver.quit()
    return player_name
Exemplo n.º 3
0
def scrap_matches_at_date(matches_date):
    driver = get_chrome_driver()
    match_url = "https://www.flashscore.com/tennis"
    driver.get(match_url)

    navigate_to_date(driver, matches_date)

    tournament = None
    elements = driver.find_elements_by_xpath("//div[@class='sportName tennis']/div")
    for elem in elements:
        if element_has_class(elem, "event__header"):
            # Tournament header
            tournament = get_tournament_from_row(driver, elem, matches_date)
        else:
            # Match row
            if tournament is None:
                # Match is not to be retrieved
                continue

            process_match_row(elem, matches_date)

    driver.quit()
Exemplo n.º 4
0
def scrap_player_id(player_name):
    atptour_name = atptour_id = None
    driver = get_chrome_driver()
    match_url = 'https://www.atptour.com/en/-/ajax/playersearch/PlayerUrlSearch?searchTerm={}'.format(
        player_name)
    driver.get(match_url)
    time.sleep(1)

    html = driver.find_element_by_tag_name("pre").get_attribute('innerHTML')
    json_obj = json.loads(html)
    elements = json_obj["items"]
    player_element = None

    if len(elements) == 0:
        names = player_name.split()
        if len(names) > 2:
            minimized_name = names[0] + " " + names[-1]
            driver.quit()
            return scrap_player_id(minimized_name)
        msg = "'{0}' not found on atptour website".format(player_name)
        log_to_file(msg, PLAYER_LOGS)
        log("players", msg)
    else:
        for element in elements:
            if str.lower(element["Key"]) == str.lower(player_name):
                player_element = element
                break

        if player_element is None:
            player_element = elements[0]

        atptour_name = player_element["Key"]
        href = player_element["Value"]
        href_regex = re.search(".+/(.*)/overview$", href)
        atptour_id = href_regex.group(1)

    driver.quit()

    return atptour_name, atptour_id
Exemplo n.º 5
0
def scrap_match_flashscore(match_id, status):
    match = pd.Series([match_id], index=["match_id"])
    driver = get_chrome_driver()

    try:
        match["match_id"] = match_id
        match_url = "https://www.flashscore.com/match/" + match_id
        driver.get(match_url)
        time.sleep(1)

        tournament_elem = driver.find_element_by_xpath(
            "//div[contains(@class, 'tournamentHeaderDescription')]/div[1]/span[3]/a"
        )

        tournament_regex = re.search("atp-singles/(.*)/", tournament_elem.get_attribute("href"))
        match["tournament_id"] = tournament_regex.group(1)
        add_tournament_info(match)

        round_regex = re.search(",.*- (.*)$", tournament_elem.text)
        if round_regex:
            match["round"] = round_regex.group(1)
        else:
            match["round"] = "Group"

        match["p1_id"], match["p1_url"], match["p2_id"], match["p2_url"] = scrap_player_ids(driver)
        add_player_info(match)
        match.drop(columns=["p1_url", "p2_url"], inplace=True)

        match_date = None
        try:
            match_date_elem = driver.find_element_by_xpath("//div[@id='detail']/div[4]/div[1]").text
            match_date_regex = re.search(r"^([0-9]+)\.([0-9]+)\.([0-9]+) ([0-9]+):([0-9]+)$", match_date_elem)
            day = int(match_date_regex.group(1))
            month = int(match_date_regex.group(2))
            year = int(match_date_regex.group(3))
            hour = int(match_date_regex.group(4))
            minute = int(match_date_regex.group(5))

            match_date = pd.to_datetime("{0} {1} {2} {3} {4}".format(year, month, day, hour, minute)
                                        , format='%Y %m %d %H %M', utc=True)

        except Exception as ex:
            msg = "Error with date format - scraping match '{}'".format(match_id)
            log_to_file(msg, MATCHES_ERROR_LOGS)
            log("scrap_match", msg, type(ex).__name__)
            raise Exception

        match["datetime"] = match_date

        '''
        Section usefull for scrap_tournament_matches()
        
        if status is None:
            status_elem = driver.find_element_by_xpath("//div[@id='detail']/div[4]/div[3]/div[1]/div[2]/span[1]").text
            if status_elem == "Finished":
                status = MatchStatus.Finished
            else:
                retired_regex = re.search("retired", status_elem)
                if retired_regex:
                    status = MatchStatus.Retired
                else:
                    msg = "status_error - match '{}'".format(match_id)
                    log_to_file(msg, MATCHES_ERROR_LOGS)
                    log("scrap_match", msg)
                    driver.quit()
                    return None
        '''

        match["status"] = status.name

        if status in [MatchStatus.Finished, MatchStatus.Retired, MatchStatus.Live, MatchStatus.Awarded,
                      MatchStatus.Interrupted]:

            if status != MatchStatus.Live:
                # Set match winner only if match has already finished
                participant_elems = driver.find_elements_by_xpath("//a[starts-with(@class, 'participantName___')]")

                if len(participant_elems[-1].find_elements_by_xpath("strong")) == 1:
                    match["p1_wins"] = False
                else:
                    match["p1_wins"] = True

            duration_elem = driver.find_element_by_xpath("//div[contains(@class, 'time--overall')]").text
            duration_regex = re.search("([0-9]+):([0-9]+)", duration_elem)
            match["minutes"] = int(duration_regex.group(1)) * 60 + int(duration_regex.group(2))

            match["p1_s1_gms"], match["p1_tb1_score"] = find_gms_value(1, 1, driver)
            match["p1_s2_gms"], match["p1_tb2_score"] = find_gms_value(1, 2, driver)
            match["p1_s3_gms"], match["p1_tb3_score"] = find_gms_value(1, 3, driver)
            match["p1_s4_gms"], match["p1_tb4_score"] = find_gms_value(1, 4, driver)
            match["p1_s5_gms"], match["p1_tb5_score"] = find_gms_value(1, 5, driver)

            match["p2_s1_gms"], match["p2_tb1_score"] = find_gms_value(2, 1, driver)
            match["p2_s2_gms"], match["p2_tb2_score"] = find_gms_value(2, 2, driver)
            match["p2_s3_gms"], match["p2_tb3_score"] = find_gms_value(2, 3, driver)
            match["p2_s4_gms"], match["p2_tb4_score"] = find_gms_value(2, 4, driver)
            match["p2_s5_gms"], match["p2_tb5_score"] = find_gms_value(2, 5, driver)

            driver.find_element_by_link_text("Statistics").click()
            time.sleep(0.5)

            row_elements = driver.find_elements_by_xpath("//div[starts-with(@class, 'statRow___')]") # stat_elem.find_elements_by_class_name("statRow")

            stat_labels = []
            p1_stats = []
            p2_stats = []
            for row_elem in row_elements:
                stat_labels.append(row_elem.find_element_by_xpath("div[1]/div[2]").text)
                p1_stats.append(row_elem.find_element_by_xpath("div[1]/div[1]").text)
                p2_stats.append(row_elem.find_element_by_xpath("div[1]/div[3]").text)

            stats_dataframe = pd.DataFrame({"label": stat_labels, "p1": p1_stats, "p2": p2_stats})

            match["p1_ace"] = int(stats_dataframe[stats_dataframe["label"] == "Aces"].iloc[0]["p1"])
            match["p1_df"] = int(stats_dataframe[stats_dataframe["label"] == "Double Faults"].iloc[0]["p1"])

            p1_svpt_elem = stats_dataframe[stats_dataframe["label"] == "Service Points Won"].iloc[0]["p1"]
            p1_svpt_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_svpt_elem)
            match["p1_svpt"] = int(p1_svpt_regex.group(3))
            match["p1_svpt_won"] = int(p1_svpt_regex.group(2))
            match["p1_svpt_ratio"] = int(p1_svpt_regex.group(1)) / 100

            p1_1st_elem = stats_dataframe[stats_dataframe["label"] == "1st Serve Points Won"].iloc[0]["p1"]
            p1_1st_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_1st_elem)
            match["p1_1st_in"] = int(p1_1st_regex.group(3))
            match["p1_1st_won"] = int(p1_1st_regex.group(2))
            match["p1_1st_won_ratio"] = int(p1_1st_regex.group(1)) / 100

            p1_2nd_elem = stats_dataframe[stats_dataframe["label"] == "2nd Serve Points Won"].iloc[0]["p1"]
            p1_2nd_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_2nd_elem)
            match["p1_2nd_pts"] = int(p1_2nd_regex.group(3))
            match["p1_2nd_won"] = int(p1_2nd_regex.group(2))
            match["p1_2nd_won_ratio"] = int(p1_2nd_regex.group(1)) / 100

            p1_bp_elem = stats_dataframe[stats_dataframe["label"] == "Break Points Saved"].iloc[0]["p1"]
            p1_bp_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_bp_elem)
            match["p1_bp_faced"] = int(p1_bp_regex.group(3))
            match["p1_bp_saved"] = int(p1_bp_regex.group(2))
            match["p1_bp_saved_ratio"] = int(p1_bp_regex.group(1)) / 100

            match["p2_ace"] = int(stats_dataframe[stats_dataframe["label"] == "Aces"].iloc[0]["p2"])
            match["p2_df"] = int(stats_dataframe[stats_dataframe["label"] == "Double Faults"].iloc[0]["p2"])

            p2_svpt_elem = stats_dataframe[stats_dataframe["label"] == "Service Points Won"].iloc[0]["p2"]
            p2_svpt_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_svpt_elem)
            match["p2_svpt"] = int(p2_svpt_regex.group(3))
            match["p2_svpt_won"] = int(p2_svpt_regex.group(2))
            match["p2_svpt_ratio"] = int(p2_svpt_regex.group(1)) / 100

            p2_1st_elem = stats_dataframe[stats_dataframe["label"] == "1st Serve Points Won"].iloc[0]["p2"]
            p2_1st_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_1st_elem)
            match["p2_1st_in"] = int(p2_1st_regex.group(3))
            match["p2_1st_won"] = int(p2_1st_regex.group(2))
            match["p2_1st_won_ratio"] = int(p2_1st_regex.group(1)) / 100

            p2_2nd_elem = stats_dataframe[stats_dataframe["label"] == "2nd Serve Points Won"].iloc[0]["p2"]
            p2_2nd_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_2nd_elem)
            match["p2_2nd_pts"] = int(p2_2nd_regex.group(3))
            match["p2_2nd_won"] = int(p2_2nd_regex.group(2))
            match["p2_2nd_won_ratio"] = int(p2_2nd_regex.group(1)) / 100

            p2_bp_elem = stats_dataframe[stats_dataframe["label"] == "Break Points Saved"].iloc[0]["p2"]
            p2_bp_regex = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_bp_elem)
            match["p2_bp_faced"] = int(p2_bp_regex.group(3))
            match["p2_bp_saved"] = int(p2_bp_regex.group(2))
            match["p2_bp_saved_ratio"] = int(p2_bp_regex.group(1)) / 100

            p1_sv_gms_elem = stats_dataframe[stats_dataframe["label"] == "Service Games Won"].iloc[0]["p1"]
            p1_sv_gms_rgx = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p1_sv_gms_elem)
            match["p1_sv_gms"] = int(p1_sv_gms_rgx.group(3))
            match["p1_sv_gms_won"] = int(p1_sv_gms_rgx.group(2))
            match["p1_sv_gms_won_ratio"] = int(p1_sv_gms_rgx.group(1)) / 100

            p2_sv_gms_elem = stats_dataframe[stats_dataframe["label"] == "Service Games Won"].iloc[0]["p2"]
            p2_sv_gms_rgx = re.search(r"([0-9]+)% \(([0-9]+)/([0-9]+)", p2_sv_gms_elem)
            match["p2_sv_gms"] = int(p2_sv_gms_rgx.group(3))
            match["p2_sv_gms_won"] = int(p2_sv_gms_rgx.group(2))
            match["p2_sv_gms_won_ratio"] = int(p2_sv_gms_rgx.group(1)) / 100

            match["p1_1st_serve_ratio"] = match["p1_1st_in"] / match["p1_svpt"] if match["p1_svpt"] > 0 else None
            match["p2_1st_serve_ratio"] = match["p2_1st_in"] / match["p2_svpt"] if match["p2_svpt"] > 0 else None

    except Exception as ex:
        msg = "Error while scraping match id '{}'".format(match_id)
        log_to_file(msg, MATCHES_ERROR_LOGS)
        log("scrap_match", msg, type(ex).__name__)
        match = None

    driver.quit()
    return match
Exemplo n.º 6
0
def scrap_player(atp_id):
    driver = get_chrome_driver()
    match_url = 'https://www.atptour.com/en/players/player/{}/overview'.format(
        atp_id)
    driver.get(match_url)
    time.sleep(0.5)

    player = pd.Series(dtype='float64')
    try:
        player["first_name"] = driver.find_element_by_xpath(
            "//div[@class='player-profile-hero-name']/div[1]").text
        player["last_name"] = driver.find_element_by_xpath(
            "//div[@class='player-profile-hero-name']/div[2]").text

        player["first_initial"] = player["first_name"][0] if player["first_name"] is not None \
                                                             and player["first_name"] != "" else None
        player["full_name"] = "{0} {1}".format(player["last_name"],
                                               player["first_initial"])

        birth_date = None
        try:
            birth_date_search = driver.find_element_by_xpath(
                "//span[@class='table-birthday']").text
            birth_regex = re.search(r"^\(([0-9]*)\.([0-9]*)\.([0-9]*)\)$",
                                    birth_date_search)
            birth_year = birth_regex.group(1)
            birth_month = birth_regex.group(2)
            birth_day = birth_regex.group(3)
            birth_date = datetime(int(birth_year), int(birth_month),
                                  int(birth_day))
        except Exception as exc:
            print("problem date")

        player["birth_date"] = birth_date

        turned_pro = None
        try:
            turned_pro_str = driver.find_element_by_xpath(
                "//div[@class='player-profile-hero-overflow']/div[2]/div[1]/table/tbody/tr[1]/td[2]/div/div[2]"
            ).text
            turned_pro = int(turned_pro_str)
        except (NoSuchElementException, ValueError):
            pass

        player["turned_pro"] = turned_pro

        weight = None
        try:
            weight_str = driver.find_element_by_xpath(
                "//span[@class='table-weight-lbs']").text
            weight = int(weight_str)
        except (NoSuchElementException, ValueError):
            pass

        height = None
        try:
            height_str = driver.find_element_by_xpath(
                "//span[@class='table-height-cm-wrapper']").text
            height_regex = re.search(r"^\(([0-9]*)cm\)$", height_str)
            if height_regex:
                height = int(height_regex.group(1))
        except (NoSuchElementException, ValueError, TypeError):
            pass

        player["weight"] = weight
        player["height"] = height

        flag_code = driver.find_element_by_xpath(
            "//div[@class='player-flag-code']").text
        player["flag_code"] = flag_code

        birth_city = birth_country = None
        try:
            birth_place = driver.find_element_by_xpath(
                "//div[@class='player-profile-hero-overflow']/div[2]/div["
                "1]/table/tbody/tr[2]/td[1]/div/div[2]").text
            b_matched_location = birth_place.split(", ")
            if len(b_matched_location) > 1:
                birth_city = b_matched_location[0]
                birth_country = b_matched_location[-1]

                if not country_exists(birth_country):
                    raise NoSuchElementException("birth_country_not_found")
            else:
                raise NoSuchElementException("birth_country_not_found")

        except NoSuchElementException:
            pass
            # Couldn't find player birth place, Setting birth_country with flag_code
            birth_country = find_country_with_flag_code(flag_code)
            if birth_country is None:
                msg = "Couldn't find birth country for player '{0}'".format(
                    atp_id)
                log_to_file(msg, PLAYER_LOGS)
                log("players", msg)

        player["birth_city"] = birth_city
        player["birth_country"] = birth_country

        residence_city = residence_country = None
        try:
            residence = driver.find_element_by_xpath(
                "//div[@class='player-profile-hero-overflow']/div[2]/div["
                "1]/table/tbody/tr[2]/td[2]/div/div[2]").text

            r_matched_location = residence.split(", ")
            if len(r_matched_location) > 1:
                residence_city = r_matched_location[0]
                residence_country = r_matched_location[-1]
        except NoSuchElementException:
            pass

        player["residence_city"] = residence_city
        player["residence_country"] = residence_country

        handedness = backhand = None
        try:
            hands = driver.find_element_by_xpath(
                "//div[@class='player-profile-hero-overflow']/div[2]/div["
                "1]/table/tbody/tr[2]/td[3]/div/div[2]").text
            hands_matched = hands.split(", ")
            if len(hands_matched) > 1:
                handedness = hands_matched[0]
                backhand = hands_matched[-1]
        except NoSuchElementException:
            pass

        player["handedness"] = handedness
        player["backhand"] = backhand

    except Exception as ex:
        player = None
        msg = "Couldn't scrap player : atp_id= '{}'".format(atp_id)
        log_to_file(msg, PLAYER_LOGS)
        log("players", msg)
        print(type(ex))

    driver.quit()

    return player
def main():
    driver = get_chrome_driver(show=False)
    for fp in yield_filelist(RAW_HTML_DIR):
        _get_text_and_save_it(driver, fp)
    _remove_acl()
def scrap_all_player_ranks(log_file_path, pickle_db_path):
    driver = get_chrome_driver()
    try:
        driver.get("https://www.atptour.com/en/rankings/singles")

        date_str = driver.find_element_by_xpath(
            "//div[@class='dropdown-wrapper']/div[1]/div/div").text

        last_ranking_date = datetime.strptime(date_str, '%Y.%m.%d').date()
        today = date.today()

        if last_ranking_date != today:
            # Check if last ranking date on atptour match current date. If not, do not scrap
            raise ValueError()

        driver = get_chrome_driver(driver)
        driver.get(
            "https://www.atptour.com/en/rankings/singles?rankDate={0}&rankRange=1-5000"
            .format(date_str.replace(".", "-")))

        ranks = []
        rank_elems = driver.find_elements_by_class_name("rank-cell")
        for rank_elem in rank_elems:
            rank_str = rank_elem.text
            # Some low-level players has rank suffixed with T because they are ex-aequo
            rank_str = rank_str.replace("T", "")
            rank = int(rank_str)
            ranks.append(rank)

        points_elems = driver.find_elements_by_xpath(
            "//td[@class='points-cell']/a")
        rank_points = [points.text for points in points_elems]
        rank_points = [int(points.replace(",", "")) for points in rank_points]

        player_ids = []
        player_elems = driver.find_elements_by_xpath(
            "//td[@class='player-cell']/span[1]/a[1]")
        for elem in player_elems:
            href = elem.get_attribute("href")
            player_id_regex = re.search("players/.*/(.*)/overview", href)
            player_ids.append(player_id_regex.group(1))

        player_ranks = pd.DataFrame({
            "rank": ranks,
            "player_id": player_ids,
            "rank_points": rank_points
        })

        if record_all_player_ranks(player_ranks):
            log_to_file("Player ranks successfully updated", log_file_path)
            db = pickledb.load(pickle_db_path, True)
            db.set("update_player_ranks_date", date_str)
        else:
            raise Exception('Player ranks not recorded')

    except ValueError:
        # log_to_file("Player ranks not updated on atptour", log_file_path)
        pass
    except Exception as ex:
        log_to_file("player_ranks update error", log_file_path)
        log("Player_ranks", str(ex))
        pass

    driver.quit()
def scrap_tournament(tournament, date):
    tournament = search_tournament_atptour(tournament, date)
    if tournament is None:
        return None

    tournament_id = tournament["atp_id"]
    tournament_formatted_name = tournament["atp_formatted_name"]

    url = None

    driver = get_chrome_driver()
    driver.maximize_window()
    match_url = 'https://www.atptour.com/en/tournaments/{0}/{1}/overview'.format(
        tournament_formatted_name, tournament_id) if url is None else url
    driver.get(match_url)
    time.sleep(1)  # Wait 1 sec to avoid IP being banned for scrapping

    try:
        name = driver.find_element_by_xpath(
            "//div[@class='player-profile-hero-name']/div[1]").text
        if name == "":
            name = tournament_formatted_name
        tournament["tourney_name"] = name

        location = driver.find_element_by_xpath(
            "//div[@class='player-profile-hero-dash']/div/div[2]").text
        matched_location = location.split(", ")
        tournament["city"] = matched_location[0]
        tournament["country"] = matched_location[-1]

        date_elem = driver.find_element_by_xpath(
            "//div[@class='player-profile-hero-dash']/div/div[3]").text
        date_regex = re.search("^(.*) - .* (.*)$", date_elem)
        try:
            locale.setlocale(locale.LC_ALL, 'en_US.utf8')
            full_date = "{0} {1}".format(date_regex.group(1),
                                         date_regex.group(2))
            tournament["start_date"] = pd.to_datetime(full_date,
                                                      format='%B %d %Y',
                                                      utc=True)
        except Exception as ex:
            print(ex)
            pass

        if "tourney_level" not in tournament.index:
            # Find tourney level from image
            img = driver.find_element_by_xpath(
                "//div[@class='tournmanet-logo']/img")
            img_src = img.get_attribute("src")
            level_matched = re.search("categorystamps_(.+)_", img_src)
            level = level_matched.group(1) if level_matched else None
            tournament["tourney_level"] = "M" if level == "1000" else "A"

        # maximum number of sets
        if "best_of" not in tournament.index:
            tournament["best_of"] = 3

        try:
            number_of_competitors = int(
                driver.find_element_by_xpath(
                    "//div[@class='bracket-sgl']/div[2]").text)
            tournament["number_of_competitors"] = number_of_competitors
        except ValueError:
            pass

        tournament["surface"] = driver.find_element_by_xpath(
            "//div[@class='surface-bottom']/div[2]").text

    except Exception:
        pass

    driver.quit()

    return tournament
def search_all_tournaments_atptour():
    tournaments_atptour = None
    driver = get_chrome_driver()
    driver.get("https://www.atptour.com/en/tournaments")
    time.sleep(3)
    try:
        atp_names = []
        atp_formatted_names = []
        atp_ids = []
        elements = driver.find_elements_by_xpath(
            "//tr[@class='tourney-result']/td[2]/a")

        for elem in elements:
            try:
                url = elem.get_attribute("href")
                url_regex = re.search("/tournaments/(.*)/(.*)/overview$", url)
                atp_formatted_name = url_regex.group(1)
                atp_id = int(url_regex.group(2))
                atp_name = elem.text

                atp_formatted_names.append(atp_formatted_name)
                atp_ids.append(atp_id)
                atp_names.append(atp_name)
            except Exception as ex:
                atp_formatted_names.append(None)
                atp_ids.append(None)
                atp_names.append(None)
                msg = "atp tournaments retrieval error, tournament '{0}'".format(
                    elem.text)
                log_to_file(msg, TOURNAMENT_LOGS)
                log("tournaments", msg, type(ex).__name__)

        cities = []
        countries = []
        elements = driver.find_elements_by_xpath(
            "//tr[@class='tourney-result']/td[2]/span[1]")

        for elem in elements:
            location = elem.text
            try:
                matched_location = location.split(", ")
                city = matched_location[0]
                country = matched_location[-1]

                cities.append(city)
                countries.append(country)
            except Exception as ex:
                cities.append(None)
                countries.append(None)
                msg = "atp tournaments retrieval error, location '{0}'".format(
                    location)
                log_to_file(msg, TOURNAMENT_LOGS)
                log("tournaments", msg, type(ex).__name__)

        start_dates = []
        end_dates = []
        elements = driver.find_elements_by_xpath(
            "//tr[@class='tourney-result']/td[2]/span[2]")

        for elem in elements:
            date_elem = elem.text
            try:
                date_regex = re.search("^(.*) - (.*)$", date_elem)
                start_date_str = date_regex.group(1)
                start_date = datetime.strptime(start_date_str, '%Y.%m.%d')

                end_date_str = date_regex.group(2)
                end_date = datetime.strptime(end_date_str, '%Y.%m.%d')
                end_date += timedelta(days=1)

                start_dates.append(start_date)
                end_dates.append(end_date)
            except Exception as ex:
                start_dates.append(None)
                end_dates.append(None)
                #print(type(ex).__name__)
                #print("atp tournaments retrieval error, date_elem: '{0}'".format(date_elem))

        tournaments_atptour = pd.DataFrame({
            "atp_id": atp_ids,
            "atp_name": atp_names,
            "atp_formatted_name": atp_formatted_names,
            "city": cities,
            "country": countries,
            "start_date": start_dates,
            "end_date": end_dates
        })

    except Exception as ex:
        msg = "Tournament header retrieval error"
        log_to_file(msg, TOURNAMENT_LOGS)
        log("tournaments", msg, type(ex).__name__)

    driver.quit()
    return tournaments_atptour