import re import time from selenium.common.exceptions import NoSuchElementException from datetime import datetime, timedelta from log import log, log_to_file, get_file_log from classes.match_status import MatchStatus from managers.player_manager import add_player_info from managers.tournament_manager import scrap_tournament, add_tournament_info, update_tournament, create_tournament from queries.match_queries import q_find_match_by_id, q_update_match, q_create_match, q_delete_match, \ get_embedded_matches_json from queries.tournament_queries import find_tournament_by_name from utils import element_has_class, get_chrome_driver MATCHES_LOGS = get_file_log("scrap_matches") MATCHES_ERROR_LOGS = get_file_log("scrap_matches_error") TOURNAMENT_LOGS = get_file_log("tournament_updates") def get_match_dtypes(matches): all_dtypes = { "draw_size": "Int16", "best_of": "object", "minutes": "Int16", "p1_ht": "Int16", "p2_ht": "Int16", "p1_weight": "Int16", "p2_weight": "Int16", "p1_ace": "Int16", "p2_ace": "Int16", "p1_df": "Int16", "p2_df": "Int16", "p1_svpt": "Int16", "p2_svpt": "Int16", "p1_1st_in": "Int16", "p2_1st_in": "Int16", "p1_1st_won": "Int16", "p2_1st_won": "Int16", "p1_2nd_won": "Int16", "p2_2nd_won": "Int16", "p1_sv_gms": "Int16", "p2_sv_gms": "Int16", "p1_bp_saved": "Int16", "p2_bp_saved": "Int16", "p1_bp_faced": "Int16", "p2_bp_faced": "Int16", "p1_rank": "Int16", "p2_rank": "Int16", "p1_rank_points": "Int16", "p2_rank_points": "Int16", "p1_s1_gms": "Int16", "p2_s1_gms": "Int16", "p1_tb1_score": "Int16", "p2_tb1_score": "Int16", "p1_s2_gms": "Int16", "p2_s2_gms": "Int16", "p1_tb2_score": "Int16",
import time import re import pandas as pd import json from datetime import datetime from selenium.common.exceptions import NoSuchElementException from log import log, get_file_log, log_to_file from managers.player_rank_manager import retrieve_player_rank_info from queries.country_queries import country_exists, find_country_with_flag_code from queries.player_queries import find_player_by_id, q_create_player, q_update_player from utils import get_chrome_driver PLAYER_LOGS = get_file_log("scrap_player") def scrap_player_id(player_name): atptour_name = atptour_id = None driver = get_chrome_driver() match_url = 'https://www.atptour.com/en/-/ajax/playersearch/PlayerUrlSearch?searchTerm={}'.format( player_name) driver.get(match_url) time.sleep(1) html = driver.find_element_by_tag_name("pre").get_attribute('innerHTML') json_obj = json.loads(html) elements = json_obj["items"] player_element = None if len(elements) == 0:
import re from datetime import datetime, date import pandas as pd import pickledb from log import log, log_to_file, get_file_log from queries.player_rank_queries import record_all_player_ranks, retrieve_all_player_ranks from utils import get_chrome_driver RANKS_LOGS = get_file_log("update_player_ranks") def scrap_all_player_ranks(log_file_path, pickle_db_path): driver = get_chrome_driver() try: driver.get("https://www.atptour.com/en/rankings/singles") date_str = driver.find_element_by_xpath( "//div[@class='dropdown-wrapper']/div[1]/div/div").text last_ranking_date = datetime.strptime(date_str, '%Y.%m.%d').date() today = date.today() if last_ranking_date != today: # Check if last ranking date on atptour match current date. If not, do not scrap raise ValueError() driver = get_chrome_driver(driver) driver.get( "https://www.atptour.com/en/rankings/singles?rankDate={0}&rankRange=1-5000"
from sklearn.compose import ColumnTransformer from sklearn.impute import SimpleImputer from sklearn.model_selection import train_test_split from sklearn.pipeline import Pipeline from sklearn.preprocessing import OneHotEncoder from sklearn.ensemble import RandomForestClassifier from joblib import dump, load from log import log_to_file, get_file_log from managers.match_manager import get_match_dtypes from queries.match_queries import q_get_past_matches, q_update_match, get_embedded_matches_json, \ get_matches_collection, get_matches_from_created_date, q_get_unfeatured_matches, q_get_unpredicted_matches from model_deployment.feature_engineering import get_categorical_cols, get_numerical_cols, add_features PREDICT_LOGS = get_file_log("predict_matches") def build_model(): # past_matches: all previous matches including when one player retired past_matches = q_get_past_matches() past_matches = past_matches.astype(get_match_dtypes(past_matches)) # finished_matches: matches that were played entirely finished_matches = past_matches[past_matches["status"] == "Finished"].copy() # matches = matches.replace({np.nan: None}) finished_matches = finished_matches[get_categorical_cols() + get_numerical_cols() + ["p1_wins"]]
import time import locale import pandas as pd import re import configparser from datetime import datetime, timedelta from log import log, log_to_file, get_file_log from queries.tournament_queries import find_tournament_by_id, q_update_tournament, q_create_tournament from utils import get_chrome_driver, get_dataframe_json config = configparser.ConfigParser() config.read("config.ini") TOURNAMENT_LOGS = get_file_log("tournament_updates") def search_all_tournaments_atptour(): tournaments_atptour = None driver = get_chrome_driver() driver.get("https://www.atptour.com/en/tournaments") time.sleep(3) try: atp_names = [] atp_formatted_names = [] atp_ids = [] elements = driver.find_elements_by_xpath( "//tr[@class='tourney-result']/td[2]/a") for elem in elements: try: url = elem.get_attribute("href")