示例#1
0
import re
import time

from selenium.common.exceptions import NoSuchElementException
from datetime import datetime, timedelta
from log import log, log_to_file, get_file_log

from classes.match_status import MatchStatus
from managers.player_manager import add_player_info
from managers.tournament_manager import scrap_tournament, add_tournament_info, update_tournament, create_tournament
from queries.match_queries import q_find_match_by_id, q_update_match, q_create_match, q_delete_match, \
    get_embedded_matches_json
from queries.tournament_queries import find_tournament_by_name
from utils import element_has_class, get_chrome_driver

MATCHES_LOGS = get_file_log("scrap_matches")
MATCHES_ERROR_LOGS = get_file_log("scrap_matches_error")
TOURNAMENT_LOGS = get_file_log("tournament_updates")


def get_match_dtypes(matches):
    all_dtypes = {
        "draw_size": "Int16", "best_of": "object", "minutes": "Int16", "p1_ht": "Int16", "p2_ht": "Int16",
        "p1_weight": "Int16",
        "p2_weight": "Int16", "p1_ace": "Int16", "p2_ace": "Int16", "p1_df": "Int16", "p2_df": "Int16",
        "p1_svpt": "Int16", "p2_svpt": "Int16", "p1_1st_in": "Int16", "p2_1st_in": "Int16", "p1_1st_won": "Int16",
        "p2_1st_won": "Int16", "p1_2nd_won": "Int16", "p2_2nd_won": "Int16", "p1_sv_gms": "Int16",
        "p2_sv_gms": "Int16", "p1_bp_saved": "Int16", "p2_bp_saved": "Int16", "p1_bp_faced": "Int16",
        "p2_bp_faced": "Int16", "p1_rank": "Int16", "p2_rank": "Int16", "p1_rank_points": "Int16",
        "p2_rank_points": "Int16", "p1_s1_gms": "Int16", "p2_s1_gms": "Int16", "p1_tb1_score": "Int16",
        "p2_tb1_score": "Int16", "p1_s2_gms": "Int16", "p2_s2_gms": "Int16", "p1_tb2_score": "Int16",
示例#2
0
import time
import re
import pandas as pd
import json

from datetime import datetime
from selenium.common.exceptions import NoSuchElementException

from log import log, get_file_log, log_to_file
from managers.player_rank_manager import retrieve_player_rank_info
from queries.country_queries import country_exists, find_country_with_flag_code
from queries.player_queries import find_player_by_id, q_create_player, q_update_player
from utils import get_chrome_driver

PLAYER_LOGS = get_file_log("scrap_player")


def scrap_player_id(player_name):
    atptour_name = atptour_id = None
    driver = get_chrome_driver()
    match_url = 'https://www.atptour.com/en/-/ajax/playersearch/PlayerUrlSearch?searchTerm={}'.format(
        player_name)
    driver.get(match_url)
    time.sleep(1)

    html = driver.find_element_by_tag_name("pre").get_attribute('innerHTML')
    json_obj = json.loads(html)
    elements = json_obj["items"]
    player_element = None

    if len(elements) == 0:
import re
from datetime import datetime, date

import pandas as pd
import pickledb

from log import log, log_to_file, get_file_log
from queries.player_rank_queries import record_all_player_ranks, retrieve_all_player_ranks
from utils import get_chrome_driver

RANKS_LOGS = get_file_log("update_player_ranks")


def scrap_all_player_ranks(log_file_path, pickle_db_path):
    driver = get_chrome_driver()
    try:
        driver.get("https://www.atptour.com/en/rankings/singles")

        date_str = driver.find_element_by_xpath(
            "//div[@class='dropdown-wrapper']/div[1]/div/div").text

        last_ranking_date = datetime.strptime(date_str, '%Y.%m.%d').date()
        today = date.today()

        if last_ranking_date != today:
            # Check if last ranking date on atptour match current date. If not, do not scrap
            raise ValueError()

        driver = get_chrome_driver(driver)
        driver.get(
            "https://www.atptour.com/en/rankings/singles?rankDate={0}&rankRange=1-5000"
示例#4
0
from sklearn.compose import ColumnTransformer
from sklearn.impute import SimpleImputer
from sklearn.model_selection import train_test_split
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import OneHotEncoder
from sklearn.ensemble import RandomForestClassifier
from joblib import dump, load

from log import log_to_file, get_file_log
from managers.match_manager import get_match_dtypes
from queries.match_queries import q_get_past_matches, q_update_match, get_embedded_matches_json, \
    get_matches_collection, get_matches_from_created_date, q_get_unfeatured_matches, q_get_unpredicted_matches
from model_deployment.feature_engineering import get_categorical_cols, get_numerical_cols, add_features

PREDICT_LOGS = get_file_log("predict_matches")


def build_model():
    # past_matches: all previous matches including when one player retired
    past_matches = q_get_past_matches()
    past_matches = past_matches.astype(get_match_dtypes(past_matches))

    # finished_matches: matches that were played entirely
    finished_matches = past_matches[past_matches["status"] ==
                                    "Finished"].copy()

    # matches = matches.replace({np.nan: None})

    finished_matches = finished_matches[get_categorical_cols() +
                                        get_numerical_cols() + ["p1_wins"]]
import time
import locale
import pandas as pd
import re
import configparser

from datetime import datetime, timedelta
from log import log, log_to_file, get_file_log
from queries.tournament_queries import find_tournament_by_id, q_update_tournament, q_create_tournament
from utils import get_chrome_driver, get_dataframe_json

config = configparser.ConfigParser()
config.read("config.ini")
TOURNAMENT_LOGS = get_file_log("tournament_updates")


def search_all_tournaments_atptour():
    tournaments_atptour = None
    driver = get_chrome_driver()
    driver.get("https://www.atptour.com/en/tournaments")
    time.sleep(3)
    try:
        atp_names = []
        atp_formatted_names = []
        atp_ids = []
        elements = driver.find_elements_by_xpath(
            "//tr[@class='tourney-result']/td[2]/a")

        for elem in elements:
            try:
                url = elem.get_attribute("href")