def train_model(args): """Orchestrates the training of model from commandline arguments.""" with open(args.config, "r") as f: config = yaml.load(f, Loader=yaml.SafeLoader) atp_data = read_raw(**config['train_model']['read_features']) atp_matches_p = choose_features(atp_data, **config['train_model']['choose_features']) setFeatureType(atp_matches_p, **config['train_model']['setFeatureType']) data = split_train_test(atp_matches_p, **config['train_model']['split_train_test']) train_features = data[0] test_features = data[1] train_labels = data[2] test_labels = data[3] np.savetxt(args.savedatapath + "//train_features.csv", train_features, fmt='%5s', delimiter=",") np.savetxt(args.savedatapath + "//test_features.csv", test_features, fmt='%5s', delimiter=",") np.savetxt(args.savedatapath + "//train_labels.csv", train_labels, fmt='%s', delimiter=",") np.savetxt(args.savedatapath + "//test_labels.csv", test_labels, fmt='%s', delimiter=",") newmodel = fit_xgboost(train_features, train_labels, **config['train_model']['fit_xgboost']) f.close() return newmodel
def run_features(args): """Orchestrates the generating of features from commandline arguments.""" with open(args.config, "r") as f: config = yaml.load(f, Loader=yaml.BaseLoader) df = read_raw(**config["run_features"]['read_main']) data = select_columns(df, **config["run_features"]['select_columns']) h2h_record = read_raw(**config["run_features"]['read_h2h']) surface_record = read_raw(**config["run_features"]['read_surface']) df_h2h = add_h2h(data, h2h_record) df_allfeatures = add_surface_winpct(df_h2h, surface_record) matches = flip_records(df_allfeatures, **config["run_features"]['flip_records']) save_dataset(matches, **config["run_features"]['save_dataset']) f.close() return matches
def run_h2h_record(args): """Orchestrates the generating of h2h records table from commandline arguments.""" with open(args.config, "r") as f: config = yaml.load(f, Loader=yaml.BaseLoader) df = read_raw(**config["run_h2h_record"]['read_raw']) h2h_record = calculate_h2h(df) save_dataset(h2h_record, **config["run_h2h_record"]['save_dataset']) f.close() return h2h_record
def run_rankingstable(args): """Orchestrates the generation of rankings table from commandline arguments.""" with open(args.config, "r") as f: config = yaml.load(f, Loader=yaml.BaseLoader) df = read_raw(**config["run_rankingstable"]['read_raw']) srank = gen_rankings_static(df) save_dataset(srank, **config["run_rankingstable"]['save_dataset']) f.close() return srank
def run_trimdata(args): """Orchestrates the trim data functionalities from commandline arguments.""" with open(args.config, "r") as f: config = yaml.load(f, Loader=yaml.BaseLoader) df = read_raw(**config["run_trimdata"]['read_raw']) df_trim = trim_columns(df, **config["run_trimdata"]['trim_columns']) save_dataset(df_trim, **config["run_trimdata"]['save_dataset']) f.close() return df_trim
def df_to_db(args): """Orchestrates the writing of csv files to database from commandline arguments.""" with open(args.config, "r") as f: config = yaml.load(f, Loader=yaml.BaseLoader) if args.rds == 'True': # Setting up the database in RDS engine_string = conf.rds_engine_string else: # Setting up the database in local using sqlite engine_string = conf.local_engine_string if args.option in ['Ranking', 'H2H', 'SurfaceWinPct']: engine = sql.create_engine(engine_string, echo=True) Base.metadata.create_all(engine) df = read_raw(**config['create_db_local'][args.option]['read_main']) setFeatureType( df, **config['create_db_local'][args.option]['setFeatureType']) df.to_sql(args.option, con=engine, if_exists='replace', index=False) logger.info("Database created for %s", args.option) else: raise logger.error("%s is not a valid table option", args.option)