def load(self) -> None: repl_table_name = "replacement_level_{}".format(self.split) if self.load_db: logging.info("Loading data into database") utils.db_load_data( self.replacement_totals, repl_table_name, self.conn, if_exists="append", index=True, ) else: logging.info("Dumping to csv") fname = os.path.join(self.CSV_DIR, "{}.csv".format(repl_table_name)) self.replacement_totals.to_csv(fname, index=True) table_name = "league_offense_{}".format(self.split) if self.load_db: logging.info("Loading data into database") utils.db_load_data(self.totals, table_name, self.conn, if_exists="append", index=True) else: logging.info("Dumping to csv") fname = os.path.join(self.CSV_DIR, "{}.csv".format(table_name)) self.totals.to_csv(fname, index=True)
def load(self) -> None: table = "game_log" if self.inseason: table += "_inseason" if self.load_db: logging.info("Loading data into database") utils.db_load_data( self.data, table, self.conn, if_exists="append", index=False ) else: filename = table + ".csv" logging.info("Dumping to csv") self.data.to_csv(os.path.join(self.CSV_DIR, filename), index=False)
def load(self) -> None: table_name = "league_pitching_{}".format(self.split) if self.load_db: logging.info("Loading data into database") utils.db_load_data(self.totals, table_name, self.conn, if_exists="append", index=True) else: logging.info("Dumping to csv") self.totals.to_csv(os.path.join(self.CSV_DIR, "{}.csv".format(table_name)), index=True)
def cli(load: bool, clear: bool, season: Optional[int], dir: str) -> None: """Script entry point""" config = utils.init_config() utils.init_logging(config["LOGGING"]) conn = utils.connect_db(config["DB"]) batters = pd.read_sql_table("raw_batters_overall", conn) pitchers = pd.read_sql_table("raw_pitchers_overall", conn) corrections = pd.read_sql_table("name_corrections", conn) duplicates = get_duplicates(conn) batters = CleanFunctions.normalize_names(batters) pitchers = CleanFunctions.normalize_names(pitchers) batters = batters[["lname", "fname", "team", "season"]] pitchers = pitchers[["lname", "fname", "team", "season"]] # All batters and pitchers # (remove duplicates where a player batted and pitched in the same season) data = pd.merge(batters, pitchers, on=["fname", "lname", "team", "season"], how="outer") data = data.sort_values(by=["lname", "fname", "team", "season"]) data = CleanFunctions.apply_corrections(data, corrections) data = generate_ids(data, duplicates) if season: data = data[data["season"] == season] if load: if clear: print("Clearing database table") conn.execute("DELETE FROM player_id") print("Loading data into database") utils.db_load_data(data, "player_id", conn, if_exists="append", index=False) else: print("Dumping to csv") data.to_csv(os.path.join(dir, "player_id.csv"), index=False) conn.close()