def modify_cf_df(self, bat, bowl): if bat is None or bowl is None or bat.empty or bowl.empty: raise RuntimeError(self.EX_MODIFY_CF.format(bat, bowl)) # bat = self.__drop_na(bat) # bowl = self.__drop_na(bowl) bat = self.__replace_zero(bat) bowl = self.__replace_zero(bowl) bat = bat.fillna(0) bowl = bowl.fillna(0) bat[self.TEXT_HS] = DataUtils.remove_asterisk_all(bat[self.TEXT_HS]) bowl[self.TEXT_WICKET_HAUL] = self.__wicket_haul_col(bowl) if self.TEXT_SPAN in bat.columns: bat = DataUtils.drop_cols(bat, self.CONSISTENCY_BAT_DROP_COLS) bowl = DataUtils.drop_cols(bowl, self.CONSISTENCY_BOWL_DROP_COLS) else: bat = DataUtils.drop_cols(bat, self.FORM_BAT_DROP_COLS) bowl = DataUtils.drop_cols(bowl, self.FORM_BOWL_DROP_COLS) df = pd.merge(bat, bowl, on=self.TEXT_PLAYER) return df.reset_index(drop=True)
def __form(self, soup, playing_eleven): players = DataUtils.get_players(soup) if len(players) > 0: table_rows = players[0].find_all(self.TEXT_TR) df = DataUtils.create_df(table_rows) return DataUtils.playing_eleven_df(df, playing_eleven) else: return
def __consistency(self, alter_url, team, playing_eleven): team_id = TeamsDAO.get_team_id(team) url = alter_url.team_url(team_id) soup = BSoup(url).get_soup() players = DataUtils.get_players(soup) table_rows = players[0].find_all(self.TEXT_TR) df = DataUtils.create_df(table_rows) return DataUtils.playing_eleven_df(df, playing_eleven)
def create_total_consistency(self, playing_eleven): playing_eleven.sort() total_consistency_df = pd.DataFrame() players_id_list = PlayersDAO.get_player_id(playing_eleven) for player_id in players_id_list: if player_id is not None: consistency = self.__process_tc(player_id) else: total_consistency_df = self.__default_values_total_consistency( total_consistency_df) continue if len(consistency) > 0: table_rows = consistency[0].find_all("tr") df = DataUtils.create_df(table_rows) total_consistency_df = total_consistency_df.append( df.transpose()) else: total_consistency_df = self.__default_values_total_consistency( total_consistency_df) total_consistency_df = self.__modify_tc( total_consistency_df.reset_index(drop=True)) total_consistency_df['Player'] = playing_eleven total_consistency_df['InnBowls'] = self.__bowl_inn_tc( total_consistency_df) return total_consistency_df.reset_index(drop=True)
def __modify_tc(self, df): df = df.replace(r'^\s*$', 0, regex=True) df = df.fillna(0) df_bat = df.loc[:, :15] df_bowl = df.loc[:, 16:] fielding_row = df.loc[df[16] == 'Fielding'].index.tolist() for row in fielding_row: row += 1 df_bowl.loc[row] = 0 wk_row = df.loc[df[16] == 'Wicket Keeping'].index.tolist() for row in wk_row: row += 1 df_bowl.loc[row] = 0 df = pd.concat([df_bat, df_bowl], axis=1, ignore_index=True) df = self.__set_columns(df) df = DataUtils.drop_cols(df, self.TOTAL_CON_DROP_COLS) # df = df.drop(df.columns[[7, 13, 14, 15, 16, 17]], axis=1) # df = df.drop_duplicates(keep=False) # df = df[~df['Innings:'].str.contains("Innings:")] duplicates = df['Innings:'].str.contains("Innings:") for index, value in duplicates.items(): if value is True: df.drop(index, inplace=True) return df
def modify_total_consistency(self, df): if df.empty or df is None: raise RuntimeError(self.EX_MODIFY_TC.format(df)) df = df.replace(r'^\s*$', 0, regex=True) df = df.fillna(0) df = self.__replace_zero(df) df = self.__replace_na(df) df[self.TEXT_HIGH_SCORE] = DataUtils.remove_asterisk_all( df[self.TEXT_HIGH_SCORE]) df[self.TEXT_WICKET_HAUL] = pd.to_numeric( df[self.TEXT_FOUR_WICKETS]) + pd.to_numeric( df[self.TEXT_FIVE_WICKETS]) df = DataUtils.drop_cols(df, self.TOTAL_CONSISTENCY_DROP_COLS) return df.reset_index(drop=True)
def __prepare_data(self, url, playing_eleven): soup = BSoup(url).get_soup() players = DataUtils.get_players(soup) if len(players) > 0: # Can add thread to run both simultaneously recent_form_bat = self.__process_rf(players, 0, playing_eleven) recent_form_bowl = self.__process_rf(players, 1, playing_eleven) return recent_form_bat, recent_form_bowl else: return
def __bat_sr_venue(self, player_id_list, stadium_venue): ser = [] for player_id in player_id_list: total_runs = 0 total_bf = 0 if player_id is not None: table = self.__find_table(player_id, ConstantUrl.CAREER_INN_BAT_URL) else: ser.append(0) continue if len(table) > 0: table_rows = table[0].find_all("tr") df = DataUtils.create_df(table_rows) df = df.dropna() cols = [ '0', 'Date', 'Versus', 'Ground', 'D/N', 'How Dismissed', 'Runs', 'B/F', 'S/R', '9', 'Aggr', 'Avg', 'S/R Pro' ] df.columns = cols df = df.drop(['0', '9'], axis=1) df = df.reset_index(drop=True) for i in df.index: if stadium_venue in df['Ground'][i]: if 'DNB' in df['Runs'][i]: continue total_runs += pd.to_numeric( DataUtils.remove_asterisk(df['Runs'][i])) total_bf += pd.to_numeric( DataUtils.remove_asterisk(df['B/F'][i])) if total_runs == 0 or total_bf == 0: ser.append(0) else: bat_sr = total_runs / total_bf * 100 ser.append(round(bat_sr, 2)) else: ser.append(0) return ser
def modify_ov_df(self, bat, bowl): if bat is None or bowl is None or bat.empty or bowl.empty: raise RuntimeError(self.EX_MODIFY_OV.format(bat, bowl)) bat = bat.replace(r'^\s*$', 0, regex=True) bowl = bowl.replace(r'^\s*$', 0, regex=True) bat = bat.fillna(0) bowl = bowl.fillna(0) bat[self.TEXT_HS] = DataUtils.remove_asterisk_all(bat[self.TEXT_HS]) if self.TEXT_VERSUS in bat.columns and self.TEXT_VERSUS in bowl.columns: bat = DataUtils.drop_cols(bat, self.OPPOSITION_BAT_DROP_COLS) bowl = DataUtils.drop_cols(bowl, self.OPPOSITION_BOWL_DROP_COLS) elif self.TEXT_GROUND in bat.columns and self.TEXT_GROUND in bowl.columns: bat = DataUtils.drop_cols(bat, self.VENUE_BAT_DROP_COLS) bowl = DataUtils.drop_cols(bowl, self.VENUE_BOWL_DROP_COLS) df = pd.merge(bat, bowl, on=self.TEXT_PLAYER) return df.reset_index(drop=True)
def __get_zeros_df(self, player_id): if player_id is not None: table = self.__find_table(player_id, ConstantUrl.CAREER_INN_BAT_URL) else: return if len(table) > 0: table_rows = table[0].find_all("tr") df = DataUtils.create_df(table_rows) df.columns = df.loc[2] df = df.drop(df.index[2]) df = df.dropna() return df return
def __process_rf(self, players, index, playing_eleven): table_rows = players[index].find_all(self.TEXT_TR) df = DataUtils.create_df(table_rows) return DataUtils.playing_eleven_df(df, playing_eleven)
def __get_df(table_rows): df = DataUtils.create_df(table_rows) df.columns = df.loc[0] df = df.drop(df.index[0]) return df
def venue_range(self, venue_range): venue_bat = {} venue_bowl = {} for i in self.venue.index: venue_bat.update({ self.venue[self.COL_PLAYER][i]: 0.4262 * (DataUtils.check_range( venue_range[1], self.venue[self.COL_BAT_AVG][i])) + 0.2566 * (DataUtils.check_range( venue_range[0], self.venue[self.COL_NO_OF_INN_BAT][i])) + 0.1510 * (DataUtils.check_range( venue_range[2], self.venue[self.COL_BAT_SR][i])) + 0.0787 * (DataUtils.check_range( venue_range[3], self.venue[self.COL_CENTURIES][i])) + 0.0556 * (DataUtils.check_range( venue_range[4], self.venue[self.COL_FIFTIES][i])) + 0.0328 * (DataUtils.check_range(venue_range[5], self.venue[self.COL_HS][i])) }) venue_bowl.update({ self.venue[self.COL_PLAYER][i]: 0.3018 * (DataUtils.check_range( venue_range[6], self.venue[self.COL_OVERS][i])) + 0.2783 * (DataUtils.check_range( venue_range[0], self.venue[self.COL_NO_OF_INN_BOWL][i])) + 0.1836 * (DataUtils.check_range( venue_range[8], self.venue[self.COL_BOWL_SR][i])) + 0.1391 * (DataUtils.check_range( venue_range[7], self.venue[self.COL_BOWL_AVG][i])) + 0.0972 * (DataUtils.check_range( venue_range[9], self.venue[self.COL_WICKET_HAUL][i])) }) return venue_bat, venue_bowl
def opposition_range(self, opposition_range): opposition_bat = {} opposition_bowl = {} for i in self.opposition.index: opposition_bat.update({ self.opposition[self.COL_PLAYER][i]: 0.4262 * (DataUtils.check_range(opposition_range[1], self.opposition[self.COL_BAT_AVG][i])) + 0.2566 * (DataUtils.check_range( opposition_range[0], self.opposition[self.COL_NO_OF_INN_BAT][i])) + 0.1510 * (DataUtils.check_range(opposition_range[2], self.opposition[self.COL_BAT_SR][i])) + 0.0787 * (DataUtils.check_range(opposition_range[3], self.opposition[self.COL_CENTURIES][i])) + 0.0556 * (DataUtils.check_range(opposition_range[4], self.opposition[self.COL_FIFTIES][i])) - 0.0328 * (DataUtils.check_range( opposition_range[5], self.opposition[self.COL_ZEROS][i])) }) opposition_bowl.update({ self.opposition[self.COL_PLAYER][i]: 0.3177 * (DataUtils.check_range( opposition_range[6], self.opposition[self.COL_OVERS][i])) + 0.3177 * (DataUtils.check_range( opposition_range[0], self.opposition[self.COL_NO_OF_INN_BOWL][i])) + 0.1933 * (DataUtils.check_range(opposition_range[8], self.opposition[self.COL_BOWL_SR][i])) + 0.1465 * (DataUtils.check_range(opposition_range[7], self.opposition[self.COL_BOWL_AVG][i])) + 0.0943 * (DataUtils.check_range( opposition_range[9], self.opposition[self.COL_WICKET_HAUL][i])) }) return opposition_bat, opposition_bowl
def recent_form_range(self, form_range): recent_form_bat = {} recent_form_bowl = {} for i in self.recent_form.index: recent_form_bat.update({ self.recent_form[self.COL_PLAYER][i]: 0.4262 * (DataUtils.check_range( form_range[1], self.recent_form[self.COL_BAT_AVG][i])) + 0.2566 * (DataUtils.check_range( form_range[0], self.recent_form[self.COL_NO_OF_INN_BAT][i])) + 0.1510 * (DataUtils.check_range( form_range[2], self.recent_form[self.COL_BAT_SR][i])) + 0.0787 * (DataUtils.check_range( form_range[3], self.recent_form[self.COL_CENTURIES][i])) + 0.0556 * (DataUtils.check_range( form_range[4], self.recent_form[self.COL_FIFTIES][i])) - 0.0328 * (DataUtils.check_range( form_range[5], self.recent_form[self.COL_ZEROS][i])) }) recent_form_bowl.update({ self.recent_form[self.COL_PLAYER][i]: 0.3269 * (DataUtils.check_range( form_range[6], self.recent_form[self.COL_OVERS][i])) + 0.2846 * (DataUtils.check_range( form_range[0], self.recent_form[self.COL_NO_OF_INN_BOWL][i])) + 0.1877 * (DataUtils.check_range( form_range[8], self.recent_form[self.COL_BOWL_SR][i])) + 0.1210 * (DataUtils.check_range( form_range[7], self.recent_form[self.COL_BOWL_AVG][i])) + 0.0798 * (DataUtils.check_range( form_range[9], self.recent_form[self.COL_WICKET_HAUL][i])) }) return recent_form_bat, recent_form_bowl
def total_consistency_range(self, consistency_range): total_con_bat = {} total_con_bowl = {} for i in self.total_consistency.index: total_con_bat.update({ self.total_consistency[self.COL_PLAYER][i]: 0.4262 * (DataUtils.check_range( consistency_range[1], self.total_consistency[self.COL_BAT_AVG][i])) + 0.2566 * (DataUtils.check_range( consistency_range[0], self.total_consistency[self.COL_NO_OF_INN_BAT][i])) + 0.1510 * (DataUtils.check_range( consistency_range[2], self.total_consistency[self.COL_BAT_SR][i])) + 0.0787 * (DataUtils.check_range( consistency_range[3], self.total_consistency[self.COL_CENTURIES][i])) + 0.0556 * (DataUtils.check_range( consistency_range[4], self.total_consistency[self.COL_FIFTIES][i])) - 0.0328 * (DataUtils.check_range( consistency_range[5], self.total_consistency[self.COL_ZEROS][i])) }) total_con_bowl.update({ self.total_consistency[self.COL_PLAYER][i]: 0.4174 * (DataUtils.check_range( consistency_range[6], self.total_consistency[self.COL_OVERS][i])) + 0.2634 * (DataUtils.check_range( consistency_range[0], self.total_consistency[self.COL_NO_OF_INN_BOWL][i])) + 0.1602 * (DataUtils.check_range( consistency_range[8], self.total_consistency[self.COL_BOWL_SR][i])) + 0.0975 * (DataUtils.check_range( consistency_range[7], self.total_consistency[self.COL_BOWL_AVG][i])) + 0.0615 * (DataUtils.check_range( consistency_range[9], self.total_consistency[self.COL_WICKET_HAUL][i])) }) return total_con_bat, total_con_bowl