Пример #1
0
    def modify_cf_df(self, bat, bowl):
        if bat is None or bowl is None or bat.empty or bowl.empty:
            raise RuntimeError(self.EX_MODIFY_CF.format(bat, bowl))

        # bat = self.__drop_na(bat)
        # bowl = self.__drop_na(bowl)

        bat = self.__replace_zero(bat)
        bowl = self.__replace_zero(bowl)

        bat = bat.fillna(0)
        bowl = bowl.fillna(0)

        bat[self.TEXT_HS] = DataUtils.remove_asterisk_all(bat[self.TEXT_HS])
        bowl[self.TEXT_WICKET_HAUL] = self.__wicket_haul_col(bowl)

        if self.TEXT_SPAN in bat.columns:
            bat = DataUtils.drop_cols(bat, self.CONSISTENCY_BAT_DROP_COLS)
            bowl = DataUtils.drop_cols(bowl, self.CONSISTENCY_BOWL_DROP_COLS)
        else:
            bat = DataUtils.drop_cols(bat, self.FORM_BAT_DROP_COLS)
            bowl = DataUtils.drop_cols(bowl, self.FORM_BOWL_DROP_COLS)

        df = pd.merge(bat, bowl, on=self.TEXT_PLAYER)

        return df.reset_index(drop=True)
Пример #2
0
 def __form(self, soup, playing_eleven):
     players = DataUtils.get_players(soup)
     if len(players) > 0:
         table_rows = players[0].find_all(self.TEXT_TR)
         df = DataUtils.create_df(table_rows)
         return DataUtils.playing_eleven_df(df, playing_eleven)
     else:
         return
Пример #3
0
    def __consistency(self, alter_url, team, playing_eleven):
        team_id = TeamsDAO.get_team_id(team)
        url = alter_url.team_url(team_id)

        soup = BSoup(url).get_soup()
        players = DataUtils.get_players(soup)
        table_rows = players[0].find_all(self.TEXT_TR)

        df = DataUtils.create_df(table_rows)
        return DataUtils.playing_eleven_df(df, playing_eleven)
Пример #4
0
    def create_total_consistency(self, playing_eleven):
        playing_eleven.sort()
        total_consistency_df = pd.DataFrame()
        players_id_list = PlayersDAO.get_player_id(playing_eleven)
        for player_id in players_id_list:
            if player_id is not None:
                consistency = self.__process_tc(player_id)
            else:
                total_consistency_df = self.__default_values_total_consistency(
                    total_consistency_df)
                continue

            if len(consistency) > 0:
                table_rows = consistency[0].find_all("tr")
                df = DataUtils.create_df(table_rows)
                total_consistency_df = total_consistency_df.append(
                    df.transpose())
            else:
                total_consistency_df = self.__default_values_total_consistency(
                    total_consistency_df)

        total_consistency_df = self.__modify_tc(
            total_consistency_df.reset_index(drop=True))

        total_consistency_df['Player'] = playing_eleven
        total_consistency_df['InnBowls'] = self.__bowl_inn_tc(
            total_consistency_df)

        return total_consistency_df.reset_index(drop=True)
Пример #5
0
    def __modify_tc(self, df):
        df = df.replace(r'^\s*$', 0, regex=True)
        df = df.fillna(0)

        df_bat = df.loc[:, :15]
        df_bowl = df.loc[:, 16:]

        fielding_row = df.loc[df[16] == 'Fielding'].index.tolist()
        for row in fielding_row:
            row += 1
            df_bowl.loc[row] = 0

        wk_row = df.loc[df[16] == 'Wicket Keeping'].index.tolist()
        for row in wk_row:
            row += 1
            df_bowl.loc[row] = 0

        df = pd.concat([df_bat, df_bowl], axis=1, ignore_index=True)
        df = self.__set_columns(df)

        df = DataUtils.drop_cols(df, self.TOTAL_CON_DROP_COLS)
        # df = df.drop(df.columns[[7, 13, 14, 15, 16, 17]], axis=1)
        # df = df.drop_duplicates(keep=False)
        # df = df[~df['Innings:'].str.contains("Innings:")]
        duplicates = df['Innings:'].str.contains("Innings:")
        for index, value in duplicates.items():
            if value is True:
                df.drop(index, inplace=True)

        return df
Пример #6
0
    def modify_total_consistency(self, df):
        if df.empty or df is None:
            raise RuntimeError(self.EX_MODIFY_TC.format(df))

        df = df.replace(r'^\s*$', 0, regex=True)
        df = df.fillna(0)

        df = self.__replace_zero(df)
        df = self.__replace_na(df)

        df[self.TEXT_HIGH_SCORE] = DataUtils.remove_asterisk_all(
            df[self.TEXT_HIGH_SCORE])
        df[self.TEXT_WICKET_HAUL] = pd.to_numeric(
            df[self.TEXT_FOUR_WICKETS]) + pd.to_numeric(
                df[self.TEXT_FIVE_WICKETS])

        df = DataUtils.drop_cols(df, self.TOTAL_CONSISTENCY_DROP_COLS)

        return df.reset_index(drop=True)
Пример #7
0
    def __prepare_data(self, url, playing_eleven):
        soup = BSoup(url).get_soup()
        players = DataUtils.get_players(soup)

        if len(players) > 0:
            # Can add thread to run both simultaneously
            recent_form_bat = self.__process_rf(players, 0, playing_eleven)
            recent_form_bowl = self.__process_rf(players, 1, playing_eleven)
            return recent_form_bat, recent_form_bowl
        else:
            return
Пример #8
0
    def __bat_sr_venue(self, player_id_list, stadium_venue):
        ser = []
        for player_id in player_id_list:
            total_runs = 0
            total_bf = 0
            if player_id is not None:
                table = self.__find_table(player_id,
                                          ConstantUrl.CAREER_INN_BAT_URL)
            else:
                ser.append(0)
                continue

            if len(table) > 0:
                table_rows = table[0].find_all("tr")
                df = DataUtils.create_df(table_rows)
                df = df.dropna()
                cols = [
                    '0', 'Date', 'Versus', 'Ground', 'D/N', 'How Dismissed',
                    'Runs', 'B/F', 'S/R', '9', 'Aggr', 'Avg', 'S/R Pro'
                ]
                df.columns = cols
                df = df.drop(['0', '9'], axis=1)
                df = df.reset_index(drop=True)
                for i in df.index:
                    if stadium_venue in df['Ground'][i]:
                        if 'DNB' in df['Runs'][i]:
                            continue
                        total_runs += pd.to_numeric(
                            DataUtils.remove_asterisk(df['Runs'][i]))
                        total_bf += pd.to_numeric(
                            DataUtils.remove_asterisk(df['B/F'][i]))

                if total_runs == 0 or total_bf == 0:
                    ser.append(0)
                else:
                    bat_sr = total_runs / total_bf * 100
                    ser.append(round(bat_sr, 2))
            else:
                ser.append(0)

        return ser
Пример #9
0
    def modify_ov_df(self, bat, bowl):
        if bat is None or bowl is None or bat.empty or bowl.empty:
            raise RuntimeError(self.EX_MODIFY_OV.format(bat, bowl))

        bat = bat.replace(r'^\s*$', 0, regex=True)
        bowl = bowl.replace(r'^\s*$', 0, regex=True)

        bat = bat.fillna(0)
        bowl = bowl.fillna(0)

        bat[self.TEXT_HS] = DataUtils.remove_asterisk_all(bat[self.TEXT_HS])

        if self.TEXT_VERSUS in bat.columns and self.TEXT_VERSUS in bowl.columns:
            bat = DataUtils.drop_cols(bat, self.OPPOSITION_BAT_DROP_COLS)
            bowl = DataUtils.drop_cols(bowl, self.OPPOSITION_BOWL_DROP_COLS)
        elif self.TEXT_GROUND in bat.columns and self.TEXT_GROUND in bowl.columns:
            bat = DataUtils.drop_cols(bat, self.VENUE_BAT_DROP_COLS)
            bowl = DataUtils.drop_cols(bowl, self.VENUE_BOWL_DROP_COLS)

        df = pd.merge(bat, bowl, on=self.TEXT_PLAYER)
        return df.reset_index(drop=True)
Пример #10
0
    def __get_zeros_df(self, player_id):
        if player_id is not None:
            table = self.__find_table(player_id,
                                      ConstantUrl.CAREER_INN_BAT_URL)
        else:
            return

        if len(table) > 0:
            table_rows = table[0].find_all("tr")
            df = DataUtils.create_df(table_rows)
            df.columns = df.loc[2]
            df = df.drop(df.index[2])
            df = df.dropna()
            return df
        return
Пример #11
0
 def __process_rf(self, players, index, playing_eleven):
     table_rows = players[index].find_all(self.TEXT_TR)
     df = DataUtils.create_df(table_rows)
     return DataUtils.playing_eleven_df(df, playing_eleven)
Пример #12
0
 def __get_df(table_rows):
     df = DataUtils.create_df(table_rows)
     df.columns = df.loc[0]
     df = df.drop(df.index[0])
     return df
Пример #13
0
    def venue_range(self, venue_range):
        venue_bat = {}
        venue_bowl = {}
        for i in self.venue.index:
            venue_bat.update({
                self.venue[self.COL_PLAYER][i]:
                0.4262 * (DataUtils.check_range(
                    venue_range[1], self.venue[self.COL_BAT_AVG][i])) +
                0.2566 * (DataUtils.check_range(
                    venue_range[0], self.venue[self.COL_NO_OF_INN_BAT][i])) +
                0.1510 * (DataUtils.check_range(
                    venue_range[2], self.venue[self.COL_BAT_SR][i])) +
                0.0787 * (DataUtils.check_range(
                    venue_range[3], self.venue[self.COL_CENTURIES][i])) +
                0.0556 * (DataUtils.check_range(
                    venue_range[4], self.venue[self.COL_FIFTIES][i])) +
                0.0328 * (DataUtils.check_range(venue_range[5],
                                                self.venue[self.COL_HS][i]))
            })

            venue_bowl.update({
                self.venue[self.COL_PLAYER][i]:
                0.3018 * (DataUtils.check_range(
                    venue_range[6], self.venue[self.COL_OVERS][i])) +
                0.2783 * (DataUtils.check_range(
                    venue_range[0], self.venue[self.COL_NO_OF_INN_BOWL][i])) +
                0.1836 * (DataUtils.check_range(
                    venue_range[8], self.venue[self.COL_BOWL_SR][i])) +
                0.1391 * (DataUtils.check_range(
                    venue_range[7], self.venue[self.COL_BOWL_AVG][i])) +
                0.0972 * (DataUtils.check_range(
                    venue_range[9], self.venue[self.COL_WICKET_HAUL][i]))
            })
        return venue_bat, venue_bowl
Пример #14
0
    def opposition_range(self, opposition_range):
        opposition_bat = {}
        opposition_bowl = {}
        for i in self.opposition.index:
            opposition_bat.update({
                self.opposition[self.COL_PLAYER][i]:
                0.4262 *
                (DataUtils.check_range(opposition_range[1],
                                       self.opposition[self.COL_BAT_AVG][i])) +
                0.2566 * (DataUtils.check_range(
                    opposition_range[0],
                    self.opposition[self.COL_NO_OF_INN_BAT][i])) + 0.1510 *
                (DataUtils.check_range(opposition_range[2],
                                       self.opposition[self.COL_BAT_SR][i])) +
                0.0787 *
                (DataUtils.check_range(opposition_range[3],
                                       self.opposition[self.COL_CENTURIES][i]))
                + 0.0556 *
                (DataUtils.check_range(opposition_range[4],
                                       self.opposition[self.COL_FIFTIES][i])) -
                0.0328 * (DataUtils.check_range(
                    opposition_range[5], self.opposition[self.COL_ZEROS][i]))
            })

            opposition_bowl.update({
                self.opposition[self.COL_PLAYER][i]:
                0.3177 * (DataUtils.check_range(
                    opposition_range[6], self.opposition[self.COL_OVERS][i])) +
                0.3177 * (DataUtils.check_range(
                    opposition_range[0],
                    self.opposition[self.COL_NO_OF_INN_BOWL][i])) + 0.1933 *
                (DataUtils.check_range(opposition_range[8],
                                       self.opposition[self.COL_BOWL_SR][i])) +
                0.1465 *
                (DataUtils.check_range(opposition_range[7],
                                       self.opposition[self.COL_BOWL_AVG][i]))
                + 0.0943 * (DataUtils.check_range(
                    opposition_range[9],
                    self.opposition[self.COL_WICKET_HAUL][i]))
            })
        return opposition_bat, opposition_bowl
Пример #15
0
    def recent_form_range(self, form_range):
        recent_form_bat = {}
        recent_form_bowl = {}
        for i in self.recent_form.index:
            recent_form_bat.update({
                self.recent_form[self.COL_PLAYER][i]:
                0.4262 * (DataUtils.check_range(
                    form_range[1], self.recent_form[self.COL_BAT_AVG][i])) +
                0.2566 * (DataUtils.check_range(
                    form_range[0],
                    self.recent_form[self.COL_NO_OF_INN_BAT][i])) +
                0.1510 * (DataUtils.check_range(
                    form_range[2], self.recent_form[self.COL_BAT_SR][i])) +
                0.0787 * (DataUtils.check_range(
                    form_range[3], self.recent_form[self.COL_CENTURIES][i])) +
                0.0556 * (DataUtils.check_range(
                    form_range[4], self.recent_form[self.COL_FIFTIES][i])) -
                0.0328 * (DataUtils.check_range(
                    form_range[5], self.recent_form[self.COL_ZEROS][i]))
            })

            recent_form_bowl.update({
                self.recent_form[self.COL_PLAYER][i]:
                0.3269 * (DataUtils.check_range(
                    form_range[6], self.recent_form[self.COL_OVERS][i])) +
                0.2846 * (DataUtils.check_range(
                    form_range[0],
                    self.recent_form[self.COL_NO_OF_INN_BOWL][i])) +
                0.1877 * (DataUtils.check_range(
                    form_range[8], self.recent_form[self.COL_BOWL_SR][i])) +
                0.1210 * (DataUtils.check_range(
                    form_range[7], self.recent_form[self.COL_BOWL_AVG][i])) +
                0.0798 * (DataUtils.check_range(
                    form_range[9], self.recent_form[self.COL_WICKET_HAUL][i]))
            })
        return recent_form_bat, recent_form_bowl
Пример #16
0
    def total_consistency_range(self, consistency_range):
        total_con_bat = {}
        total_con_bowl = {}
        for i in self.total_consistency.index:
            total_con_bat.update({
                self.total_consistency[self.COL_PLAYER][i]:
                0.4262 * (DataUtils.check_range(
                    consistency_range[1],
                    self.total_consistency[self.COL_BAT_AVG][i])) + 0.2566 *
                (DataUtils.check_range(
                    consistency_range[0],
                    self.total_consistency[self.COL_NO_OF_INN_BAT][i])) +
                0.1510 * (DataUtils.check_range(
                    consistency_range[2],
                    self.total_consistency[self.COL_BAT_SR][i])) + 0.0787 *
                (DataUtils.check_range(
                    consistency_range[3],
                    self.total_consistency[self.COL_CENTURIES][i])) + 0.0556 *
                (DataUtils.check_range(
                    consistency_range[4],
                    self.total_consistency[self.COL_FIFTIES][i])) - 0.0328 *
                (DataUtils.check_range(
                    consistency_range[5],
                    self.total_consistency[self.COL_ZEROS][i]))
            })

            total_con_bowl.update({
                self.total_consistency[self.COL_PLAYER][i]:
                0.4174 * (DataUtils.check_range(
                    consistency_range[6],
                    self.total_consistency[self.COL_OVERS][i])) + 0.2634 *
                (DataUtils.check_range(
                    consistency_range[0],
                    self.total_consistency[self.COL_NO_OF_INN_BOWL][i])) +
                0.1602 * (DataUtils.check_range(
                    consistency_range[8],
                    self.total_consistency[self.COL_BOWL_SR][i])) + 0.0975 *
                (DataUtils.check_range(
                    consistency_range[7],
                    self.total_consistency[self.COL_BOWL_AVG][i])) + 0.0615 *
                (DataUtils.check_range(
                    consistency_range[9],
                    self.total_consistency[self.COL_WICKET_HAUL][i]))
            })
        return total_con_bat, total_con_bowl