def most_common_lineup_position(retro): TABLE_FLAG = False if isinstance(retro, _Table): TABLE_FLAG = True retro = retro.to_df() # Order of operations: # 1. Get PA counts # 2. Turn Lineup_Order into a column # 3. Rename column to PA # 4. Sort on PA in descending order lineup_pos = retro.groupby(['Batter_ID', 'Lineup_Order'])['Inning'].\ count().\ reset_index(level='Lineup_Order').\ rename(columns={'Inning': 'PA'}).\ sort_values('PA', ascending=False) # Duplicates indicate other positions. By keeping first, we keep the most # common due to the sorting most_common = ~lineup_pos.index.duplicated(keep='first') lineup_pos = lineup_pos.loc[most_common, ['Lineup_Order']].sort_index() if TABLE_FLAG: return _Table.from_df(lineup_pos.reset_index()) else: return lineup_pos
def fast_run_expectancy(retro, re): TABLE_FLAG = False if isinstance(retro, _Table): TABLE_FLAG = True retro = retro.to_df() re = re.to_df() re = re.set_index(['Outs', 'Start_Bases']) # Build current out-runner states idx = list(zip(retro['Outs'], retro['Start_Bases'])) # Extract run potentials retro['Run_Expectancy'] = re.loc[idx].values next_outs = retro['Outs'] + retro['Event_Outs'] # Build next out-runner states idx = list(zip(next_outs, retro['End_Bases'])) # Extract run potentials retro['Run_Expectancy_Next'] = re.loc[idx].values # When the inning ends, there are 3 outs. That is not in the run # expectancy matrix so inning ending plate appearances will have an NA # value here. We fill those with 0. retro['Run_Expectancy_Next'].fillna(0, inplace=True) return _Table.from_df(retro) if TABLE_FLAG else retro
def fill_null(table, fill_column=None, fill_value=None, fill_method=None): TABLE_FLAG = False if isinstance(table, _Table): TABLE_FLAG = True table = table.to_df() data = table[fill_column] if fill_column is not None else table data = data.fillna(value=fill_value, method=fill_method) return _Table.from_df(data) if TABLE_FLAG else data
def get_first_from_group(table, groupby): TABLE_FLAG = False if isinstance(table, _Table): TABLE_FLAG = True table = table.to_df() out = table.sort_values(groupby).\ drop_duplicates(subset=groupby, keep='first') if TABLE_FLAG: return _Table.from_df(out) else: return out
def merge(t1, t2, on, how='outer', fillna=True): DS_FLAG = False if isinstance(t1, _Table): t1 = t1.to_df() DS_FLAG = True if isinstance(t2, _Table): t2 = t2.to_df() full_t = _pd.merge(t1, t2, how=how, left_on=on, right_on=on) if fillna: full_t.fillna(0, inplace=True) if DS_FLAG: return _Table.from_df(full_t) else: return full_t
def merge(t1, t2, on, how='outer', fillna=True): import pandas as pd from datascience import Table DS_FLAG = False if isinstance(t1, Table): t1 = t1.to_df() DS_FLAG = True if isinstance(t2, Table): t2 = t2.to_df() full_t = pd.merge(t1, t2, how=how, left_on=on, right_on=on) if fillna: full_t.fillna(0, inplace=True) if DS_FLAG: return Table.from_df(full_t) else: return full_t
def multi_sort(table, by, descending=True, na_position='first'): sorted_df = table.to_df().sort_values(by, ascending=not descending, na_position=na_position) return _Table.from_df(sorted_df)
def concat(table_list): df = _pd.concat([t.to_df() for t in table_list]) return _Table.from_df(df)
def fill_null(table, value=None, method=None): df = table.to_df().fillna(value=value, method=method) return _Table.from_df(df)
#Hence is of little use for prediction of shelf life. hence droping those columns also from further analysis print( "Also Residual Oxygen, Moisture (%) and Hexanal (ppm) are charecteristics of aged samples. Hence is of little use for prediction of shelf life. hence droping those columns also from further analysis" ) ProductTable = ProductTable.drop('Moisture (%)') ProductTable = ProductTable.drop('Residual Oxygen (%)') ProductTable = ProductTable.drop('Hexanal (ppm)') #Also drop colomn Study Number/Sample ID as this is not affecting shelf life in anyway '''ProductTable=ProductTable.drop('Study Number') ProductTable=ProductTable.drop('Sample ID')''' #Drop Duplicate Entries iF there are any pandasDF = ProductTable.to_df() pandasDF.drop_duplicates(keep='first', inplace=True) #inplace=True modify original record ProductTable = Table.from_df(pandasDF) '''ProductTable=ProductTable.move_column('Difference From Fresh',0) ProductTable=ProductTable.move_column('Sample Age (Weeks)',1) ProductTable=ProductTable.move_column('Processing Agent Stability Index',2) ProductTable=ProductTable.move_column('Process Type',3)''' #-------------------------------------------------------------------------------------------------------------------- #ONE HOT ENCODING(Custom) #-------------------------------------------------------------------------------------------------------------------- ProductTable.append_column('ProcessTypeC', False) ProductTable.append_column('ProcessTypeB', False) ProductTable.append_column('ProcessTypeA', False) ProductTable['ProcessTypeA'] = ProductTable.apply( lambda x: True if x == 'A' else False, 'Process Type') ProductTable['ProcessTypeC'] = ProductTable.apply( lambda x: True if x == 'C' else False, 'Process Type')
def from_df(self, df): # a pandas df table = Table.from_df(df) df_name = find_name() return self.create_with_table_wrap(table, df_name)