def eval_separate(self, node, args): n_cols = robjects.r('ncol(' + args[0] + ')')[0] self.assertArg(node, args, index=1, cond=lambda x: x <= n_cols, capture_indices=[0]) tbl = robjects.r(args[0]) col = tbl.columns[int(args[1]) - 1] ret_df_name = get_fresh_name() _script = '{ret_df} <- separate({table}, {col1}, c("{TMP1}", "{TMP2}"))'.format( ret_df=ret_df_name, table=args[0], col1=str(args[1]), TMP1=get_fresh_col(), TMP2=get_fresh_col()) if tbl[col].dtype == np.object: cell = tbl[col][0] if cell.count('_') > 1: _script = '{ret_df} <- separate({table}, {col1}, c("{TMP1}", "{TMP2}", "{TMP3}"), sep="_")'.format( ret_df=ret_df_name, table=args[0], col1=str(args[1]), TMP1=get_fresh_col(), TMP2=get_fresh_col(), TMP3=get_fresh_col()) else: raise GeneralError() try: ret_val = robjects.r(_script) return ret_df_name except: logger.error('Error in interpreting separate...') raise GeneralError()
def eval_mutate(self, node, args): n_cols = robjects.r('ncol(' + args[0] + ')')[0] self.assertArg(node, args, index=2, cond=lambda x: x <= n_cols, capture_indices=[0]) self.assertArg(node, args, index=3, cond=lambda x: x <= n_cols, capture_indices=[0]) self.assertArg(node, args, index=2, cond=lambda x: get_type(args[0], str(x)) == 'numeric', capture_indices=[0]) self.assertArg(node, args, index=3, cond=lambda x: get_type(args[0], str(x)) == 'numeric', capture_indices=[0]) ret_df_name = get_fresh_name() _script = '{ret_df} <- {table} %>% mutate({TMP}=.[[{col1}]] {op} .[[{col2}]])'.format( ret_df=ret_df_name, table=args[0], TMP='mutate_a', op=args[1], col1=str(args[2]), col2=str(args[3])) # _script = '{ret_df} <- {table} %>% mutate({TMP}=.[[{col1}]] {op} .[[{col2}]])'.format( # ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), op=args[1], col1=str(args[2]), col2=str(args[3])) try: ret_val = robjects.r(_script) return ret_df_name except: logger.error('Error in interpreting mutate...') raise GeneralError()
def eval_summarise(self, node, args): input_tbl = robjects.r(args[0]) input_cols = input_tbl.columns.values n_cols = len(input_cols) aggr_fun = str(args[1]) self.assertArg(node, args, index=2, cond=lambda x: x <= n_cols, capture_indices=[0]) if not aggr_fun == 'n': self.assertArg(node, args, index=2, cond=lambda x: get_type(args[0], str(x)) == 'integer' or get_type(args[0], str(x)) == 'numeric', capture_indices=[0]) ret_df_name = get_fresh_name() _script = '' if aggr_fun == 'n': _script = '{ret_df} <- {table} %>% summarise({TMP} = {aggr} ())'.format( ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), aggr=aggr_fun) else: aggr_col = input_cols[args[2]-1] _script = '{ret_df} <- {table} %>% summarise({TMP} = {aggr} (`{col}`))'.format( ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), aggr=aggr_fun, col=aggr_col) try: ret_val = robjects.r(_script) return ret_df_name except Exception as e: logger.error('Error in interpreting summarise...') raise GeneralError()
def eval_spread(self, node, args): n_cols = robjects.r('ncol(' + args[0] + ')')[0] first_idx = int(args[1]) self.assertArg(node, args, index=1, cond=lambda x: x <= n_cols, capture_indices=[0]) self.assertArg(node, args, index=2, cond=lambda x: x <= n_cols and x > first_idx, capture_indices=[0, 1]) ret_df_name = get_fresh_name() _script = '{ret_df} <- spread({table}, {col1}, {col2})'.format( ret_df=ret_df_name, table=args[0], col1=str(args[1]), col2=str(args[2])) try: ret_val = robjects.r(_script) return ret_df_name except: logger.error('Error in interpreting spread...') # r0 = robjects.r(args[0]) # logger.info(r0) # temp_df_name = get_fresh_name() # key_script = '{ret_df} <- select({table}, {cols})'.format( # ret_df=ret_df_name, table=args[0], cols=get_collist([str(args[1])])) # rv = robjects.r(key_script) # temp_df_name = get_fresh_name() # id_script = '{ret_df} <- select({table}, {cols})'.format( # ret_df=ret_df_name, table=args[0], cols=get_collist(["-"+str(args[1]), "-"+str(args[2])])) # rv2 = robjects.r(id_script) raise GeneralError()
def eval_inner_join(self, node, args): ret_df_name = get_fresh_name() _script = '{ret_df} <- inner_join({t1}, {t2})'.format( ret_df=ret_df_name, t1=args[0], t2=args[1]) try: ret_val = robjects.r(_script) return ret_df_name except: logger.error('Error in interpreting innerjoin...') raise GeneralError()
def eval_mutateCustom(self, node, args): n_cols = robjects.r('ncol(' + args[0] + ')')[0] col_idx = args[2] - 1 self.assertArg(node, args, index=2, cond=lambda x: x <= n_cols, capture_indices=[0]) input_tbl = robjects.r(args[0]) col_type = input_tbl.dtypes[col_idx] if col_type == np.float64 or col_type == np.int64: raise GeneralError() ret_df_name = get_fresh_name() _script = '{ret_df} <- {table} %>% mutate({TMP}=(.[[{col1}]] {op} "{col2}"))'.format( ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), op=args[1], col1=str(args[2]), col2=str(args[3])) try: ret_val = robjects.r(_script) return ret_df_name except Exception as e: logger.error('Error in interpreting mutateCustom...', _script) # assert False, e raise GeneralError()
def eval_cumsum(self, node, args): n_cols = robjects.r('ncol(' + args[0] + ')')[0] self.assertArg(node, args, index=1, cond=lambda x: x <= n_cols, capture_indices=[0]) ret_df_name = get_fresh_name() _script = '{ret_df} <- {table} %>% mutate({TMP}=cumsum(.[[{col1}]]))'.format( ret_df=ret_df_name, table=args[0], TMP='cumsum', col1=str(args[1])) try: ret_val = robjects.r(_script) return ret_df_name except Exception as e: logger.error('Error in interpreting cumsum...', _script) raise GeneralError()
def eval_gather(self, node, args): n_cols = robjects.r('ncol(' + args[0] + ')')[0] self.assertArg(node, args, index=1, cond=lambda x: max(list(map(lambda y: int(y), x))) <= n_cols, capture_indices=[0]) ret_df_name = get_fresh_name() _script = '{ret_df} <- gather({table}, KEY, VALUE, {cols})'.format( ret_df=ret_df_name, table=args[0], cols=get_collist(args[1])) try: ret_val = robjects.r(_script) return ret_df_name except: logger.error('Error in interpreting gather...') raise GeneralError()
def eval_filter(self, node, args): n_cols = robjects.r('ncol(' + args[0] + ')')[0] self.assertArg(node, args, index=2, cond=lambda x: x <= n_cols, capture_indices=[0]) self.assertArg(node, args, index=2, cond=lambda x: get_type(args[0], str(x)) != 'factor', capture_indices=[0]) ret_df_name = get_fresh_name() _script = '{ret_df} <- {table} %>% filter(.[[{col}]] {op} "{const}")'.format( ret_df=ret_df_name, table=args[0], op=args[1], col=str(args[2]), const=str(args[3])) try: ret_val = robjects.r(_script) return ret_df_name except Exception as e: logger.error('Error in interpreting filter...', e) raise GeneralError()
def eval_unite(self, node, args): n_cols = robjects.r('ncol(' + args[0] + ')')[0] first_idx = int(args[1]) self.assertArg(node, args, index=1, cond=lambda x: x <= n_cols, capture_indices=[0]) self.assertArg(node, args, index=2, cond=lambda x: x <= n_cols and x != first_idx, capture_indices=[0, 1]) ret_df_name = get_fresh_name() _script = '{ret_df} <- unite({table}, {TMP}, {col1}, {col2})'.format( ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), col1=str(args[1]), col2=str(args[2])) try: ret_val = robjects.r(_script) return ret_df_name except: logger.error('Error in interpreting unite...') raise GeneralError()