示例#1
0
    def eval_separate(self, node, args):
        n_cols = robjects.r('ncol(' + args[0] + ')')[0]
        self.assertArg(node, args,
                index=1,
                cond=lambda x: x <= n_cols,
                capture_indices=[0])

        tbl = robjects.r(args[0])
        col = tbl.columns[int(args[1]) - 1]
        ret_df_name = get_fresh_name()
        _script = '{ret_df} <- separate({table}, {col1}, c("{TMP1}", "{TMP2}"))'.format(
                  ret_df=ret_df_name, table=args[0], col1=str(args[1]), TMP1=get_fresh_col(), TMP2=get_fresh_col())
        if tbl[col].dtype == np.object:
            cell = tbl[col][0]
            if cell.count('_') > 1:
                _script = '{ret_df} <- separate({table}, {col1}, c("{TMP1}", "{TMP2}", "{TMP3}"), sep="_")'.format(
                  ret_df=ret_df_name, table=args[0], col1=str(args[1]), TMP1=get_fresh_col(), TMP2=get_fresh_col(), TMP3=get_fresh_col())
        else:
            raise GeneralError()

        try:
            ret_val = robjects.r(_script)
            return ret_df_name
        except:
            logger.error('Error in interpreting separate...')
            raise GeneralError()
示例#2
0
    def eval_mutate(self, node, args):
        n_cols = robjects.r('ncol(' + args[0] + ')')[0]
        self.assertArg(node, args,
                index=2,
                cond=lambda x: x <= n_cols,
                capture_indices=[0])
        self.assertArg(node, args,
                index=3,
                cond=lambda x: x <= n_cols,
                capture_indices=[0])
        self.assertArg(node, args,
                index=2,
                cond=lambda x: get_type(args[0], str(x)) == 'numeric',
                capture_indices=[0])
        self.assertArg(node, args,
                index=3,
                cond=lambda x: get_type(args[0], str(x)) == 'numeric',
                capture_indices=[0])

        ret_df_name = get_fresh_name()
        _script = '{ret_df} <- {table} %>% mutate({TMP}=.[[{col1}]] {op} .[[{col2}]])'.format(
                ret_df=ret_df_name, table=args[0], TMP='mutate_a', op=args[1], col1=str(args[2]), col2=str(args[3]))
        # _script = '{ret_df} <- {table} %>% mutate({TMP}=.[[{col1}]] {op} .[[{col2}]])'.format(
        #           ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), op=args[1], col1=str(args[2]), col2=str(args[3]))
        try:
            ret_val = robjects.r(_script)
            return ret_df_name
        except:
            logger.error('Error in interpreting mutate...')
            raise GeneralError()
示例#3
0
    def eval_summarise(self, node, args):
        input_tbl = robjects.r(args[0])
        input_cols = input_tbl.columns.values
        n_cols = len(input_cols)

        aggr_fun = str(args[1])
        self.assertArg(node, args,
                index=2,
                cond=lambda x: x <= n_cols,
                capture_indices=[0])

        if not aggr_fun == 'n':
            self.assertArg(node, args,
                    index=2,
                    cond=lambda x: get_type(args[0], str(x)) == 'integer' or get_type(args[0], str(x)) == 'numeric',
                    capture_indices=[0])

        ret_df_name = get_fresh_name()
        _script = ''
        if aggr_fun == 'n':
            _script = '{ret_df} <- {table} %>% summarise({TMP} = {aggr} ())'.format(
                    ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), aggr=aggr_fun)
        else:
            aggr_col = input_cols[args[2]-1]
            _script = '{ret_df} <- {table} %>% summarise({TMP} = {aggr} (`{col}`))'.format(
                    ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), aggr=aggr_fun, col=aggr_col)
        try:
            ret_val = robjects.r(_script)
            return ret_df_name
        except Exception as e:
            logger.error('Error in interpreting summarise...')
            raise GeneralError()
示例#4
0
    def eval_spread(self, node, args):
        n_cols = robjects.r('ncol(' + args[0] + ')')[0]
        first_idx = int(args[1])
        self.assertArg(node, args,
                index=1,
                cond=lambda x: x <= n_cols,
                capture_indices=[0])
        self.assertArg(node, args,
                index=2,
                cond=lambda x: x <= n_cols and x > first_idx,
                capture_indices=[0, 1])

        ret_df_name = get_fresh_name()
        _script = '{ret_df} <- spread({table}, {col1}, {col2})'.format(
                  ret_df=ret_df_name, table=args[0], col1=str(args[1]), col2=str(args[2]))
        

        try:
            ret_val = robjects.r(_script)
            return ret_df_name
        except:
            logger.error('Error in interpreting spread...')
            # r0 = robjects.r(args[0])
            # logger.info(r0)
            # temp_df_name = get_fresh_name()
            # key_script = '{ret_df} <- select({table}, {cols})'.format(
            #        ret_df=ret_df_name, table=args[0], cols=get_collist([str(args[1])]))
            # rv = robjects.r(key_script)
            # temp_df_name = get_fresh_name()
            # id_script = '{ret_df} <- select({table}, {cols})'.format(
            #        ret_df=ret_df_name, table=args[0], cols=get_collist(["-"+str(args[1]), "-"+str(args[2])]))
            # rv2 = robjects.r(id_script)
            raise GeneralError()
示例#5
0
 def eval_inner_join(self, node, args):
     ret_df_name = get_fresh_name()
     _script = '{ret_df} <- inner_join({t1}, {t2})'.format(
               ret_df=ret_df_name, t1=args[0], t2=args[1])
     try:
         ret_val = robjects.r(_script)
         return ret_df_name
     except:
         logger.error('Error in interpreting innerjoin...')
         raise GeneralError()
示例#6
0
    def eval_mutateCustom(self, node, args):
        n_cols = robjects.r('ncol(' + args[0] + ')')[0]
        col_idx = args[2] - 1
        self.assertArg(node, args,
                index=2,
                cond=lambda x: x <= n_cols,
                capture_indices=[0])

        input_tbl = robjects.r(args[0])
        col_type = input_tbl.dtypes[col_idx]
        if col_type == np.float64 or col_type == np.int64:
            raise GeneralError()

        ret_df_name = get_fresh_name()
        _script = '{ret_df} <- {table} %>% mutate({TMP}=(.[[{col1}]] {op} "{col2}"))'.format(
                  ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), op=args[1], col1=str(args[2]), col2=str(args[3]))
        try:
            ret_val = robjects.r(_script)
            return ret_df_name
        except Exception as e:
            logger.error('Error in interpreting mutateCustom...', _script)
            # assert False, e
            raise GeneralError()
示例#7
0
    def eval_cumsum(self, node, args):
        n_cols = robjects.r('ncol(' + args[0] + ')')[0]
        self.assertArg(node, args,
                index=1,
                cond=lambda x: x <= n_cols,
                capture_indices=[0])

        ret_df_name = get_fresh_name()
        _script = '{ret_df} <- {table} %>% mutate({TMP}=cumsum(.[[{col1}]]))'.format(
                  ret_df=ret_df_name, table=args[0], TMP='cumsum', col1=str(args[1]))
        try:
            ret_val = robjects.r(_script)
            return ret_df_name
        except Exception as e:
            logger.error('Error in interpreting cumsum...', _script)
            raise GeneralError()
示例#8
0
    def eval_gather(self, node, args):
        n_cols = robjects.r('ncol(' + args[0] + ')')[0]
        self.assertArg(node, args,
                index=1,
                cond=lambda x: max(list(map(lambda y: int(y), x))) <= n_cols,
                capture_indices=[0])

        ret_df_name = get_fresh_name()
        _script = '{ret_df} <- gather({table}, KEY, VALUE, {cols})'.format(
                   ret_df=ret_df_name, table=args[0], cols=get_collist(args[1]))
        try:
            ret_val = robjects.r(_script)
            return ret_df_name
        except:
            logger.error('Error in interpreting gather...')
            raise GeneralError()
示例#9
0
    def eval_filter(self, node, args):
        n_cols = robjects.r('ncol(' + args[0] + ')')[0]
        self.assertArg(node, args,
                index=2,
                cond=lambda x: x <= n_cols,
                capture_indices=[0])
        self.assertArg(node, args,
                index=2,
                cond=lambda x: get_type(args[0], str(x)) != 'factor',
                capture_indices=[0])

        ret_df_name = get_fresh_name()
        _script = '{ret_df} <- {table} %>% filter(.[[{col}]] {op} "{const}")'.format(
                  ret_df=ret_df_name, table=args[0], op=args[1], col=str(args[2]), const=str(args[3]))
        try:
            ret_val = robjects.r(_script)
            return ret_df_name
        except Exception as e:
            logger.error('Error in interpreting filter...', e)
            raise GeneralError()
示例#10
0
    def eval_unite(self, node, args):
        n_cols = robjects.r('ncol(' + args[0] + ')')[0]
        first_idx = int(args[1])
        self.assertArg(node, args,
                index=1,
                cond=lambda x: x <= n_cols,
                capture_indices=[0])
        self.assertArg(node, args,
                index=2,
                cond=lambda x: x <= n_cols and x != first_idx,
                capture_indices=[0, 1])

        ret_df_name = get_fresh_name()
        _script = '{ret_df} <- unite({table}, {TMP}, {col1}, {col2})'.format(
                  ret_df=ret_df_name, table=args[0], TMP=get_fresh_col(), col1=str(args[1]), col2=str(args[2]))
        try:
            ret_val = robjects.r(_script)
            return ret_df_name
        except:
            logger.error('Error in interpreting unite...')
            raise GeneralError()