def read_data(self, file_name, col_from, col_to): RawData = DataFrame(excel.read_excel("../Data/" + file_name + '.xlsx')) NumericData = RawData.rx(True, IntVector(range(col_from, col_to + 1))) MetaData = RawData.rx(True, col_from - 1)[0] RawData._set_rownames(IntVector(range(1, len(MetaData) + 1))) self.file_name = file_name self.raw_data = RawData #print(self.raw_data) self.numeric_data = NumericData self.metadata = r_base.factor(MetaData) self.metadata_list = list(MetaData) self.metabolite_list = list(self.raw_data.names)[1:] self.make_metabolite_dict()
def r_c50(rdf: RDataFrame, target: str, predictors: List[str]) -> RListVector: """ Wrapper function around the C5.0 classifier. Note: The target column must be a factor vector. TODO: Training control and other parameters. """ predictor_slice = rdf.rx(r_c(*predictors)) target_slice = rdf.rx2(r_c(target)) return C50.C5_0(predictor_slice, target_slice)
def r_dataframe_subset_one_element(rdf: RDataFrame, n: int) -> RDataFrame: """ Creates a dataframe with one column from the given dataframe and index. See: https://github.com/topepo/caret/issues/672 https://stackoverflow.com/questions/40505994/how-to-apply-preprocessing-in-carets-train-to-only-some-variables https://stackoverflow.com/questions/31497479/how-to-select-columns-from-r-dataframe-in-rpy2-in-python """ return r('data.frame')(rdf.rx(RIntVector([ n, ])))
def r_formula(rdf: RDataFrame, target: str, predictors: List[str]) -> RFormula: """ Creates an R modelling formula associated with the given dataframe. The produced string formula is 'predictor ~ var1 + var2 + etc...' """ lhs_items = [target, '~'] rhs_items = [] for predictor in predictors: rhs_items.append(predictor) rhs_items.append('+') rhs_items = rhs_items[:-1] # remove the last '+' all_items = lhs_items + rhs_items formula_string = ' '.join(all_items) formula = RFormula(formula_string) for predictor in predictors: formula.environment[predictor] = rdf.rx(predictor) return formula