示例#1
0
 def read_data(self, file_name, col_from, col_to):
     RawData = DataFrame(excel.read_excel("../Data/" + file_name + '.xlsx'))
     NumericData = RawData.rx(True, IntVector(range(col_from, col_to + 1)))
     MetaData = RawData.rx(True, col_from - 1)[0]
     RawData._set_rownames(IntVector(range(1, len(MetaData) + 1)))
     self.file_name = file_name
     self.raw_data = RawData
     #print(self.raw_data)
     self.numeric_data = NumericData
     self.metadata = r_base.factor(MetaData)
     self.metadata_list = list(MetaData)
     self.metabolite_list = list(self.raw_data.names)[1:]
     self.make_metabolite_dict()
示例#2
0
def r_c50(rdf: RDataFrame, target: str, predictors: List[str]) -> RListVector:
    """
    Wrapper function around the C5.0 classifier.

    Note: The target column must be a factor vector.
    TODO: Training control and other parameters.
    """
    predictor_slice = rdf.rx(r_c(*predictors))
    target_slice = rdf.rx2(r_c(target))

    return C50.C5_0(predictor_slice, target_slice)
示例#3
0
def r_dataframe_subset_one_element(rdf: RDataFrame, n: int) -> RDataFrame:
    """
    Creates a dataframe with one column from the given dataframe and index.

    See:
        https://github.com/topepo/caret/issues/672
        https://stackoverflow.com/questions/40505994/how-to-apply-preprocessing-in-carets-train-to-only-some-variables
        https://stackoverflow.com/questions/31497479/how-to-select-columns-from-r-dataframe-in-rpy2-in-python
    """
    return r('data.frame')(rdf.rx(RIntVector([
        n,
    ])))
示例#4
0
def r_formula(rdf: RDataFrame, target: str, predictors: List[str]) -> RFormula:
    """
    Creates an R modelling formula associated with the given dataframe.

    The produced string formula is 'predictor ~ var1 + var2 + etc...'
    """

    lhs_items = [target, '~']
    rhs_items = []

    for predictor in predictors:
        rhs_items.append(predictor)
        rhs_items.append('+')

    rhs_items = rhs_items[:-1]  # remove the last '+'
    all_items = lhs_items + rhs_items
    formula_string = ' '.join(all_items)

    formula = RFormula(formula_string)

    for predictor in predictors:
        formula.environment[predictor] = rdf.rx(predictor)

    return formula