Пример #1
0
def make_dataframe(data, xid, yid, factorid):
    ns = []
    steps = []
    runtimes = []
    for (n, v1) in data.items():
        for (step, runtime) in v1.items():
            ns.append(n)
            steps.append(step)
            runtimes.append(runtime)
    df = ro.DataFrame({xid : ro.IntVector(ns),
                       factorid : ro.StrVector(steps),
                       yid : ro.IntVector(runtimes)})
    return df
Пример #2
0
    def calculate(self, msg):
        save_path, all_predicted_time = msg
        x, y, _ = plt.hist(list(itertools.chain.from_iterable(all_predicted_time)), bins=5)
        qt = importr("qualityTools")
        grdevices = importr('grDevices')
        ytp = [(y[i]/100, y[i+1]/100) for i in range(len(y)-1)]

        x_rp = []
        for i in range(len(x)):
            x_rp.extend([str(ytp[i])]*int(x[i]))
        grdevices.png(file="%s.png" % save_path, width=600, height=400)
        qt.paretoChart(robjects.StrVector(x_rp), main="predicted_time--"+save_path.basename())
        grdevices.dev_off()
Пример #3
0
def heatmap(Arguments):
    '''
    '''

    Data = load_data(Arguments)
    if Arguments.OrderBy: Data = reorder_data(Data, Arguments)

    #This is specifically the part you want plotted on the heatmap, NOT the sidebars
    Features, Variates = get_heatmap_data(Data, Arguments)
    Features = [Feature[:Feature.index(":")] for Feature in Features]

    ClusteredMatrix = ro.r["hierarchical_cluster_distance_matrix"](
        float_matrix(Data.Labels, Features, Variates, Arguments))
    Columns = list(ClusteredMatrix.colnames)
    Rows = list(ClusteredMatrix.rownames)
    Matrix = np.array(ClusteredMatrix)

    ZMatrix = z_matrix(Matrix)
    ColorPalette = list(ro.r["colorRampPalette"](ro.StrVector(
        ["blue", "white", "red"]))(100))
    ColorMatrix = [float2color(Vector, ColorPalette) for Vector in ZMatrix]
    exit()

    Heatmap = float_matrix(Data.Labels, Features, Variates, Arguments)

    Legend = False  #overwrite if ColSideColors is called

    if "colsidecolors" in [Option.lower() for Option in Arguments.Heatmap]:
        ColorFeatures, ColSideMatrix, Legend = get_colsidematrix(
            Data, Arguments)
        ColSideMatrix = transpose_matrix(
            string_matrix(Data.Labels, ColorFeatures, ColSideMatrix,
                          Arguments))
    else:
        ColSideMatrix = False

    if "rowsidecolors" in [Option.lower() for Option in Arguments.Heatmap]:
        RowSideMatrix = get_rowsidematrix(Features, Arguments)
    else:
        RowSideMatrix = False

    ro.r['pdf'](Arguments.Filename + '.pdf')
    if Legend: ro.r['par'](mar=ro.IntVector([1, 1, 1, 1]))
    ro.r['par'](**{'cex.axis': 0.8})
    get_heatmap(Heatmap,
                ColSideMatrix=ColSideMatrix,
                RowSideMatrix=RowSideMatrix,
                Legend=Legend)
    ro.r['dev.off']()

    return
Пример #4
0
def get_mail_corpus(nlon_cleaning=False):
    if (nlon_cleaning):
        nlon, nlon_model = training_nlon()

    #Path to mail's corpus
    corpus_file = 'data/mailcorpus.json'
    with open(corpus_file) as data_file:
        corpus = json.load(data_file)

    print('Reading and cleaning emails corpus. Number of emails: ' + str(len(corpus)))
    dict = {}
    n = 0
    #Text cleaning
    for d in corpus:
        if d['type_of_recipient'] == 'From':
            # if not d['is_response_of'] == None:
            res = EmailReplyParser.read(d['message_body'].replace('\\n', '\n'))
            text = res.reply
            # else:
            #     text = d['message_body'].replace('\\n', '\n')
            n += 1

            if (nlon_cleaning):
                try:
                    soup = BS4(text, 'html.parser')
                    clean_message_body = soup.text
                except Exception as e:
                    print('Error with BS4 on text:\n\n%s\n\n' % text, str(e))
                    clean_message_body = text.strip()
                message_by_lines = text.splitlines()
                list_length = len(message_by_lines)
                index = 0
                for count in range(0, list_length):
                    text1 = robjects.StrVector([message_by_lines[index]])
                    if nlon.NLoNPredict(nlon_model, text1)[0] == 'Not':
                        del message_by_lines[index]
                    else:
                        index = index + 1
                clean_message_body = '\n'.join(message_by_lines)
                text = clean_message_body

            if not text == '':
                if d['email_address'] in dict:
                    dict[d['email_address']].append(text)
                else:
                    dict[d['email_address']] = [text]
        print(str(n)+'/'+str(len(corpus))+'\n' if n%50==0 else '', end='')

    print('Mails retrieved: '+ str(n))
    print('Email addresses: '+ str(len(dict)))
    return dict
Пример #5
0
def make_rvector(col, ct=COLTYPE.FLOAT):
    """Make and return an R vector for data in `col` of COLTYPE ct.

    Returns:
      robjects.Vector
    Raises:
      TypeError if the type is unknown
      TypeError if it is COLTYPE.DATE but not parseable
    """
    if ct == COLTYPE.INT:
        vec = robjects.IntVector(col)
    elif ct == COLTYPE.FLOAT:
        vec = robjects.FloatVector(col)
    elif ct == COLTYPE.STR:
        # Use I() from R.base library to avoid conversion
        # into a factor. Usually though a factor is what you want.
        vec = base.I(robjects.StrVector(col))
    elif ct == COLTYPE.BOOL:
        vec = robjects.BoolVector(col)
    elif ct == COLTYPE.FACTOR:
        # conversion will happen automatically
        vec = robjects.StrVector(col)
    elif ct == COLTYPE.DATE:
        field = col[0]
        if isinstance(field, datetime.datetime):
            tcol = map(datetime_to_sec, col)
        elif isinstance(field, float):
            tcol = col
        else:
            raise TypeError("Bad date type '%s' for column %d, '%s'. "
                            "Expected time.struct_time, "
                            "datetime.datetime, or float." % (
                                type(field), i, colnames[i]))
        vec = robjects.FloatVector(tcol)
    else:
        raise TypeError("Unknown type '%s' for column %d, '%s'." % (
            type(field), i, colnames[i]))
    return(vec)
Пример #6
0
    def get_dps(self, entries):
        """ Wrapper to retrieve dotproducts from DPS database (neuronlistfh)
        as neuronslist.

        Parameters
        ----------
        entries :   int | str | list thereof, optional
                    Neurons to extract from DPS database. Can be:

                    1. int: e.g. ``hits=5`` for top 5 hits
                    2 .list of ints: e.g. ``hits=[2,5]`` to plot hits 2 and 5
                    3. string: e.g. ``hits = 'THMARCM-198F_seg1'`` to plot this neuron
                    4. list of strings:
                       e.g. ``['THMARCM-198F_seg1', npfMARCM-'F000003_seg002']``
                       to plot multiple neurons by their gene name

        Returns
        -------
        neuronlist of dotproduct neurons
        """

        if isinstance(entries, int):
            return self.db.rx(
                robjects.StrVector(self.results.ix[:entries -
                                                   1].gene_name.tolist()))
        elif isinstance(entries, str):
            return self.db.rx(entries)
        elif isinstance(entries, (list, np.ndarray)):
            if isinstance(entries[0], int):
                return self.db.rx(
                    robjects.StrVector(
                        self.results.ix[entries].gene_name.tolist()))
            elif isinstance(entries[0], str):
                return self.db.rx(robjects.StrVector(entries))
        else:
            logger.error('Unable to intepret entries provided. See '
                         'help(NBLASTresults.plot3d) for details.')
            return None
Пример #7
0
 def creatruleset(self, rules):
     flag = False
     for rule in rules.keys():
         sets = str.split(rule, '=>')
         robjects.globalenv["l"] = robjects.StrVector(
             re.findall("[a-z]+", sets[0]))
         robjects.globalenv["r"] = robjects.StrVector(
             re.findall("[a-z]+", sets[1]))
         quality = rules[rule]
         # robjects.globalenv["q"]=robjects.DataFrame({'support':int(quality['sup']),'confidence':float(quality['conf']),'w_Kulc':float(quality['h_Kulc'])})
         robjects.globalenv["q"] = robjects.DataFrame({
             'support':
             int(quality['sup']),
             'confidence':
             float(quality['conf']),
             'lift':
             float(quality['lift'])
         })
         robjects.r('''
             lm<-matrix(1,ncol=length(l))
             dimnames(lm)<-list(NULL,l)
             x<-matrix(0,ncol=length(r))
             dimnames(x)<-list(NULL,r)
             lhs<-as(cbind(lm,x),"itemMatrix")
             rm<-matrix(1,ncol=length(r))
             dimnames(rm)<-list(NULL,r)
             x<-matrix(0,ncol=length(l))
             dimnames(x)<-list(NULL,l)
             rhs<-as(cbind(x,rm),"itemMatrix")
         ''')
         if flag:
             robjects.r('''
             rule<-new("rules",lhs=lhs,rhs=rhs,quality=q)
             ruleset<-c(ruleset,rule)
             ''')
         else:
             robjects.r('ruleset<-new("rules",lhs=lhs,rhs=rhs,quality=q)')
             flag = True
Пример #8
0
def mplotHis(moptions):
    perclist, rankgrouplist, rankperclist, split_points, myRankStr = group_rank(
        moptions)

    figname = moptions["FileID"]
    mresfolder = moptions['outFolder']

    ggplot = importr('ggplot2')
    importr('gridExtra')

    spvector = robjects.IntVector(split_points)
    rankstrvector = robjects.StrVector(myRankStr)

    moptions['CaseSizes'].sort()
    csvector = robjects.IntVector(moptions['CaseSizes'])

    #mdfperc = robjects.DataFrame({"MixedPerc":robjects.FactorVector(robjects.FloatVector(perclist), levels=percvector, labels=percvector), "Rank":robjects.FactorVector(robjects.StrVector(rankgrouplist), levels=rankstrvector, labels=rankstrvector), "Fraction":robjects.FloatVector(rankperclist)})
    mdfperc = robjects.DataFrame({
        "MixedPerc":
        robjects.FactorVector(robjects.IntVector(perclist),
                              levels=csvector,
                              labels=csvector),
        "Percentile":
        robjects.FactorVector(robjects.StrVector(rankgrouplist),
                              levels=rankstrvector,
                              labels=rankstrvector),
        "Fraction":
        robjects.FloatVector(rankperclist)
    })

    robjects.r(resource_string(__name__, 'Rscript/Hist_sim_plot.R'))
    robjects.r('pdf("' + mresfolder + '/hist2_' + figname + '.pdf", width=' +
               ("%.0f" % (len(moptions["CaseSizes"]) * 0.8)) +
               ', height=4, onefile = TRUE)')

    robjects.globalenv['Hist_sim_plot'](mdfperc, spvector, rankstrvector)

    robjects.r('dev.off()')
Пример #9
0
    def plot_module_eigengene(self, module):
        '''
        barchart illustrating module eigengene
        '''
        eigengene = self.eigengenes.get_module_eigengene(module)

        params = {}
        params['height'] = base().as_numeric(eigengene)

        limit = max(abs(base().max(eigengene)[0]), abs(base().min(eigengene)[0]))
        ylim = [-1 * limit, limit]
        params['ylim'] = ro.IntVector(ylim)

        colors = ["red" if e[0] > 0 else "blue" for e in eigengene]
        params['col'] = ro.StrVector(colors)

        params['border'] = ro.NA_Logical
        params['las'] = 2
        params['names.arg'] = ro.StrVector(self.eigengenes.samples())
        params['cex.names'] = 0.6
        params['main'] = "Eigengene: " + module
        manager = RManager(eigengene, params)
        manager.barchart()
Пример #10
0
    def module_eigengenes(self, membership):
        '''
        wrapper for moduleEigengenes function
        calculates eigengenes from profiles &
        module membership (gene -> membership dict)
        '''

        params = {}
        params['softPower'] = self.params[
            'power'] if 'power' in self.params else 6
        params['expr'] = base().as_data_frame(self.transpose_data())
        params['colors'] = ro.StrVector(membership)

        return wgcna().moduleEigengenes(**params)
Пример #11
0
    def _filter_and_values_to_RList(d):
        """`d` is a dictionary of filters: values.  Returns a StrVector and
        a ListVector of StrVectors"""
        # Could use ListVector directly with the dict, but want to guarantee
        # positional order of filters and values

        f = robjects.StrVector(list(d.keys()))
        v = robjects.ListVector(
            rpy2.rlike.container.TaggedList(
                d.values(),
                tags=list(d.keys())
            )
        )
        return f, v
Пример #12
0
def read_target_group_of_interest(targetspath):
    """
    Reads a tsv file, extracts the first column (while assuming they
    are transcript/gene identifiers) and sets them as a vector in R
    global environament.
    """

    genes = []
    with open(targetspath) as fh:
        for line in fh:
            token = line.split("\t")
            genes.append(token[0].strip())

    robjects.globalenv["targetset"] = robjects.StrVector(genes)
Пример #13
0
    def _run(self, cdata):
        # Execute script using R
        r_json_arguments = robjects.StrVector([json.dumps(cdata, indent=4)])
        r_arguments = importr("rjson").fromJSON(r_json_arguments)
        source_r_file(self.r_file)
        result = str(r['do.call'](
            'run', r_arguments)).encode("utf-8").decode("unicode_escape")

        # Interpret results as string
        match = R_STRING_RE.match(result)
        if not match:
            err_msg = "Expected a string as return value from R script. Got: {}"
            raise ValueError(err_msg.format(result))
        return match.groupdict()["str"]
Пример #14
0
    def catch_dataset_ids(self, data_frame, id_label):
        id_pos = get_item_pos(r['names'](data_frame), id_label)
        ids_idx = [
            item.replace('"', '')
            for item in list(r['as.vector'](data_frame[id_pos]))
        ]

        keep = [
            item for item in list(r['names'](data_frame)) if item != id_label
        ]
        keep = robjects.StrVector(keep)
        data_frame_x = data_frame.rx(True, keep)

        return [data_frame_x, ids_idx]
Пример #15
0
    def genResampleData(infile, outfile):
        '''
        Resample the data n-times with replacement - generates
        n flat files which are then propagated at later stages.
        Files are generally small though.
        '''

        time_agg = list(TIME.__dict__['track2groups'].keys())
        time_points = [int(str(x).split("-")[1]) for x in time_agg]
        time_points.sort()
        time_points = list(set(time_points))
        rep_agg = list(REPLICATE.__dict__['track2groups'].keys())
        replicates = [str(x).split("-")[2] for x in rep_agg]
        time_rep_comb = [x for x in itertools.product(time_points, replicates)]
        time_cond = ro.StrVector([x[0] for x in time_rep_comb])
        rep_cond = ro.StrVector([x[1] for x in time_rep_comb])
        ref_gtf = str(infile).split("-")[1]
        condition = (str(infile).split("-")[0]).strip("deseq.dir/")

        time_points = ",".join([str(i) for i in time_points])
        replicates = ",".join(replicates)

        statement = '''
        cgat data2resamples
        --log=%(outfile)s.log
        --time=%(time_points)s
        --replicates=%(replicates)s
        --condition=%(condition)s
        --resamples=%(resampling_resample)s
        --input-gtf=%(ref_gtf)s
        --output-file-directory=clustering.dir
        --seed=%(resampling_seed)s
        %(infile)s
        '''
        P.run()

        P.touch(outfile)
Пример #16
0
    def computeGOSimilarity(self, funcData):
        print(funcData)
        root = etree.fromstring(funcData)
        rows = root.xpath('//row')

        drugs=[]
        drugsToGo={}

        for row in rows:
            d = row.xpath('./drug/text()')[0].strip()
            s = row.xpath('./GO/text()')[0].strip()

            if d in drugsToGo:
                l = drugsToGo[d]
            else:
                l = []
                drugsToGo[d] = l
            l.append(s)
            
        result = '<?xml version="1.0"?>'
        result += '<data>'

        compute = robjects.r['mgoSim'] 

        for d1 in drugsToGo.keys():
            for d2 in drugsToGo.keys():
                print(drugsToGo[d1])
                print(drugsToGo[d2])
                x = compute(robjects.StrVector(drugsToGo[d1]), robjects.StrVector(drugsToGo[d2]), ont='MF', organism="human", measure="Wang")
                result += "<row>"
                result += "<drug1>" + d1 + "</drug1>"
                result += "<drug2>" + d2 + "</drug2>"
                result += "<sim>" + str(x[0]) + "</sim>"
                result += "</row>"

        result += "</data>"
        return result
Пример #17
0
    def calculate_degree_modularity(self, targetModule):
        '''
        calculates in degree (kIn) and out degree (kOut)
        for the target module
        '''
        members = self.__get_module_members(targetModule)

        degree = rsnippets.degree(self.adjacency, ro.StrVector(members),
                                  self.args.edgeWeight)
        self.modules[targetModule]['kIn'] = int(degree.rx2('kIn')[0])
        self.modules[targetModule]['kOut'] = int(degree.rx2('kOut')[0])
        size = self.modules[targetModule]['size']
        self.modules[targetModule]['density'] = float(
            self.modules[targetModule]['kIn']) / (float(size) *
                                                  (float(size) - 1.0) / 2.0)
Пример #18
0
def build_dataframe(monitor: Monitor) -> dplyr.DataFrame:
    what = (
        'susceptible',
        'incubating',
        'sick',
    )
    dataf = dplyr.DataFrame({
        'what':
        ro.StrVector([v for v in what for x in monitor.day]),
        'day':
        ro.IntVector([v for x in what for v in monitor.day]),
        'count':
        ro.IntVector([v for x in what for v in getattr(monitor, x)])
    })
    return dataf
Пример #19
0
def Morris(repet, factors, binf, bsup):
    """ Simplified import of R'Morris function"""
    factors = robj.StrVector(factors)
    binf = numpy.array(binf)
    bsup = numpy.array(bsup)
    d = r.list('oat', 5, 3)
    d = r['names<-'](d, ['type', 'levels', 'grid.jump'])
    m = r.morris(factors=factors, r=repet, design=d, binf=binf, bsup=bsup)
    param = r['data.frame'](m.r["X"][0])
    pdict = dict(
        (k, numpy.array(param.r[k][0]).tolist()) for k in r.colnames(param))

    l = len(pdict[pdict.keys()[0]])
    print 'Computational cost Morris SA is %d' % l
    return m, pdict
Пример #20
0
    def run(self):
        from rpy2 import robjects
        VEGAN_SCRIPT = 'L:/resources/code/models/pre_process/gnn_vegan.r'

        # Source the gnn_vegan R file
        robjects.r.source(VEGAN_SCRIPT)

        # Create an R vector to pass
        var_vector = robjects.StrVector(self.variables)

        # Create the vegan file
        robjects.r.write_vegan(self.method, self.spp_file, self.env_file,
                               var_vector, self.id_field,
                               self.species_transform,
                               self.species_downweighting, self.ord_file)
Пример #21
0
def barPlot(dict_,
            keysInOrder=None,
            printCounts=True,
            ylim=None,
            *args,
            **kwdargs):
    """ Plot a bar plot

    Args:
        dict_: a dictionary of name -> value, where value is the height of the bar
            use a collections.OrderedDict() to easily convey the order of the groups
        keysInOrder: an optional ordering of the keys in dict_ (alternate option to using collections.OrderedDict)
        printCounts: option to print the counts on top of each bar

    additional kwdargs are passed directly to r.barplot()
    """

    if not keysInOrder:
        keysInOrder = dict_.keys()

    heights = ro.FloatVector([dict_[key] for key in keysInOrder])

    kwdargs["names.arg"] = ro.StrVector(keysInOrder)

    if ylim is None:
        if printCounts:
            ylim = [min(heights), max(heights) * 1.1]
        else:
            ylim = [min(heights), max(heights)]

    x = r.barplot(heights, ylim=ro.FloatVector(ylim), *args, **kwdargs)

    if printCounts:
        heightsStrings = ["{:.2g}".format(height) for height in heights]
        r.text(x, ro.FloatVector(heights), ro.StrVector(heightsStrings), pos=3)
    return x
Пример #22
0
def r_ttest(py_data1, py_data2, mu=0, alt="two.sided"):
    """
    Defined for use within apply_ttest function.
    """
    
    # Make sure datasets have equal dimensions
    py_data1, py_data2 = equal_data(py_data1, py_data2)
    
    # Bring in t.test function from R
    t_test = R.r['t.test']    
    
    # Convert numpy array to R vector for both datasets
    data1 = R.FloatVector(py_data1)
    data2 = R.FloatVector(py_data2)
    
    # Perform t-test on two populations
    test = t_test(data1, data2, mu=mu, 
                  **{'paired':True, 'na.action':R.StrVector(("na.exclude",)), 
                     'alternative':R.StrVector((alt,))})
    
    # Index names from R list and report the p-value                 
    names = test.names
    pval = test[names.index('p.value')][0]
    return pval
Пример #23
0
def deseq_results(dds, condition1, condition2, out_dir):

    #Get DESeq2 results.
    to_dataframe = robjects.r('function(x) data.frame(x)')
    res = to_dataframe(
        deseq.results(dds,
                      contrast=robjects.StrVector(
                          ['Group', condition1, condition2])))
    gene_ids = res.rownames
    res = pandas2ri.ri2py_dataframe(res)
    res.index = gene_ids

    #Output results.
    res.to_csv(os.path.join(out_dir, f'{condition1}-{condition2}-results.txt'),
               sep='\t')
Пример #24
0
def Morris(repeat, factors, binf, bsup):
    """ Simplified import of R'Morris function"""

    factors = robj.StrVector(factors)
    binf = numpy.array(binf)
    bsup = numpy.array(bsup)
    d = r.list('oat', 5, 3)
    d = r['names<-'](d, ['type', 'levels', 'grid.jump'])
    m = r.morris(factors=factors, r=repeat, design=d, binf=binf, bsup=bsup)
    #param=r['data.frame'](m.rx["X"])
    param = r['data.frame'](r['$'](m, 'X'))
    pdict = dict(
        (k, numpy.array((r['$'](param, k))).tolist()) for k in param.names)
    #pdict = dict((str(k), list(v)) for k,v in param.iteritems())
    return m, pdict
Пример #25
0
def train_elastic_net_wrapper(features_data_,
                              features_,
                              d_,
                              data_annotation_,
                              x_w=None,
                              prune=True,
                              nested_folds=10):
    x = numpy.array([features_data_[v] for v in features_.id.values])
    dimnames = robjects.ListVector([(1, robjects.StrVector(d_["individual"])),
                                    (2,
                                     robjects.StrVector(features_.id.values))])
    x = robjects.r["matrix"](robjects.FloatVector(x.flatten()),
                             ncol=features_.shape[0],
                             dimnames=dimnames)
    y = robjects.FloatVector(d_[data_annotation_.gene_id])
    nested_folds = robjects.FloatVector([nested_folds])
    #py2ri chokes on None.
    if x_w is None:
        res = train_elastic_net(y, x, n_train_test_folds=nested_folds)
    else:
        res = train_elastic_net(
            y, x, penalty_factor=x_w, n_train_test_folds=nested_folds
        )  # observation weights, not explanatory variable weight :( , x_weight = x_w)
    return pandas2ri.ri2py(res[0]), pandas2ri.ri2py(res[1])
Пример #26
0
    def selectRiverBarPlots(self, filename):
        riverDict = {}
        vectorDict = {}

        if self.selectParm != None:
            for i in self.selectParm:

                riverDict = self.countAllRiverParm(i)

                vectorDict['River'] = robjects.StrVector(riverDict.keys())
                vectorDict['Count'] = robjects.IntVector(riverDict.values())

                newFilename = filename + "_" + i + "_bar_river"
                self.barPlot(robjects.DataFrame(vectorDict), newFilename,
                             "River", "Count")
Пример #27
0
 def _generate_table(self, declarations):
     """Generates an R data frame table from the list of declarations."""
     decl_table = defaultdict(list)
     for decl in declarations:
         decl_dict = self._map_fields(decl)
         # R DataFrame is column-major.
         for k, v in decl_dict.items():
             decl_table[k].append(v)
     return robjects.DataFrame(
         # Have to translate into a properly typed vector, otherwise R will treat the data in a bad way.
         {
             k: (robjects.StrVector(v)
                 if k in STR_COLUMNS else robjects.FloatVector(v))
             for k, v in decl_table.items()
         })
Пример #28
0
    def convert_to_r_dataframe(self, df, strings_as_factors=False):
        """
        Convert a pandas DataFrame to a R data.frame.

        Parameters
        ----------
        df: The DataFrame being converted
        strings_as_factors: Whether to turn strings into R factors (default: False)

        Returns
        -------
        A R data.frame

        """

        import rpy2.rlike.container as rlc

        columns = rlc.OrdDict()

        # FIXME: This doesn't handle MultiIndex

        for column in df:
            value = df[column]
            value_type = value.dtype.type

            if value_type == np.datetime64:
                value = com.convert_to_r_posixct(value)
            else:
                value = [
                    item if pd.notnull(item) else com.NA_TYPES[value_type]
                    for item in value
                ]

                value = com.VECTOR_TYPES[value_type](value)

                if not strings_as_factors:
                    I = ro.baseenv.get("I")
                    value = I(value)

            columns[column] = value

        r_dataframe = ro.DataFrame(columns)
        del columns

        r_dataframe.rownames = ro.StrVector(list(df.index))
        r_dataframe.colnames = list(df.columns)

        return r_dataframe
Пример #29
0
 def asVector(val: Iterable,
              names: Optional[Iterable] = None) -> robj.Vector:
     val = np.asarray(ll(val))
     vect = {
         'i': robj.IntVector,
         'u': robj.IntVector,
         'f': robj.FloatVector,
         'b': robj.BoolVector,
         'S': robj.StrVector,
         'U': robj.StrVector,
     }.get(val.dtype.kind, lambda x: None)(val)
     if missing(vect):
         raise TypeError(f'unknown vector type [{val.dtype.kind}]')
     if available(names):
         vect.names = robj.StrVector(np.asarray(ll(names), dtype=str))
     return vect
Пример #30
0
def r_matrix(np_matrix, col_names=None):
    """Convert a numpymatrix to R matrix. If no columns are provided
    it will assign the following ['x_1', 'x_2',... ] as column names
    """

    if np_matrix.ndim != 2:
        msg = 'Input input dimension is %s and MUST be 2' % np_matrix.ndim
        raise ValueError(msg)

    n_row, n_col = np_matrix.shape
    r_mat = robjects.r.matrix(np_matrix, nrow=n_row, ncol=n_col)
    if col_names is None:
        col_names = ['x_%s' % (i + 1) for i in range(n_col)]

    r_mat.colnames = robjects.StrVector(col_names)
    return r_mat