def createGraphSeries(cohort): hm = ['bothR5','bothDX','esR5','esDX','trofileR5','trofileDX','mixedR5DX'] stat_data = DataFrame.from_csvfile(cohort+'.seq.out', sep = "\t") # Begin witing graphs to pdf grdevices.pdf(file=cohort+".stats.pdf",width=7,height=7) # graph 1 scatter plot scatterPlot(stat_data,'bothR5','bothDX',cohort.upper()+' cohort\n bothR5/bothDX correlation') scatterPlot(stat_data,'esR5','esDX',cohort.upper()+' cohort\n esR5/esDX correlation') scatterPlot(stat_data,'trofileR5','trofileDX',cohort.upper()+' cohort\n trofileR5/trofileDX correlation') scatterPlot(stat_data,'bothR5','esR5',cohort.upper()+' cohort\n bothR5/esR5 correlation') scatterPlot(stat_data,'trofileR5','esR5',cohort.upper()+' cohort\n trofileR5/esR5 correlation') scatterPlot(stat_data,'bothDX','esDX',cohort.upper()+' cohort\n bothDX/esDX correlation') scatterPlot(stat_data,'trofileDX','esDX',cohort.upper()+' cohort\n trofileDX/esDX correlation') scatterPlot(stat_data,'bothDX','mixedR5DX',cohort.upper()+' cohort\n bothDX/mixedR5DX correlation') for hmm in hm: hmm_scores = DataFrame.from_csvfile(cohort+'.seq.'+hmm+'.stats',sep="\t") # graph 3 plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Accuracy"),type='o',main='Accuracy vs. Cutoff '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Cutoff',ylab='Accuracy') # graph 4 plot(hmm_scores.rx2("FPP"),hmm_scores.rx2("TPP"),type='o',xlim=base.c(0,1),ylim=base.c(0,1),main=cohort.upper()+' Cohort ROC\n'+hmm+'.hmm',xlab='% False Pos',ylab='% False Neg') # graph 5 plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Phi"),type='o',xlim=base.c(50,100),ylim=base.c(0,0.75),main=cohort.upper()+' Cohort Association Coeff\n'+hmm+'.hmm',xlab='Cutoff',ylab='Association Coefficient') # close pdf file grdevices.dev_off() return
def r_c50(rdf: RDataFrame, target: str, predictors: List[str]) -> RListVector: """ Wrapper function around the C5.0 classifier. Note: The target column must be a factor vector. TODO: Training control and other parameters. """ predictor_slice = rdf.rx(r_c(*predictors)) target_slice = rdf.rx2(r_c(target)) return C50.C5_0(predictor_slice, target_slice)
def _extract_mapping(self, cimpl_obj, cis_sites): # Convert CIS sites to frame format. cis_frame = CisSite.to_frame(cis_sites) # Convert to R representation for cimpl. chr_with_prefix = add_prefix(cis_frame['chromosome'], prefix='chr') r_base = importr('base') cis_frame_r = RDataFrame({ 'id': r_base.I(StrVector(cis_frame['id'])), 'chromosome': r_base.I(StrVector(chr_with_prefix)), 'scale': StrVector(cis_frame['scale']), 'start': IntVector(cis_frame['start']), 'end': IntVector(cis_frame['end']) }) cis_frame_r.rownames = StrVector(cis_frame['id']) # Retrieve cis matrix from cimpl. cis_matrix_r = self._cimpl.getCISMatrix(cimpl_obj, cis_frame_r) cis_matrix = dataframe_to_pandas(cis_matrix_r) # Extract scale information from cis matrix. scale_cols = [c for c in cis_matrix.columns if c.startswith('X')] cis_matrix_scales = cis_matrix[['id'] + scale_cols] # Melt matrix into long format. mapping = pd.melt(cis_matrix_scales, id_vars=['id']) mapping = mapping[['id', 'value']] mapping = mapping.rename(columns={ 'id': 'insertion_id', 'value': 'cis_id' }) # Split cis_id column into individual entries (for entries # with multiple ids). Then drop any empty rows, as these # entries are empty cells in the matrix. mapping = mapping.ix[mapping['cis_id'] != ''] mapping = expand_column(mapping, col='cis_id', delimiter='|') mapping_dict = { ins_id: set(grp['cis_id']) for ins_id, grp in mapping.groupby('insertion_id') } return mapping_dict
def clusterPop(admix, indexes): if len(indexes)==1: return [indexes[0]] subf = "adm.%d" % (os.getpid(),) w = open(subf, "w") f = open(admix) fPos = 0 oldOrder = [] for l in f: if fPos in indexes: w.write(l) oldOrder.append(fPos) fPos += 1 f.close() w.close() df = DataFrame.from_csvfile(subf, sep=" ", header=False) d=robjects.r.hclust(robjects.r.dist(df)) for name, value in d.items(): if name=="order": order = value break os.remove(subf) reOrder = [] for pos in order: reOrder.append(oldOrder[pos-1]) return reOrder
def clusterAll(admix, myPop): f = open(admix) ls = f.readlines() f.close() numK = len(ls[0].split(" ")) pops = {} for i in range(len(ls)): vals = [float(x) for x in ls[i].rstrip().split(" ")] cnt, accu = pops.get(myPop[i], (0, [0.0]*numK)) cnt += 1 for i2 in range(numK): accu[i2] += vals[i2] pops[myPop[i]] = cnt, accu popNames = list(pops.keys()) popNames.sort() subf = "accu.%d" % (os.getpid(),) w = open(subf, "w") for popName in popNames: myVals = pops[popName][1] cnt = pops[popName][0] myVals = [x/cnt for x in myVals] w.write(" ".join([str(x) for x in myVals])) w.write("\n") w.close() df = DataFrame.from_csvfile(subf, sep=" ", header=False) d=robjects.r.hclust(robjects.r.dist(df, method="max"), method="complete") for name, value in d.items(): if name=="order": order = value break os.remove(subf) popOrder = [] for pos in order: popOrder.append(popNames[pos-1]) return popOrder
def createModel(data): print 'Create_model' importr('forecast') robj.r(''' arima_data <- function(data){ best_arima = auto.arima(data,trace=F,stepwise=T) forecast = forecast.Arima(best_arima,h=60,level=c(99.5)) output = forecast$mean return (output) } ''') features_names = ["Ret_%d_pred" % (i) for i in range(121, 181)] predict = pd.DataFrame(columns=features_names) i = 1 for tmp in DataFrame.iter_row(data): if i % 100 == 0: print i tmp = robj.r("as.numeric")(tmp) #tmp = robj.r('ts')(tmp,start=2) tmp = robj.r('ts')(tmp, start=2, frequency=15) forecast = robj.r('arima_data')(tmp) forecast = robj.r('as.numeric')(forecast) forecast = np.array(forecast) predict2 = pd.DataFrame(forecast).T predict2.columns = features_names predict = pd.concat([predict, predict2], axis=0) i = i + 1 print predict predict.to_csv("tmp1.csv")
def loadfiles(self): """ Load files into R environment """ rcount = 0 names = robjects.r['names'] # Set the default parameter for reading from csv param = {'sep': '\t', 'header': True, 'as_is': True, 'row.names': ri.NULL} # Check the correct parameter and set the default for p in param.keys(): if p in self.param: if self.param[p] is not None: param[p] = self.param[p] self.param.update(param) # Read all the files in the R-environment for f, s in zip(self.filelist, self.seplist): try: tmpdata = DataFrame.from_csvfile(f, sep=str(s), header=param['header'], as_is=param['as_is'], row_names=param['row.names']) self.mylist.append(tmpdata) fdir, fname = os.path.split(os.path.splitext(f)[0]) self.listname.append(fname) rcount += 1 except IOError, e: self.error += e
def hae_ennakkoilmoitukset(tiedosto): if not os.path.exists(tiedosto): tiedosto = os.path.join(DATADIR, tiedosto) if not os.path.exists(tiedosto): raise IOError("Annettua tiedostoa %s ei löydy" % tiedosto) return DataFrame.from_csvfile(tiedosto, header=True, sep=',', as_is=True)
def __init__(self, formula_str, df, factors=None, resid_formula_str=None, **lmer_opts): """ """ # get the pred_var pred_var = formula_str.split('~')[0].strip() # convert df to a recarray if it's a dataframe if isinstance(df, pd.DataFrame): df = df.to_records() # add column if necessary if pred_var not in df.dtype.names: # must add it df = append_fields(df, pred_var, [0.0] * len(df), usemask=False) # make factor list if necessary if factors is None: factors = {} # add in missingarg for any potential factor not provided for k in df.dtype.names: if isinstance(df[k][0], str) and k not in factors: factors[k] = MissingArg for f in factors: if factors[f] is None: factors[f] = MissingArg # checking for both types of R Vectors for rpy2 variations elif (not isinstance(factors[f], Vector) and not factors[f] == MissingArg): factors[f] = Vector(factors[f]) # convert the recarray to a DataFrame (releveling if desired) self._rdf = DataFrame({ k: (FactorVector(df[k], levels=factors[k]) if (k in factors) or isinstance(df[k][0], str) else df[k]) for k in df.dtype.names }) # get the column index self._col_ind = list(self._rdf.colnames).index(pred_var) # make a formula obj self._rformula = Formula(formula_str) # make one for resid if necessary if resid_formula_str: self._rformula_resid = Formula(resid_formula_str) else: self._rformula_resid = None # save the args self._lmer_opts = lmer_opts # model is null to start self._ms = None
def analyzeR(): mypatchData = DataFrame.from_csvfile(inputfile,header=True, sep = "\t") ici = list(mypatchData.colnames).index('ic50_val') starti = ici + 1 lasti = mypatchData.nrow myData = mypatchData[,ici:lasti]
def py2ri_pandasdataframe(obj): od = OrderedDict() for name, values in obj.iteritems(): if values.dtype.kind == 'O': od[name] = StrVector(values) else: od[name] = conversion.py2ri(values) return DataFrame(od)
def py2rpy_pandasdataframe(obj): od = OrderedDict() for name, values in obj.iteritems(): try: od[name] = conversion.py2rpy(values) except Exception as e: warnings.warn('Error while trying to convert ' 'the column "%s". Fall back to string conversion. ' 'The error is: %s' % (name, str(e))) od[name] = StrVector(values) return DataFrame(od)
def r_dataframe_subset_one_element(rdf: RDataFrame, n: int) -> RDataFrame: """ Creates a dataframe with one column from the given dataframe and index. See: https://github.com/topepo/caret/issues/672 https://stackoverflow.com/questions/40505994/how-to-apply-preprocessing-in-carets-train-to-only-some-variables https://stackoverflow.com/questions/31497479/how-to-select-columns-from-r-dataframe-in-rpy2-in-python """ return r('data.frame')(rdf.rx(RIntVector([ n, ])))
def createGraphSeries(cohort,t,sfiles,ofile): hm = ['bothR5','bothDX','esR5','esDX','trofileR5','trofileDX','mixedR5DX'] # Begin witing graphs to pdf grdevices.pdf(file=cohort+".stats.pdf",width=7,height=7) if t == 'both' or t =='out': stat_data = DataFrame.from_csvfile(ofile, sep = "\t") # graph 1 scatter plot scatterPlot(stat_data,'bothR5','bothDX',cohort.upper()+' cohort\n bothR5/bothDX correlation') scatterPlot(stat_data,'esR5','esDX',cohort.upper()+' cohort\n esR5/esDX correlation') scatterPlot(stat_data,'trofileR5','trofileDX',cohort.upper()+' cohort\n trofileR5/trofileDX correlation') scatterPlot(stat_data,'bothR5','esR5',cohort.upper()+' cohort\n bothR5/esR5 correlation') scatterPlot(stat_data,'trofileR5','esR5',cohort.upper()+' cohort\n trofileR5/esR5 correlation') scatterPlot(stat_data,'bothDX','esDX',cohort.upper()+' cohort\n bothDX/esDX correlation') scatterPlot(stat_data,'trofileDX','esDX',cohort.upper()+' cohort\n trofileDX/esDX correlation') scatterPlot(stat_data,'bothDX','mixedR5DX',cohort.upper()+' cohort\n bothDX/mixedR5DX correlation') if t == 'both' or t =='stats': for hmm in hm: if sfiles.has_key(hmm): f = sfiles[hmm] hmm_scores = DataFrame.from_csvfile(f,sep="\t") # graph 3 plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Accuracy"),type='o',main='Accuracy vs. Cutoff '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Cutoff',ylab='Accuracy') # graph 4 plot(hmm_scores.rx2("FPP"),hmm_scores.rx2("TPP"),type='o',xlim=base.c(0,1),ylim=base.c(0,1),main=cohort.upper()+' Cohort ROC\n'+hmm+'.hmm',xlab='% False Pos',ylab='% False Neg') # graph 5 plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Phi"),type='o',xlim=base.c(50,100),ylim=base.c(0,0.75),main=cohort.upper()+' Cohort Association Coeff\n'+hmm+'.hmm',xlab='Cutoff',ylab='Association Coefficient') # graph 6 plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Specificity"),type='o',main='Specificity vs. Cutoff '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Cutoff',ylab='Specificity') # graph 7 plot(hmm_scores.rx2("Cutoff"),hmm_scores.rx2("Sensitivity"),type='o',main='Sensitivity vs. Cutoff '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Cutoff',ylab='Sensitivity') # graph 8 plot(hmm_scores.rx2("Sensitivity"),hmm_scores.rx2("Specificity"),type='o',main='Sensitivity vs. Specificity '+cohort.upper()+' Cohort\n'+hmm+'.hmm',xlab='Sensitivity',ylab='Specificity') # close pdf file grdevices.dev_off() return
def r_dataframe_column_to_factor_by_name(rdf: RDataFrame, name: str) -> RDataFrame: """ Transform the column with the given name into a factor vector. Note: This modifies the passed dataframe. """ for index, item in enumerate(r_dataframe_column_names(rdf)): if item == name: rdf[index] = RFactorVector(RFactorVector(rdf.rx2(name))) return rdf raise ValueError('Given name is not in R dataframe')
def as_dataframe(table): '''returns a DataFrame instance. Requires counts to be [[col1, col2, col3, ..]]''' data = dict(list(zip(table.header, list(zip(*table.tolist()))))) for column in data: if type(data[column][0]) in (str, str): klass = StrVector else: klass = IntVector data[column] = klass(data[column]) return DataFrame(data)
def createModel(data, param): print 'Create_model' importr('forecast') robj.r(''' arima_data <- function(data){ best_arima = auto.arima(data,trace=F,stepwise=T,max.P=8,max.Q=8,max.p=10,max.q=10,max.order=10, ,start.p=1,start.q=0,start.P=1,start.Q=0,seasonal=T,ic=('bic')) forecast = forecast.Arima(best_arima,h=60,level=c(99.5),stationary=T) output = forecast$mean return (output) } ''') print 'the frequency is %d' % (param['frequency']) features_names = ["Ret_%d_pred" % (i) for i in range(121, 181)] predict = pd.DataFrame(columns=features_names) i = 1 for tmp in DataFrame.iter_row(data): if i % 100 == 0: print i tmp = robj.r("as.numeric")(tmp) #tmp = robj.r('ts')(tmp,start=2) tmp = robj.r('ts')(tmp, start=2, frequency=param['frequency']) forecast = robj.r('arima_data')(tmp) forecast = robj.r('as.numeric')(forecast) forecast = np.array(forecast) predict2 = pd.DataFrame(forecast).T predict2.columns = features_names predict = pd.concat([predict, predict2], axis=0) i = i + 1 #this way I will get forecast_data , train_data raw_data.to_csv("raw.csv") predict.to_csv("predict.csv") data = predict.join(raw_data, rsuffix='_2') data.to_csv("data.raw.csv") data['Ret_120_price'] = price_train['Ret_120_price'] transform_format(data) Ret_1 = data['Ret_MinusTwo'] Ret_2 = 1 - (1.0 / ((1.0 / (1 - data['Ret_MinusOne'])) * data['Ret_120_price'] * data['Ret_180_price'])) data['Ret_PlusOne_pred'] = 0.5 * Ret_1 + 0.5 * Ret_2 data['Ret_PlusTwo_pred'] = 0.5 * Ret_2 + 0.5 * data['Ret_PlusOne_pred'] data.to_csv("data.csv") WMAE_model(data) mase = np.sum(data['error']) / (40000 * 62) print 'loss:%f' % (mase) return {'loss': mase, 'status': STATUS_OK}
def loadfiles(self): """ Load files into R environment """ rcount = 0 asmatrix = robjects.r['as.matrix'] diag = robjects.r['diag'] names = robjects.r['names'] ## Set the default parameter for reading from csv param = {'header': True, 'as_is': True, 'row.names': ri.RNULLArg} ## Check the correct parameter and set the default for p in param.keys(): if p in self.param: if self.param[p] is not None: param[p] = self.param[p] for f, s in zip(self.filelist, self.seplist): try: dataf = DataFrame.from_csvfile(f, sep=str(s), header=param['header'], as_is=param['as_is'], row_names=param['row.names']) dataf = asmatrix(dataf) # Should be the diagonal set to 0? # Do it for all the inputs, just to be sure zcount = 0 for i in xrange(dataf.ncol): if (dataf.rx(i+1,i+1)[0] - 0.0 >= 1e-8): zcount += 1 dataf.rx[i+1,i+1] = 0 if zcount: self.e += f self.mylist.append(dataf) fdir, fname = os.path.split(os.path.splitext(f)[0]) self.listname.append(fname) rcount += 1 except IOError, e: self.error += e except RRuntimeError, e: self.error += e
def stepwise_regression(data, d_v, i_vs): # __file = tempfile.NamedTemporaryFile(delete=False) __file = open('/home/foodfan/haha','wb') __file.writelines(data) stats = importr('stats') pat = '%s~%s' % (d_v, '+'.join(i_vs)) print pat # return None __file.close() data_from_input = DataFrame.from_csvfile(__file.name) reg = stats.lm(pat, data_from_input) st = stats.step(reg, direction = 'backward') ret = str(st[0]) # print '------------------------------------------------------------' # # print '------------------------------------------------------------' return ret
def read_data(self, file_name, col_from, col_to): RawData = DataFrame(excel.read_excel("../Data/" + file_name + '.xlsx')) NumericData = RawData.rx(True, IntVector(range(col_from, col_to + 1))) MetaData = RawData.rx(True, col_from - 1)[0] RawData._set_rownames(IntVector(range(1, len(MetaData) + 1))) self.file_name = file_name self.raw_data = RawData #print(self.raw_data) self.numeric_data = NumericData self.metadata = r_base.factor(MetaData) self.metadata_list = list(MetaData) self.metabolite_list = list(self.raw_data.names)[1:] self.make_metabolite_dict()
def pandas2ri(obj): if isinstance(obj, PandasDataFrame): od = OrderedDict() for name, values in obj.iteritems(): if values.dtype.kind == 'O': od[name] = StrVector(values) else: od[name] = pandas2ri(values) return DataFrame(od) elif isinstance(obj, PandasIndex): if obj.dtype.kind == 'O': return StrVector(obj) else: # only other alternative to 'O' is integer, I think, # which goes straight to the numpy converter. return numpy2ri.numpy2ri(obj) elif isinstance(obj, PandasSeries): if obj.dtype == '<M8[ns]': # time series d = [ IntVector([x.year for x in obj]), IntVector([x.month for x in obj]), IntVector([x.day for x in obj]), IntVector([x.hour for x in obj]), IntVector([x.minute for x in obj]), IntVector([x.second for x in obj]) ] res = ISOdatetime(*d) #FIXME: can the POSIXct be created from the POSIXct constructor ? # (is '<M8[ns]' mapping to Python datetime.datetime ?) res = POSIXct(res) else: # converted as a numpy array res = numpy2ri.numpy2ri(obj.values) # "index" is equivalent to "names" in R if obj.ndim == 1: res.do_slot_assign('names', ListVector({'x': pandas2ri(obj.index)})) else: res.do_slot_assign('dimnames', ListVector(pandas2ri(obj.index))) return res else: return original_py2ri(obj)
def aov(matrix, factor_names, measure_name, robj, interactions = '+'): ''' Computes a repeated measures anova in R via the 'aov' command. This function uses R's aov function. It does not compute Greehnhouse-Geisser and Huynh-Feldt corrections. Use lm_anova for this. Input: matrix : ndarray Each dimension of the matrix corresponds to one factor. The first dimension must be (!) the number of subjects. The values in the matrix are taken as the dependent variable. factor_names : list List with names of each factor. The ordering must correspond to the dimensions given by matrix.shape. measure_name : str Name of the dependnent variable. robj : rpy2.robjects instance interactions : str '+' for no interactions '*' for all interactions ''' robj.r('rm(list = ls(all = TRUE)) ') df = make_data_frame(matrix, factor_names, measure = measure_name) robj.globalenv['df'] = DataFrame(df) robj.r('attach(df)') formula = '' error = '' for factor in factor_names: robj.r('%s<-factor(df$%s)'%(factor,factor)) formula = formula + interactions + factor error = error + '*' + factor formula,error = formula[1:], error[1:] formula = 'aov.out <- aov(%s ~ %s + Error(subject/(%s), data=df))'%(measure_name, formula, error) robj.r(formula) print(robj.r('summary(aov.out)')) robj.r('detach(df)')
def csv2graph(csvfiles, seplist=[], param={},filepath='.', graph_format='gml'): """ Utility to convert from csv file to igraph format file """ igraph = importr('igraph') gadj = igraph.graph_adjacency wgraph = igraph.write_graph if len(seplist) != len(csvfiles): raise IOError('Not enought separators') for i,f in enumerate(csvfiles): myfname = f + ".%s" % format tmpdata = DataFrame.from_csvfile(f, sep=seplist[i], header=param['header'] if param.has_key('header') else True, as_is=True, row_names=param['row.names'] if param.has_key('row_names') else False) g = gadj(reslist, mode='undirected', weighted=True) wgraph(g, file=os.path.join(filepath,myfname), format=format) return True
def r_formula(rdf: RDataFrame, target: str, predictors: List[str]) -> RFormula: """ Creates an R modelling formula associated with the given dataframe. The produced string formula is 'predictor ~ var1 + var2 + etc...' """ lhs_items = [target, '~'] rhs_items = [] for predictor in predictors: rhs_items.append(predictor) rhs_items.append('+') rhs_items = rhs_items[:-1] # remove the last '+' all_items = lhs_items + rhs_items formula_string = ' '.join(all_items) formula = RFormula(formula_string) for predictor in predictors: formula.environment[predictor] = rdf.rx(predictor) return formula
distEisen = robjects.r(''' distEisen <- function(x, use = "pairwise.complete.obs") { co.x <- cor(x, use = use) dist.co.x <- 1 - co.x return(as.dist(dist.co.x)) } ''') listToDF = robjects.r(''' listToDF <- function(inputList, fill = NA){ # Use fill = NULL for regular recycling behavior maxLen = max(sapply(inputList, length)) for(i in seq_along(inputList)) inputList[[i]] <- c(inputList[[i]], rep(fill, maxLen - length(inputList[[i]]))) return(as.data.frame(inputList)) } ''') annotations = DataFrame.from_csvfile(annotation_classes_input_file, header=True, sep='\t', quote='"', row_names=1) R = robjects.r R["library"]("utils") R["library"]("tools")
def __init__(self, fe_formula, re_formula, re_group, dep_data, ind_data, factors=None, row_mask=None, dep_mask=None, use_ranks=False, use_norm=True, memmap=False, memmap_dir=None, resid_formula=None, svd_terms=None, feat_thresh=0.05, feat_nboot=1000, do_tfce=False, connectivity=None, shape=None, dt=.01, E=2 / 3., H=2.0, n_jobs=1, verbose=10, lmer_opts=None): """ dep_data can be an array or a dict of arrays (possibly memmapped), one for each group. ind_data can be a rec_array for each group or one large rec_array with a grouping variable. """ if verbose > 0: sys.stdout.write('Initializing...') sys.stdout.flush() start_time = time.time() # save the formula self._formula_str = fe_formula + ' + ' + re_formula # see if there's a resid formula if resid_formula: # the random effects are the same self._resid_formula_str = resid_formula + ' + ' + re_formula else: self._resid_formula_str = None # save whether using ranks self._use_ranks = use_ranks # see the thresh for keeping a feature self._feat_thresh = feat_thresh self._feat_nboot = feat_nboot self._do_tfce = do_tfce self._connectivity = connectivity self._dt = dt self._E = E self._H = H # see if memmapping self._memmap = memmap # save job info self._n_jobs = n_jobs self._verbose = verbose # eventually fill the feature shape self._feat_shape = None # handle the dep_mask self._dep_mask = dep_mask # fill A,M,O,D self._A = {} self._M = {} self._O = {} self._D = {} O = [] # loop over unique grouping var self._re_group = re_group if isinstance(ind_data, dict): # groups are the keys self._groups = np.array(ind_data.keys()) else: # groups need to be extracted from the recarray self._groups = np.unique(ind_data[re_group]) for g in self._groups: # get that subj inds if isinstance(ind_data, dict): # the index is just the group into that dict ind_ind = g else: # select the rows based on the group ind_ind = ind_data[re_group] == g # process the row mask if row_mask is None: # no mask, so all good row_ind = np.ones(len(ind_data[ind_ind]), dtype=np.bool) elif isinstance(row_mask, dict): # pull the row_mask from the dict row_ind = row_mask[g] else: # index into it with ind_ind row_ind = row_mask[ind_ind] # extract that group's A,M,O # first save the observations (rows of A) self._O[g] = ind_data[ind_ind][row_ind] if use_ranks: # loop over non-factors and rank them for n in self._O[g].dtype.names: if (n in factors) or isinstance(self._O[g][n][0], str): continue self._O[g][n] = rankdata(self._O[g][n]) O.append(self._O[g]) # eventually allow for dict of data files for dep_data if isinstance(dep_data, dict): # the index is just the group into that dict dep_ind = g else: # select the rows based on the group dep_ind = ind_ind # save feature shape if necessary if self._feat_shape is None: self._feat_shape = dep_data[dep_ind].shape[1:] # handle the mask if self._dep_mask is None: self._dep_mask = np.ones(self._feat_shape, dtype=np.bool) # create the connectivity (will mask later) if self._do_tfce and self._connectivity is None and \ (len(self._dep_mask.flatten()) > self._dep_mask.sum()): # create the connectivity self._connectivity = cluster.sparse_dim_connectivity( [cluster.simple_neighbors_1d(n) for n in self._feat_shape]) # Save D index into data (apply row and feature masks # This will also reshape it self._D[g] = dep_data[dep_ind][row_ind][:, self._dep_mask].copy() # reshape it #self._D[g] = self._D[g].reshape((self._D[g].shape[0], -1)) if use_ranks: if verbose > 0: sys.stdout.write('Ranking %s...' % (str(g))) sys.stdout.flush() for i in xrange(self._D[g].shape[1]): # rank it self._D[g][:, i] = rankdata(self._D[g][:, i]) # normalize it self._D[g][:, i] = ((self._D[g][:, i] - 1) / (len(self._D[g][:, i]) - 1)) # save M from D so we can have a normalized version self._M[g] = self._D[g].copy() # remove any NaN's in dep_data self._D[g][np.isnan(self._D[g])] = 0.0 # normalize M if use_norm: self._M[g] -= self._M[g].mean(0) self._M[g] /= np.sqrt((self._M[g]**2).sum(0)) # determine A from the model.matrix rdf = DataFrame({ k: (FactorVector(self._O[g][k]) if k in factors else self._O[g][k]) for k in self._O[g].dtype.names }) # model spec as data frame ms = r['data.frame'](r_model_matrix(Formula(fe_formula), data=rdf)) cols = list(r['names'](ms)) if svd_terms is None: self._svd_terms = [c for c in cols if 'Intercept' not in c] else: self._svd_terms = svd_terms # self._A[g] = np.vstack([ms[c] #np.array(ms.rx(c)) self._A[g] = np.concatenate( [np.array(ms.rx(c)) for c in self._svd_terms]).T if use_ranks: for i in xrange(self._A[g].shape[1]): # rank it self._A[g][:, i] = rankdata(self._A[g][:, i]) # normalize it self._A[g][:, i] = ((self._A[g][:, i] - 1) / (len(self._A[g][:, i]) - 1)) # normalize A if True: # use_norm: self._A[g] -= self._A[g].mean(0) self._A[g] /= np.sqrt((self._A[g]**2).sum(0)) # memmap if desired if self._memmap: self._M[g] = _memmap_array(self._M[g], memmap_dir, unique_id=str(g)) self._D[g] = _memmap_array(self._D[g], memmap_dir, unique_id=str(g)) # save the new O self._O = O if lmer_opts is None: lmer_opts = {} self._lmer_opts = lmer_opts self._factors = factors # mask the connectivity if self._do_tfce and (len(self._dep_mask.flatten()) > self._dep_mask.sum()): self._connectivity = self._connectivity.tolil()[ self._dep_mask.flatten()][:, self._dep_mask.flatten()].tocoo() # prepare for the perms and boots and jackknife self._perms = [] self._tp = [] self._tb = [] self._tj = [] self._pfmask = [] if verbose > 0: sys.stdout.write('Done (%.2g sec)\n' % (time.time() - start_time)) sys.stdout.write('Processing actual data...') sys.stdout.flush() start_time = time.time() global _global_meld _global_meld[id(self)] = self # run for actual data (returns both perm and boot vals) self._R = None self._ss = None self._mer = None tp, tb, R, feat_mask, ss, mer = _eval_model(id(self), None) self._R = R self._tp.append(tp) self._tb.append(tb) self._feat_mask = feat_mask self._fmask = ~feat_mask[0] self._pfmask.append(~feat_mask[0]) self._ss = ss self._mer = mer if verbose > 0: sys.stdout.write('Done (%.2g sec)\n' % (time.time() - start_time)) sys.stdout.flush()
def __init__(self, dex_name): self.dexcom_data = DataFrame.from_csvfile(dex_name)
def from_csv(cls, data): return cls(DataFrame.from_csvfile(str(data)))
def test_image_png(): dataf = DataFrame({'x': 1, 'Y': 2}) g = rpy2.robjects.lib.ggplot2.ggplot(dataf) img = ggplot.image_png(g) assert img
from rpy2.robjects.vectors import DataFrame import math import datetime parser = argparse.ArgumentParser() parser.add_argument("-in_csv", help="") parser.add_argument("-out", help="") args=parser.parse_args() infile="/Users/security/science/bigoutput.csv" dataf = DataFrame.from_csvfile(infile, sep = ",",header=True) # Get statistics for investigated seqs #rmean = robjects.r['mean'] #rmed = robjects.r['median'] #rmax = robjects.r['max'] #rsd = robjects.r['sd'] #rsum = robjects.r['sum'] # #ma=rmax(dataf.rx('Length')) # #as_vec = robjects.r['as.vector'] #as_num = robjects.r['as.numeric'] #as_mat = robjects.r['as.matrix'] # #test22=as_vec(dataf.rx('Length'))
## print pat ## return None # __file.close() # data_from_input = DataFrame.from_csvfile(__file.name) # reg = stats.lm(pat, data_from_input) # st = stats.step(reg, direction = 'backward') ## ret = '' ## for key, value in st.iteritems(): ## ret += key + ',' + str(value) + '\n' ## return ret # return '' # stepwise_regression() d_v = "inflat" i_vs = ["money", "output", "initial", "poprate", "inv", "school"] data_from_input = DataFrame.from_csvfile("/home/foodfan/money.csv") stats = importr("stats") pat = "%s~%s" % (d_v, "+".join(i_vs)) # print pat # return None reg = stats.lm(pat, data_from_input) st = stats.step(reg, direction="backward") ret = "" print "------------------------------------" print str(st[0]) # print st[1] # for key, value in st.iteritems(): # ret += key + ',' + str(value) + '\n' print ret
def main(): try: datafile = sys.argv[1] outfile_name = sys.argv[2] expression = sys.argv[3] except: stop_err( 'Usage: python gsummary.py input_file ouput_file expression' ) math_allowed = S3_METHODS()[ 'Math' ] ops_allowed = S3_METHODS()[ 'Ops' ] # Check for invalid expressions for word in re.compile( '[a-zA-Z]+' ).findall( expression ): if word and not word in math_allowed: stop_err( "Invalid expression '%s': term '%s' is not recognized or allowed" %( expression, word ) ) symbols = set() for symbol in re.compile( '[^a-z0-9\s]+' ).findall( expression ): if symbol and not symbol in ops_allowed: stop_err( "Invalid expression '%s': operator '%s' is not recognized or allowed" % ( expression, symbol ) ) else: symbols.add( symbol ) if len( symbols ) == 1 and ',' in symbols: # User may have entered a comma-separated list r_data_frame columns stop_err( "Invalid columns '%s': this tool requires a single column or expression" % expression ) # Find all column references in the expression cols = [] for col in re.compile( 'c[0-9]+' ).findall( expression ): try: cols.append( int( col[1:] ) - 1 ) except: pass tmp_file = tempfile.NamedTemporaryFile( 'w+b' ) # Write the R header row to the temporary file hdr_str = "\t".join( "c%s" % str( col+1 ) for col in cols ) tmp_file.write( "%s\n" % hdr_str ) skipped_lines = 0 first_invalid_line = 0 i = 0 for i, line in enumerate( file( datafile ) ): line = line.rstrip( '\r\n' ) if line and not line.startswith( '#' ): valid = True fields = line.split( '\t' ) # Write the R data row to the temporary file for col in cols: try: float( fields[ col ] ) except: skipped_lines += 1 if not first_invalid_line: first_invalid_line = i + 1 valid = False break if valid: data_str = "\t".join( fields[ col ] for col in cols ) tmp_file.write( "%s\n" % data_str ) tmp_file.flush() if skipped_lines == i + 1: stop_err( "Invalid column or column data values invalid for computation. See tool tips and syntax for data requirements." ) else: # summary function and return labels summary_func = r( "function( x ) { c( sum=sum( as.numeric( x ), na.rm=T ), mean=mean( as.numeric( x ), na.rm=T ), stdev=sd( as.numeric( x ), na.rm=T ), quantile( as.numeric( x ), na.rm=TRUE ) ) }" ) headings = [ 'sum', 'mean', 'stdev', '0%', '25%', '50%', '75%', '100%' ] headings_str = "\t".join( headings ) #r.set_default_mode( NO_CONVERSION ) #r_data_frame = r.read_table( tmp_file.name, header=True, sep="\t" ) r_data_frame = DataFrame.from_csvfile( tmp_file.name, header=True, sep="\t" ) outfile = open( outfile_name, 'w' ) for col in re.compile( 'c[0-9]+' ).findall( expression ): r.assign( col, r[ "$" ]( r_data_frame, col ) ) try: summary = summary_func( r( expression ) ) except RException, s: outfile.close() stop_err( "Computation resulted in the following error: %s" % str( s ) ) #summary = summary.as_py( BASIC_CONVERSION ) outfile.write( "#%s\n" % headings_str ) print summary print summary.r_repr() outfile.write( "%s\n" % "\t".join( [ "%g" % ( summary.rx2( k )[0] ) for k in headings ] ) ) outfile.close() if skipped_lines: print "Skipped %d invalid lines beginning with line #%d. See tool tips for data requirements." % ( skipped_lines, first_invalid_line )
def __init__( self, fe_formula, re_formula, re_group, dep_data, ind_data, factors=None, row_mask=None, use_ranks=False, use_norm=True, memmap=False, memmap_dir=None, resid_formula=None, null_formula=None, num_null_boot=0, svd_terms=None, use_ssvd=False, #nperms=500, nboot=100, n_jobs=1, verbose=10, lmer_opts=None): """ """ if verbose > 0: sys.stdout.write('Initializing...') sys.stdout.flush() start_time = time.time() # save the formula self._formula_str = fe_formula + ' + ' + re_formula # see if there's a resid formula if resid_formula: # the random effects are the same self._resid_formula_str = resid_formula + ' + ' + re_formula else: self._resid_formula_str = None # see if there's a null formula if null_formula: # the random effects are the same self._null_formula_str = null_formula + ' + ' + re_formula else: self._null_formula_str = None self._num_null_boot = num_null_boot # save whether using ranks self._use_ranks = use_ranks # see whether to use sparse svd self._use_ssvd = use_ssvd # see if memmapping self._memmap = memmap # save job info self._n_jobs = n_jobs self._verbose = verbose # eventually fill the feature shape self._feat_shape = None # fill A,M,O,D self._A = {} self._M = {} self._O = {} self._D = {} O = [] # loop over unique grouping var self._re_group = re_group if isinstance(ind_data, dict): # groups are the keys self._groups = np.array(list(ind_data.keys())) else: # groups need to be extracted from the recarray self._groups = np.unique(ind_data[re_group]) for g in self._groups: # get that subj inds if isinstance(ind_data, dict): # the index is just the group into that dict ind_ind = g else: # select the rows based on the group ind_ind = ind_data[re_group] == g # process the row mask if row_mask is None: # no mask, so all good row_ind = np.ones(len(ind_data[ind_ind]), dtype=np.bool) elif isinstance(row_mask, dict): # pull the row_mask from the dict row_ind = row_mask[g] else: # index into it with ind_ind row_ind = row_mask[ind_ind] # extract that group's A,M,O # first save the observations (rows of A) self._O[g] = ind_data[ind_ind][row_ind] if use_ranks: # loop over non-factors and rank them for n in self._O[g].dtype.names: if (n in factors) or isinstance(self._O[g][n][0], str): continue self._O[g][n] = rankdata(self._O[g][n]) O.append(self._O[g]) # eventually allow for dict of data files for dep_data if isinstance(dep_data, dict): # the index is just the group into that dict dep_ind = g else: # select the rows based on the group dep_ind = ind_ind # save feature shape if necessary if self._feat_shape is None: self._feat_shape = dep_data[dep_ind].shape[1:] # Save D index into data self._D[g] = dep_data[dep_ind][row_ind] # reshape it self._D[g] = self._D[g].reshape((self._D[g].shape[0], -1)) if use_ranks: if verbose > 0: sys.stdout.write('Ranking %s...' % (str(g))) sys.stdout.flush() for i in range(self._D[g].shape[1]): self._D[g][:, i] = rankdata(self._D[g][:, i]) # reshape M, so we don't have to do it repeatedly self._M[g] = self._D[g].copy( ) #dep_data[ind].reshape((dep_data[ind].shape[0],-1)) # normalize M if use_norm: self._M[g] -= self._M[g].mean(0) self._M[g] /= np.sqrt((self._M[g]**2).sum(0)) # determine A from the model.matrix rdf = DataFrame({ k: (FactorVector(self._O[g][k]) if k in factors else self._O[g][k]) for k in self._O[g].dtype.names }) # model spec as data frame ms = r['data.frame'](r_model_matrix(Formula(fe_formula), data=rdf)) cols = list(r['names'](ms)) if svd_terms is None: self._svd_terms = [c for c in cols if not 'Intercept' in c] else: self._svd_terms = svd_terms self._A[g] = np.concatenate( [np.array(ms.rx(c)) for c in self._svd_terms]).T #for c in cols if not 'Intercept' in c]).T if use_ranks: for i in range(self._A[g].shape[1]): self._A[g][:, i] = rankdata(self._A[g][:, i]) # normalize A if True: #use_norm: self._A[g] -= self._A[g].mean(0) self._A[g] /= np.sqrt((self._A[g]**2).sum(0)) # memmap if desired if self._memmap: self._M[g] = _memmap_array(self._M[g], memmap_dir) self._D[g] = _memmap_array(self._D[g], memmap_dir) # concat the Os together and make an LMER instance #O = np.concatenate(O) #self._O = np.vstack(O) #self._O = np.array(O) self._O = O if lmer_opts is None: lmer_opts = {} self._lmer_opts = lmer_opts self._factors = factors #self._lmer = LMER(self._formula_str, O, factors=factors, **lmer_opts) # prepare for the perms and boots self._perms = [] self._boots = [] self._tp = [] self._tb = [] if verbose > 0: sys.stdout.write('Done (%.2g sec)\n' % (time.time() - start_time)) sys.stdout.write('Processing actual data...') sys.stdout.flush() start_time = time.time() global _global_meld _global_meld[id(self)] = self # run for actual data (returns both perm and boot vals) self._R = None self._ss = None self._mer = None self._mer_null = None tp, tb, R, feat_mask, ss, mer, mer_null = _eval_model( id(self), None, None) self._R = R self._tp.append(tp) self._tb.append(tb) self._feat_mask = feat_mask self._ss = ss self._mer = mer self._mer_null = mer_null if verbose > 0: sys.stdout.write('Done (%.2g sec)\n' % (time.time() - start_time)) sys.stdout.flush()
def main(): try: datafile = sys.argv[1] outfile_name = sys.argv[2] expression = sys.argv[3] except: stop_err('Usage: python gsummary.py input_file ouput_file expression') math_allowed = S3_METHODS()['Math'] ops_allowed = S3_METHODS()['Ops'] # Check for invalid expressions for word in re.compile('[a-zA-Z]+').findall(expression): if word and not word in math_allowed: stop_err( "Invalid expression '%s': term '%s' is not recognized or allowed" % (expression, word)) symbols = set() for symbol in re.compile('[^a-z0-9\s]+').findall(expression): if symbol and not symbol in ops_allowed: stop_err( "Invalid expression '%s': operator '%s' is not recognized or allowed" % (expression, symbol)) else: symbols.add(symbol) if len(symbols) == 1 and ',' in symbols: # User may have entered a comma-separated list r_data_frame columns stop_err( "Invalid columns '%s': this tool requires a single column or expression" % expression) # Find all column references in the expression cols = [] for col in re.compile('c[0-9]+').findall(expression): try: cols.append(int(col[1:]) - 1) except: pass tmp_file = tempfile.NamedTemporaryFile('w+b') # Write the R header row to the temporary file hdr_str = "\t".join("c%s" % str(col + 1) for col in cols) tmp_file.write("%s\n" % hdr_str) skipped_lines = 0 first_invalid_line = 0 i = 0 for i, line in enumerate(file(datafile)): line = line.rstrip('\r\n') if line and not line.startswith('#'): valid = True fields = line.split('\t') # Write the R data row to the temporary file for col in cols: try: float(fields[col]) except: skipped_lines += 1 if not first_invalid_line: first_invalid_line = i + 1 valid = False break if valid: data_str = "\t".join(fields[col] for col in cols) tmp_file.write("%s\n" % data_str) tmp_file.flush() if skipped_lines == i + 1: stop_err( "Invalid column or column data values invalid for computation. See tool tips and syntax for data requirements." ) else: # summary function and return labels summary_func = r( "function( x ) { c( sum=sum( as.numeric( x ), na.rm=T ), mean=mean( as.numeric( x ), na.rm=T ), stdev=sd( as.numeric( x ), na.rm=T ), quantile( as.numeric( x ), na.rm=TRUE ) ) }" ) headings = ['sum', 'mean', 'stdev', '0%', '25%', '50%', '75%', '100%'] headings_str = "\t".join(headings) #r.set_default_mode( NO_CONVERSION ) #r_data_frame = r.read_table( tmp_file.name, header=True, sep="\t" ) r_data_frame = DataFrame.from_csvfile(tmp_file.name, header=True, sep="\t") outfile = open(outfile_name, 'w') for col in re.compile('c[0-9]+').findall(expression): r.assign(col, r["$"](r_data_frame, col)) try: summary = summary_func(r(expression)) except RException, s: outfile.close() stop_err("Computation resulted in the following error: %s" % str(s)) #summary = summary.as_py( BASIC_CONVERSION ) outfile.write("#%s\n" % headings_str) print summary print summary.r_repr() outfile.write( "%s\n" % "\t".join(["%g" % (summary.rx2(k)[0]) for k in headings])) outfile.close() if skipped_lines: print "Skipped %d invalid lines beginning with line #%d. See tool tips for data requirements." % ( skipped_lines, first_invalid_line)
def show4(): open4() r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/end.R',encoding="utf-8") data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/project2.csv') pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='day', y='time',fill = 'factor(project)')+ggplot2.geom_bar(stat ='identity',position = 'dodge')+ggplot2.ggtitle("两项目时间对比图")+ggplot2.labs(x='日期',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) pp.plot()
def rpy2py_listvector(obj): if 'data.frame' in obj.rclass: res = rpy2py(DataFrame(obj)) else: res = numpy2ri.rpy2py(obj) return res
p.join() times_r.append(res) from rpy2.robjects.vectors import DataFrame, FloatVector, StrVector, IntVector d = {} d['code'] = StrVector([x[0] for x in combos]) + StrVector([x[0] for x in combos_r]) d['sequence'] = StrVector([x[-2] for x in combos]) + StrVector( [x[0] for x in combos_r]) d['time'] = FloatVector([x for x in times]) + FloatVector( [x[0] for x in combos_r]) d['n_loop'] = IntVector([x[-1] for x in combos]) + IntVector( [x[1] for x in combos_r]) d['group'] = StrVector( [d['code'][x] + ':' + d['sequence'][x] for x in xrange(len(d['n_loop']))]) dataf = DataFrame(d) from rpy2.robjects.lib import ggplot2 p = ggplot2.ggplot(dataf) + \ ggplot2.geom_line(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.geom_point(ggplot2.aes_string(x="n_loop", y="time", colour="code")) + \ ggplot2.facet_wrap(Formula('~sequence')) + \ ggplot2.scale_y_continuous('running time') + \ ggplot2.scale_x_continuous('repeated n times', ) + \ ggplot2.xlim(0, max(n_loops)) + \ ggplot2.opts(title = "Benchmark (running time)")
def hae_ehdokkaat(): in_tiedosto = os.path.join(DATADIR, 'e2011ehd.csv') return DataFrame.from_csvfile(in_tiedosto, header=True, sep='\t', as_is=True)
def _to_pandas_factor(obj): codes = [x - 1 if x > 0 else -1 for x in numpy.array(obj)] res = pandas.Categorical.from_codes(codes, categories=list(obj.do_slot('levels')), ordered='ordered' in obj.rclass) return res converter._rpy2py_nc_map.update({ rinterface.IntSexpVector: conversion.NameClassMap(numpy2ri.rpy2py, {'factor': _to_pandas_factor}), rinterface.ListSexpVector: conversion.NameClassMap(numpy2ri.rpy2py, {'data.frame': lambda obj: rpy2py(DataFrame(obj))}) }) def activate(): warnings.warn( 'The global conversion available with activate() ' 'is deprecated and will be removed in the next ' 'major release. Use a local converter.', category=DeprecationWarning) global original_converter # If module is already activated, there is nothing to do. if original_converter is not None: return original_converter = conversion.Converter(
def lmer_feature(formula_str, dat, perms=None, val=None, factors=None, **kwargs): """ Run LMER on a number of permutations of the predicted data. """ # get the perm_var perm_var = formula_str.split('~')[0].strip() # set the val if necessary if not val is None: dat[perm_var] = val # make factor list if necessary if factors is None: factors = [] # convert the recarray to a DataFrame rdf = DataFrame({ k: (FactorVector(dat[k]) if (k in factors) or isinstance(dat[k][0], str) else dat[k]) for k in dat.dtype.names }) #rdf = com.convert_to_r_dataframe(pd.DataFrame(dat),strings_as_factors=True) # get the column index col_ind = list(rdf.colnames).index(perm_var) # make a formula obj rformula = Formula(formula_str) # just apply to actual data if no perms if perms is None: #perms = [np.arange(len(dat))] perms = [None] # run on each permutation tvals = None for i, perm in enumerate(perms): if not perm is None: # set the perm rdf[col_ind] = rdf[col_ind].rx(perm + 1) # inside try block to catch convergence errors try: ms = lme4.lmer(rformula, data=rdf, **kwargs) except: continue #tvals.append(np.array([np.nan])) # extract the result df = r['data.frame'](r_coef(r['summary'](ms))) if tvals is None: # init the data # get the row names rows = list(r['row.names'](df)) tvals = np.rec.fromarrays( [np.ones(len(perms)) * np.nan for ro in range(len(rows))], names=','.join(rows)) tvals[i] = tuple(df.rx2('t.value')) return tvals
def _convert_to_dataframe(x): """ Convert Python list of integers to R data frame. """ tmp = dict() tmp['y'] = IntVector(x) return DataFrame(tmp)
import rpy2.robjects.pandas2ri from rpy2.robjects.vectors import DataFrame import math import datetime parser = argparse.ArgumentParser() parser.add_argument("-in_csv", help="") parser.add_argument("-out", help="") args=parser.parse_args() dataf = DataFrame.from_csvfile(args.in_csv, sep = ",",header=True) # Get statistics for investigated seqs rmean = robjects.r['mean'] rmed = robjects.r['median'] rmax = robjects.r['max'] rsd = robjects.r['sd'] rsum = robjects.r['sum'] ma=rmax(dataf.rx('hitlen')) as_vec = robjects.r['as.vector'] as_num = robjects.r['as.numeric'] as_mat = robjects.r['as.matrix'] #test22=as_vec(dataf.rx('Length'))
def show1(): open1() r.source('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/R/head1.r',encoding="utf-8") data = DataFrame.from_csvfile('D:/Postgraduate/Course/2-semester/R-language/TimeAnalyze/Programe/temp/day1.csv') pp = ggplot2.ggplot(data)+ggplot2.aes_string(x='project', y='time',fill = 'project')+ggplot2.geom_bar(stat ='identity')+ggplot2.ggtitle("今日项目时间分布图")+ggplot2.labs(x='项目',y='时间 (min)')+ggplot2.theme(**{'axis.text.x': ggplot2.element_text(angle = 45)}) pp.plot()