def xcorr(ts1,ts2=None,maxlag=72,freq=12): """ Replicates the matlab function xcorr using rpy2 (which may need to be installed) Input: ts1 and ts2 are timeseries, 1D arrays. If only one array is entered the the auto-correlation will be calculated. maxlag... the maximum lag values freq: For monthly data use freq=12, for annual I guess freq = 1 Output: cor_out: The correlation values at each lag (numpy array) lags: An array with the values of the lags. To match the matlab output and make it easier to plot cross and auto-correlations together, the auto-correlations are 'mirrored' for <0. """ #define R functions rts=robjects.r['ts'] # R function used to create timeseries rccf=robjects.r['ccf'] # R function to calculate cross-correlations racf=robjects.r['acf'] # R function to calculate auto-correlations #Convert python array to an R vector (Floatvector), then an R timeseries ts1_r=rts(robjects.FloatVector(ts1),frequency=freq) if ts2==None: # Use autocorrelation if there's only one timeseries acf_ts1 = racf(ts1_r,lag_max=maxlag,plot=False) ac_ts1=rpyn.ri2numpy(acf_ts1[0])[:,0,0] # Converts R array back to numpy array cor_out=np.concatenate([ac_ts1[::-1],ac_ts1[1:maxlag+1]]) # This mirrors the positive values of the auto-corr elif ts2!=None: ts2_r=rts(robjects.FloatVector(ts2),frequency=freq) ccf_ts12 = rccf(ts1_r,ts2_r,lag_max=maxlag,plot=False) cc_ts12=rpyn.ri2numpy(ccf_ts12[0])[:,0,0] cor_out=cc_ts12 lags=np.concatenate([np.linspace(-maxlag,-1,num=maxlag),np.linspace(0,maxlag,num=maxlag+1)]) return cor_out, lags
def ri2pandas(o): if isinstance(o, DataFrame): # use the numpy converter recarray = numpy2ri.ri2numpy(o) res = PandasDataFrame.from_records(recarray) else: res = ro.default_ri2ro(o) return res
def getRasterValues(lon, lat, layer, buffervalue): startTime = time.time() rinterface.initr() r = robjects.r r.require('raster') ras = r.raster(layer) rasvalues = r.extract(ras, r.cbind(lon,lat), buffer=buffervalue, small=True) values = npri.ri2numpy(rasvalues[0]) endTime = time.time() print(str(endTime - startTime)) return values
def custom_pathways(gene_vals, kegg_file, pval): importr("KEGGREST") importr("org.Mm.eg.db") importr("GSEABase") result = annotate_ensembl(gene_vals) sigs = [] univ = [] for key in result: if float(gene_vals[key]) < float(pval): sigs.append(result[key]) univ.append(result[key]) ro.globalenv["sigs"] = sigs ro.globalenv["univ"] = univ sets = ro.r.getGmt(kegg_file) ro.globalenv["sets"] = sets ro.r('genes_pathway <- lapply(sets, geneIds)') ro.r('names(genes_pathway) <- names(sets)') ro.r('hyperg <- Category:::.doHyperGInternal') ro.r('''hyperg_test <- function(pathway_genes, significant_genes, all_genes, over=TRUE) { white_balls_drawn <- length(intersect(significant_genes, pathway_genes)) white_balls_in_urn <- length(pathway_genes) total_balls_in_urn <- length(all_genes) black_balls_in_urn <- total_balls_in_urn - white_balls_in_urn balls_pulled <- length(significant_genes) hyperg(white_balls_in_urn, black_balls_in_urn, balls_pulled, white_balls_drawn, over) } ''') ro.r('pVals_pathway <- t(sapply(genes_pathway, hyperg_test, sigs, univ))') ro.r('pVals_pathway <- cbind(rownames(pVals_pathway), pVals_pathway)') pvals = ro.r('pVals_pathway') vector1=rpyn.ri2numpy(pvals.rx(True,1)) vector2=rpyn.ri2numpy(pvals.rx(True,2)) vector3=rpyn.ri2numpy(pvals.rx(True,3)) vector4=rpyn.ri2numpy(pvals.rx(True,4)) output = open("Hypergeo_pathways.txt", "w") output.write("Pathway\tP-value\tOddsRatio\tExpected\n"), for i, j in enumerate(vector1[0]): output.write("{}\t{}\t{}\t{}\n".format(j, vector2[0][i],vector3[0][i],vector4[0][i])), output.close()
def manager(request): params = dict() params["queries"] = internal.ListQueries(request, {"projectID": [request.session["projectID"]]}) if request.method == "POST": resp = dict() queryname = request.POST.get("query", None) dataset = request.POST.get("dataset", None) method = request.POST.get("method", None) category = request.POST.get("category", None) count = request.POST.get("count", 20) if not queryname or not dataset or not method or not category: return HttpResponse( json.dumps("Please check that the input form is complete."), content_type="application/json" ) query = Query.objects.get(project=request.session["projectID"], name=queryname) from rpy2 import robjects import rpy2.robjects.numpy2ri as rpyn # fetch Analysis data of interest heatmapScript = SCRIPTPATH + "r/heatmapDataCreator.R" robjects.r.source(heatmapScript) heatmapCommand = robjects.r["heatmapDataCreator"] profiles1 = Analysis.objects.filter( project=request.session["projectID"], dataset=dataset, method=method, category=category, sample__in=query.expandsamples, ).values_list("sample", "entity", "profile") # format analysis data and load into R profiles = zip(*profiles1) profileRdata = robjects.DataFrame( { "samples": robjects.StrVector(profiles[0]), "entity": robjects.StrVector(profiles[1]), "profile": robjects.FloatVector(profiles[2]), } ) processedMatrix = heatmapCommand(profileRdata, count) vector = rpyn.ri2numpy(processedMatrix) resp["rows"] = list(processedMatrix.rownames) resp["cols"] = list(processedMatrix.colnames) resp["maxVal"] = numpy.amax(vector) resp["minVal"] = numpy.amin(vector) resp["data"] = vector.tolist() return HttpResponse(json.dumps(resp), content_type="application/json") return render(request, "heatmap.html", params)
def from_dtw2dict(alignment): """Auxiliar function which transform useful information of the dtw function applied in R using rpy2 to python formats. """ dtw_keys = list(alignment.names) bool_traceback = 'index1' in dtw_keys and 'index2' in dtw_keys bool_traceback = bool_traceback and 'stepsTaken' in dtw_keys ## Creating a dict to save all the information in python format dtw_dict = {} # Transformation into a dict dtw_dict['stepPattern'] = ri2numpy(alignment.rx('stepPattern')) dtw_dict['N'] = alignment.rx('N')[0] dtw_dict['M'] = alignment.rx('M')[0] dtw_dict['call'] = alignment.rx('call') dtw_dict['openEnd'] = alignment.rx('openEnd')[0] dtw_dict['openBegin'] = alignment.rx('openBegin')[0] dtw_dict['windowFunction'] = alignment.rx('windowFunction') dtw_dict['jmin'] = alignment.rx('jmin')[0] dtw_dict['distance'] = alignment.rx('distance')[0] dtw_dict['normalizedDistance'] = alignment.rx('normalizedDistance')[0] if bool_traceback: aux = np.array(ri2numpy(alignment.rx('index1')).astype(int)) dtw_dict['index1'] = aux aux = np.array(ri2numpy(alignment.rx('index2')).astype(int)) dtw_dict['index2'] = aux dtw_dict['stepsTaken'] = ri2numpy(alignment.rx('stepsTaken')) elif 'localCostMatrix' in dtw_keys: aux = np.array(ri2numpy(alignment.rx('localCostMatrix'))) dtw_dict['localCostMatrix'] = aux elif 'reference' in dtw_keys and 'query' in dtw_keys: dtw_dict['reference'] = alignment.rx('reference') dtw_dict['query'] = alignment.rx('query') return dtw_dict
def matrix_to_normcount(matrix, samples): # read normalized count from matrix (row=clone; col=sample) and # update the samples' clones samplenames = matrix.colnames cloneids = matrix.rownames for sample in samples: if sample.name not in samplenames: sys.stderr.write(("Warning: sample %s does not have normalized " % sample.name + "count.")) continue for clone in sample.clones: normcount = 0.0 for id in clone.vjseq_ids: assert id in cloneids nc = matrix.rx[id, sample.name] normcount = normcount + rpyn.ri2numpy(nc)[0] clone.set_normcount(normcount) return samples
w0 = [1, 0.75, 0.75, 0.5, 0.5, 0.5, 0.25, 0.25, 0.25, 0.25, 0, 0, 0, 0, 0] w1 = [0, 0.25, 0, 0.5, 0.25, 0, 0.75, 0.5, 0.25, 0, 1, 0.75, 0.5, 0.25, 0] w2 = [0, 0, 0.25, 0, 0.25, 0.5, 0, 0.25, 0.5, 0.75, 0, 0.25, 0.5, 0.75, 1] path = './matrix_dist_norm/' files = getfiles(path) subset0 = [] subset1 = [] subset2 = [] print "Subset 0" for index, file in enumerate(files[:3]): sub = 0 m = r("readRDS('"+ str(file)+"')") mat = rpyn.ri2numpy(m) subset0.append(mat) print "Calculating..." calculate(subset0, w0, w1, w2, sub) print "end\n" print "Subset 1" for index, file in enumerate(files[3:6]): sub = 1 m = r("readRDS('"+ str(file)+"')") mat = rpyn.ri2numpy(m) subset1.append(mat) print "Calculating..." calculate(subset1, w0, w1, w2, sub)
# some simple R things print R.r.median(R.IntVector([1,2,3,4]))[0] # create two R vectors and do correlation coefficient in R a = R.IntVector([1,2,3,4]) b = R.IntVector([1,2,3,4]) # need the subscript to get authentic python type? print R.r.cor(a,b,method="pearson")[0] my_vec = R.IntVector([1,2,3,4]) my_chr_vec = R.StrVector(['aaa','bbb']) my_float_vec = R.FloatVector([0.001,0.0002,0.003,0.4]) print "\nconvert to numpy array?" vector = rpyn.ri2numpy(my_float_vec) print vector python_list = list(my_float_vec) print python_list bigger_vec = R.IntVector(a+b) # using multiple lists print list(bigger_vec) # linear regression observed = R.FloatVector([1.1, 1.2, 1.3]) # native python list will not do!! theoretical = R.FloatVector([1.15, 1.25, 1.35]) R.globalEnv['observed'] = observed R.globalEnv['theoretical'] = theoretical m = R.r.lm('theoretical ~ observed')
def rx(data, var): return rpyn.ri2numpy(data.rx2(var))
import numpy as np import rpy2.robjects as ro import rpy2.robjects.numpy2ri as n2r n2r.activate() r = ro.r r.library('glmnet') # input files (for this example) need to have header and NO index column X = np.loadtxt('./x.csv', dtype=float, delimiter=',', skiprows=1) y = np.loadtxt('./y.csv', dtype=int, delimiter=',', skiprows=1) y = ro.FactorVector(list(y.transpose())) # use factors trained_model = r['cv.glmnet'](X, y, nfolds=3, family="binomial") lambda_ = np.asanyarray(trained_model.rx2('lambda')) cvm_ = np.asanyarray(trained_model.rx2('cvm')) cvsd_ = np.asanyarray(trained_model.rx2('cvsd')) lambda_min = np.asanyarray(trained_model.rx2('lambda.min'))[0] min_cvm = cvm_[np.argwhere(lambda_ == lambda_min)[0][0]] idx = np.argwhere(cvm_ < min_cvm + 0.1 * cvsd_) idx[0] fit = trained_model.rx2('glmnet.fit') beta = n2r.ri2numpy(r['as.matrix'](fit.rx2('beta'))) relvars = np.argwhere(beta[:, idx[0]].transpose()[0] > 1e-5) print relvars.transpose()[0]
def testAtomicVectorToNumpy(self): v = robjects.vectors.IntVector((1,2,3)) a = rpyn.ri2numpy(v) self.assertTrue(isinstance(a, numpy.ndarray)) self.assertEqual(1, v[0])
def testDataFrameToNumpy(self): df = robjects.vectors.DataFrame(dict((('a', 1), ('b', 2)))) reca = rpyn.ri2numpy(df) self.assertTrue(isinstance(reca, numpy.recarray)) self.assertEqual(1, reca.a[0]) self.assertEqual(2, reca.b[0])
def fa(source=False, use_filter="default", data_file="latest", participant_subset="", drop_metadata=True, drop=[], clean=7, factors=5, facecolor="#ffffff"): #gets config file: config = get_config_file(localpath=path.dirname(path.realpath(__file__))+'/') #IMPORT VARIABLES if not source: source = config.get('Source', 'source') data_path = config.get('Addresses', source) filter_dir = config.get('Paths', "filter_dir") filter_name = config.get("Filters", use_filter) #END IMPORT VARIABLES filter_path = path.dirname(path.realpath(__file__)) + '/' + filter_dir + filter_name + '.csv' filters = DataFrame.from_csv(filter_path, header=None).transpose() # transpose filters because of .csv file formatting all_data = DataFrame.from_csv(data_path + data_file + ".csv") all_data = all_data.reset_index(level=0) #~ print filters["metadata"] #clean data of respondents who only ckeck extreme answers: all_data = all_data[map(lambda y: len(set(y)) > clean,np.array(all_data))] if drop_metadata == True: # drops metadata all_data = all_data.drop(filters["metadata"][Series.notnull(filters["metadata"])], axis=1) drop_list = [] for drop_item in drop: # compile list of column names to be dropped: drop_list += list(filters[drop_item][Series.notnull(filters[drop_item])]) #get unique column names (the list may contain duplicates if overlaying multiple filters): drop_list = list(set(drop_list)) all_data = all_data.drop(drop_list, axis=1) if participant_subset == "odd": # selects only odd indexes (keep the other dataset half for validation) keep_rows = all_data.index.values[1::2] filtered_data = all_data.ix[keep_rows] elif participant_subset == "even": # selects only even indexes (keep the other dataset half for validation) keep_rows = all_data.index.values[0::2] filtered_data = all_data.ix[keep_rows] elif participant_subset == "male": # selects only male participants filtered_data = all_data[all_data['My legal gender:'] == 'Male'] elif participant_subset == "female": # selects only female participants filtered_data = all_data[all_data['My legal gender:'] == 'Female'] else: filtered_data = all_data #convert to correct type for analysis: filtered_data_array = np.array(filtered_data, dtype='float64') filtered_data_array = filtered_data_array / 100 fit = r.factanal(filtered_data_array, factors, rotation='promax') load = r.loadings(fit) load = numpy2ri.ri2numpy(load) load = r.t(load) remapped_cmap = remappedColorMap(cm.PiYG, start=(np.max(load)-abs(np.min(load)))/(2*np.max(load)), midpoint=abs(np.min(load))/(np.max(load)+abs(np.min(load))), name='shrunk') fig, ax = plt.subplots(nrows=1, ncols=1, figsize=(17.5, 5), facecolor=facecolor) graphic = ax.imshow(load, cmap = remapped_cmap, interpolation='none') ax.xaxis.set_major_locator(matplotlib.ticker.MultipleLocator(base=1.0)) ax.yaxis.set_major_locator(matplotlib.ticker.MultipleLocator(base=1.0)) ax.set_xticklabels([0]+filtered_data.columns.tolist(),fontsize=8,rotation=90) ax.set_yticklabels(np.arange(factors+1)) ax.set_ylabel('Factors') ax.set_title("Question Loadings on Factors") #Recolor plot spines: for spine_side in ["bottom", "top", "left", "right"]: ax.spines[spine_side].set_color("#777777") #Remove ticks: plt.tick_params(axis='both', which='both', left="off", right="off", bottom='off', top='off') divider = make_axes_locatable(ax) #calculate width for cbar so that it is equal to the question column width: cbar_width = str(100/np.shape(load)[1])+ "%" cax = divider.append_axes("right", size=cbar_width, pad=0.05) cbar = colorbar(graphic, cax=cax, drawedges=True) #Limit the number of ticks: tick_locator = ticker.MaxNLocator(nbins=6) cbar.locator = tick_locator cbar.update_ticks() #Align ticklabels so that negative values are not misaligned (meaning right align): for t in cbar.ax.get_yticklabels(): t.set_horizontalalignment('right') t.set_x(0.045*(np.shape(load)[1]+6)) #Tweak color bar borders cbar.outline.set_color("#666666") cbar.dividers.set_linewidth(0)
def testAtomicVectorToNumpy(self): v = robjects.vectors.IntVector((1, 2, 3)) a = rpyn.ri2numpy(v) self.assertTrue(isinstance(a, numpy.ndarray)) self.assertEqual(1, v[0])
def doMigration(self, ipSrc, movUsers, ipDst, dstUsers): cacheSize = self.getCacheSize(ipDst[0], ipSrc[0][0]) dIntSrc = [] for src in range(len(ipSrc)): dIntSrc.append(self.getMergedInterests(ipSrc[src], "1 months")) dIntDst = self.getMergedInterests(ipDst, "1 months") #Aux Lists auxB = [] auxC = [] auxL = [] auxS = [] numCells = len(dIntSrc) + 1 # need mapping (-1) cnt = 1 print "Processing 1st Source Interests" # Adding prefixes requested at 1st source for k,v in dIntSrc[0].items(): popDst = dIntDst[k][1] if k in dIntDst else 0 popSrcs = 0 for i in range(numCells-1): popSrcs += (dIntSrc[i][k][1] if k in dIntSrc[i] else 0) * movUsers[i] popInt = (popSrcs + popDst*dstUsers) / numCells ### ID | Populatiry auxB.extend([cnt, popInt]) ### ID | File Size auxC.extend([cnt, v[0]]) auxL.append(k) auxS.append(v[0]) cnt += 1 print "Processing other Sources Interests" for j in range(1,numCells-1): for k,v in dIntSrc[j].items(): if k not in auxL: popSrcs += (dIntSrc[i][k][1] if k in dIntSrc[i] else 0) * movUsers[i] popSrcs = 0 for i in range(j, numCells-1): popSrcs += (dIntSrc[i][k][1] if k in dIntSrc[i] else 0) * movUsers[i] popInt = (popSrcs + popDst*dstUsers) / numCells ### ID | Populatiry auxB.extend([cnt, popInt]) ### ID | File Size auxC.extend([cnt, v[0]]) auxL.append(k) auxS.append(v[0]) cnt += 1 print "Processing Destination Interests" # Adding interests requested at destination still missing for k,v in dIntDst.items(): if k not in auxL: popInt = (v[1]*dstUsers) / numCells ### ID | Populatiry auxB.extend([cnt, popInt]) ### ID | File Size auxC.extend([cnt, v[0]]) auxL.append(k) auxS.append(v[0]) cnt += 1 print "Going to run MADM" ## Create matrix Ben and matrix Cost mBen = r.matrix(auxB, ncol=2, byrow=True) mCost = r.matrix(auxC, ncol=2, byrow=True) vBen = ro.FloatVector([1.0]) vCost = ro.FloatVector([1.0]) output = MADM(mBen,mCost,vBen,vCost,1) if output == -1: return 200 #print output outAux = rpyn.ri2numpy(output) if outAux[0] == -2: return 200 elif outAux[0] == -1: code = self.sendMigrationData(auxL, ipDst) return code cacheList = [] usedCache = 0 for out in outAux: if auxS[int(out[0])-1] + usedCache < cacheSize: cacheList.append(auxL[int(out[0]-1)]) usedCache += auxS[int(out[0]-1)] if usedCache == cacheSize: break #for ent in cacheList: #print ent code = self.sendMigrationData(cacheList, ipDst) return code
# some simple R things print R.r.median(R.IntVector([1, 2, 3, 4]))[0] # create two R vectors and do correlation coefficient in R a = R.IntVector([1, 2, 3, 4]) b = R.IntVector([1, 2, 3, 4]) # need the subscript to get authentic python type? print R.r.cor(a, b, method="pearson")[0] my_vec = R.IntVector([1, 2, 3, 4]) my_chr_vec = R.StrVector(['aaa', 'bbb']) my_float_vec = R.FloatVector([0.001, 0.0002, 0.003, 0.4]) print "\nconvert to numpy array?" vector = rpyn.ri2numpy(my_float_vec) print vector python_list = list(my_float_vec) print python_list bigger_vec = R.IntVector(a + b) # using multiple lists print list(bigger_vec) # linear regression observed = R.FloatVector([1.1, 1.2, 1.3]) # native python list will not do!! theoretical = R.FloatVector([1.15, 1.25, 1.35]) R.globalenv['observed'] = observed R.globalenv['theoretical'] = theoretical m = R.r.lm('theoretical ~ observed') # R.abline(lm,col='color') # add regression line to EXISTING plot
# Transform URL string into normal string in python (%20 to space etc) rpy2: Convert FloatVector or Matrix back to a Python array or list? import rpy2.robjects.numpy2ri as rpyn vector=rpyn.ri2numpy(vector_R)
parfOrig_r = IntVector(parfOrig) parfOrig_rc = r.circular(parfOrig_r, units='degrees', template='geographics') # optimal bandwith determined based on a VonMises distribution # using circular.bw_nrd_circular() if reg == 'andes': bandwith = 2.0 elif reg == 'qf': bandwith = 3.5 elif reg == 'amazon': bandwith = 1.0 # calculate circular density print 'calculating density for ' + paramOrig + ', please wait...' dens = r.density(parfOrig_rc, bw=bandwith, kernel='vonmises') print 'density for ' + paramOrig + ' OK' # get density values back to python densX = rpyn.ri2numpy(dens[1]) densY = rpyn.ri2numpy(dens[2]) gc.collect() # densityOrig = ss.kde.gaussian_kde(parfOrig) # x = np.arange(0., np.max(parfOrig), .1) # yOrig = densityOrig(x) # plot # cartesian plot fig = reg + '_aspect_compass_original_density_circular.svg' print 'fig will be: ' + fig plt.ylabel('density') plt.xlabel('aspect_compass') plt.plot(densX, densY, label=paramOrig) plt.xticks((-270, -180, -90, 0, 90), ('E','S','W','N','E')) print 'cartesian plot for ' + paramOrig + ' OK' gc.collect()