def _initialize(self, data1, data2): try: import statsmodels.api as sm lowess = sm.nonparametric.lowess except ImportError: print("===================================") print("Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'") print("===================================") # NOTE: delta parameter is only available from statsmodels > 0.5.0 delta = (max(data1) - min(data1)) * 0.01 frac = 0.1 if len(data1) < 100: frac = 1.0 k = 0 while k <= 10: k += 1 # Input data is y/x -> needs switch result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=frac, it=10) if any( [math.isnan(r[1]) for r in result] ): print ("WARNING: lowess returned NA data points! We are trying to fix it") delta = delta * k result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=frac, it=10) frac = 1.0 else: break return [ r[0] for r in result], [r[1] for r in result]
def correct(sample,gcCount,binSize,maxN=0.1,minRD=0.0001,fVal=0.1,iVal=3): allX = [] allY = [] chroms = sample.keys() for chrom in chroms: for bin in range(min(len(gcCount[chrom]),len(sample[chrom]))): if gcCount['N'+chrom][bin] < binSize * maxN and sample[chrom][bin] > binSize * minRD: allX.append(gcCount[chrom][bin]) allY.append(sample[chrom][bin]) allX = np.array(allX,np.float) allY = np.array(allY,np.float) lowessCurve = biostat.lowess(allX,allY,f=fVal, iter=iVal).tolist() correctedSample = dict() for chrom in chroms: correctedSample[chrom] = [] for bin in range(min(len(gcCount[chrom]),len(sample[chrom]))): if gcCount['N'+chrom][bin] < binSize * maxN and sample[chrom][bin] > binSize * minRD: correctedValue = sample[chrom][bin]/lowessCurve.pop(0) correctedSample[chrom].append(correctedValue) else: correctedSample[chrom].append(0) return correctedSample
def correct(sample, gcCount, binSize, maxN=0.1, minRD=0.0001, fVal=0.1, iVal=3): allX = [] allY = [] chroms = sample.keys() for chrom in chroms: for bin in range(min(len(gcCount[chrom]), len(sample[chrom]))): if gcCount['N' + chrom][bin] < binSize * maxN and sample[chrom][ bin] > binSize * minRD: allX.append(gcCount[chrom][bin]) allY.append(sample[chrom][bin]) allX = np.array(allX, np.float) allY = np.array(allY, np.float) lowessCurve = biostat.lowess(allX, allY, f=fVal, iter=iVal).tolist() correctedSample = dict() for chrom in chroms: correctedSample[chrom] = [] for bin in range(min(len(gcCount[chrom]), len(sample[chrom]))): if gcCount['N' + chrom][bin] < binSize * maxN and sample[chrom][ bin] > binSize * minRD: correctedValue = sample[chrom][bin] / lowessCurve.pop(0) correctedSample[chrom].append(correctedValue) else: correctedSample[chrom].append(0) return correctedSample
def _initialize(self, data1, data2): try: from Bio.Statistics.lowess import lowess except ImportError: print "===================================" print "Cannot import the module lowess from Biopython, \nplease install 'biopython' from https://pypi.python.org/pypi/biopython" print "===================================" old_settings = numpy.seterr(all='ignore') result = lowess(numpy.array(data1), numpy.array(data2), f=0.1, iter=3) if all([math.isnan(it) for it in result]): # Try standard paramters result = lowess(numpy.array(data1), numpy.array(data2)) numpy.seterr(**old_settings) return data1, result
def test_Precomputed(self): x = array([0.0, 1.0, 2.0, 3.0, 5.0, 9.0, 11.0]) y = x**2 # Precalculated smooth output ys = array([-2.96219015, 1.72680044, 6.58686813, 11.62986671, 28.18598762, 86.85271581, 116.83893423 ]) # Smooth output calculated by the lowess function output = lowess(x, y, f=2./3., iter = 3) for precomputed, calculated in zip(ys, output): self.assertAlmostEqual(precomputed, calculated, 4)
def _initialize(self, data1, data2): try: import statsmodels.api as sm lowess = sm.nonparametric.lowess except ImportError: print("===================================") print( "Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'" ) print("===================================") # NOTE: delta parameter is only available from statsmodels > 0.5.0 delta = (max(data1) - min(data1)) * 0.01 frac = 0.1 if len(data1) < 100: frac = 1.0 k = 0 while k <= 10: k += 1 # Input data is y/x -> needs switch result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=frac, it=10) if any([math.isnan(r[1]) for r in result]): print( "WARNING: lowess returned NA data points! We are trying to fix it" ) delta = delta * k result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=frac, it=10) frac = 1.0 else: break return [r[0] for r in result], [r[1] for r in result]
def test_Precomputed(self): x = array([0.0, 1.0, 2.0, 3.0, 5.0, 9.0, 11.0]) y = x**2 # Precalculated smooth output ys = array([-2.96219015, 1.72680044, 6.58686813, 11.62986671, 28.18598762, 86.85271581, 116.83893423 ]) # Smooth output calculated by the lowess function output = lowess(x, y, f=2./3., iter = 3) for precomputed, calculated in zip(ys, output): self.assertAlmostEqual(precomputed, calculated, places=4)
def scatterLinePlot(self,title_I,xlabel_I,ylabel_I,x_data_I,y_data_I,text_labels_I=[],fit_func_I='linear',show_eqn_I=True,show_r2_I=True,filename_I=None,show_plot_I=True): '''Create a scatter line plot and fitted line''' # Create the fit: if fit_func_I == 'linear': slope, intercept, r_value, p_value, std_err = linregress(x_data_I, y_data_I); r2 = r_value**2; #coefficient of determination x2 = x_data_I; y2 = []; for d in x2: y2.append(d*slope+intercept); elif fit_func_I=='lowess': #lowess x2 = numpy.array(x_data_I); y2_lowess = lowess.lowess(x2,numpy.array(y_data_I),f=0.1,iter=100) y2 = numpy.zeros_like(y2_lowess); for i,y2s in enumerate(y2_lowess): if i==0: y2[i] = y2s; elif i!=0 and y2s<y2[i-1]: y2[i] = y2[i-1]; else: y2[i] = y2s; # Create a Figure object. fig = plt.figure() # Create an Axes object. ax = fig.add_subplot(1,1,1) # one row, one column, first plot # Plot the data. ax.scatter(x_data_I, y_data_I, color="blue", marker="o") ax.plot(x2,y2,color='red',linestyle='-') # Add a title. ax.set_title(title_I) # Add some axis labels. ax.set_xlabel(xlabel_I) ax.set_ylabel(ylabel_I) # Label data points. if text_labels_I: for i, txt in enumerate(text_labels_I): ax.annotate(txt, (x_data_I[i],y_data_I[i])) # Show fit equation if show_eqn_I: fit_eqn = "y = " + str(slope) + "*x"; if intercept < 0: fit_eqn += " " + str(intercept); elif intercept > 0: fit_eqn += " +" + str(intercept); ax.annotate(fit_eqn,(min(x_data_I),max(y_data_I))); # Show r2 value if show_r2_I: r2_label = "r2 = " + str(r2); ax.annotate(r2_label,(min(x_data_I),max(y_data_I)-0.5)); # Show legend # Produce an image. if filename_I: fig.savefig(filename_I) # Show the image. if show_plot_I: plt.show();
def _initialize(self, data1, data2): try: import statsmodels.api as sm lowess = sm.nonparametric.lowess except ImportError: print "===================================" print "Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'" print "===================================" result = lowess(numpy.array(data1), numpy.array(data2)) return result
def _initialize(self, data1, data2): try: import cylowess lowess = cylowess.lowess except ImportError: print "===================================" print "Cannot import the module lowess from 'cylowess', \nplease install the cylowess package according to http://slendermeans.org/lowess-speed.html (see also README)" print "===================================" delta = (max(data1) - min(data1)) * 0.01 result = lowess(numpy.array(data1), numpy.array(data2), delta=delta) return result
def _initialize(self, data1, data2): try: import statsmodels.api as sm lowess = sm.nonparametric.lowess except ImportError: print "===================================" print "Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'" print "===================================" # Input data is y/x -> needs switch result = lowess(numpy.array(data2), numpy.array(data1)) return result
def _initialize(self, data1, data2): try: import cylowess lowess = cylowess.lowess except ImportError: print "===================================" print "Cannot import the module lowess from 'cylowess', \nplease install the cylowess package according to http://slendermeans.org/lowess-speed.html (see also README)" print "===================================" delta = (max(data1) - min(data1)) * 0.01 # Input data is y/x -> needs switch result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=0.1, it=10) return [ r[0] for r in result], [r[1] for r in result]
def _initialize(self, data1, data2): try: import statsmodels.api as sm lowess = sm.nonparametric.lowess except ImportError: print("===================================") print("Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'") print("===================================") # NOTE: delta parameter is only available from statsmodels > 0.5.0 delta = (max(data1) - min(data1)) * 0.01 # Input data is y/x -> needs switch result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=0.1, it=10) return [ r[0] for r in result], [r[1] for r in result]
def _initialize(self, data1, data2): try: import cylowess lowess = cylowess.lowess except ImportError: print "===================================" print "Cannot import the module lowess from 'cylowess', \nplease install the cylowess package according to http://slendermeans.org/lowess-speed.html (see also README)" print "===================================" delta = (max(data1) - min(data1)) * 0.01 # Input data is y/x -> needs switch result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=0.1, it=10) return [r[0] for r in result], [r[1] for r in result]
def gc_correct_lowess(gc2bin, raw_counts): rt_dict = {} for gc_content, bin_list in gc2bin.items(): value_list = np.array([raw_counts[b] for b in bin_list]) if len(value_list) <= 3: cor_value_list = value_list else: average_depth = average_depth_in_gc(gc2bin, raw_counts, gc_content) # key_list, value_list = zip(*sorted(bin_counts.items())) x = np.array(range(len(bin_list))) try: ur_loess = lowess(x, value_list) cor_value_list = value_list - (ur_loess - average_depth) except FloatingPointError: cor_value_list = value_list pass for b, v in zip(bin_list, cor_value_list): rt_dict[b] = v return rt_dict
def _initialize(self, data1, data2): try: import statsmodels.api as sm lowess = sm.nonparametric.lowess except ImportError: print("===================================") print( "Cannot import the module lowess from 'statsmodels', \nplease install the Python package 'statsmodels'" ) print("===================================") # NOTE: delta parameter is only available from statsmodels > 0.5.0 delta = (max(data1) - min(data1)) * 0.01 # Input data is y/x -> needs switch result = lowess(numpy.array(data2), numpy.array(data1), delta=delta, frac=0.1, it=10) return [r[0] for r in result], [r[1] for r in result]
def smooth_function(zinput,smooth_method = 'lowess',span = .05): if smooth_method not in ['lowess','triangle']: return zinput xarray = [] yarray = [] years = zinput.keys() for key in years: if zinput[key]!='None': xarray.append(float(key)) yarray.append(float(zinput[key])) from numpy import array x = array(xarray) y = array(yarray) if smooth_method == 'lowess': #print "starting lowess smoothing<br>" from Bio.Statistics.lowess import lowess smoothed = lowess(x,y,float(span),3) x = [int(p) for p in x] returnval = dict(zip(x,smoothed)) return returnval if smooth_method == 'triangle': #print "starting triangle smoothing<br>" span = int(span) #Takes the floor--so no smoothing on a span < 1. returnval = zinput windowsize = span*2 + 1 from numpy import average for key in zinput: surrounding = array(range(windowsize),dtype=float) weights = array(range(windowsize)) for i in range(windowsize): key_dist = i - span #if span is 2, the zeroeth element is -2, the second element is 0 off, etc. workingon = int(key) + key_dist try: surrounding[i] = float(zinput[workingon]) weights[i] = (span + 1 - abs(key_dist))**.5 except: surrounding[i] = 0 weights[i] = 0 returnval[key] = round(average(surrounding,weights=weights),3) return returnval
def plot_rain_by_year(self): # Get data data = self.get_rain_totals() years = np.asarray(data[0]).astype(np.float) x_pos = np.arange(len(years)) rain = np.asarray(data[1]) # Plot bars plt.bar(x_pos, rain, align='center', alpha=0.4) plt.xticks(x_pos, years) plt.tick_params(axis='x', which='both', bottom='off', top='off') plt.xlabel('Rain') # Plot average avg = np.average(rain) plt.axhline(avg) # Plot trend line l = lowess(years, rain, f=0.5) plt.plot(l, linestyle='--') plt.title('Total rain in London per year') plt.show()
def get_GC_depth_correction_vect(sum_depths,n_bases,GC_width,max_correction_factor,calc_correction_factor=True,correct_range=False): #max_correction_factor = (max_correction_factor==-1) and 999999 or max_correction_factor #print "getting GC correction factor: max scale factor %d"%(max_correction_factor) assert(sum_depths.shape[0]==2*GC_width+1+1) #have to count 0 as well~ so, 41+1 frac_bases = n_bases.astype(np.float64)/n_bases.sum() ave_depths = sum_depths/n_bases.astype(np.float64) GCp = np.arange(0,2*GC_width+1+1)/float((2*GC_width+1)) ave_depths[np.where(np.isnan(ave_depths))] = 0 #now, chop off the nans, make the array only as wide as the non-nans ave_depths2_8 = ave_depths[np.where(np.logical_and(GCp>=0.25,GCp<=.75))] GCp2_8 = GCp[np.where(np.logical_and(GCp>=0.25,GCp<=.75))] GCp0_2 = GCp[np.where(GCp<0.25)] GCp8_10 = GCp[np.where(GCp>.75)] #now apply lowess on this lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.15) #lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.1) #USING .1... not working... wy? #lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.25) line_func = lambda p, x: (p[0]*x+p[1]) k=5 y1 = lowess_depth[0:k+1] x1 = GCp2_8[0:k+1] l = lowess_depth.shape[0] y2 = lowess_depth[l-k:l+1] x2 = GCp2_8[l-k:l+1] print lowess_depth print "fit lowess on",ave_depths2_8 print GCp0_2 print GCp8_10 print x1,y2 print x2,y2 p1 = get_line_params(x1,y1) p2 = get_line_params(x2,y2) left_line = line_func(p1,GCp0_2) right_line = line_func(p2,GCp8_10) lowess_depth = np.r_[left_line,lowess_depth,right_line] #lowess_depth[np.where(lowess_depth<=0)=np.min(lowess_depth[np.where(lowess_depth>0)]) mu = sum_depths.astype(np.float64).sum()/n_bases.astype(np.float64).sum() #correction = lowess_depth - mu lowess_depth = np.clip(lowess_depth,1e-10,1e30) correction = mu/lowess_depth if(correct_range): #print "correcting in range .75" GC_max = 0.75 GC_min = 0.2 iGC_max = (np.where(GCp>GC_max))[0][0] iGC_min = (np.where(GCp<GC_min))[0][0] max_correction_factor = correction[iGC_max] correction=np.clip(correction,1.0/max_correction_factor,max_correction_factor) elif(max_correction_factor<=0): correction=np.ones(correction.shape[0]) else: correction=np.clip(correction,1.0/max_correction_factor,max_correction_factor) return GCp,ave_depths,GCp2_8,lowess_depth,correction,mu
for y in range(0, 12): for d in range(0, len(aveSig)): index = y * len(aveSig) + d aveTrend[index] = aveSig[d] #subtract the average from the signal ltTrend = numpy.copy(dvals) for y in range(0, 12): for d in range(0, len(aveSig)): index = y * len(aveSig) + d ltTrend[index] = dvals[index] - aveSig[d] #loess smoothed trend (rough stuff) x = numpy.array(range(0, len(dvals)), numpy.float) y = numpy.array(ltTrend, numpy.float) result = lowess.lowess(x, y, f=0.5 / 3., iter=2) #residuals resids = ltTrend - result #plots pyplot.subplot(4, 1, 1) pyplot.plot(x, dvals) pyplot.subplot(4, 1, 2) pyplot.plot(x, aveTrend) pyplot.subplot(4, 1, 3) pyplot.plot(x, result) pyplot.subplot(4, 1, 4) pyplot.plot(x, resids) pyplot.show()
def gen_mh_scatter(x, y, color='green', xlabel=None, ylabel=None, one_line=False, fig=None, ax=None, marker='.', connect=False, label=None, trendline=None, alpha=1.0, mask=None, zero_lines=False, edgecolors='none', grid=None, figsize=FIG_SIZE, linestyle='-'): """ Notes: one_line: False, <style, e.g. 'r--', 'r:'> zero_lines: T/F grid: None, 'major, 'minor', 'both' edgecolors: 'none', 'green' etc zero_lines: T/F trendline: False, 1, 2, ... (degree), 'lowess' """ # some defaults if no style is provided, just True: if one_line is True: one_line = 'r--' if fig is None or ax is None: #fig, ax = plt.subplots() fig = plt.figure(figsize=figsize, dpi=200) ax = fig.add_subplot(111) if connect: ax.set_color_cycle([color]) ax.plot(x, y, c=color, marker=marker, label=label, alpha=alpha) else: ax.scatter(x, y, c=color, marker=marker, edgecolors=edgecolors, label=label, alpha=alpha) if not grid is None: ax.grid(b=True, which=grid) if xlabel: ax.set(xlabel=xlabel) if ylabel: ax.set(ylabel=ylabel) if one_line: lmin = max(min(x), min(y)) lmax = min(max(x), max(y)) ax.plot((lmin, lmax), (lmin, lmax), one_line) if zero_lines: lmin = max(min(x), min(y)) lmax = min(max(x), max(y)) ax.axvline(0) ax.axhline(0) if trendline: if not mask is None: x = x[mask] y = y[mask] if (len(set(x)) > 1 ) and (len(set(y)) > 1): if trendline == 'lowess': order = np.argsort(x) lx = x[order] ly = lowess(x[order], y[order]) else: coefs = np.polyfit(x, y, trendline) lx = np.linspace(min(x), max(x), 100) ly = [polyfit_apply(coefs, x) for x in lx] ax.plot(lx, ly, '-', color=color) return fig, ax
def scatterLinePlot(self, title_I, xlabel_I, ylabel_I, x_data_I, y_data_I, text_labels_I=[], fit_func_I='linear', show_eqn_I=True, show_r2_I=True, filename_I=None, show_plot_I=True): '''Create a scatter line plot and fitted line''' # Create the fit: if fit_func_I == 'linear': slope, intercept, r_value, p_value, std_err = linregress( x_data_I, y_data_I) r2 = r_value**2 #coefficient of determination x2 = x_data_I y2 = [] for d in x2: y2.append(d * slope + intercept) elif fit_func_I == 'lowess': #lowess x2 = numpy.array(x_data_I) y2_lowess = lowess.lowess(x2, numpy.array(y_data_I), f=0.1, iter=100) y2 = numpy.zeros_like(y2_lowess) for i, y2s in enumerate(y2_lowess): if i == 0: y2[i] = y2s elif i != 0 and y2s < y2[i - 1]: y2[i] = y2[i - 1] else: y2[i] = y2s # Create a Figure object. fig = plt.figure() # Create an Axes object. ax = fig.add_subplot(1, 1, 1) # one row, one column, first plot # Plot the data. ax.scatter(x_data_I, y_data_I, color="blue", marker="o") ax.plot(x2, y2, color='red', linestyle='-') # Add a title. ax.set_title(title_I) # Add some axis labels. ax.set_xlabel(xlabel_I) ax.set_ylabel(ylabel_I) # Label data points. if text_labels_I: for i, txt in enumerate(text_labels_I): ax.annotate(txt, (x_data_I[i], y_data_I[i])) # Show fit equation if show_eqn_I: fit_eqn = "y = " + str(slope) + "*x" if intercept < 0: fit_eqn += " " + str(intercept) elif intercept > 0: fit_eqn += " +" + str(intercept) ax.annotate(fit_eqn, (min(x_data_I), max(y_data_I))) # Show r2 value if show_r2_I: r2_label = "r2 = " + str(r2) ax.annotate(r2_label, (min(x_data_I), max(y_data_I) - 0.5)) # Show legend # Produce an image. if filename_I: fig.savefig(filename_I) # Show the image. if show_plot_I: plt.show()
def smooth_function(zinput,smooth_method = 'lowess',span = .05): if smooth_method not in ['lowess','triangle','rectangle']: return zinput xarray = [] yarray = [] years = zinput.keys() years.sort() for key in years: if zinput[key]!='None': xarray.append(float(key)) yarray.append(float(zinput[key])) from numpy import array x = array(xarray) y = array(yarray) if smooth_method == 'lowess': #print "starting lowess smoothing<br>" from Bio.Statistics.lowess import lowess smoothed = lowess(x,y,float(span)/100,3) x = [int(p) for p in x] returnval = dict(zip(x,smoothed)) return returnval if smooth_method == 'rectangle': from math import log #print "starting triangle smoothing<br>" span = int(span) #Takes the floor--so no smoothing on a span < 1. returnval = zinput windowsize = span*2 + 1 from numpy import average for i in range(len(xarray)): surrounding = array(range(windowsize),dtype=float) weights = array(range(windowsize),dtype=float) for j in range(windowsize): key_dist = j - span #if span is 2, the zeroeth element is -2, the second element is 0 off, etc. workingon = i + key_dist if workingon >= 0 and workingon < len(xarray): surrounding[j] = float(yarray[workingon]) weights[j] = 1 else: surrounding[j] = 0 weights[j] = 0 returnval[xarray[i]] = round(average(surrounding,weights=weights),3) return returnval if smooth_method == 'triangle': from math import log #print "starting triangle smoothing<br>" span = int(span) #Takes the floor--so no smoothing on a span < 1. returnval = zinput windowsize = span*2 + 1 from numpy import average for i in range(len(xarray)): surrounding = array(range(windowsize),dtype=float) weights = array(range(windowsize),dtype=float) for j in range(windowsize): key_dist = j - span #if span is 2, the zeroeth element is -2, the second element is 0 off, etc. workingon = i + key_dist if workingon >= 0 and workingon < len(xarray): surrounding[j] = float(yarray[workingon]) #This isn't actually triangular smoothing: I dampen it by the logs, to keep the peaks from being too too big. #The minimum is '2', since log(1) == 0, which is a nonesense weight. weights[j] = log(span + 2 - abs(key_dist)) else: surrounding[j] = 0 weights[j] = 0 returnval[xarray[i]] = round(average(surrounding,weights=weights),3) return returnval
def null_model( matrix, positions=None, lengths=None, model="uniform", noisy=False, circ=False, sparsity=False, ): """Attempt to compute a 'null model' of the matrix given a model to base itself on. """ n, m = matrix.shape positions_supplied = True if positions is None: positions = range(n) positions_supplied = False if lengths is None: lengths = np.diff(positions) N = np.copy(matrix) contigs = np.array(positions_to_contigs(positions)) def is_inter(i, j): return contigs[i] != contigs[j] diagonal = np.diag(matrix) if model == "uniform": if positions_supplied: trans_contacts = np.array([ matrix[i, j] for i, j in itertools.product(range(n), range(m)) if is_inter(i, j) ]) mean_trans_contacts = np.average(trans_contacts) else: mean_trans_contacts = np.average(matrix) - diagonal / len(diagonal) N = np.random.poisson(lam=mean_trans_contacts, size=(n, m)) np.fill_diagonal(N, diagonal) elif model == "distance": distances = distance_diagonal_law(matrix, positions) N = np.array([[distances[min(abs(i - j), n)] for i in range(n)] for j in range(n)]) elif model == "rippe": trans_contacts = np.array([ matrix[i, j] for i, j in itertools.product(range(n), range(m)) if is_inter(i, j) ]) mean_trans_contacts = np.average(trans_contacts) kuhn, lm, slope, d, A = rippe_parameters(matrix, positions, circ=circ) def jc(s, frag): dist = s - circ * (s**2) / lengths[frag] computed_contacts = (0.53 * A * (kuhn**(-3.)) * (dist**slope) * np.exp((d - 2) / (dist + d))) return np.maximum(computed_contacts, mean_trans_contacts) for i in range(n): for j in range(n): if not is_inter(i, j) and i != j: posi, posj = positions[i], positions[j] N[i, j] = jc(np.abs(posi - posj) * lm / kuhn, frag=j) else: N[i, j] = mean_trans_contacts if sparsity: contact_sum = matrix.sum(axis=0) n = len(contact_sum) try: from Bio.Statistics import lowess trend = lowess.lowess(np.array(range(n), dtype=np.float64), contact_sum, f=0.03) except ImportError: expected_size = int(np.amax(contact_sum) / np.average(contact_sum)) w = min(max(expected_size, 20), 100) trend = np.array( [np.average(contact_sum[i:min(i + w, n)]) for i in range(n)]) cov_score = np.sqrt((trend - np.average(trend)) / np.std(trend)) N = ((N * cov_score).T) * cov_score if noisy: if callable(noisy): noise_function = noisy return noise_function(N) else: return N
#Correct any extreme outliers caused by low read count outliers = [] for window in xrange(len(gc_curve)): if read_counts[window] < 10: if window == 0 and gc_curve[window] - 0.5 > gc_curve[window + 1]: outliers.append(window) gc_curve[window] = gc_curve[window + 1] elif window == len(gc_curve) - 1 and gc_curve[window] - 0.5 > gc_curve[window - 1]: outliers.append(window) gc_curve[window] = gc_curve[window + 1] elif gc_curve[window] - 0.5 > gc_curve[window - 1] and gc_curve[window] - 0.5 > gc_curve[window + 1]: outliers.append(window) gc_curve[window] = (gc_curve[window - 1] + gc_curve[window + 1])/2. gc_x = np.array([x / float(len(gc_curve) - 1) for x in xrange(len(gc_curve))]) smoothed_gc_curve = lowess.lowess(gc_x, gc_curve, f = 0.1, iter = 1) smoothed_gc_curve = [max(0.0, x) for x in smoothed_gc_curve] outfile = open(outname, 'w') outfile.write('# GC Curve file combined from %s\n' %(', '.join(args.curves))) outfile.write('# Curve calculated from %i reads at %i locations\n' %(sum(read_counts), sum(loc_counts))) if len(outliers) > 0: outfile.write('# Windows with corrected extreme outlier GC bias: %s\n' %(', '.join(['%.*f-%.*f' %(len(str((len(gc_curve) - 1))) + 2, window * 1.0/(len(gc_curve) - 1), len(str((len(gc_curve) - 1))) + 2, min(1., (window + 1) * 1.0/(len(gc_curve) - 1) - (1. / 10 ** (len(str((len(gc_curve) - 1))) + 2)))) for window in outliers]))) outfile.write('#\n') outfile.write('#GC_content\tSmoothed_GC_bias\tRaw_GC_Bias\tNo_of_reads\tNo_of_locations\n') for window, bias in enumerate(gc_curve): outfile.write('%.*f-%.*f\t%f\t%f\t%i\t%i\n' %(len(str((len(gc_curve) - 1))) + 2, window * 1.0/(len(gc_curve) - 1), len(str((len(gc_curve) - 1))) + 2, min(1., (window + 1) * 1.0/(len(gc_curve) - 1) - (1. / 10 ** (len(str((len(gc_curve) - 1))) + 2))), smoothed_gc_curve[window], bias, read_counts[window], loc_counts[window])) if args.plot: plt.clf()
def multiScatterLinePlot(self, title_I, xlabel_I, ylabel_I, x_data_I=[], y_data_I=[], data_labels_I=[], text_labels_I=[], fit_func_I='linear', show_eqn_I=True, show_r2_I=True, filename_I=None, show_plot_I=True, show_legend_I=True): '''Create a scatter line plot and fitted line''' #Input: # x_data_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type float # y_data_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type float # data_labels_I = [a,b,...] of type string # text_labels_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type string # Create a Figure object. fig = plt.figure() # Create an Axes object. ax = fig.add_subplot(1, 1, 1) # one row, one column, first plot # Generate colors colors = iter(cm.rainbow(numpy.linspace(0, 1, len(x_data_I)))) for cnt_data, data in enumerate(y_data_I): # Create the fit: if fit_func_I == 'linear': slope, intercept, r_value, p_value, std_err = linregress( x_data_I[cnt_data], y_data_I[cnt_data]) r2 = r_value**2 #coefficient of determination x2 = x_data_I y2 = [] for d in x2: y2.append(d * slope + intercept) elif fit_func_I == 'lowess': #lowess x2 = numpy.array(x_data_I[cnt_data]) y2_lowess = lowess.lowess(x2, numpy.array(y_data_I[cnt_data]), f=0.1, iter=100) y2 = numpy.zeros_like(y2_lowess) for i, y2s in enumerate(y2_lowess): if i == 0: y2[i] = y2s elif i != 0 and y2s < y2[i - 1]: y2[i] = y2[i - 1] else: y2[i] = y2s # Plot the data. c = next(colors) ax.scatter(x_data_I[cnt_data], y_data_I[cnt_data], color=c, marker="o", label=data_labels_I[cnt_data]) if fit_func_I: ax.plot(x2, y2, linestyle='-', color=c, label=data_labels_I[cnt_data] + '_fitted') # Add a title. ax.set_title(title_I) # Add some axis labels. ax.set_xlabel(xlabel_I) ax.set_ylabel(ylabel_I) # Label data points. if text_labels_I: for i, txt in enumerate(text_labels_I[cnt_data]): ax.annotate(txt, (x_data_I[cnt_data][i], y_data_I[cnt_data][i])) # Show fit equation if fit_func_I == 'linear' and show_eqn_I: fit_eqn = "y = " + str(slope) + "*x" if intercept < 0: fit_eqn += " " + str(intercept) elif intercept > 0: fit_eqn += " +" + str(intercept) ax.annotate(fit_eqn, (min(x_data_I[cnt_data]), max(y_data_I[cnt_data]))) # Show r2 value if fit_func_I == 'linear' and show_r2_I: r2_label = "r2 = " + str(r2) ax.annotate( r2_label, (min(x_data_I[cnt_data]), max(y_data_I[cnt_data]) - 0.5)) # Show legend if show_legend_I: plt.legend(loc='best') # Produce an image. if filename_I: fig.savefig(filename_I) # Show the image. if show_plot_I: plt.show()
for y in range(0,12): for d in range(0, len(aveSig)): index = y*len(aveSig)+d aveTrend[index] = aveSig[d] #subtract the average from the signal ltTrend = numpy.copy(dvals) for y in range(0,12): for d in range(0, len(aveSig)): index = y*len(aveSig)+d ltTrend[index] = dvals[index] - aveSig[d] #loess smoothed trend (rough stuff) x = numpy.array(range(0,len(dvals)), numpy.float) y = numpy.array(ltTrend, numpy.float) result = lowess.lowess(x,y, f=0.5/3.,iter=2) #residuals resids = ltTrend-result #plots pyplot.subplot(4,1,1) pyplot.plot(x, dvals) pyplot.subplot(4,1,2) pyplot.plot(x, aveTrend) pyplot.subplot(4,1,3) pyplot.plot(x, result) pyplot.subplot(4,1,4) pyplot.plot(x, resids) pyplot.show()
def get_GC_depth_correction_vect(sum_depths, n_bases, GC_width, max_correction_factor, calc_correction_factor=True, correct_range=False): #max_correction_factor = (max_correction_factor==-1) and 999999 or max_correction_factor #print "getting GC correction factor: max scale factor %d"%(max_correction_factor) assert (sum_depths.shape[0] == 2 * GC_width + 1 + 1 ) #have to count 0 as well~ so, 41+1 frac_bases = n_bases.astype(np.float64) / n_bases.sum() ave_depths = sum_depths / n_bases.astype(np.float64) GCp = np.arange(0, 2 * GC_width + 1 + 1) / float((2 * GC_width + 1)) ave_depths[np.where(np.isnan(ave_depths))] = 0 #now, chop off the nans, make the array only as wide as the non-nans ave_depths2_8 = ave_depths[np.where(np.logical_and(GCp >= 0.25, GCp <= .75))] GCp2_8 = GCp[np.where(np.logical_and(GCp >= 0.25, GCp <= .75))] GCp0_2 = GCp[np.where(GCp < 0.25)] GCp8_10 = GCp[np.where(GCp > .75)] #now apply lowess on this lowess_depth = biostats.lowess(GCp2_8, ave_depths2_8, f=.15) #lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.1) #USING .1... not working... wy? #lowess_depth = biostats.lowess(GCp2_8,ave_depths2_8,f=.25) line_func = lambda p, x: (p[0] * x + p[1]) k = 5 y1 = lowess_depth[0:k + 1] x1 = GCp2_8[0:k + 1] l = lowess_depth.shape[0] y2 = lowess_depth[l - k:l + 1] x2 = GCp2_8[l - k:l + 1] print(lowess_depth) print("fit lowess on", ave_depths2_8) print(GCp0_2) print(GCp8_10) print(x1, y2) print(x2, y2) p1 = get_line_params(x1, y1) p2 = get_line_params(x2, y2) left_line = line_func(p1, GCp0_2) right_line = line_func(p2, GCp8_10) lowess_depth = np.r_[left_line, lowess_depth, right_line] #lowess_depth[np.where(lowess_depth<=0)=np.min(lowess_depth[np.where(lowess_depth>0)]) mu = sum_depths.astype(np.float64).sum() / n_bases.astype(np.float64).sum() #correction = lowess_depth - mu lowess_depth = np.clip(lowess_depth, 1e-10, 1e30) correction = mu / lowess_depth if (correct_range): #print "correcting in range .75" GC_max = 0.75 GC_min = 0.2 iGC_max = (np.where(GCp > GC_max))[0][0] iGC_min = (np.where(GCp < GC_min))[0][0] max_correction_factor = correction[iGC_max] correction = np.clip(correction, 1.0 / max_correction_factor, max_correction_factor) elif (max_correction_factor <= 0): correction = np.ones(correction.shape[0]) else: correction = np.clip(correction, 1.0 / max_correction_factor, max_correction_factor) return GCp, ave_depths, GCp2_8, lowess_depth, correction, mu
def multiScatterLinePlot(self,title_I,xlabel_I,ylabel_I,x_data_I=[],y_data_I=[],data_labels_I=[],text_labels_I=[],fit_func_I='linear',show_eqn_I=True,show_r2_I=True,filename_I=None,show_plot_I=True,show_legend_I=True): '''Create a scatter line plot and fitted line''' #Input: # x_data_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type float # y_data_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type float # data_labels_I = [a,b,...] of type string # text_labels_I = [[a1,a2,a3...],[b1,b2,b3,...],...] of type string # Create a Figure object. fig = plt.figure() # Create an Axes object. ax = fig.add_subplot(1,1,1) # one row, one column, first plot # Generate colors colors=iter(cm.rainbow(numpy.linspace(0,1,len(x_data_I)))) for cnt_data, data in enumerate(y_data_I): # Create the fit: if fit_func_I == 'linear': slope, intercept, r_value, p_value, std_err = linregress(x_data_I[cnt_data], y_data_I[cnt_data]); r2 = r_value**2; #coefficient of determination x2 = x_data_I; y2 = []; for d in x2: y2.append(d*slope+intercept); elif fit_func_I=='lowess': #lowess x2 = numpy.array(x_data_I[cnt_data]); y2_lowess = lowess.lowess(x2,numpy.array(y_data_I[cnt_data]),f=0.1,iter=100) y2 = numpy.zeros_like(y2_lowess); for i,y2s in enumerate(y2_lowess): if i==0: y2[i] = y2s; elif i!=0 and y2s<y2[i-1]: y2[i] = y2[i-1]; else: y2[i] = y2s; # Plot the data. c = next(colors); ax.scatter(x_data_I[cnt_data], y_data_I[cnt_data],color=c, marker="o",label=data_labels_I[cnt_data]) if fit_func_I: ax.plot(x2,y2,linestyle='-',color=c,label=data_labels_I[cnt_data]+'_fitted') # Add a title. ax.set_title(title_I) # Add some axis labels. ax.set_xlabel(xlabel_I) ax.set_ylabel(ylabel_I) # Label data points. if text_labels_I: for i, txt in enumerate(text_labels_I[cnt_data]): ax.annotate(txt, (x_data_I[cnt_data][i],y_data_I[cnt_data][i])) # Show fit equation if fit_func_I == 'linear' and show_eqn_I: fit_eqn = "y = " + str(slope) + "*x"; if intercept < 0: fit_eqn += " " + str(intercept); elif intercept > 0: fit_eqn += " +" + str(intercept); ax.annotate(fit_eqn,(min(x_data_I[cnt_data]),max(y_data_I[cnt_data]))); # Show r2 value if fit_func_I == 'linear' and show_r2_I: r2_label = "r2 = " + str(r2); ax.annotate(r2_label,(min(x_data_I[cnt_data]),max(y_data_I[cnt_data])-0.5)); # Show legend if show_legend_I: plt.legend(loc='best'); # Produce an image. if filename_I: fig.savefig(filename_I) # Show the image. if show_plot_I: plt.show();
def fit_trajectories(self,x_I,y_I,fit_func_I='lowess',plot_textLabels_I=None,plot_fit_I=False): '''fit trajectory growth rate data to a smoothing function''' #Input: # x_I = ale_time # y_I = growth_rate #Output: # x_O = ale_time_fitted # y_O = growth_rate_fitted #cnt = 1; x = []; y = []; x = x_I; y = y_I; if fit_func_I=='spline': #spline tck = splrep(x,y,k=3,s=.025) #no smoothing factor #tck = splrep(x,y,k=3,task=-1,t=10) #no smoothing factor x2 = linspace(min(x),max(x),500) y2_spline= splev(x2,tck) y2 = numpy.zeros_like(y2_spline); for i,y2s in enumerate(y2_spline): if i==0: y2[i] = y2s; elif i!=0 and y2s<y2[i-1]: y2[i] = y2[i-1]; else: y2[i] = y2s; elif fit_func_I=='movingWindow': #moving window filter x2 = numpy.array(x); y2 = smooth(numpy.array(y),window_len=10, window='hanning'); elif fit_func_I=='legendre': #legendre smoothing optimization smooth = legendre_smooth(len(x),1,1e-4,25) x2 = numpy.array(x); y2 = smooth.fit(numpy.array(y)) elif fit_func_I=='lowess': #lowess x2 = numpy.array(x); y2_lowess = lowess.lowess(x2,numpy.array(y),f=0.1,iter=100) y2 = numpy.zeros_like(y2_lowess); for i,y2s in enumerate(y2_lowess): if i==0: y2[i] = y2s; elif i!=0 and y2s<y2[i-1]: y2[i] = y2[i-1]; else: y2[i] = y2s; else: print("fit function not recongnized"); if plot_fit_I: ##QC plot using MatPlotLib # Create a Figure object. fig = pp.figure(); # Create an Axes object. ax = fig.add_subplot(1,1,1) # one row, one column, first plot ## Add a title. #ax.set_title(k['sample_label']) # Set the axis pp.axis([0,max(x),0,max(y)+0.1]); # Add axis labels. ax.set_xlabel('Time [days]') ax.set_ylabel('GR [hr-1]') ## Label data points #tck = splrep(x,y,k=3,s=1.); #spline fit with very high smoothing factor #x_days = ALEsKOs_textLabels[k['sample_name_abbreviation']]['day'] #y_days = splev(x_days,tck) #for i,txt in enumerate(ALEsKOs_textLabels[k['sample_name_abbreviation']]['dataType']): # ax.annotate(txt, (x_days[i],y_days[i]-.15)) # Create the plot #pp.plot(x_days,y_days,'rx',x,y,'b.',x2,y2,'g') pp.plot(x,y,'b.',x2,y2,'g') #display the plot pp.show() #record x_O = []; y_O = []; x_O = x2; y_O = y2; #cnt += 1; return x_O, y_O;