def rolling_OLS(sym1="C",sym2="BAC",\ Frame=percentage(aggregate(sym1='C',sym2='BAC')),\ avg=10): a=Frame print(len(a)) a['roll_Beta']=0 a['rolling_r2']=0 for i in range(len(a)-avg+1): x=a.iloc[i:i+avg-1][sym1] #extraction d'une sous matrice y=a.iloc[i:i+avg-1][sym2] a.iloc[i+avg-1,2]=stats.linregress(x,y)[0] a.iloc[i+avg-1,3]=stats.linregress(x,y)[2] a=a[avg:] a['Beta_hedged '+sym1]=a[sym1]-a['roll_Beta']*a[sym2] a['ZScore']=(a['Beta_hedged '+ sym1]-\ np.mean(a['Beta_hedged '+ sym1]))/\ np.std(a['Beta_hedged '+ sym1]) print(np.mean(a['Beta_hedged '+ sym1])) print(np.std(a['Beta_hedged '+ sym1])) return a
def TL_from_sample(dat_sample, analysis = 'partition', out_folder = './out_files/'): """Obtain the empirical and simulated TL relationship given the output file from sample_var(). Here only the summary statistics are recorded for each study, instead of results from each individual sample, because the analysis can be quickly re-done given the input file, without going through the time-limiting step of generating samples from partitions. The input dat_sample is in the same format as defined by get_var_sample_file(). The output file has the following columns: study, empirical b, empirical intercept, empirical R-squared, empirical p-value, mean b, intercept, R-squared from samples, percentage of significant TL in samples (at alpha = 0.05), z-score between empirical and sample b, 2.5 and 97.5 percentile of sample b, z-score between empirical and sample intercept, 2.5 and 97.5 percentile of sample intercept. """ study_list = sorted(np.unique(dat_sample['study'])) for study in study_list: dat_study = dat_sample[dat_sample['study'] == study] emp_b, emp_inter, emp_r, emp_p, emp_std_err = stats.linregress(np.log(dat_study['mean']), np.log(dat_study['var'])) b_list = [] inter_list = [] psig = 0 R2_list = [] for i_sim in dat_sample.dtype.names[5:]: var_sim = dat_study[i_sim][dat_study[i_sim] > 0] # Omit samples of zero variance mean_list = dat_study['mean'][dat_study[i_sim] > 0] sim_b, sim_inter, sim_r, sim_p, sim_std_error = stats.linregress(np.log(mean_list), np.log(var_sim)) b_list.append(sim_b) inter_list.append(sim_inter) R2_list.append(sim_r ** 2) if sim_p < 0.05: psig += 1 psig /= len(dat_sample.dtype.names[5:]) out_file = open(out_folder + 'TL_form_' + analysis + '.txt', 'a') print>>out_file, study, emp_b, emp_inter, emp_r ** 2, emp_p, np.mean(b_list), np.mean(inter_list), np.mean(R2_list), \ psig, get_z_score(emp_b, b_list), np.percentile(b_list, 2.5), np.percentile(b_list, 97.5), get_z_score(emp_inter, inter_list), \ np.percentile(inter_list, 2.5), np.percentile(inter_list, 97.5) out_file.close()
def plot_error_norms_with_h(h_vals, nrds, ind=0, att="velo_mag", xlims=None, base=10): """ plot error calculated with 3 different norms as a function of h for a set of model runs h_vals is a numpy.ndarray of grid scale values, nrds is a list of numa_plotting_tools.NumaRunData objects the FINEST GRID is the LAST nrd in nrds """ fig = plt.figure(figsize=(12, 9)) l1, l2, linf = calc_error_norms(nrds, ind, att) x = 1 / h_vals[:-1] xlog = np.log10(x) m1, b1 = linregress(xlog, np.log10(l1))[:2] m2, b2 = linregress(xlog, np.log10(l2))[:2] mi, bi = linregress(xlog, np.log10(linf))[:2] x_ = np.linspace(xlog.min(), xlog.max(), 25) x_10 = 10 ** x_ lstr = "m = {:.02f}" plt.loglog(x, l1, "ks", label=r"$L_1$", basex=base, basey=base) plt.plot(x_10, 10 ** (m1 * x_ + b1), "k--", label=lstr.format(m1)) plt.loglog(x, l2, "ro", label=r"$L_2$", basex=base, basey=base) plt.plot(x_10, 10 ** (m2 * x_ + b2), "r--", label=lstr.format(m2)) plt.loglog(x, linf, "b<", label=r"$L_{\infty}$", basex=base, basey=base) plt.plot(x_10, 10 ** (mi * x_ + bi), "b--", label=lstr.format(mi)) if xlims is not None: plt.xlim(xlims) plt.legend(numpoints=1) plt.xlabel("1/h") plt.ylabel("Error norm") return fig
def _plot_pres_wind(pressures, winds): fig = plt.figure() ax = plt.subplot(121) plt.plot(pressures[:, 0], pressures[:, 1], 'b+') rp = stats.linregress(pressures) label = 'grad.: {0:.2f}\nintercept: {1:.1f}\n r$^2$: {2:.2f}'.format(rp[0], rp[1], rp[2] ** 2) plt.plot((880, 1040), (880 * rp[0] + rp[1], 1040 * rp[0] + rp[1]), 'r--', label=label) plt.ylabel('derived track pressure (hPa)') plt.xlabel('best track\npressure (hPa)') plt.legend(bbox_to_anchor=(0.9, 1.23), numpoints=1, prop={'size': 10}) ax.set_xticks((880, 920, 960, 1000, 1040)) ax = plt.subplot(122) plt.plot(winds[:, 0], winds[:, 1], 'b+') rw = stats.linregress(winds) label = 'grad.: {0:.2f}\nintercept: {1:.1f}\n r$^2$: {2:.2f}'.format(rw[0], rw[1], rw[2] ** 2) plt.plot((0, 160), (0 * rw[0] + rw[1], 160 * rw[0] + rw[1]), 'r--', label=label) plt.ylabel('derived track max. wind speed (ms$^{-1}$)') plt.xlabel('best track\nmax. wind speed (ms$^{-1}$)') plt.legend(bbox_to_anchor=(0.9, 1.23), numpoints=1, prop={'size': 10}) ax.set_xticks((0, 40, 80, 120, 160)) ax.yaxis.tick_right() ax.yaxis.set_label_position("right") fig.set_size_inches(6.3, 3) _save_figure('press_max_ws_corr_2005.png')
def main(): timestamps = [] bottom_norms = [] top_norms = [] # expects norms.dat in same directory. Can be changed to be a command-line arg f = open('norms.dat', 'r') for line in f: words = line.split(' ') timestamps.append(words[0][4:20]) bottom_norms.append(words[1]) top_norms.append(words[2]) slope, intercept, r_value, p_value, std_err = stats.linregress(np.asarray(timestamps, float), np.asarray(bottom_norms, float)) bottom_result = { 'slope': slope, 'intercept': intercept, 'start_time': timestamps[0] } slope, intercept, r_value, p_value, std_err = stats.linregress(np.asarray(timestamps, float), np.asarray(top_norms, float)) top_result = { 'slope': slope, 'intercept': intercept, 'start_time': timestamps[0] } result = { "bottom": bottom_result, "top": top_result } return result
def WCFitCoeff(wcut, opt): if np.sum(wcut) < 0.01: slope = 0 intercept = 0 else: water_break_index = np.argwhere(wcut > 0)[0, 0] # Get the first position where WOR is > 0 new_wc, new_opt = np.array(wcut[water_break_index:]), np.array(opt[water_break_index:]) * 0.0062898 slope, intercept, r_value, p_value, slope_std_error = stats.linregress(new_opt, new_wc) if r_value < 0.99: ratio = float(len(new_wc)) / float(len(wcut)) if ratio > 0.1: if ratio/2 > 0.1: ration2_index = water_break_index + int(len(new_wc)/2) new_wc, new_opt = np.array(wcut[ration2_index:]), np.array(opt[ration2_index:]) * 0.0062898 slope, intercept, r_value, p_value, slope_std_error = stats.linregress(new_opt, new_wc) if r_value < 0.99: r90_index = int(0.9 * len(wcut)) new_wc, new_opt = np.array(wcut[r90_index:]), np.array(opt[r90_index:]) * 0.0062898 slope, intercept, r_value, p_value, slope_std_error = stats.linregress(new_opt, new_wc) return slope, intercept
def model_mean_disp_by_lm(self,allgenedict): ''' Modeling the mean and dispersion by linear regression ''' list_k=[] list_dispersion=[] for (gid,gsk) in allgenedict.iteritems(): nsg=len(gsk.nb_count[0]) nsample=len(gsk.nb_count) if len(gsk.sgrna_kvalue)>0: if gsk.MAP_sgrna_dispersion_estimate!=None: sg_k=[x[0] for x in gsk.sgrna_kvalue.tolist()] sg_dispersion=gsk.MAP_sgrna_dispersion_estimate if len(sg_k)>=nsg*nsample: list_k+=sg_k[:(nsg*nsample)] list_dispersion+=sg_dispersion[:(nsg*nsample)] k_log=np.log(list_k) dispersion_log=np.log(list_dispersion) # remove those with too low variance k_log2=np.array([k_log[i] for i in range(len(dispersion_log)) if dispersion_log[i]>(-1)]) dispersion_log2=np.array([dispersion_log[i] for i in range(len(dispersion_log)) if dispersion_log[i]>(-1)]) if len(k_log2)>20: (slope,intercept,r_value,p_value,std_err)=linregress(k_log2,dispersion_log2) else: (slope,intercept,r_value,p_value,std_err)=linregress(k_log,dispersion_log) self.lm_intercept=intercept self.lm_coeff=slope logging.info('Linear regression: y='+str(slope)+'x+'+str(intercept)) if np.isnan(slope) or np.isnan(intercept): logging.error('Nan values for linear regression')
def _extrapolate(x, y): """This is a very simple extrapolation. Takes: two series of the same pandas DataTable. x is most likely the index of the DataTable assumption: - x is are the sampling points while y is the dataset which is incomplete and needs to be extrapolated - the relation ship is very close to linear proceedure: - takes the fist two points of y and performs a linear fit. This fit is then used to calculate y at the very first x value - similar at the end just with the last two points. returns: nothing. everthing happens inplace """ xAtYnotNan = x.values[~np.isnan(y.values)][:2] YnotNan = y.values[~np.isnan(y.values)][:2] slope, intercept, r_value, p_value, slope_std_error = stats.linregress(xAtYnotNan,YnotNan) fkt = lambda x: intercept + (slope * x) y.values[0] = fkt(x.values[0]) xAtYnotNan = x.values[~np.isnan(y.values)][-2:] YnotNan = y.values[~np.isnan(y.values)][-2:] slope, intercept, r_value, p_value, slope_std_error = stats.linregress(xAtYnotNan,YnotNan) fkt = lambda x: intercept + (slope * x) y.values[-1] = fkt(x.values[-1]) return
def angleDetection(cx,cy,lx,ly,ux,uy,overallminx,overallmaxx,DEBUG=False): lineAngleList = [] lineInterceptList = [] lineSlopeList = [] lineUpperInterceptList = [] lineUpperSlopeList = [] lineLowerInterceptList = [] lineLowerSlopeList = [] # Fit line on the last line if enough point is available for it in range(len(cx)): if len(cx[it])>0: lineslope, intercept, r_value, p_value, std_err = stats.linregress(cx[it],cy[it]) plotLinearRegression(plt,overallminx,overallmaxx,lineslope, intercept,"0.5") lineAngle = math.atan(lineslope) lineAngleList.append(lineAngle) lineSlopeList.append(lineslope) lineInterceptList.append(intercept) if DEBUG: print "Slope: %.2f, Intercept: %.2f, Angle: %.2f" % (lineslope,intercept,lineAngle) print "Centerx, centery: ", cx, cy lineslope, intercept, r_value, p_value, std_err = stats.linregress(lx[it],ly[it]) plotLinearRegression(plt,overallminx,overallmaxx,lineslope, intercept,"0.3") lineLowerSlopeList.append(lineslope) lineLowerInterceptList.append(intercept) lineslope, intercept, r_value, p_value, std_err = stats.linregress(ux[it],uy[it]) plotLinearRegression(plt,overallminx,overallmaxx,lineslope, intercept,"0.3") lineUpperSlopeList.append(lineslope) lineUpperInterceptList.append(intercept) return (lineAngleList,lineSlopeList,lineInterceptList,lineLowerSlopeList,lineLowerInterceptList,lineUpperSlopeList,lineUpperInterceptList)
def movementType(mom, length, verbose): ''' Here we compute the movement type as indicated in this publication: Sbalzarini 2005a If the correlation coefficient for the coefficient diffusion regression is low, we put 0 instead. If one of the regressions leading to the movement type has a correlation coefficient that is under 0.7, then we add 1 to indicateur. It's because we are interested to know which trajectories have this issue, and if it's only for one regression or for all. Returns: diffusion coefficient, movement type and adequateness of diffusion ''' indicateur = 0 x=np.log(range(1, int(length/3)+1));y=np.log(mom) gamma=np.zeros(shape=(len(moments),)) for nu in moments: r=linregress(x, y[nu-1]) if verbose>5: print "correlation coefficient", r[2] print 'p-value', r[3] gamma[nu-1]=r[0] if nu==2: if r[2]>=0.70: D=np.exp(r[1])/(4) else: D=0 corr = r[2] if r[2]<0.70: indicateur += 1 r2=linregress(moments, gamma) if verbose>5: print "slope ", r2[0], "diffusion coefficient", D return indicateur, r2[0], D, corr
def pinkNoiseCharacterize(pspectrum,normalize=True,plot=False): '''Compute main power spectrum characteristics''' if normalize: pspectrum = pspectrum/np.sum(pspectrum) S = entropy(pspectrum,1) x = np.arange(1,len(pspectrum)+1) lx = np.log10(x) ly = np.log10(pspectrum) c1 = (x > 0)*(x < 80) c2 = x >= 80 fit1 = stats.linregress(lx[c1],ly[c1]) fit2 = stats.linregress(lx[c2],ly[c2]) #print fit1 #print fit2 if plot: plot(lx,ly) plot(lx[c1],lx[c1]*fit1[0]+fit1[1],'r-') plot(lx[c2],lx[c2]*fit2[0]+fit2[1],'r-') return {'S':S,'slope1':fit1[0],'slope2':fit2[0]}
def plot(filename): data = seq.open(filename).map(parse_line) bfs = data.filter(_.algorithm == 'bfs') dfs = data.filter(_.algorithm == 'dfs') x = np.array(bfs.map(lambda x: x.vertexes * x.edges * x.edges).list()) y = np.array(bfs.map(_.runtime).list()) slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) print(slope, intercept, r_value, p_value, std_err) plt.title('Numerical Performance of Edmonds-Karp') plt.xlabel('Input Size in VE^2') plt.ylabel('Running Time in Seconds') plt.scatter(x, y) plt.show() plt.clf() ff_data = dfs.map(lambda x: (x.flow, x.flow * x.edges, x.runtime)).group_by(_[0]).cache() plt.title('Numerical Performance of Ford-Fulkerson') plt.xlabel('Input Size in Ef') plt.ylabel('Running Time in Seconds') max_flow = ff_data.max_by(lambda kv: kv[0])[0] all_x = list() all_y = list() for k, v in ff_data: x = list(map(_[1], v)) all_x.extend(x) y = list(map(_[2], v)) all_y.extend(y) ratio = 1 - k / max_flow if ratio > .8: ratio = .8 plt.scatter(x, y, color=str(ratio)) x = np.array(all_x) y = np.array(all_y) slope, intercept, r_value, p_value, std_err = stats.linregress(x, y) print(slope, intercept, r_value, p_value, std_err) plt.show()
def do_pca_analysis(profiles, lens, name='', plot=False): L = np.array(0.446*(lens-np.mean(lens)), dtype='float64') pr = [] for i,p in enumerate(profiles): mask = np.isnan(p) p[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), p[~mask]) av, va = moving_average(np.log(p+0.001), 46, 100) pr.append(av) y = np.array(pr) pca = PCA(n_components=2) pca.fit(y) print pca.explained_variance_ratio_ yp = pca.transform(y) m,b,r,p,_ = stats.linregress(L, yp[:,0]) p1 = [p] r1 = [r] for _ in xrange(300): sample = np.random.choice(L.shape[0], L.shape[0], replace=True) m,b,r,p,_ = stats.linregress(L[~sample], yp[~sample,0]) p1.append(p) r1.append(r) m,b,r,p,_ = stats.linregress(L, yp[:,1]) p2 = [p] r2 = [r] for _ in xrange(300): sample = np.random.choice(L.shape[0], L.shape[0], replace=True) m,b,r,p,_ = stats.linregress(L[~sample], yp[~sample,1]) p2.append(p) r2.append(r) if plot: plot_pca(y, pca, yp, L, name) return r1, p1, r2, p2, L.shape[0], name, np.std(L)
def doAnalysis(canvas): xi = arange(0,26) A = array([ xi, ones(26)]) # linearly generated sequence y = canvas.data.times slope, intercept, r_value, p_value, std_err = stats.linregress(xi,y) return stats.linregress(xi,y)
def calculate_breaking_points(quant_list): MAX_ERROR = 5 RANGE = 5 CENTER = 50 BRAKE_POINTS = dict() # right to left window for x_iter in range(100, CENTER, -1): x_proj = range(x_iter - RANGE, x_iter) # pylab.plot(x_proj, quant[x-RANGE:x], 'k',alpha=0.5) y_subset = quant_list[x_iter - RANGE:x_iter] slope, intercept, r_value, p_value, std_err = stats.linregress(x_proj, y_subset) g, l = calculate_error(slope, intercept, x_proj, y_subset) # print x-RANGE,x, slope, intercept, y[0], f(x, slope, intercept), l,r if l > MAX_ERROR: BRAKE_POINTS[x_iter - RANGE / 2] = {"error":l, "slope":slope, "offset":intercept} # left to right window for x_iter in range(0, CENTER, 1): x_proj = range(x_iter, x_iter + RANGE) # pylab.plot(x_proj, quant_list[x_iter:x_iter + RANGE], 'k', alpha=0.3) y_subset = quant_list[x_iter:x_iter + RANGE] slope, intercept, r_value, p_value, std_err = stats.linregress(x_proj, y_subset) g, l = calculate_error(slope, intercept, x_proj, y_subset) # print x,x+RANGE, slope, intercept, y[0], f(x, slope, intercept), l,r if l > MAX_ERROR: if ((x_iter - RANGE + x_iter) / 2) not in BRAKE_POINTS.keys(): BRAKE_POINTS[x_iter + (RANGE / 2)] = {"error":l, "slope":slope, "offset":intercept} # pylab.plot([x_iter, x_iter + RANGE, ], [ f(x_iter, slope, intercept), f(x_iter + RANGE, slope, intercept)], "b", alpha=0.3) return BRAKE_POINTS
def do_stats(df): """do linregress and add to df""" try: from scipy.stats import linregress except ImportError: thetime = strftime("%H:%M:%S", localtime()) print('%s: sort type not available in this verion of corpkit.' % thetime) return False indices = list(df.index) first_year = list(df.index)[0] try: x = [int(y) - int(first_year) for y in indices] except ValueError: x = list(range(len(indices))) statfields = ['slope', 'intercept', 'r', 'p', 'stderr'] stats = [] if isinstance(df, Series): y = list(df.values) sl = Series(list(linregress(x, y)), index=statfields) else: for entry in list(df.columns): y = list(df[entry]) stats.append(list(linregress(x, y))) sl = DataFrame(zip(*stats), index=statfields, columns=list(df.columns)) df = df.append(sl) # drop infinites and nans df = df.replace([np.inf, -np.inf], np.nan) df = df.fillna(0.0) return df
def analyse(sample_raw_data, analysis, id_list): """This function use all seven points. """ x = [0, 60, 120, 180, 240, 300, 360] for name, data in sample_raw_data.items(): item = [id_list[name], name, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 ] fifty = data['50uM'] twenty = data['20uM'] ten = data['10uM'] if 'OVRFLW' in fifty: continue ref1 = data['ref1'] ref2 = data['ref2'] ref3 = data['ref3'] ref4 = data['ref4'] ref5 = data['ref5'] ref6 = data['ref6'] ref = list() for i in range(7): to_mean = [ref1[i], ref2[i], ref3[i], ref4[i], ref5[i], ref6[i]] ref.append(sum(to_mean)/6) slope, intercept, r_value, _, _ = linregress(x, fifty) item[5] = slope item[9] = intercept item[13] = r_value ** 2 slope, intercept, r_value, _, _ = linregress(x, twenty) item[6] = slope item[10] = intercept item[14] = r_value ** 2 slope, intercept, r_value, _, _ = linregress(x, ten) item[7] = slope item[11] = intercept item[15] = r_value ** 2 slope, intercept, r_value, _, _ = linregress(x, ref) item[8] = slope item[12] = intercept item[16] = r_value ** 2 item[2] = item[5] / item[8] item[3] = item[6] / item[8] item[4] = item[7] / item[8] item.extend(fifty) item.extend(twenty) item.extend(ten) item.extend(ref) item.extend(ref1) item.extend(ref2) item.extend(ref3) item.extend(ref4) item.extend(ref5) item.extend(ref6) analysis.append(item)
def resid_plotter(cax, x, y, DL, logFlag, figtitle, figname, subf): print "...plotting..." # if logFlag: # x = np.log10(x) # y = np.log10(y) xlabel = "Modeled Hg" ylabel = "Observed Hg" minx = np.min(x) miny = np.min(y) maxx = np.max(x) maxy = np.max(y) # override to specify axis limits minx = 0.001 miny = minx maxx = 10.0 maxy = maxx # limits minxy = np.min([minx, miny]) maxxy = np.max([maxx, maxy]) DLinds = np.nonzero(DL == 1) Detectinds = np.nonzero(DL == 0) plt.hold(True) mksize = 4.5 # plot one to one line plt.plot([minxy, maxxy], [minxy, maxxy], "black") # plot up the detects solid plotx = x[Detectinds] ploty = y[Detectinds] plt.plot(plotx, ploty, "bo", markerfacecolor="blue", markersize=mksize, markeredgecolor="black") # plot up the nondetects white plotx = x[DLinds] ploty = y[DLinds] plt.plot(plotx, ploty, "bo", markerfacecolor="white", markersize=mksize, markeredgecolor="black") plt.xlim([minxy, maxxy]) plt.ylim([minxy, maxxy]) if logFlag: plt.yscale("log") plt.xscale("log") plt.xlabel(xlabel) plt.ylabel(ylabel) ticknames = ["0.001", "0.01", "0.1", "1.0", "10.0"] plt.setp(ax1, xticklabels=ticknames) plt.setp(ax1, yticklabels=ticknames) plt.text(0.0015, 5, "{0}".format(figtitle)) plt.savefig("{0}/{1}.pdf".format(subf, figname)) plt.close("all") if logFlag: slope, intercept, r_value, p_value, std_err = stats.linregress(np.log10(plotx), np.log10(ploty)) else: slope, intercept, r_value, p_value, std_err = stats.linregress(plotx, ploty) return r_value ** 2
def tagstrendToTaxoLineChart(dataframe, title, dates, split, colourDict, taxonomies, emptyOther): style = createTagsPlotStyle(dataframe, colourDict) line_chart = pygal.Line(x_label_rotation=20, style=style) line_chart.title = title line_chart.x_labels = dates xi = numpy.arange(split) for taxonomy in taxonomies: taxoStyle = createTagsPlotStyle(dataframe, colourDict, taxonomy) taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) taxo_line_chart.title = title + ': ' + taxonomy taxo_line_chart.x_labels = dates for it in dataframe.iterrows(): if it[0].startswith(taxonomy): slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1]) line = slope * xi + intercept taxo_line_chart.add(re.sub(taxonomy + ':', '', it[0]), line, show_dots=False) dataframe = dataframe.drop([it[0]]) taxo_line_chart.render_to_file('plot/' + taxonomy + '_trend.svg') if not emptyOther: taxoStyle = createTagsPlotStyle(dataframe, colourDict) taxo_line_chart = pygal.Line(x_label_rotation=20, style=taxoStyle) taxo_line_chart.title = title + ': other' taxo_line_chart.x_labels = dates for it in dataframe.iterrows(): slope, intercept, r_value, p_value, std_err = stats.linregress(xi, it[1]) line = slope * xi + intercept taxo_line_chart.add(it[0], line, show_dots=False) taxo_line_chart.render_to_file('plot/other_trend.svg')
def calc_dp(): folders = [fname for fname in os.walk('.').next()[1] if fname[0] == 'd'] H**O = [] LUMO = [] ab = [] d = [float(f[1:]) for f in folders] home = os.getcwd() print home print "------------------------DFT Results------------------------" for folder in folders: os.chdir(home+'/'+folder) this_homo, this_lumo, this_ab = getHLA() H**O.append(this_homo) LUMO.append(this_lumo) ab.append(this_ab) HOMO_p = (np.array(H**O)+np.array(ab)).tolist() LUMO_p = (np.array(LUMO)+np.array(ab)).tolist() print "Deform\tHOMO\tLUMO\tab\tHOMO+ab\tLUMO+ab" for row in zip(d, H**O, LUMO, ab, HOMO_p, LUMO_p): print "%7.4f\t%7.4f\t%7.4f\t%7.4f\t%7.4f\t%7.4f" % row print "--------------------------Fitting--------------------------" Ev, intercept, r_value, p_value, std_err_v = linregress(d, HOMO_p) Ec, intercept, r_value, p_value, std_err_c = linregress(d, LUMO_p) print "Ev = %5.3f (error = %5.3f)" % (Ev, std_err_v) print "Ec = %5.3f (error = %5.3f)" % (Ec, std_err_c) print "--------------------------End------------------------------\n" os.chdir(home) str_name = home.split(os.sep)[-1] struct_name = home.split(os.sep)[-2] return struct_name, str_name, Ev, Ec, std_err_v, std_err_c
def forecast_from_trend_line(xs, yrs, forecast_yrs, forecast_periods, trend_function): """ Forecast data by using the specified trend function. Trend functions are the same functions offered in Excel for adding trend lines to a plot. """ if trend_function == 1: # Linear trend (y = ax + B) slope, intercept, _, _, _ = stats.linregress(yrs, xs) y = slope * forecast_yrs + intercept elif trend_function == 2: # 2nd degree Polynomial trend (p(x) = p[0] * x**2 + p[2]) z = np.polyfit(yrs, xs, 2) y = np.polyval(z, forecast_yrs) elif trend_function == 3: # 3rd degree Polynomial trend (p(x) = p[0] * x**3 + x**2 + p[3]) z = np.polyfit(yrs, xs, 3) y = np.polyval(z, forecast_yrs) elif trend_function == 4: # Logarithmic trend (y = A + B log x) slope, intercept, _, _, _ = stats.linregress(np.log(yrs), xs) y = intercept + slope * np.log(forecast_yrs) elif trend_function == 5: # Exponential trend (y = Ae^(Bx)) slope, intercept, _, _, _ = stats.linregress(yrs, np.log(xs)) y = np.exp(intercept) * np.exp(slope * forecast_yrs) elif trend_function == 6: # Power function trend (y = Ax^B) slope, intercept, _, _, _ = stats.linregress(np.log(yrs), np.log(xs)) y = np.exp(intercept) * np.power(forecast_yrs, slope) elif trend_function == 7: # Exponential smoothing with a dampened trend xs_fit_opt = exp_smooth.calc_variable_arrays(.98, xs, forecast_periods) y = exp_smooth.exp_smooth_forecast(xs_fit_opt, True)[-forecast_periods:] else: # Consumption forecasting with elasticity and income y = 8 # Mask any negative, zero, infinity, or n/a values before returning y = np.ma.masked_less_equal(y, 0) y = np.ma.fix_invalid(y) return y
def test_linear_regression_full(): # Requires scipy, which isn't always available. # Not good for coverage but scipy is hard to install correctly on travis-ci try: from scipy.stats import linregress timesteps = numpy.arange(0, 100, dtype=numpy.uint8) k = numpy.random.rand(100) b = numpy.random.rand(k.shape[0]) # shape is (timesteps.shape, k.shape) values = numpy.outer(timesteps, k) + b + numpy.random.normal( size=(timesteps.shape[0], k.shape[0]), scale=0.1) values = values.reshape( (100, 10, 10)) # divide the second dimension into two parts slopes, intercepts, r2vals, pvals = linear_regression(timesteps, values, full=True) s, i, r, p = linregress(timesteps, values[:, 0, 0])[:4] assert numpy.allclose((s, i, r ** 2, p), (slopes[0, 0], intercepts[0, 0], r2vals[0, 0], pvals[0, 0])) s, i, r, p = linregress(timesteps, values[:, 2, 0])[:4] assert numpy.allclose((s, i, r ** 2, p), (slopes[2, 0], intercepts[2, 0], r2vals[2, 0], pvals[2, 0])) except ImportError: print('WARNING: scipy not available for testing')
def OffsetPlot(): import pylab as P import scipy.stats as S offsp = S.spearmanr(data[:,dict['medianOffset']], telfocusCorrected) offreg = S.linregress(data[:,dict['medianOffset']], telfocusCorrected) offreg2 = S.linregress(data[:,dict['medianOffset']], telfocusOld) min = -50. max = 50. print '\nOffset Spearman rank-order:', offsp print 'Offset fit:', offreg print 'and For unCorrected data:', offreg2 P.plot(data[:,dict['medianOffset']], telfocusCorrected, 'bo', label = 'Data') P.plot([min,max], [min*offreg[0] + offreg[1], max*offreg[0] + offreg[1]], 'r-', label ='Linear Fit (Corrected)', lw = 2.0) P.plot([min,max], [min*offreg2[0] + offreg2[1], max*offreg2[0] + offreg2[1]], 'g--', label ='Linear Fit (UnCorrected)', lw = 1.5) P.axhline(medianNew, color ='b') P.xlim(min, max) P.xlabel('Median Offset (telescope units)') P.ylabel('Temperature Corrected Telescope Focus + Median Offset') P.legend(shadow=True) P.savefig('offsetCorrelation.png') P.close()
def findscparam(self): if not self.setparam: return if self.ivdata[:, 0][0]>self.ivdata[:, 0][1]: volt = np.flipud(self.ivdata[:, 0]) curr = np.flipud(self.ivdata[:, 1]) else: volt = self.ivdata[:, 0] curr = self.ivdata[:, 1] # finding last data position before zero crossing zero_crossing=np.where(np.diff(np.sign(curr)))[0][0] # creating function for data interpolation data_interpld = interpolate.interp1d(volt, curr, kind='cubic') # approximate Voc value by linear interpolation slope = (curr[zero_crossing +1] - curr[zero_crossing])/(volt[zero_crossing + 1]-volt[zero_crossing]) intercept = curr[zero_crossing] - slope*volt[zero_crossing] # slope, intercept, r_value, p_value, std_err = stats.linregress(volt[zero_crossing:zero_crossing+1], curr[zero_crossing:zero_crossing+1]) voc = - intercept/slope isc = data_interpld(0) # finding max power point voltnew = np.arange(0, volt[zero_crossing+1], 0.001) maxscpower = max(np.abs(np.multiply(voltnew, data_interpld(voltnew)))) maxscpower_voltposition = np.argmax(np.abs(np.multiply(voltnew, data_interpld(voltnew)))) fillfactor = np.abs(maxscpower/(voc*isc)) effic = maxscpower*1000/(self.sampleparameters[2]*self.sampleparameters[1]) # finding r_s and r_shunt graphically --- approximate method rsh_slope, intercept, r_value, p_value, std_err = stats.linregress(voltnew[0:int(maxscpower_voltposition*0.8)], data_interpld(voltnew[0:int(maxscpower_voltposition*0.8)])) rshunt = np.abs(1/rsh_slope) rs_slope, intercept, r_value, p_value, std_err = stats.linregress(voltnew[-50:-1], data_interpld(voltnew[-50:-1])) rseries = np.abs(1/rs_slope) return [isc, voc, fillfactor, maxscpower, effic, rshunt, rseries]
def measure_okr(h, t, falls, minPPoints=30, minSPoints=3, minP=0.1, figNum=None): pursuitVel = [] saccadeVel = [] i = 1 if figNum != None: figure(figNum) while i < len(falls): pStart = falls[i-1]['start']+falls[i-1]['length']+2 pEnd = falls[i]['start'] sStart = falls[i]['start'] sEnd = falls[i]['start']+falls[i]['length'] if pEnd - pStart < minPPoints or sEnd - sStart < minSPoints: i += 1 continue pr = linregress(t[pStart:pEnd],h[pStart:pEnd]) sr = linregress(t[sStart:sEnd],h[sStart:sEnd]) if (pr[3] <= minP and sr[3] <= minP): saccadeVel += [sr[0],] pursuitVel += [pr[0],] if figNum != None: ts = array([t[sStart],t[sEnd]]) ys = ts * sr[0] + sr[1] plot(ts,ys,c='g',linewidth=2) ts = array([t[pStart],t[pEnd]]) ys = ts * pr[0] + pr[1] plot(ts,ys,c='r',linewidth=2) i += 1 if figNum != None: plot(t,h,c='k') return pursuitVel, saccadeVel
def do_pca_analysis(profiles, lens, name=""): L = np.array(0.446 * (lens - np.mean(lens)), dtype="float64") profiles_smooth_l = [] for i, p in enumerate(profiles): mask = np.isnan(p) p[mask] = np.interp(np.flatnonzero(mask), np.flatnonzero(~mask), p[~mask]) average, va = scalingBicoidFinalReally.moving_average(np.log(p + 0.001), 46, 100) profiles_smooth_l.append(average) profiles_a = np.array(profiles_smooth_l) pca = PCA(n_components=2) pca.fit(profiles_a) print pca.explained_variance_ratio_ profiles_transformed_a = pca.transform(profiles_a) m, b, r, p, _ = stats.linregress(L, profiles_transformed_a[:, 0]) p1 = [p] r1 = [r] for _ in xrange(1000): sample = np.random.choice(L.shape[0], L.shape[0], replace=True) m, b, r, p, _ = stats.linregress(L[~sample], profiles_transformed_a[~sample, 0]) p1.append(p) r1.append(r) m, b, r, p, _ = stats.linregress(L, profiles_transformed_a[:, 1]) p2 = [p] r2 = [r] for _ in xrange(1000): sample = np.random.choice(L.shape[0], L.shape[0], replace=True) m, b, r, p, _ = stats.linregress(L[~sample], profiles_transformed_a[~sample, 1]) p2.append(p) r2.append(r) plot_pca(profiles_a, pca, profiles_transformed_a, L, name) more_stats_d = {"norm_sigma_l": np.std(lens) / np.mean(lens)} return pca, (r1, p1, r2, p2, L.shape[0], name, np.std(L), more_stats_d)
def get_Slope(start_at,finish_at,sample,aflow): length = len(aflow[start_at:finish_at]) length20 = int(length*0.2)+start_at length80 = int(length*0.8)+start_at x2 = sample[start_at:length20] y2 = aflow[start_at:length20] # call liner regression for that data set slope, intercept, r_value, p_value, std_err = stats.linregress(x2,y2) angle_1 = math.degrees(math.atan(slope)) self.ax1.plot(x2,y2,'.' '-r') x2 = sample[length20:length80] y2 = aflow[length20:length80] self.ax1.plot(x2,y2,'.' '-g') # call liner regression for that data set slope, intercept, r_value, p_value, std_err = stats.linregress(x2,y2) angle_2 = math.degrees(math.atan(slope)) x2 = sample[length80:finish_at] y2 = aflow[length80:finish_at] self.ax1.plot(x2,y2,'.' '-b') # call liner regression for that data set slope, intercept, r_value, p_value, std_err = stats.linregress(x2,y2) angle_3 = math.degrees(math.atan(slope)) return angle_1, angle_2, angle_3
def _fit_align(xcoefs, ycoefs, misll, fitll, relx, rely): """fitting stage of procedure align_correlate """ acoefx, bcoefx = None, None acoefy, bcoefy = None, None xfit, yfit = None, None # inter-/extra-polation if misll: # find linear fit from scipy.stats import linregress if xcoefs is None: acoefx, bcoefx = linregress(fitll, relx[fitll])[:2] else: acoefx, bcoefx = xcoefs if ycoefs is None: acoefy, bcoefy = linregress(fitll, rely[fitll])[:2] else: acoefy, bcoefy = ycoefs # calculate offsets for layers in misll for i in misll: relx[i] = acoefx*i + bcoefx rely[i] = acoefy*i + bcoefy return relx, rely, acoefx, bcoefx, acoefy, bcoefy
def linear_regress(data, log=True, clip=None, r2=0.8, **kwargs): """Fit a 1st order polynomial by doing first order polynomial fit.""" ys = pd.DataFrame(data) values = pd.DataFrame(index=['slope', 'intercept', 'good']) good = False fits = {} for col in ys: if clip: y = ys[col].dropna() limit = np.arange(1,np.min(((1+clip),len(y.index)))) y = ys.loc[limit,[col]][col] x = pd.Series(y.index.values, index=y.index, dtype=np.float64) else: y = ys[col].dropna() x = pd.Series(y.index.values, index=y.index, dtype=np.float64) if log: slope, intercept, r, p, stderr = \ stats.linregress(np.log(x), np.log(y)) if r**2 > r2: good = True values[col] = [slope, np.exp(intercept), good] fits[col] = x.apply(lambda x: np.exp(intercept)*x**slope) else: slope, intercept, r, p, stderr = \ stats.linregress(x, y) if r**2 > r2: good = True values[col] = [slope, intercept, good] fits[col] = x.apply(lambda x: intercept*x**slope) values = values.T fits = pd.concat(fits, axis=1) return (values,fits)
def plot_planar_pos_error_sensitivity(): error, avg, max = load_error_data("errorFinal.dat") error_frac = float(1)/error fig = plt.figure() fig.set_size_inches(fig_size, (float(6)/8)*fig_size) plt.grid(True) #plt.title("Farfield error due to planar uncertainty") plt.xlabel("Planar position error [$\lambda$]") plt.ylabel("Farfield error") plt.xlim(np.min(error_frac), np.max(error_frac)) plt.plot(error_frac, avg) plt.plot(error_frac, max) plt.legend(["avg", "max"], loc='upper left') # Calculate error sensitivity equations slope, intercept, r_value, p_value, std_error = stats.linregress(error_frac, avg) print("Avg") print(slope) print(intercept) slope, intercept, r_value, p_value, std_error = stats.linregress(error_frac, max) print("Max") print(slope) print(intercept) x = np.linspace(0, 0.25, 50)
def derive_EBM_II(T, N, xCO2): n_years = T.size #-------------------------------------------------------------------------------------------------- # 0. set param to the EBM-1 values #-------------------------------------------------------------------------------------------------- EBM_0 = derive_EBM_I(T, N, xCO2) forcage = FORCING(typ='abrupt', xCO2_infty=xCO2) datas_EBM = analytical_EBM(EBM_0, forcage, n_years) T0_EBM = datas_EBM['T0'] T_EBM = datas_EBM['T'] H_EBM = datas_EBM['H'] n_iters = 10 for iter in range(n_iters): X = np.c_[T, H_EBM[1:]] regr = linear_model.LinearRegression() regr.fit(X, N) forc = regr.intercept_ lbda = - regr.coef_[0] epsi = 1 - regr.coef_[1] # print('=====>') # print(forc) # print(lbda) # print(epsi) # print('=====>') T_eq = forc / lbda t_i = 80 x_ = np.arange(t_i, n_years) y_ = np.log(1 - T[t_i-1:n_years-1] / T_eq) slope, intercept, r_value, p_value, std_err = stats.linregress(x_, y_) tau_s = -1.0 / slope a_s = np.exp(intercept) a_f = 1 - a_s t_i = 6 t_ = np.arange(1, t_i) tau = t_ / (np.log(a_f) - np.log(1 - T[0:t_i-1]/T_eq - a_s*np.exp(-t_/tau_s))) tau_f = np.mean(tau) c = lbda / (a_f / tau_f + a_s / tau_s) c_0 = lbda*(a_f*tau_f + a_s*tau_s) - c gam = c_0 / (a_s*tau_f + a_f*tau_s) # print(c) # print(c_0) c_0 = c_0 / epsi gam = gam / epsi myEBM = EBM(F=forc, lbda=lbda, c=c, c_0=c_0, gam=gam, epsi=epsi, xCO2=xCO2) datas_EBM = analytical_EBM(myEBM, forcage, n_years) T0_EBM = datas_EBM['T0'] T_EBM = datas_EBM['T'] H_EBM = datas_EBM['H'] output = EBM(F=forc, lbda=lbda, c=c, c_0=c_0, gam=gam, epsi=epsi, xCO2=xCO2) return output
<조건4> 회귀모델 세부 결과 확인 : summary()함수 이용 ''' from scipy import stats import pandas as pd import statsmodels.formula.api as sm import matplotlib.pyplot as plt from pylab import plot, legend, show score_iq = pd.read_csv("C:/ITWILL/4_Python-II/data/score_iq.csv") score_iq.info() # 1 y = score_iq.score x = score_iq.academy # 2 model = stats.linregress(x,y) model ''' LinregressResult(slope=4.847829398324446 <기울기>, intercept=68.23926884996192 <절편>, rvalue=0.8962646792534938 <설명력>, pvalue=4.036716755167992e-54 <pvalue>, stderr=0.1971936807753301 <표준오차>) ''' y_pred = x * model.slope + model.intercept # 3 plt.plot(x, y, 'bo', label='x,y scatter') plt.plot(x, y_pred, 'r.-', label='y pred') legend(loc='best') plt.show() # 1
X = X[np.logical_not(np.isnan(y)).ravel(), :] DX = DX[np.logical_not(np.isnan(y))] y = y[np.logical_not(np.isnan(y))] assert X.shape == (80, 261212) X = X[DX != 2] y = y[DX != 2] DX = DX[DX != 2] assert X.shape == (62, 261212) lr = linear_model.Ridge(alpha=0.5) # cross_val_predict returns an array of the same size as `y` where each entry # is a prediction obtained by cross validation: pred = sklearn.cross_validation.cross_val_predict(lr, X, y, cv=n_folds) slope, intercept, r_value, p_value, std_err = stats.linregress(y, pred) plt.plot(y, pred, 'o', label='original data') plt.plot(y, intercept + slope * y, 'r', label='fitted line') plt.xlabel("MAASC score") plt.ylabel("Predicted score using MRI-based features") plt.legend() plt.show() plt.figure() plt.grid() plt.title("R2 = %.02f and p = %.01e" % (r_value, p_value), fontsize=12) plt.plot(y[DX == 1], pred[DX == 1], 'o', label="ASD") plt.plot(y[DX == 3], pred[DX == 3], 'o', label="SCZ") plt.plot(y, intercept + slope * y, 'r', color="black") plt.xlabel("MAASC score")
def plotter(fdict): """ Go """ import matplotlib matplotlib.use('agg') import matplotlib.pyplot as plt pgconn = get_dbconn('coop') cursor = pgconn.cursor(cursor_factory=psycopg2.extras.DictCursor) ctx = get_autoplot_context(fdict, get_description()) which = ctx['which'] station = ctx['station'] network = "%sCLIMATE" % (station[:2], ) nt = NetworkTable(network) table = "alldata_%s" % (station[:2], ) cursor.execute( """ select year, extract(doy from day) as d from (select day, year, rank() OVER (PARTITION by year ORDER by avg DESC) from (select day, year, avg((high+low)/2.) OVER (ORDER by day ASC rows 91 preceding) from """ + table + """ where station = %s and day > '1893-01-01') as foo) as foo2 where rank = 1 ORDER by year ASC """, (station, )) years = [] maxsday = [] today = datetime.date.today() delta = 0 if which == 'end_summer' else 91 for row in cursor: if row['year'] == today.year and row['d'] < 270: continue maxsday.append(row['d'] - delta) years.append(row['year']) df = pd.DataFrame(dict(year=pd.Series(years), doy=pd.Series(maxsday))) maxsday = np.array(maxsday) (fig, ax) = plt.subplots(1, 1) ax.scatter(years, maxsday) ax.grid(True) ax.set_ylabel("%s Date" % ('End' if delta == 0 else 'Start', )) ax.set_title(("%s [%s] %s\n" "%s Date of Warmest (Avg Temp) 91 Day Period") % (nt.sts[station]['name'], station, PDICT.get(which), 'End' if delta == 0 else 'Start')) yticks = [] yticklabels = [] for i in np.arange(min(maxsday) - 5, max(maxsday) + 5, 1): ts = datetime.datetime(2000, 1, 1) + datetime.timedelta(days=i) if ts.day in [1, 8, 15, 22, 29]: yticks.append(i) yticklabels.append(ts.strftime("%-d %b")) ax.set_yticks(yticks) ax.set_yticklabels(yticklabels) h_slope, intercept, r_value, _, _ = stats.linregress(years, maxsday) ax.plot(years, h_slope * np.array(years) + intercept, lw=2, color='r') avgd = datetime.datetime( 2000, 1, 1) + datetime.timedelta(days=int(np.average(maxsday))) ax.text(0.1, 0.03, "Avg Date: %s, slope: %.2f days/century, R$^2$=%.2f" % (avgd.strftime("%-d %b"), h_slope * 100., r_value**2), transform=ax.transAxes, va='bottom') ax.set_xlim(min(years) - 1, max(years) + 1) ax.set_ylim(min(maxsday) - 5, max(maxsday) + 5) return fig, df
def order_slope(single_powers: np.ndarray) -> float: ordered_powers = np.sort(single_powers[single_powers > 0]) return linregress(np.arange(len(ordered_powers)), np.log(ordered_powers))[0]
significanceRange = round(len(yValueArr) / 8) significantTrendCount = 0 significantTrendArray = [] for n in range(trendLen): currentVal = trendArray[n - 1] nextVal = trendArray[n] if (currentVal != nextVal or (currentVal == nextVal and n == (trendLen - 1))): if (n == (trendLen - 1)): endIndex = n + 1 else: endIndex = n trendLength = endIndex - startIndex + 1 if trendLength > significanceRange: xRange = pd.Series(numericXValueArr).loc[startIndex:endIndex] yRange = pd.Series(yValueArr).loc[startIndex:endIndex] result = linregress(xRange, yRange) intercept = round(result[1], 2) slope = round(result[0], 2) trendRange = {"Length": (endIndex - startIndex + 1), "direction": currentVal, "start": startIndex, "end": endIndex, "slope": slope, "intercept":intercept} significantTrendArray.append(trendRange) significantTrendCount += 1 startIndex = n # sort the trend dictionaries by length if (significantTrendCount > 1): # normalize trend slopes to get magnitudes for multi-trend charts slopes = np.array([trend['slope'] for trend in significantTrendArray]).reshape(-1, 1) scaler = preprocessing.MinMaxScaler() scaler.fit(slopes) scaledSlopes = scaler.transform(slopes) print(significantTrendArray)
#std = 0 avgAccum = np.append(avgAccum, avg) peakAccum = np.append(peakAccum, peak) marker = '^' else: avgAccum = np.append(avgAccum, avg) peakAccum = np.append(peakAccum, peak) marker = 'o' ax.errorbar(peak, avg, xerr=error, yerr=std, marker=marker, color=color, capsize=3) #ax.scatter(peak, avg) ax.annotate(mol, (peak + 0.02, avg + 3)) #index +=1 slope, intercept, r_value, p_value, std_error = linregress(peakAccum, avgAccum) x = np.linspace(np.min(peakAccum), np.max(peakAccum), 100) y = slope * x + intercept ax.plot(x, y, label="r = %.3f" % r_value, color='k') ax.legend(loc=1) fig.savefig('figures/peak_v_cnc_scount.png', format='png')
def do_linear_regression(X, Y): # 여기에 내용을 채우세요. slope, intercept, r_value, p_value, std_err = linregress(X, Y) return slope, intercept
def test(self, input, target, iprint=1, filename=None): """ Calculates output and parameters of regression. :Parameters: input : 2-D array Array of input patterns target : 2-D array Array of network targets iprint : {0, 1, 2}, optional Verbosity level: 0 -- print nothing, 1 -- print regression parameters for each output node (default), 2 -- print additionaly general network info and all targets vs. outputs filename : str Path to the file where printed messages are redirected Default is None :Returns: out : tuple *(output, regress)* tuple where: *output* is an array of network answers on input patterns and *regress* contains regression parameters for each output node. These parameters are: *slope, intercept, r-value, p-value, stderr-of-slope, stderr-of-estimate*. :Examples: >>> from ffnet import mlgraph, ffnet >>> from numpy.random import rand >>> conec = mlgraph((3,3,2)) >>> net = ffnet(conec) >>> input = rand(50,3); target = rand(50,2) >>> output, regress = net.test(input, target) Testing results for 50 testing cases: OUTPUT 1 (node nr 8): Regression line parameters: slope = -0.000649 intercept = 0.741282 r-value = -0.021853 p-value = 0.880267 slope stderr = 0.004287 estim. stderr = 0.009146 . OUTPUT 2 (node nr 7): Regression line parameters: slope = 0.005536 intercept = 0.198818 r-value = 0.285037 p-value = 0.044816 slope stderr = 0.002687 estim. stderr = 0.005853 Exemplary plot: .. plot:: :include-source: from ffnet import mlgraph, ffnet from numpy.random import rand from numpy import linspace import pylab # Create and train net on random data conec = mlgraph((3,10,2)) net = ffnet(conec) input = rand(50,3); target = rand(50,2) net.train_tnc(input, target, maxfun = 400) output, regress = net.test(input, target, iprint = 0) # Plot results for first output pylab.plot(target.T[0], output.T[0], 'o', label='targets vs. outputs') slope = regress[0][0]; intercept = regress[0][1] x = linspace(0,1) y = slope * x + intercept pylab.plot(x, y, linewidth = 2, label = 'regression line') pylab.legend() pylab.show() """ # Check if we dump stdout to the file if filename: import sys file = open(filename, 'w') saveout = sys.stdout sys.stdout = file # Print network info if iprint == 2: print(self) print('') # Test data and get output input, target = self._testdata(input, target) nump = len(input) output = self(input) #array([self(inp) for inp in input]) # Calculate regression info from scipy.stats import linregress numo = len(self.outno) target = target.transpose() output = output.transpose() regress = [] if iprint: print("Testing results for %i testing cases:" % nump) for o in range(numo): if iprint: print("OUTPUT %i (node nr %i):" % (o + 1, self.outno[o])) if iprint == 2: print("Targets vs. outputs:") for p in range(nump): print("%4i % 13.6f % 13.6f" % (p + 1, target[o, p], output[o, p])) x = target[o] y = output[o] r = linregress(x, y) # linregress calculates stderr of the slope instead of the estimate, even # though the docs say something else. we calculate the thing here manually sstd = r[-1] estd = sstd * sqrt(((x - x.mean())**2).sum()) r += (estd, ) if iprint: print("Regression line parameters:") print("slope = % f" % r[0]) print("intercept = % f" % r[1]) print("r-value = % f" % r[2]) print("p-value = % f" % r[3]) print("slope stderr = % f" % r[4]) print("estim. stderr = % f" % r[5]) regress.append(r) if iprint: print('') # Close file and restore stdout if filename: file.close() sys.stdout = saveout return output.transpose(), regress
def plot_abundance_correlation_all(data_dir): files = glob.glob( '{}/images_table_mix_*_results_abundance.csv'.format(data_dir)) fig_abundance = plt.figure(0) fig_fp = plt.figure(1) fig_abundance.set_size_inches(cm_to_inches(4.25), cm_to_inches(4.25)) fig_fp.set_size_inches(cm_to_inches(4), cm_to_inches(3)) plt.figure(0) color_list = [ 'darkviolet', 'navy', 'fuchsia', 'red', 'limegreen', 'gold', 'darkorange', 'dodgerblue' ] for i in range(len(files)): filename = files[i] sum_tab = pd.read_csv(filename) mix_id = int( re.sub('mix_', '', re.search('mix_[0-9]*', filename).group(0))) input_tab_filename = '{}/hiprfish_1023_mix_{}.csv'.format( data_dir, str(mix_id)) input_tab = pd.read_csv(input_tab_filename) abundance = sum_tab.drop(columns=['Barcodes']) mean_absolute_abundance = abundance.sum(axis=1) ul_absolute_abundance = np.percentile(abundance.values, 75, axis=1) ll_absolute_abundance = np.percentile(abundance.values, 25, axis=1) sum_tab['MeasuredAbundance'] = mean_absolute_abundance / np.sum( mean_absolute_abundance) sum_tab['ULAbundance'] = ul_absolute_abundance / np.sum( mean_absolute_abundance) sum_tab['LLAbundance'] = ll_absolute_abundance / np.sum( mean_absolute_abundance) sum_tab = sum_tab.merge(input_tab, how='left', on='Barcodes').fillna(0) sum_tab_fp = sum_tab.loc[sum_tab.Concentration.values == 0] plt.figure(0) plt.plot(sum_tab.Concentration.values * 1000, sum_tab.MeasuredAbundance.values * 1000, '.', markersize=4, alpha=0.5, color=color_list[i], markeredgewidth=0) sum_tab_trim = sum_tab[sum_tab.Concentration != 0] slope, intercept, r_value, p_value, std_err = linregress( sum_tab_trim.Concentration.values, sum_tab_trim.MeasuredAbundance.values) gross_error_rate = sum_tab.loc[sum_tab.Concentration.values == 0].MeasuredAbundance.sum() plt.figure(1) plt.hist(sum_tab_fp.MeasuredAbundance.values * 1000, bins=100, alpha=0.2) plt.figure(0) plt.xlabel(r'Input$\times 10^{3}$', fontsize=8, color='black') plt.ylabel(r'Measured$\times 10^{3}$', fontsize=8, color='black', labelpad=1) plt.tick_params(direction='in', width=0.5, length=2, labelsize=8, labelcolor='black', color='black') lim_max = np.maximum(np.max(sum_tab.Concentration), np.max(sum_tab.MeasuredAbundance)) * 1.05 abundance_correlation_filename = '{}/abundance_correlation_all.pdf'.format( data_dir) abundance_fp_filename = '{}/abundance_false_positive_histogram.pdf'.format( data_dir) plt.plot([0, 17.5], [0, 17.5], '--', color='black', alpha=0.8, linewidth=0.5) plt.xlim(-0.5, 17.5) plt.ylim(-0.5, 17.5) plt.subplots_adjust(left=0.22, bottom=0.2, right=0.99, top=0.99) plt.axes().set_aspect('equal') plt.savefig(abundance_correlation_filename, dpi=300, transparent=True) plt.figure(1) plt.yscale('log') plt.xlabel(r'Measured Abundance$\times 10^{3}$', fontsize=8) plt.ylabel('Frequency', fontsize=8) plt.tick_params(direction='in', width=0.5, length=2, labelsize=8) plt.subplots_adjust(left=0.22, right=0.95, top=0.9, bottom=0.2) plt.savefig(abundance_fp_filename, dpi=300, transparent=True) plt.close()
fourteen_days_index = datetime_list.index( closest_date_with_data) #get index of our "14 days ago" date last_14_days = daily_confirmed[(fourteen_days_index - len(daily_confirmed)):] last_14_days_datetimes = datetime_list[(fourteen_days_index - len(daily_confirmed)):] trend = [0] for i in range(len(last_14_days)): if i > 0: if last_14_days[i] == 0: pass else: trend.append(last_14_days[i] - last_14_days[i - 1]) x_vals = [i for i in range(len(last_14_days))] slope, intercept, r_value, p_value, std_err = linregress(x_vals, last_14_days) if slope > 0: trend_color = '#faafaf' elif slope <= 0: trend_color = '#affaaf' #calculate the simple moving average with a window size of 5 #DAILY CASES daily_confirmed = np.array(daily_confirmed) confirmed_moving = list(moving_average(daily_confirmed)) leading_zeroes = [0, 0, 0, 0] confirmed_moving = leading_zeroes + confirmed_moving san_diego_data["Confirmed_Moving"] = confirmed_moving #CUMULATIVE CASES
def do_plot(self, inputDir, plotOutDir, plotOutFileName, simDataFile, validationDataFile, metadata): if not os.path.isdir(inputDir): raise Exception, "variantDir does not currently exist as a directory" if not os.path.exists(plotOutDir): os.mkdir(plotOutDir) ap = AnalysisPaths(inputDir, variant_plot = True) if ap.n_generation == 1: print "Need more data to create addedMass" return allScatter = plt.figure() allScatter.set_figwidth(11) allScatter.set_figheight(6) plt.style.use('seaborn-deep') color_cycle = plt.rcParams['axes.prop_cycle'].by_key()['color'] title_list = [r"Glucose minimal, $\tau = $44 min", r"Glucose minimal anaerobic, $\tau = $100 min", r"Glucose minimal + 20 amino acids, $\tau = $25 min"] for varIdx in ap.get_variants(): if varIdx == 0: plotIdx = 1 gen = [2,3] elif varIdx == 1: plotIdx = 0 gen = [2,3] elif varIdx == 2: plotIdx = 2 gen = [2,3] else: continue initial_masses = np.zeros(0) final_masses = np.zeros(0) all_cells = ap.get_cells(generation=gen, variant=[varIdx]) if len(all_cells) == 0: continue fail = 0 for simDir in all_cells: try: simOutDir = os.path.join(simDir, "simOut") mass = TableReader(os.path.join(simOutDir, "Mass")) cellMass = mass.readColumn("dryMass") initial_masses = np.hstack((initial_masses, cellMass[0])) final_masses = np.hstack((final_masses, cellMass[-1])) except Exception as e: print e fail+=1 added_masses = final_masses - initial_masses scaled_initial_masses = initial_masses / initial_masses.mean() scaled_added_masses = added_masses / added_masses.mean() nbins = 5 n, xbin = np.histogram(scaled_initial_masses, bins=nbins) sy, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses) sy2, xbin = np.histogram(scaled_initial_masses, bins=nbins, weights=scaled_added_masses*scaled_added_masses) mean = sy / n std = np.sqrt(sy2/(n-1) - n*mean*mean/(n-1)) slope, intercept, r_value, p_value, std_err = linregress(scaled_initial_masses, scaled_added_masses) # plot all scatter plots plt.figure(allScatter.number) ax = plt.subplot2grid((1,3), (0,plotIdx)) ax.plot(scaled_initial_masses, scaled_added_masses, '.', color = "black", alpha = 0.2, zorder=1, markeredgewidth = 0.0) ax.errorbar(((xbin[1:] + xbin[:-1])/2), mean, yerr=std, color = "black", linewidth=1, zorder=2) ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color = "blue") ax.set_title( title_list[varIdx] + ", n=%d" % ((len(all_cells) - fail), ) + "\n" + r"$m_{add}$=%.3f$\times$$m_{init}$ + %.3f" % (slope,intercept) + "\n" + "r-value=%0.2g" % r_value + "\n" + "p-value=%0.2g" % p_value, fontsize=FONT_SIZE) ax.set_xlim([INIT_MASS_LOWER_LIM, INIT_MASS_UPPER_LIM]) ax.set_ylim([ADDED_MASS_LOWER_LIM, ADDED_MASS_UPPER_LIM]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) if varIdx == 1: ax.set_ylabel("Normed added mass", fontsize=FONT_SIZE) ax.set_xlabel("Normed initial mass", fontsize=FONT_SIZE) plt.subplots_adjust(bottom = 0.2) whitePadSparklineAxis(ax) for tick in ax.yaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) for tick in ax.xaxis.get_major_ticks(): tick.label.set_fontsize(FONT_SIZE) # plot stripped figure fig = plt.figure() fig.set_figwidth(3) fig.set_figheight(2) ax = plt.subplot2grid((1,1), (0,0)) ax.plot(scaled_initial_masses, scaled_added_masses, '.', color = color_cycle[0], alpha = 0.25, ms=6, zorder=1, markeredgewidth = 0.0, clip_on=False) ax.plot(scaled_initial_masses, slope * scaled_initial_masses + intercept, color = 'k') ax.set_xlim([INIT_MASS_LOWER_LIM, INIT_MASS_UPPER_LIM]) ax.set_ylim([ADDED_MASS_LOWER_LIM, ADDED_MASS_UPPER_LIM]) ax.get_yaxis().get_major_formatter().set_useOffset(False) ax.get_xaxis().get_major_formatter().set_useOffset(False) whitePadSparklineAxis(ax) ax.tick_params(which='both', bottom=True, left=True, top=False, right=False, labelbottom=True, labelleft=True, labelsize=FONT_SIZE) ax.set_xlabel("") ax.set_ylabel("") plt.tight_layout() exportFigure(plt, plotOutDir, plotOutFileName + str(varIdx) + "_stripped", metadata, transparent = True) plt.figure(allScatter.number) exportFigure(plt, plotOutDir, plotOutFileName, metadata) plt.close("all")
import numpy as np import matplotlib.pyplot as plt from scipy import stats M = 10000 N = 52 * 5 betaHat = np.empty(M) Rsqrd = np.empty(M) for i in range(M): y = np.cumsum(np.random.normal(size=N)) x = np.cumsum(np.random.normal(size=N)) reg = stats.linregress(x, y) betaHat[i] = reg.slope Rsqrd[i] = reg.rvalue**2 plt.hist(betaHat, bins=100) plt.title('Sampling Distribution of Beta') plt.xlabel("Value") plt.ylabel("Frequency") plt.savefig("images/Spurious-Beta-Histogram.png") plt.clf() plt.hist(Rsqrd, bins=100) plt.title('Sampling Distribution of R-Squared') plt.xlabel("Value") plt.ylabel("Frequency") plt.savefig("images/Spurious-Rsqrd-Histogram.png")
def __call__( self, screen_object: Union['Screen', Any], mode: Literal["mean", "pointmutant"] = 'pointmutant', min_score: Optional[float] = None, max_score: Optional[float] = None, replicate: int = -1, replicate_second_object: int = -1, output_file: Union[None, str, Path] = None, **kwargs: Any, ) -> None: """ Generate a scatter plot between object and a second object of the same class. Parameters ---------- screen_object : object from class *Screen* to do the scatter with mode : str, default 'pointmutant'. Alternative set to "mean" for the mean of each position. min_score : float, default None Change values below a minimum score to be that score. i.e., setting min_score = -1 will change any value smaller than -1 to -1. max_score : float, default None Change values below a maximum score to be that score. i.e., setting max_score = 1 will change any value greater than 1 to 1. replicate : int, default -1 Set the replicate to plot. By default, the mean is plotted. First replicate start with index 0. If there is only one replicate, then leave this parameter untouched. replicate_second_object : int, default -1 Set the replicate to plot. By default, the mean is plotted. First replicate start with index 0. If there is only one replicate, then leave this parameter untouched. output_file : str, default None If you want to export the generated graph, add the path and name of the file. Example: 'path/filename.png' or 'path/filename.svg'. **kwargs : other keyword arguments """ temp_kwargs = self._update_kwargs(kwargs) self.graph_parameters() # Chose mode: if mode.lower() == 'pointmutant': df_output: DataFrame = process_by_pointmutant( self.dataframes.df_notstopcodons_limit_score( min_score, max_score)[replicate], screen_object.dataframes.df_notstopcodons_limit_score( min_score, max_score)[replicate_second_object]) else: df_output = process_mean_residue( self.dataframes.df_notstopcodons_limit_score( min_score, max_score)[replicate], screen_object.dataframes.df_notstopcodons_limit_score( min_score, max_score)[replicate_second_object]) # create figure self.fig, self.ax_object = plt.subplots(figsize=temp_kwargs['figsize']) # Scatter data points plt.scatter(df_output['dataset_1'], df_output['dataset_2'], c='k', s=8, alpha=0.5, rasterized=True, label='_nolegend_') # correlation _, _, r_value, _, _ = linregress(df_output['dataset_1'], df_output['dataset_2']) # fit and graph line fit = np.polyfit(df_output['dataset_1'], df_output['dataset_2'], 1) plt.plot(np.unique(df_output['dataset_1']), np.poly1d(fit)(np.unique(df_output['dataset_1'])), color='r', linewidth=1, label="$R^2$ = {}".format(str(round(r_value**2, 2)))) self._tune_plot(temp_kwargs) self._save_work(output_file, temp_kwargs) if temp_kwargs['show']: plt.show()
def determine_zone_slope(self, dbz_3d, clutter, angles, azim_zone, gate_zone): """ We will only use CONVOL scans for determining slopes The following property can be used to exclude clutter from slope. self.convol_clutter """ azim_region = self.config['precip']['azim_region'] gate_region = self.config['precip']['gate_region'] min_azim = azim_zone * azim_region max_azim = (azim_zone + 1) * azim_region min_gate = gate_zone * gate_region max_gate = (gate_zone + 1) * gate_region angle_list = [] dbz_list = [] height_list = [] # Above this threshold, we consider it rain. max_dbz_per_km = -5.0 for x in range(0, len(angles)): angle = angles[x] zone_data = dbz_3d[x, min_azim:max_azim, min_gate:max_gate] # Should be more OOP zone_clutter = clutter[x, min_azim:max_azim, min_gate:max_gate] zone_clutter_bool = zone_clutter.astype(bool) if zone_data.shape != zone_clutter.shape: raise ValueError("Incompatible zone shapes.") zone_flat = list(zone_data.flatten()) zone_clutter_flat = list(zone_clutter_bool.flatten()) num_cells = len(zone_flat) num_clutter = len(zone_clutter_flat) if num_cells != num_clutter: raise ValueError("Incompatible list lengths.") for y in range(0, num_cells): if not zone_clutter_flat[y]: dbz = zone_flat[y] if not isinstance(dbz, np.ma.core.MaskedConstant): angle_list.append(angle) dbz_list.append(dbz) # Making trig approximation h = x*tan(theta) # Note, distance must be in km, and theta must # be converted to radians. # TODO: Use builtin pyart methods. rad_angle = math.radians(angle) # Using midpoint approximation midpoint = int((min_gate + max_gate) / 2.0) # Convert to kilometers distance = midpoint * self.grid_info.gate_step * 0.001 height = distance * math.tan(rad_angle) height_list.append(height) if len(angle_list) != len(dbz_list): raise ValueError("Zone slope error") # If there is only data from one elevation angle, we cannot # compute the slope. angle_set = set(angle_list) if len(angle_set) < 2: return np.nan else: slope, intercept, r_value, p_value, std_err = stats.linregress( height_list, dbz_list) return slope
data[barcode][n] = data[barcode][n] / sample_sum for barcode in data: data[barcode] = np.log2(data[barcode]) # x vals (# of generations) for slope calculations time_points = [0.0, 1.74, 3.71, 0, 1.75, 3.37, 0., 2., 4.] # calculate slope for every allele for each experiment barcode_slopes = dict(zip(data.keys(), np.zeros((len(data.keys()), 3)))) for barcode in data: for n in range(0, 3): yvals = data[barcode][(n * 3):(n * 3 + 3)] xvals = time_points[(n * 3):(n * 3 + 3)] line = stats.linregress(xvals, yvals) barcode_slopes[barcode][n] = line[0] #print barcode_slopes def get_fitness(barcode_slopes): # get avg wt slope for each experiment wt_slopes = np.zeros((1, 3)) wt_count = 0 for barcode in barcode_slopes: if allele_map[barcode][1] == 'WT': wt_slopes += barcode_slopes[barcode] wt_count += 1 wt_slopes = wt_slopes / float(wt_count)
def get_trendlines_regression(signal: list, **kwargs) -> dict: """Get Trendlines Regression A regression-only based method of generating trendlines (w/o use of local minima and maxima). Arguments: signal {list} -- signal of which to find a trend (can be anything) Optional Args: iterations {int} -- number of types through trendline creation with "divisors" (default: {15}) threshold {float} -- acceptable ratio a trendline can be off and still counted in current plot (default: {0.1}) dates {list} -- typically DataFrame.index (default: {None}) indicator {str} -- for plot name, indicator trend analyzed (default: {''}) plot_output {bool} -- (default: {True}) name {str} -- (default: {''}) views {str} -- (default: {''}) Returns: dict -- trendline content """ config_path = os.path.join("resources", "config.json") if os.path.exists(config_path): with open(config_path, 'r') as cpf: c_data = json.load(cpf) cpf.close() ranges = c_data.get('trendlines', {}).get('divisors', {}).get('ranges', []) ranged = 0 for rg in ranges: if len(signal) > rg: ranged += 1 divs = c_data.get('trendlines', {}).get('divisors', {}).get('divisors') if divs is not None: if len(divs) > ranged: DIVISORS = divs[ranged] iterations = kwargs.get('iterations', len(DIVISORS)) threshold = kwargs.get('threshold', 0.1) dates = kwargs.get('dates') indicator = kwargs.get('indicator', '') plot_output = kwargs.get('plot_output', True) name = kwargs.get('name', '') views = kwargs.get('views', '') indexes = list(range(len(signal))) if iterations > len(DIVISORS): iterations = len(DIVISORS) divisors = DIVISORS[0:iterations] lines = [] x_s = [] t_line_content = [] line_id = 0 y_max = max(signal) - min(signal) x_max = len(signal) scale_change = float(x_max) / float(y_max) for div in divisors: period = int(len(signal) / div) for i in range(div): for k in range(2): data = dict() if i == div - 1: data['value'] = signal[period * i:len(signal)].copy() data['x'] = indexes[period * i:len(signal)].copy() else: data['value'] = signal[period * i:period * (i + 1)].copy() data['x'] = indexes[period * i:period * (i + 1)].copy() data = pd.DataFrame.from_dict(data) while len(data['x']) > 4: reg = linregress(data['x'], data['value']) if k == 0: data = data.loc[data['value'] > reg[0] * data['x'] + reg[1]] else: data = data.loc[data['value'] < reg[0] * data['x'] + reg[1]] reg = linregress(data['x'], data['value']) content = {'slope': reg[0], 'intercept': reg[1]} content['angle'] = np.arctan( reg[0] * scale_change) / np.pi * 180.0 if reg[0] < 0.0: content['angle'] = 180.0 + \ (np.arctan(reg[0] * scale_change) / np.pi * 180.0) line = [] for ind in indexes: line.append(reg[0] * ind + reg[1]) x_line = indexes.copy() line_corrected, x_corrected = filter_nearest_to_signal( signal, x_line, line) if len(x_corrected) > 0: content['length'] = len(x_corrected) content['id'] = line_id line_id += 1 lines.append(line_corrected.copy()) x_s.append(x_corrected.copy()) t_line_content.append(content) # lines.append(line) # x_s.append(x_line) for i in range(period, len(signal), 2): for k in range(2): data = dict() data['value'] = signal[i - period:i].copy() data['x'] = indexes[i - period:i].copy() data = pd.DataFrame.from_dict(data) while len(data['x']) > 4: reg = linregress(data['x'], data['value']) if k == 0: data = data.loc[data['value'] > reg[0] * data['x'] + reg[1]] else: data = data.loc[data['value'] < reg[0] * data['x'] + reg[1]] reg = linregress(data['x'], data['value']) content = {'slope': reg[0], 'intercept': reg[1]} content['angle'] = np.arctan( reg[0] * scale_change) / np.pi * 180.0 if reg[0] < 0.0: content['angle'] = 180.0 + \ (np.arctan(reg[0] * scale_change) / np.pi * 180.0) line = [] for ind in indexes: line.append(reg[0] * ind + reg[1]) x_line = indexes.copy() line_corrected, x_corrected = filter_nearest_to_signal( signal, x_line, line, threshold=threshold) if len(x_corrected) > 0: content['length'] = len(x_corrected) content['id'] = line_id line_id += 1 lines.append(line_corrected.copy()) x_s.append(x_corrected.copy()) t_line_content.append(content) # handle over load of lines (consolidate) # Idea: bucket sort t_line_content by 'slope', within each bucket then consolidate similar # intercepts, both by line extension/combination and on slope averaging. Track line 'id' list # so that the corrections can be made for plots and x_plots t_line_content, lines, x_s = consolidate_lines(t_line_content, lines, x_s, signal) t_line_content, lines, x_s = consolidate_lines(t_line_content, lines, x_s, signal, thresh=0.2) t_line_content, lines, x_s = consolidate_lines(t_line_content, lines, x_s, signal, thresh=0.3) plots = [] x_plots = [] plots.append(signal) x_plots.append(list(range(len(signal)))) plots.extend(lines) x_plots.extend(x_s) if dates is not None: new_xs = [] for xps in x_plots: nxs = [dates[i] for i in xps] new_xs.append(nxs) x_plots = new_xs title = f"{indicator.capitalize()} Trendlines" if plot_output: generic_plotting(plots, x=x_plots, title=title) else: filename = os.path.join(name, views, f"{indicator}_trendlines_{name}.png") generic_plotting(plots, x=x_plots, title=title, filename=filename, saveFig=True) trends = dict() return trends
err = float(f.readline().strip()) data = numpy.genfromtxt(file_name, skip_header=1, delimiter=",") return data, err import sys try: name = sys.argv[1] except IndexError: name = "KCl" fig = plt.style.use("science") plt.figure(figsize=(2.8, 2.1), facecolor="w") # plt.axvline(x=0, ls="--", color="#00ffee") data, err = read(name) # data[-1, -1] *= 1.5 plt.errorbar(data[:, 0], data[:, 1], yerr=err / 2, fmt="o") log_x = numpy.log(data[:, 0]) log_y = numpy.log(data[:, 1]) p = linregress(log_x, log_y) print(p) xx = numpy.logspace(-4.2, -1.5) yy = 0.0014 * xx**-0.587 plt.plot(xx, yy, "--") # plt.xscale("log") plt.xlabel("KCl concentration (mol/L)") plt.ylabel("Rectification") plt.xscale("log") plt.yscale("log") plt.savefig("../img/rect_{}_conc.svg".format(name))
def _getValues(self, index_returns: Series = Series(), portfolio_returns: Series = Series()): return stats.linregress(index_returns, portfolio_returns)
# In[13]: # Sort by x axis x_axis, y_axis = (list(t) for t in zip( *sorted(zip(trimmed_lengths, trimmed_colonies)))) # In[14]: # Gather Data get_ipython().run_line_magic('matplotlib', 'inline') import matplotlib.pyplot as plt import numpy as np from scipy import stats slope, intercept, r_value, p_value, std_err = stats.linregress(x_axis, y_axis) print("slope: " + str(round(slope, 4))) print("r value: " + str(round(r_value, 4))) print("r squared value: " + str(round(r_value**2, 4))) print("p value: " + str(round(p_value, 8))) print("std err: " + str(round(std_err, 3))) plt.scatter(x_axis, y_axis) plt.plot(np.unique(x_axis), np.poly1d(np.polyfit(x_axis, y_axis, 1))(np.unique(x_axis))) plt.title('Transformation Efficiency in Aquarium from Integrant Length') plt.xlabel('Plasmid Length') plt.ylabel('Colonies Produced') plt.show() # In[ ]:
def _slope(ts): # original (James?) x = np.arange(len(ts)) log_ts = np.log(ts) slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts) annualized_slope = ((1 + slope)**250 ) * 100 return annualized_slope * (r_value ** 2)
def get_lines_from_period(fund: pd.DataFrame, kargs: list, interval: int, **kwargs) -> list: """Get Lines from Period Arguments: fund {pd.DataFrame} -- fund dataset kargs {list} -- mins and maxes of x and y lists interval {int} -- period of time for a lookback of a trend Optional Args: vq {dict} -- volatility quotient, used to determine if a trendline is still valid at the end of the period by providing a volatility threshold (default: {0.06}) Returns: list -- list of trendlines given the period """ vq = kwargs.get('vq', 0.06) EXTENSION = interval BREAK_LOOP = 50 cycles = int(np.floor(len(fund['Close']) / interval)) mins_y = kargs[1] mins_x = kargs[0] maxes_y = kargs[3] maxes_x = kargs[2] X = [] Y = [] for cycle in range(cycles): start = cycle * interval end = start + interval data = fund['Close'][start:end].copy() x = list(range(start, end)) reg = linregress(x=x, y=data) if reg[0] >= 0: use_min = True else: use_min = False count = 0 st_count = count if use_min: while (count < len(mins_x)) and (mins_x[count] < start): count += 1 st_count = count end_count = st_count while (count < len(mins_x)) and (mins_x[count] < end): count += 1 end_count = count datay = mins_y[st_count:end_count].copy() datax = mins_x[st_count:end_count].copy() dataz = {} dataz['x'] = datax dataz['y'] = datay dataw = pd.DataFrame.from_dict(dataz) dataw.set_index('x') datav = dataw.copy() stop_loop = 0 while ((len(dataw['x']) > 0) and (reg[0] > 0.0)) and (stop_loop < BREAK_LOOP): reg = linregress(x=dataw['x'], y=dataw['y']) datav = dataw.copy() dataw = dataw.loc[dataw['y'] < reg[0] * dataw['x'] + reg[1]] stop_loop += 1 if reg[0] < 0.0: dataw = datav.copy() if len(dataw) >= 2: reg = linregress(x=dataw['x'], y=dataw['y']) else: while (count < len(maxes_x)) and (maxes_x[count] < start): count += 1 st_count = count end_count = st_count while (count < len(maxes_x)) and (maxes_x[count] < end): count += 1 end_count = count datay = maxes_y[st_count:end_count].copy() datax = maxes_x[st_count:end_count].copy() dataz = {} dataz['x'] = datax dataz['y'] = datay dataw = pd.DataFrame.from_dict(dataz) dataw.set_index('x') datav = dataw.copy() stop_loop = 0 while ((len(dataw['x']) > 0) and (reg[0] < 0.0)) and (stop_loop < BREAK_LOOP): reg = linregress(x=dataw['x'], y=dataw['y']) datav = dataw.copy() dataw = dataw.loc[dataw['y'] > reg[0] * dataw['x'] + reg[1]] stop_loop += 1 if reg[0] > 0.0: dataw = datav.copy() if len(dataw) >= 2: reg = linregress(x=dataw['x'], y=dataw['y']) end = line_extender(fund, list(range(start, end)), reg) if end != 0: max_range = [start, end] if max_range[1] > len(fund['Close']): max_range[1] = len(fund['Close']) if interval > 100: max_range[1] = len(fund['Close']) if end + EXTENSION > int(0.9 * float(len(fund['Close']))): max_range[1] = len(fund['Close']) max_range[1] = line_reducer(fund, max_range[1], reg, threshold=vq) datax = list(range(max_range[0], max_range[1])) datay = [reg[0] * float(x) + reg[1] for x in datax] if (len(datay) > 0) and (not math.isnan(datay[0])): X.append(datax) Y.append(datay) return X, Y
def slope(ts): ## new version x = np.arange(len(ts)) log_ts = np.log(ts) slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts) annualized_slope = (np.power(np.exp(slope), 250) - 1) * 100 return annualized_slope * (r_value ** 2)
def compute(self, drift_tube_length=90.33, neutral_mass=28.013): """compute the ccs values based on the multi-field parameters """ # ======================== # given parameters # ======================== # mass: scalar # drift_tube_length (cm): scalar # temperatures, T(C): array --> T(K) = T(C)+273.15 T_K = np.array(self.temperatures) + 273.15 # pressures, P(torr): array --> P(Pa) = P(torr)/760*101325 P_torr = np.array(self.pressures) P_Pa = P_torr / 760 * 101325 # voltage_cell, Vcell: array --> E = Vcell / drift_tube_length Vcell = np.array(self.voltages) E = Vcell / drift_tube_length inv_E = 1.0 / (E * 100.0) # arrival_time (ms): array arrival_sec = np.array(self.arrival_time) / 1000 # neutral_mass = 28.013 (N2 by default) # ======================== # constant parameters # ======================== # 1.60217657E-19 or 1.6021766208E-19 e = 1.6021766208E-19 charge_state = 1 boltzmann_constant = 1.38064852E-23 N0 = 101325 / boltzmann_constant / 273.15 # N0_(m-3) # ======================== # computed parameters by given # ======================== # P/V = P(torr) / Vcell self._p_v = P_torr / Vcell # E/N (Td) = E / P(torr) / 0.3535 E_N = (E / P_torr) / 0.3535 mass_in_kg = self.mass * 1.66054E-27 neutral_mass_in_kg = neutral_mass * 1.66054E-27 reduced_mass_in_kg = (mass_in_kg * neutral_mass_in_kg / (mass_in_kg + neutral_mass_in_kg)) # ======================== slope, intercept, r_value, p_value, std_err = linregress( self._p_v, arrival_sec) # drift_time (sec) = arrival_sec - intercept drift_time = arrival_sec - intercept # compute CCS by Mason-Schamp Equation # ccs = 3 * e / 16 / N0 * np.sqrt(2 * np.pi / reduced_mass_in_kg / boltzmann_constant / T_K) \ # * drift_time * 760 * T_K * Vcell / (drift_tube_length / 100)**2 / P_torr / 273.15 * 1E20 K0 = drift_tube_length * drift_tube_length / slope * 273.15 / 760 / np.mean( T_K) ccs = 3 * e / 16 / N0 / K0 / 0.0001 * np.sqrt( 2 * np.pi / (boltzmann_constant * reduced_mass_in_kg * np.mean(T_K))) * 1e20 properties = { 'slope': slope, 'intercept': intercept, 'r2': r_value**2, 'p_value': p_value, 'k0': K0, 'ccs': ccs } for p in properties: self._metadata[p] = properties[p]
def momentum_func(self, price_array): r = np.log(price_array) slope, _, rvalue, _, _ = linregress(np.arange(len(r)), r) annualized = (1 + slope)**252 return (annualized * (rvalue**2))
def slope_v(ts): # new (Vladimir) x = np.arange(len(ts)) log_ts = np.log(ts) slope, intercept, r_value, p_value, std_err = stats.linregress(x, log_ts) annualized_slope = ((1 + slope)**250 -1.0) * 100 return annualized_slope * (r_value ** 2)
def linear_trend(self): self.regression = stats.linregress(self.time,self.magnitude) self.features['linear trend'] = self.regression.slope
# make those sets of the same size in case one is bigger than other normalized_length = min(len(company_data), len(market_data)) company_data = company_data[:normalized_length] market_data = market_data[:normalized_length] # extract 'return' rows company_return = [row[2] for row in company_data] market_return = [row[2] for row in market_data] # extract estimation period: all observations that were earlier than the event window comp_est_period = company_return[EVENT_WINDOW:] market_est_period = market_return[EVENT_WINDOW:] # calculate linear regression over the estimation period beta, alpha, r_value, p_value, std_err = linregress( market_est_period, comp_est_period) # extrapolate the regression into the event window: # put market_data through the regression values and # calculate expected company return company_expected_return = [] for idx, val in enumerate(company_return[:EVENT_WINDOW]): exp_val = market_return[idx] * beta + alpha company_expected_return.append(exp_val) # put data into numpy format company_expected_return_event_window = np.array(company_expected_return) company_return_event_window = np.array(company_return[:EVENT_WINDOW]) # calculate abnormal return abnornal_return = company_return_event_window - company_expected_return_event_window
elif tag in ADJ: adjs.append(w) wLen.append(len(words)) vLen.append(len(verbs)) nLen.append(len(nouns)) advLen.append(len(advs)) adjLen.append(len(adjs)) plotData0 = [(wLen, vLen), (wLen, nLen), (wLen, adjLen)] yaxisLabels = ['V x 1000', 'N x 1000', 'ADJ x 1000'] plt.figure(figsize=(7.5,7.5)) for (pane, data) in enumerate(plotData0): X, Y = data[0], data[1] slope, intercept = stats.linregress(X, Y)[0:2] rX = slope*array(X) + intercept plt.subplot(2, 2, pane+1) plt.scatter(X, Y) plt.plot(X, rX, 'r', label='slope={},\nintercept={}'.format( round(slope,2), round(intercept,2))) plt.ylim(plt.xlim()) wTicks = [int(tk/1000) for tk in plt.gca().get_xticks()] plt.gca().set_xticklabels(wTicks) plt.gca().set_yticklabels(wTicks) offset = (plt.gca().get_xticks()[1]-plt.gca().get_xticks()[0])/10 for pt in range(len(X)): plt.annotate(str(pt), # scifiCorpus[i] xy=(X[pt], Y[pt]),
print(x11.shape) print(y11.shape) x11a = np.array([x11, y11]) print(x11a.shape, 'x11a.shape') x11b = x11a[:, ~np.isnan(x11a).any(axis=0)] print(x11b.shape, 'X11b.shape') print(x11b, 'X11b') x1 = x11b[0] y1 = x11b[1] print(x1, 'x1') print(y1, 'y1') x1y1 = np.vstack([x1, y1]) z1 = gaussian_kde(x1y1)(x1y1) slope1, intercept1, r_value1, p_value1, std_err1 = stats.linregress(x1, y1) line1 = slope1 * x1 + intercept1 correlate_annual = stats.pearsonr(x1, y1) #Regression ~ using bootstrap #ind=np.where((gc_data_ammonia_annual!=0)&(sites_ammonia_AM!=0)) #print (ind) regres_annual = rma(x1, y1, (len(x1)), 1000) print('slope annual: ', regres_annual[0]) print('Intercept annual: ', regres_annual[1]) print('slope error annual: ', regres_annual[2]) print('Intercept error annual: ', regres_annual[3]) #plotting scatter plot title_list1 = 'GC and IASI NH$_3$ Column (Annual)' fig1 = plt.figure(facecolor='White', figsize=[11, 11])