def test_multilinear_model(self): x = np.linspace(0.0, 5.0) y = 10.0 + 5.0 * x data = Data(x, y) odr_obj = ODR(data, multilinear) output = odr_obj.run() assert_array_almost_equal(output.beta, [10.0, 5.0])
def orthoregress(x, y): """Perform an Orthogonal Distance Regression on the given data, using the same interface as the standard scipy.stats.linregress function. Adapted from https://gist.github.com/robintw/d94eb527c44966fbc8b9#file-orthoregress-py Arguments: x: x data y: y data Returns: [slope, intercept, residual] Uses standard ordinary least squares to estimate the starting parameters then uses the scipy.odr interface to the ODRPACK Fortran code to do the orthogonal distance calculations. """ def f(p, x): """Basic linear regression 'model' for use with ODR""" return (p[0] * x) + p[1] linreg = stats.linregress(x, y) mod = Model(f) dat = Data(x, y) od = ODR(dat, mod, beta0=linreg[0:2]) out = od.run() return list(out.beta) + [out.res_var]
def test_quadratic_model(self): x = np.linspace(0.0, 5.0) y = 1.0 * x**2 + 2.0 * x + 3.0 data = Data(x, y) odr_obj = ODR(data, quadratic) output = odr_obj.run() assert_array_almost_equal(output.beta, [1.0, 2.0, 3.0])
def test_exponential_model(self): x = np.linspace(0.0, 5.0) y = -10.0 + np.exp(0.5 * x) data = Data(x, y) odr_obj = ODR(data, exponential) output = odr_obj.run() assert_array_almost_equal(output.beta, [-10.0, 0.5])
def test_unilinear_model(self): x = np.linspace(0.0, 5.0) y = 1.0 * x + 2.0 data = Data(x, y) odr_obj = ODR(data, unilinear) output = odr_obj.run() assert_array_almost_equal(output.beta, [1.0, 2.0])
def test_explicit(self): explicit_mod = Model( self.explicit_fcn, fjacb=self.explicit_fjb, fjacd=self.explicit_fjd, meta=dict(name='Sample Explicit Model', ref='ODRPACK UG, pg. 39'), ) explicit_dat = Data([0.,0.,5.,7.,7.5,10.,16.,26.,30.,34.,34.5,100.], [1265.,1263.6,1258.,1254.,1253.,1249.8,1237.,1218.,1220.6, 1213.8,1215.5,1212.]) explicit_odr = ODR(explicit_dat, explicit_mod, beta0=[1500.0, -50.0, -0.1], ifixx=[0,0,1,1,1,1,1,1,1,1,1,0]) explicit_odr.set_job(deriv=2) explicit_odr.set_iprint(init=0, iter=0, final=0) out = explicit_odr.run() assert_array_almost_equal( out.beta, np.array([1.2646548050648876e+03, -5.4018409956678255e+01, -8.7849712165253724e-02]), ) assert_array_almost_equal( out.sd_beta, np.array([1.0349270280543437, 1.583997785262061, 0.0063321988657267]), ) assert_array_almost_equal( out.cov_beta, np.array([[4.4949592379003039e-01, -3.7421976890364739e-01, -8.0978217468468912e-04], [-3.7421976890364739e-01, 1.0529686462751804e+00, -1.9453521827942002e-03], [-8.0978217468468912e-04, -1.9453521827942002e-03, 1.6827336938454476e-05]]), )
def test_output_file_overwrite(self): """ Verify fix for gh-1892 """ def func(b, x): return b[0] + b[1] * x p = Model(func) data = Data(np.arange(10), 12 * np.arange(10)) tmp_dir = tempfile.mkdtemp() error_file_path = os.path.join(tmp_dir, "error.dat") report_file_path = os.path.join(tmp_dir, "report.dat") try: ODR(data, p, beta0=[0.1, 13], errfile=error_file_path, rptfile=report_file_path).run() ODR(data, p, beta0=[0.1, 13], errfile=error_file_path, rptfile=report_file_path, overwrite=True).run() finally: # remove output files for clean up shutil.rmtree(tmp_dir)
def test_polynomial_model(self): x = np.linspace(0.0, 5.0) y = 1.0 + 2.0 * x + 3.0 * x**2 + 4.0 * x**3 poly_model = polynomial(3) data = Data(x, y) odr_obj = ODR(data, poly_model) output = odr_obj.run() assert_array_almost_equal(output.beta, [1.0, 2.0, 3.0, 4.0])
def ortho_regress(x, y): linreg = linregress(x, y) mod = Model(f) dat = Data(x, y) od = ODR(dat, mod, beta0=linreg[0:2]) out = od.run() #print(list(out.beta)) #return list(out.beta) + [np.nan, np.nan, np.nan] return(list(out.beta))
def test_empty_data(self): beta0 = [0.02, 0.0] linear = Model(self.empty_data_func) empty_dat = Data([], []) assert_warns(OdrWarning, ODR, empty_dat, linear, beta0=beta0) empty_dat = RealData([], []) assert_warns(OdrWarning, ODR, empty_dat, linear, beta0=beta0)
def fit(self, x, y): # Initial estimate of betas linreg = linregress(x, y) linear = Model(self.model) mydata = Data(x, y) myodr = ODR(mydata, linear, beta0=linreg[0:2]) myoutput = myodr.run() self.betas = myoutput.beta
def _run_odr(self): """Run an ODR regression""" linear = Model(self._modelODR) mydata = Data(ravel(self._datax), ravel(self._datay), 1) myodr = ODR(mydata, linear, beta0=self._guess, maxit=10000) myoutput = myodr.run() self._result = myoutput.beta self._stdev = myoutput.sd_beta self._covar = myoutput.cov_beta self._odr = myoutput
def test_ifixx(self): x1 = [-2.01, -0.99, -0.001, 1.02, 1.98] x2 = [3.98, 1.01, 0.001, 0.998, 4.01] fix = np.vstack((np.zeros_like(x1, dtype=int), np.ones_like(x2, dtype=int))) data = Data(np.vstack((x1, x2)), y=1, fix=fix) model = Model(lambda beta, x: x[1, :] - beta[0] * x[0, :]**2., implicit=True) odr1 = ODR(data, model, beta0=np.array([1.])) sol1 = odr1.run() odr2 = ODR(data, model, beta0=np.array([1.]), ifixx=fix) sol2 = odr2.run() assert_equal(sol1.beta, sol2.beta)
def test_ticket_1253(self): def linear(c, x): return c[0] * x + c[1] c = [2.0, 3.0] x = np.linspace(0, 10) y = linear(c, x) model = Model(linear) data = Data(x, y, wd=1.0, we=1.0) job = ODR(data, model, beta0=[1.0, 1.0]) result = job.run() assert_equal(result.info, 2)
def ODR_fit(self): parameter_initialization = [0, 0, 0, 0, 1, 0, 0, 0, 0] ''' parameter_initialization = [] for i in range(self.data["Enthalpy_order"]): parameter_initialization.append(0) parameter_initialization.append(1) if self.data["Temp_dependence"] == "True": parameter_initialization.append(0) parameter_initialization.append(0) if self.data["A"] == "Free": parameter_initialization.append(0) if self.data["B"] == "Free": parameter_initialization.append(0) ''' self.odr_model = Model(self.ODR_fit_func) self.mydata = Data([self.H_M_ratio_data, self.Temperatures_data], self.Pressures_data) self.myodr = ODR(self.mydata, self.odr_model, beta0=np.asarray(parameter_initialization), maxit=10000000) self.myoutput = self.myodr.run() print(self.myoutput.beta) self.assign_fitting_constants() order = self.data["Enthalpy_order"] for i in range(len(self.H_M_ratio_dict)): self.Pressures_fit[i] = [] T = self.Temperatures[i] x = np.asarray(self.H_M_ratio_dict[i]) if self.data["Temp_dependence"] == "True": R = self.myoutput.beta[order] + self.myoutput.beta[order+1]*T + self.myoutput.beta[order+2]*(T**2) else: R = self.myoutput.beta[order] H = self.E + 2*self.Alpha*x + 3*self.Beta*(x**2) + 4*self.Gamma*(x**3) self.Pressures_fit[i] = self.P_0**0.5*np.exp(self.A + self.B*T + (1/(self.k_B*T))*H + np.log(x/(R-x)) + R/(R-x)) self.plot_fit()
def test_implicit(self): implicit_mod = Model( self.implicit_fcn, implicit=1, meta=dict(name='Sample Implicit Model', ref='ODRPACK UG, pg. 49'), ) implicit_dat = Data([ [0.5,1.2,1.6,1.86,2.12,2.36,2.44,2.36,2.06,1.74,1.34,0.9,-0.28, -0.78,-1.36,-1.9,-2.5,-2.88,-3.18,-3.44], [-0.12,-0.6,-1.,-1.4,-2.54,-3.36,-4.,-4.75,-5.25,-5.64,-5.97,-6.32, -6.44,-6.44,-6.41,-6.25,-5.88,-5.5,-5.24,-4.86]], 1, ) implicit_odr = ODR(implicit_dat, implicit_mod, beta0=[-1.0, -3.0, 0.09, 0.02, 0.08]) out = implicit_odr.run() assert_array_almost_equal( out.beta, np.array([-0.9993809167281279, -2.9310484652026476, 0.0875730502693354, 0.0162299708984738, 0.0797537982976416]), ) assert_array_almost_equal( out.sd_beta, np.array([0.1113840353364371, 0.1097673310686467, 0.0041060738314314, 0.0027500347539902, 0.0034962501532468]), ) assert_array_almost_equal( out.cov_beta, np.array([[2.1089274602333052e+00, -1.9437686411979040e+00, 7.0263550868344446e-02, -4.7175267373474862e-02, 5.2515575927380355e-02], [-1.9437686411979040e+00, 2.0481509222414456e+00, -6.1600515853057307e-02, 4.6268827806232933e-02, -5.8822307501391467e-02], [7.0263550868344446e-02, -6.1600515853057307e-02, 2.8659542561579308e-03, -1.4628662260014491e-03, 1.4528860663055824e-03], [-4.7175267373474862e-02, 4.6268827806232933e-02, -1.4628662260014491e-03, 1.2855592885514335e-03, -1.2692942951415293e-03], [5.2515575927380355e-02, -5.8822307501391467e-02, 1.4528860663055824e-03, -1.2692942951415293e-03, 2.0778813389755596e-03]]), )
def orthoregress(x, y): """Perform an Orthogonal Distance Regression on the given data, using the same interface as the standard scipy.stats.linregress function. Arguments: x: x data y: y data Returns: [m, c, nan, nan, nan] Uses standard ordinary least squares to estimate the starting parameters then uses the scipy.odr interface to the ODRPACK Fortran code to do the orthogonal distance calculations. """ linreg = linregress(x, y) mod = Model(f) dat = Data(x, y) od = ODR(dat, mod, beta0=linreg[0:2]) out = od.run() return list(out.beta) + [np.nan, np.nan, np.nan]
def Normal_calc(t, nn): pc_0 = NN(t, nn) x = pc_0.x y = pc_0.y z = pc_0.z def func(beta, data): x, y = data a, b, c = beta return a * x + b * y + c data = Data([x, y], z) model = Model(func) odr = ODR(data, model, beta0=[0.0, 0.0, 0.0]) odr.set_job(fit_type=0) res = odr.run() """Extend plot with plt.Quiver (vectors) later on...?""" # Calculate xyz coordinates for corner vertices of the plane Y, X = np.mgrid[y.min():y.max():2j, x.min():x.max():2j] Z = func(res.beta, [X, Y]) f = plt.figure() pl = f.add_subplot(111, projection='3d') pl.scatter3D(x, y, z) pl.plot_surface(X, Y, Z, alpha=0.4) plt.show() # Define 3 points on plane for cross product calculation (from previous calculation) P = [X[0][0], Y[0][0], Z[0][0]] Q = [X[0][1], Y[0][1], Z[0][1]] R = [X[1][0], Y[1][0], Z[1][0]] print('PQR:', P, Q, R) # Calculate vectors on plane PQ = [Q[0] - P[0], Q[1] - P[1], Q[2] - P[2]] PR = [R[0] - P[0], R[1] - P[1], R[2] - P[2]] print(PQ, PR) # Calculate cross product of vectors + normalize to 1 = sqrt(x**2+y**2+z**2) N1 = np.cross(PQ, PR) print('N1:', N1) N1_array = np.array([[N1[0], N1[1], N1[2]]], dtype=np.float) N1_normalized = preprocessing.normalize(N1_array, norm='l2') return N1_normalized[0]
def orthoregress(x, y): ''' Orthogonal regression. Parameters ---------- x: np.array y: np.array Returns ------- (slope, intercpet): (float, float) ''' linreg = linregress(x, y) mod = Model(f) dat = Data(x, y, wd=1. / (np.var(x) + 1e-8), we=1. / (np.var(y) + 1e-8)) # small value is added to var to prevent zero division error od = ODR(dat, mod, beta0=linreg[0:2]) out = od.run() return list(out.beta)
def orthoregress(x, y): """Perform an Orthogonal Distance Regression on the given data, using the same interface as the standard scipy.stats.linregress function. Arguments: x: x data y: y data Returns: [m, c] Uses standard ordinary least squares to estimate the starting parameters then uses the scipy.odr interface to the ODRPACK Fortran code to do the orthogonal distance calculations. Source: http://blog.rtwilson.com/orthogonal-distance-regression-in-python/ """ linreg = linregress(x, y) mod = Model(f) dat = Data(x, y) od = ODR(data=dat, model=mod, beta0=linreg[0:2], maxit=10) out = od.run() return list(out.beta)
def set_odr_peak_model(self): """! @brief Set ODR model """ x = self.roi_data[:, 0] y = self.roi_data[:, 1] data = Data(x, y) # bgn = len(self.bg_model.params) self.tot_model = lambda p, X: self.bg_model.eval( p[:bgn], X) + self.peak_model.eval(p[bgn:], X) #self.tot_model = lambda p, X: self.bg_model.eval(p[:bgn], X) print("Initial Model Params") print(self._init_params) self.odr_model = ODR(data, Model(self.tot_model), beta0=self._init_params, ifixb=[1, 1, 1, 0, 1], maxit=800, taufac=0.8)
def run_odr(self, x, y, x_weights, y_weights): """"receives pairwise distance matrices and wODR weights :parameter X: pairwise distance matrix from gene1 :parameter Y: pairwise distance matrix from gene2 :parameter x_weights: wODR weights for gene1 distances :parameter y_weights: wODR weights for gene2 distances :return ODR object (https://docs.scipy.org/doc/scipy/reference/generated/scipy.odr.ODR.html) """ mod = Model(self.line) data = Data(x, y, wd=x_weights, we=y_weights ) odr = ODR(data, mod, beta0=[np.std(y)/np.std(x)] ) return(odr.run())
def monte_carlo_odr(x_data, y_data, x_err, y_err, new_x_data, new_y_data, new_x_err, new_y_err): """ 1) Randomises the data (i = 1000) based on values (x, y) and associated errors (x_err, y_err). 2) Constructs a standard logged OLS regression (used for ODR beta estimates). 3) Detects outliers using internally studentised residuals from the OLS. Those > 2 sigma (95%) are rejected. 4) Constructs an ODR and saves model coefficients (beta, covariance matrix, errors) 5) Takes the median coefficients for final ODR model construction """ # Generates results files betas = [] covariances = [] eps = [] # make function into Model instance (logarithmic) model = Model(log_func) # Sets seed for reproducible results np.random.seed(214) # 1000 iterations for i in range(1000): # Randomises the data (mean, sd) x = np.random.normal(x_data, x_err) y = np.random.normal(y_data, y_err) # Logs the data first logX = log10(x) # Adds constant for stats model intercept X = sm.add_constant(logX) # 10 iterations (should be much less, but "just in case") for i in range(10): # runs a simple OLS model (log) linear_model = sm.OLS(y, X) results = linear_model.fit() # creates instance of influence influence = results.get_influence() # calculates internally standardized residuals St_Res = influence.resid_studentized_internal # Finds max residual M = np.max(abs(St_Res)) # If any are larger than 2 standard deviations if M > 2: # Find their index res = [idx for idx, val in enumerate(St_Res) if val > 2 or val < -2] # Delete these data points x = np.delete(x, res) X = np.delete(X, res, axis = 0) y = np.delete(y, res) # If none are larger than 2 sd, continue using this dataset. Slope and intercept used for ODR fit. else: slope = results.params[1] intercept = results.params[0] continue # New data and model data = Data(x, y) # Job = 0, explicit orthogonal out = ODR(data, model, beta0=[slope, intercept], job=0).run() # Appends model coefficients to results file (for EPS, only the maximum value is recorded) betas.append(out.beta) eps.append(max(out.eps)) covariances.append(out.cov_beta) # Takes the median of the model estimates Beta = np.median(betas, axis = 0) Eps = np.median(eps, axis = 0) Covariance = np.median(covariances, axis = 0) # fit model using new beta, original x scale for consistency xn = linspace(min(x_data), max(x_data), 1000) yn = log_func(Beta, xn) # 1 and 2 sigma prediction intervals pl1 = prediction_interval([log10(xn), 1], 54, len(xn), Eps, 68., Beta, Covariance) pl2 = prediction_interval([log10(xn), 1], 54, len(xn), Eps, 95., Beta, Covariance) # create a figure to draw on and add a subplot fig, ax = subplots(1) # plot y calculated from px against de-logged x (and 1 and 2 sigma prediction intervals) ax.plot(xn, yn, '#EC472F', label='Logarithmic ODR') ax.plot(xn, yn + pl1, '#0076D4', dashes=[9, 4.5], label='1σ Prediction limit (~68%)', linewidth=0.8) ax.plot(xn, yn - pl1, '#0076D4', dashes=[9, 4.5], linewidth=0.8) ax.plot(xn, yn + pl2, '#BFBFBF', dashes=[9, 4.5], label='2σ Prediction limit (~95%)', linewidth=0.5) ax.plot(xn, yn - pl2, '#BFBFBF', dashes=[9, 4.5], linewidth=0.5) # plot points and error bars ax.plot(x_data, y_data, 'k.', markerfacecolor= '#4495F3', markeredgewidth=.5, markeredgecolor = 'k', label='Calibration data (n = 54)', markersize=5) ax.errorbar(x_data, y_data, ecolor='k', xerr=x_err, yerr=y_err, fmt=" ", linewidth=0.5, capsize=0) # adds new data and errors bars ax.plot(new_x_data, new_y_data,'k.', markerfacecolor= '#FF8130', markeredgewidth=.5, markeredgecolor = 'k', label = 'New data (n = 15)', markersize = 5) ax.errorbar(new_x_data, new_y_data, ecolor='k', xerr=new_x_err, yerr=new_y_err, fmt=" ", linewidth=0.5, capsize=0) # labels, extents etc. ax.set_ylim(0, 60) ax.set_xlabel('Mean R-value') ax.set_ylabel('Age (ka)') ax.tick_params(direction = 'in') ax.tick_params(bottom=True, top=True, left=True, right=True) ax.tick_params(labelbottom=True, labeltop=False, labelleft=True, labelright=False) # configure legend ax.legend(frameon=False, fontsize=7) # Sets axis ratio to 1 ratio = 1 ax.set_aspect(1.0/ax.get_data_ratio()*ratio) # export the figure fig.set_size_inches(3.2, 3.2) savefig('Pyrenees_Monte_Carlo_ODR.png', dpi = 900, bbox_inches='tight') #savefig('Pyrenees_Monte_Carlo_ODR.svg') # Return final model coefficients return Beta, Eps, Covariance
# 0 gong/clip1/1.jpg 181.0 231.0 # 1 gong/clip1/2.jpg 180.0 231.0 segment_data = segment_data[abs(segment_data['x'].mean() - segment_data['x']) < 2 * segment_data['x'].std()] segment_data = segment_data[abs(segment_data['y'].mean() - segment_data['y']) < 2 * segment_data['y'].std()] x = list(segment_data['x']) y = list(segment_data['y']) if len(x) == 0: continue mydata = Data(x, y) f = linear mod = Model(linear) myodr = ODR(mydata, mod, beta0=[0, 2]) # print(myodr) res = myodr.run() coeff = res.beta obj_and_metric = measure_objects(f, coeff) obj_locations = list(zip(list(objects['x_pos']), list(objects['y_pos']))) for obj in obj_locations: error = abs(obj[1] - f(coeff, obj[0])) obj_and_metric.append((obj, error)) if x[-1] >= x[0]:
def test_multi(self): multi_mod = Model( self.multi_fcn, meta=dict(name='Sample Multi-Response Model', ref='ODRPACK UG, pg. 56'), ) multi_x = np.array([ 30.0, 50.0, 70.0, 100.0, 150.0, 200.0, 300.0, 500.0, 700.0, 1000.0, 1500.0, 2000.0, 3000.0, 5000.0, 7000.0, 10000.0, 15000.0, 20000.0, 30000.0, 50000.0, 70000.0, 100000.0, 150000.0 ]) multi_y = np.array([ [ 4.22, 4.167, 4.132, 4.038, 4.019, 3.956, 3.884, 3.784, 3.713, 3.633, 3.54, 3.433, 3.358, 3.258, 3.193, 3.128, 3.059, 2.984, 2.934, 2.876, 2.838, 2.798, 2.759 ], [ 0.136, 0.167, 0.188, 0.212, 0.236, 0.257, 0.276, 0.297, 0.309, 0.311, 0.314, 0.311, 0.305, 0.289, 0.277, 0.255, 0.24, 0.218, 0.202, 0.182, 0.168, 0.153, 0.139 ], ]) n = len(multi_x) multi_we = np.zeros((2, 2, n), dtype=float) multi_ifixx = np.ones(n, dtype=int) multi_delta = np.zeros(n, dtype=float) multi_we[0, 0, :] = 559.6 multi_we[1, 0, :] = multi_we[0, 1, :] = -1634.0 multi_we[1, 1, :] = 8397.0 for i in range(n): if multi_x[i] < 100.0: multi_ifixx[i] = 0 elif multi_x[i] <= 150.0: pass # defaults are fine elif multi_x[i] <= 1000.0: multi_delta[i] = 25.0 elif multi_x[i] <= 10000.0: multi_delta[i] = 560.0 elif multi_x[i] <= 100000.0: multi_delta[i] = 9500.0 else: multi_delta[i] = 144000.0 if multi_x[i] == 100.0 or multi_x[i] == 150.0: multi_we[:, :, i] = 0.0 multi_dat = Data(multi_x, multi_y, wd=1e-4 / np.power(multi_x, 2), we=multi_we) multi_odr = ODR(multi_dat, multi_mod, beta0=[4., 2., 7., .4, .5], delta0=multi_delta, ifixx=multi_ifixx) multi_odr.set_job(deriv=1, del_init=1) out = multi_odr.run() assert_array_almost_equal( out.beta, np.array([ 4.3799880305938963, 2.4333057577497703, 8.0028845899503978, 0.5101147161764654, 0.5173902330489161 ]), ) assert_array_almost_equal( out.sd_beta, np.array([ 0.0130625231081944, 0.0130499785273277, 0.1167085962217757, 0.0132642749596149, 0.0288529201353984 ]), ) assert_array_almost_equal( out.cov_beta, np.array([[ 0.0064918418231375, 0.0036159705923791, 0.0438637051470406, -0.0058700836512467, 0.011281212888768 ], [ 0.0036159705923791, 0.0064793789429006, 0.0517610978353126, -0.0051181304940204, 0.0130726943624117 ], [ 0.0438637051470406, 0.0517610978353126, 0.5182263323095322, -0.0563083340093696, 0.1269490939468611 ], [ -0.0058700836512467, -0.0051181304940204, -0.0563083340093696, 0.0066939246261263, -0.0140184391377962 ], [ 0.011281212888768, 0.0130726943624117, 0.1269490939468611, -0.0140184391377962, 0.0316733013820852 ]]), )
out.pprint() af = out.beta[0] bf = out.beta[1] cf = out.beta[2] yrngf = bf + af * xrng yhinge = bf + af * hxfix idx = xrng > hxfix yrngf[idx] = cf * (xrng[idx]-hxfix) + yhinge # pdb.set_trace() ############## polynomial from scipy.odr import Model, Data, ODR from scipy.stats import linregress,norm mod = Model(f) dat = Data(ml, mw) co = np.polynomial.polynomial.polyfit(ml, mw,2) od = ODR(dat, mod, beta0=[co[2],co[1],co[0]]) out = od.run() print '\npolynomial\n' out.pprint() yrng_poly = out.beta[0]*xrng**2+out.beta[1]*xrng+out.beta[2] out_poly = out ############## # get stats from simulated data ############## a = 0.042 b = 0.481
def FFD_powerlaw(logx, logy, logxerr, logyerr, findXmin=False, slope=False): ''' Given a FFD, provide the best fit parameters for a power law function. This is done using orthogonal distance regression, so that both x and y errors can be accounted for. As a crude way to account for incompleteness, determine a minimum energy, below which the fit gets bad. This is done by iteratively fitting with different values for xmin, and finding the value of xmin that minimizes the KS distance between the model and the data. See Clauset 2007 (arXiv:0706.1062), sec 3.3 for an explanation of the algorithm. Parameters ---------- logx : The log-scaled x values logy : The log-scaled y values logxerr : The log-scaled x error bars logyerr : The log-scaled y error bars findXmin : Iteratively determine the best x value below which to trim the data Returns ------- b0, b1, b0_err, b1_err, cutoff Best fit parameters for the power law slope and normalization, along with the optimal xmin ''' if slope == False: def f(B, x): if B[0] > 0: return np.inf return B[0]*x + B[1] else: def f(B, x): if B[0] > 0: return np.inf return -1*x + B[1] if findXmin: cutoff_vals = np.linspace(np.min(logx), np.max(logx)) else: cutoff_vals = -np.inf ks_vals = np.zeros_like(cutoff_vals) param_arr = np.empty((len(ks_vals), 4)) for idx, e_cut in enumerate(cutoff_vals): # Initial guess for powerlaw fit b00, b10 = -1.0, 10 mask = logx > e_cut # Make the KS distance large if we threw out all of the data if len(logx[mask]) < 1: ks_vals[idx] = np.inf continue linear = Model(f) mydata = Data(logx[mask], logy[mask], wd=1/logxerr[mask]**2, we=1/logyerr[mask]**2) myodr = ODR(mydata, linear, beta0=[b00, b10]) myoutput = myodr.run() b0, b1 = myoutput.beta[0], myoutput.beta[1] b0_err, b1_err = myoutput.sd_beta[0], myoutput.sd_beta[1] # Record the KS distance and best fit parameters for this particular xmin value ks_vals[idx] = stats.ks_2samp(logy[mask], b0*logx[mask] + b1).statistic param_arr[idx][0] = b0 param_arr[idx][1] = b1 param_arr[idx][2] = b0_err param_arr[idx][3] = b1_err # Use the parameters that correspond to the minimum KS distance best_ks_idx = np.argmin(ks_vals) b0 = param_arr[best_ks_idx][0] b1 = param_arr[best_ks_idx][1] b0_err = param_arr[best_ks_idx][2] b1_err = param_arr[best_ks_idx][3] cutoff = cutoff_vals[best_ks_idx] return b0, b1, b0_err, b1_err, cutoff
def main(): try: data_filename = 'number.txt' data = np.loadtxt(data_filename, skiprows=0) cur = np.reshape(data, (41, 3)) data_filename = 'curvature.txt' data = np.loadtxt(data_filename, skiprows=0) hough = np.reshape(data, (41, 3)) #emperical error xerr = np.sqrt(hough[:, 0]) / 3 yerr_h = hough[:, 2] yerr_c = cur[:, 2] data_h = Data(hough[:, 0].T, hough[:, 1].T, we=1 / (np.power(xerr.T, 2) + np.spacing(1)), wd=1 / (np.power(yerr_h.T, 2) + np.spacing(1))) data_c = Data(cur[:, 0].T, cur[:, 1].T, we=1 / (np.power(xerr.T, 2) + np.spacing(1)), wd=1 / (np.power(yerr_c.T, 2) + np.spacing(1))) model = Model(ord_function) odr_h = ODR(data_h, model, beta0=[0, 0]) odr_c = ODR(data_c, model, beta0=[0, 0]) odr_h.set_job(fit_type=2) odr_c.set_job(fit_type=2) output_h = odr_h.run() output_c = odr_c.run() popt_h = output_h.beta perr_h = output_h.sd_beta popt_c = output_c.beta perr_c = output_c.sd_beta popt_h, pcov_h = curve_fit(linear_fit_function, hough[:, 0], hough[:, 1], [1, 0], hough[:, 2]) perr_h = np.sqrt(np.diag(pcov_h)) # popt_c, pcov_c = curve_fit(linear_fit_function, cur[:,0], cur[:,1], [1, 0], cur[:, 2]) # perr_c = np.sqrt(np.diag(pcov_c)) A = popt_h[0] / np.sqrt(popt_h[0] * popt_h[0] + 1) B = -1 / np.sqrt(popt_h[0] * popt_h[0] + 1) C = popt_h[1] / np.sqrt(popt_h[0] * popt_h[0] + 1) fitting_error_h = np.mean(np.abs(A * hough[:, 0] + B * hough[:, 1] + C)) A = popt_c[0] / np.sqrt(popt_c[0] * popt_c[0] + 1) B = -1 / np.sqrt(popt_c[0] * popt_c[0] + 1) C = popt_c[1] / np.sqrt(popt_c[0] * popt_c[0] + 1) fitting_error_c = np.mean(np.abs(A * cur[:, 0] + B * cur[:, 1] + C)) fig, ax = plt.subplots(ncols=1) ax.errorbar(hough[:, 0], hough[:, 1], xerr=xerr, yerr=yerr_h, fmt='o', color='blue') ax.errorbar(cur[:, 0], cur[:, 1], xerr=xerr, yerr=yerr_c, fmt='o', color='red') ax.plot(hough[:, 0], popt_h[0] * hough[:, 0] + popt_h[1], '-b', linewidth=2) ax.plot(cur[:, 0], popt_c[0] * cur[:, 0] + popt_c[1], '-r', linewidth=2) bbox_props = dict(boxstyle="square,pad=0.3", fc="white", ec="black", lw=2) annotation_text = "function: y = kx + b \n" \ "Hough Transfrom (blue)\n"\ "k = %.2f b = %.2f Error = %.2f" % (popt_h[0], popt_h[1], fitting_error_h) + '\n'\ "Curvature Method (red)\n"\ "k = %.2f b = %.2f Error = %.2f" % (popt_c[0], popt_c[1], fitting_error_c) ax.text(10, max(np.amax(hough[:, 1]), np.amax(cur[:, 1])) + 10, annotation_text, ha="left", va="top", rotation=0, size=15, bbox=bbox_props) ax.set_title('Algorithom Performance') ax.set_xlabel('Bubble Number Counted Manually') ax.set_ylabel('Bubbble Number Counted by Algorithom') plt.grid() plt.xlim((np.amin(hough[:, 0]) - 5, np.amax(hough[:, 0]) + 5)) plt.ylim((0, max(np.amax(hough[:, 1]), np.amax(cur[:, 1])) + 20)) plt.show() except KeyboardInterrupt: print "Shutdown requested... exiting" except Exception: traceback.print_exc(file=sys.stdout) sys.exit(0)
def match_copies(self, matrix1, taxa1, matrix2, taxa2, force_single_copy=False): """Select best pairing copies between assessed gene families :parameter matrix1: DataFrame with distances from gene1 :parameter matrix2: DataFrame with distances from gene2 :parameter taxa1: taxon table from gene1 (pd.DataFrame) :parameter taxa2: taxon table from gene2 (pd.DataFrame) Return paired copies of input DataFrames""" # # create a single DataFrame matching taxa from both gene families, and # remove "|<num>" identification for added copies all_taxon_pairs = pd.DataFrame() all_taxon_pairs['gene1'] = taxa1.gene all_taxon_pairs['gene2'] = taxa2.gene all_taxon_pairs['genome'] = taxa1.genome.tolist() all_taxon_pairs['pairs'] = all_taxon_pairs[['gene1', 'gene2']].apply(lambda x: frozenset(x), axis=1) # # summarize distances matrices by using only its upper triangle (triu) triu_indices = np.triu_indices_from(matrix1, k=1) condensed1 = matrix1.values[triu_indices] condensed2 = matrix2.values[triu_indices] # # run ODR with no weights... model = Model(self.line) data = Data(condensed1, condensed2) odr = ODR(data, model, beta0=[np.std(condensed2) / # Geometric Mean slope estimate np.std(condensed1)] # ) regression = odr.run() ############################################### new code... # # create DataFrame with all residuals from the preliminary ODR with all # possible combinations of gene within the same genome residual_df = pd.DataFrame(columns=['matrix1_gene', 'matrix2_gene', 'genome', 'to_drop', 'combined_residual'], data =zip(taxa1.iloc[triu_indices[0], 0].values, taxa2.iloc[triu_indices[0], 0].values, taxa1.iloc[triu_indices[0], 1].values, taxa1.iloc[triu_indices[0], 1].values == taxa1.iloc[triu_indices[1], 1].values, abs(regression.delta)+abs(regression.eps)) ) residual_df = residual_df.append( pd.DataFrame(columns=['matrix1_gene', 'matrix2_gene', 'genome', 'to_drop', 'combined_residual'], data =zip(taxa1.iloc[triu_indices[1], 0].values, taxa2.iloc[triu_indices[1], 0].values, taxa1.iloc[triu_indices[1], 1].values, taxa1.iloc[triu_indices[0], 1].values == taxa1.iloc[triu_indices[1], 1].values, abs(regression.delta)+abs(regression.eps)) ), sort =True, ignore_index=True ) residual_df.drop(index =residual_df.index[residual_df.to_drop], inplace=True) sum_paired_residuals = residual_df.groupby( ['matrix1_gene', 'matrix2_gene'] ).agg( residual_sum=pd.NamedAgg(column ="combined_residual", aggfunc=sum), genome =pd.NamedAgg(column='genome', aggfunc=lambda x: x.iloc[0]) ).reset_index() sum_paired_residuals.sort_values('residual_sum', inplace=True) sum_paired_residuals.reset_index(inplace=True, drop =True) best_pairs = pd.DataFrame(columns=['gene1', 'gene2', 'genome']) for genome, indices in sum_paired_residuals.groupby('genome').groups.items(): pairing_possibilities = sum_paired_residuals.loc[indices].copy() while pairing_possibilities.shape[0]: first_row = pairing_possibilities.iloc[0] best_pairs = best_pairs.append( pd.Series(index=['gene1', 'gene2', 'genome'], data =[first_row.matrix1_gene, first_row.matrix2_gene, genome]), ignore_index=True ) if force_single_copy: break pairing_possibilities.drop( index=pairing_possibilities.query( '(matrix1_gene == @first_row.matrix1_gene) | ' '(matrix2_gene == @first_row.matrix2_gene)' ).index, inplace=True) best_pairs['pairs'] = best_pairs[['gene1', 'gene2']].apply(lambda x: frozenset(x), axis=1) all_taxon_pairs = all_taxon_pairs.query('pairs.isin(@best_pairs.pairs)').copy() taxa1 = taxa1.reindex(index=all_taxon_pairs.index) taxa2 = taxa2.reindex(index=all_taxon_pairs.index) taxa1.sort_values('genome', kind='mergesort', inplace=True) taxa2.sort_values('genome', kind='mergesort', inplace=True) taxa1.reset_index(drop=True, inplace=True) taxa2.reset_index(drop=True, inplace=True) ############################################### ...up to here if not all(taxa1.genome == taxa2.genome): raise Exception('**Wow, taxa order is wrong! ABORT!!!') matrix1 = matrix1.reindex(index =taxa1.taxon, columns=taxa1.taxon, copy =True) matrix2 = matrix2.reindex(index =taxa2.taxon, columns=taxa2.taxon, copy =True) return(matrix1, taxa1, matrix2, taxa2)
# Compute line of best fit Y = future_life_expectancy X = age from scipy.odr import Model, Data, ODR from scipy.stats import linregress def f(p, x): return (p[0] * x) + p[1] linreg = linregress(X, Y) mod = Model(f) dat = Data(X, Y) od = ODR(dat, mod, beta0=[1., 2.]) out = od.run() TLSbeta = out.beta[0] # In[15]: # Plot chart plt.plot(X, Y, '.') plt.plot(X, out.beta[1] + np.multiply(X, out.beta[0]), '.') plt.xlabel('Current Age') plt.ylabel('Future Life')