def handle_image_generation(classifier, feature_set, imagepath, title=''): ''' Train a classifier and return it's scores on the train and test split. Save a contour image of it's predictions if it is only trained on two features. :param classifier: A string or object describing a classifier. :param feature_set: A list of column names describing the feature set to train the model on. :param imagepath: The path to store the contour plot. :param title: The title of the plot with scores. :return: The train and test scores for the classifier. ''' train_table, test_table = get_split_table() train_labels, test_labels = get_labels(train_table, test_table) classifier = fit(classifier, feature_set, train_table) train_score = classifier.score(train_table[feature_set], train_labels) test_score = classifier.score(test_table[feature_set], test_labels) if (len(feature_set) == 2): fig = plt.figure() ax = visualize_confidence(classifier, train_table, *feature_set) plot_with_columns(train_table, *feature_set, ax=ax, marker='+', label='train') plot_with_columns(test_table, *feature_set, ax=ax, label='test') ax.legend() try: ax.set_title( title.format(train_score=train_score, test_score=test_score)) except ValueError: ax.set_title(title) fig.savefig(imagepath) return train_score, test_score
def test_oseen(coreR, gamma, dist,xdrift,ydrift,u_conv,v_conv): print('coreR:',coreR,'Gamma',gamma,'xdrift',xdrift, 'ydrift',ydrift,'u_conv',u_conv,'v_conv',v_conv) model = [[],[],[],[],[],[]] model[0] = coreR model[1] = gamma coreRori = model[0] gammaori = model[1] x_index = np.linspace(-1,1,dist) y_index = np.linspace(-1,1,dist) x_index, y_index = np.meshgrid(x_index, y_index) x_real = 0.0 y_real = 0.0 model[4] = u_conv model[5] = v_conv u_data, v_data = fitting.velocity_model(coreR, gamma, x_real, y_real, u_conv, v_conv, x_index+xdrift, y_index+ydrift) u_data = u_data + u_conv v_data = v_data + v_conv # NOISE u_data = np.random.normal(u_data,0.3) v_data = np.random.normal(v_data,0.3) model = fitting.fit(coreR, gamma, x_index, y_index, x_real, y_real, u_data, v_data, u_conv, v_conv,0) print('coreR:',model[0],'error(%):',(1-(model[0])/coreRori)*100) print('gamma:',model[1],'error(%):',(1-(model[1])/gammaori)*100) print('x_real:',model[2]) print('y_real:',model[3]) u_model, v_model = fitting.velocity_model(model[0], model[1], model[2], model[3], model[4],model[5], x_index, y_index) corr = fitting.correlation_coef(u_data,v_data,u_model,v_model) print('correlation:',corr) print('---') plot.plot_fit_test(x_index, y_index, u_data, v_data, u_model, v_model, model[2], model[3], model[0], model[1], model[4],model[5], corr)
def segment(k,m, inciset, trainingset, radiographs, colors, leftout, mode = 0): # get image training set trainimgs = [radiographs[i] for i in trainingset] # read landmarks from file lmtrain,lmtest = landmarks.get(trainingset) # align all landmarks, plot depending on mode aligns, means = landmarks.align(lmtrain) if mode == 0: ui.plotalign(colors, means, aligns) # do pca, plot depending on mode eva, evc = pca.pca(aligns, means) if mode == 0: ui.plotpca(means,eva,evc) # get initial estimate, manual or auto depending on mode # draw init also depending on mode est, greymodels = model.estimate(k, m, inciset, means, trainimgs, lmtrain, radiographs[leftout], colors, mode) if mode == 2: ui.plotinit(est, radiographs[leftout], colors, leftout) # fit init estimate and get plot mask if mode == 0 or mode == 1: X = fit.fit(est, inciset, eva, evc, means, greymodels, radiographs[leftout], k, m, 3.0) mask = ui.plotfit(radiographs[leftout], list(est), X, len(inciset), colors) return mask
def main(): try: dataset = request.form["dataset"] if(dataset == "static/diabetes.csv"): target = "diabetes" feature1 = request.form["feature1"] feature2 = request.form["feature2"] classifier = request.form["classifier"] except KeyError: error = "Warning! Missing selections. Please select one dataset, two features from the dataset, and one classifier!" return render_template('select.html', error=error) df = read(dataset) X_train, X_test, y_train, y_test = split_proportional(df, target) y_train = pd.get_dummies(y_train) y_test = pd.get_dummies(y_test) clf = fit(X_train, y_train, classifier, feature1, feature2) data_train = select_features(X_train, (feature1, feature2)) data_test = select_features(X_test, (feature1, feature2)) accuracy_train = np.mean(cross_val_score(clf, data_train, y_train, cv=5)) accuracy_test = np.mean(cross_val_score(clf, data_test, y_test, cv=5)) plot_data = build_plot(data_test, y_test, clf) return render_template('plot.html', accuracy_train=accuracy_train, accuracy_test=accuracy_test, plot_url=plot_data)
def main(): """Simple main program to test the methods of the file. """ feature_x, feature_y, classifier = "", "", "" data_training, data_validation = data.extract_data("diabetes.csv") while classifier == "" or classifier != "svc" and classifier != "knn": classifier = input( "ENTER CLASSIFIER\nSVC or KNN?\n(Enter 'q' to exit)\n>").lower() if classifier == "q": exit(0) if classifier != "svc" and classifier != "knn": print("\n\nInput is not a valid classifier, try again..") feature_string = "ENTER {} FEATURE\npregnant\nglucose\npressure\ntriceps\ninsulin\nmass\npedigree\nage\n(Enter 'q' to exit)\n>" while feature_x == "" or feature_x not in list(data_training): feature_x = input("\n" + feature_string.format("FIRST")).lower() if feature_x == "q": exit(0) if feature_x not in list(data_training): print("\n\nInput is not a valid feature, try again..") while feature_y == "" or feature_y not in list(data_training): feature_y = input("\n" + feature_string.replace( feature_x + "\n", "").format("SECOND")).lower() if feature_y == "q": exit(0) if feature_y not in list(data_training): print("\n\nInput is not a valid feature, try again..") plot_diabetes( data_training, data_validation, fitting.fit(data_training, data_validation, classifier, feature_x, feature_y), feature_x, feature_y).show()
def test_oseen(coreR, gamma, dist,xdrift,ydrift): print('|*|coreR:',coreR,'Gamma',gamma,'xdrift',xdrift,'ydrift',ydrift,'|*|') model = [[],[],[],[],[],[]] model[0] = coreR model[1] = gamma coreRori = model[0] gammaori = model[1] X = np.linspace(-1,1,dist) Y = np.linspace(-1,1,dist) X, Y = np.meshgrid(X,Y) fxCenter = 0.0 fyCenter = 0.0 u_conv = 0.0 #flipped with v, fix later v_conv = 0.0 Uw, Vw = fitting.velocity_model(coreR, gamma, fxCenter, fyCenter, u_conv, v_conv, X+xdrift, Y+ydrift) Uw = Uw + u_conv Vw = Vw + v_conv # NOISE Uw = np.random.normal(Uw,0.3) Vw = np.random.normal(Vw,0.3) model = fitting.fit(coreR, gamma, X, Y, fxCenter, fyCenter, Uw, Vw, u_conv, v_conv) print('coreR:',model[0],'error(%):',(1-(model[0])/coreRori)*100) print('gamma:',model[1],'error(%):',(1-(model[1])/gammaori)*100) print('fxCenter:',model[2]) print('fyCenter:',model[3]) #print('u_conv:',model[4]) #print('v_conv:',model[5]) #print('xCenter:', fxCenter) #print('yCenter:',fyCenter) uMod, vMod = fitting.velocity_model(model[0], model[1], model[2], model[3],u_conv,v_conv, X, Y)#, model[4], model[5]) corr = fitting.correlation_coef(Uw,Vw,uMod,vMod) print('correlation:',corr) print('---') plot.plot_corr(X, Y, Uw, Vw, uMod, vMod, model[0], corr)
def scan_and_fit(l, step_size = step_size): range = r_[l-range_width/2, l+range_width/2] y = scan_and_read(range, step_time, step_size) x = linspace(range[0],range[1],len(y)) ## Expected values for parameters l0.set(x[argmin(y)]) y0.set(amax(y)) a.set(amin(y)-amax(y)) fit(f,parameters, x, y, 1,) figure(1) hold(False) title("$\lambda_0 = %.4f$"%l0()) xt = linspace(range[0],range[1],100) plot(x,y,'o') hold(True) plot(xt, f(xt),'-') return l0(), gamma()
def plot(error=False): plt = visualize.plot_diabetes( data_training, data_validation, fitting.fit(data_training, data_validation, classifiers[0], features[0], features[1]), features[0], features[1]) acc_train, acc_val = fitting.getAccuracy(data_training, data_validation, classifiers[0], features[0], features[1]) return render_template('show_plot.html', error=error, classifiers=classifiers, features=features, p1=acc_train, p2=acc_val)
def picture_prosess(features, targeted_column, classifier): """ Calls upon data.diabetes_dataset() and fitting.fit() to predict and calculate accuracy to be displayed on the web-page. Visualize.visualizer() will also be called if only two checkboxes are checked. The scatterplots are saved in buffers to avoid problems with matplotlib, flask, and python. There has been some problems occurring testing (Mac) with all the latest updates to the packages. Without the buffers the scatterplots would not be updated in real time when checking new checkboxes without having to restart the page. Since the scatterplots should only be displayed when 2 checkboxes are marked, the whole buffer and scatter-plot action is inside an if-statement. args: features (list:String): list containing names of features(columns) targeted_column (String): name of column classifier (String): name of classifier returns: t_ac (float): v_ac (float): img1 (string): scatter plot object 1 img2 (string): scatter plot object 2 """ data_frame, training_set, validation_set = data.diabetes_dataset() trained_classifier = fitting.fit(training_set, classifier, features, targeted_column) img1 = None img2 = None prediction1 = trained_classifier.predict(training_set[features]) t_ac = metrics.accuracy_score(training_set[targeted_column], prediction1) prediction2 = trained_classifier.predict(validation_set[features]) v_ac = metrics.accuracy_score(validation_set[targeted_column], prediction2) if(len(features) == 2): buf = BytesIO() #add to buffer (visualize.visualizer(prediction1, training_set, features)).savefig(buf, format="png") img1 = base64.b64encode(buf.getbuffer()).decode("ascii") buf = BytesIO() #add to buffer (visualize.visualizer(prediction2, validation_set, features)).savefig(buf, format="png") img2 = base64.b64encode(buf.getbuffer()).decode("ascii") return t_ac, v_ac, img1, img2
def render_metrics(model_names, features, svm_settings, knn_settings, lda_settings): features.append( 'diabetes') #Adds data which contains 0/1 or True/False values models, training_data, target_data, control_data = fitting.fit( 'data/diabetes.csv', model_names, features, svm_settings, knn_settings, lda_settings) y_true = control_data['diabetes'] control_data = control_data.drop(columns=['diabetes']) if len(model_names) == 1: return metrics_one(models, control_data, y_true) elif len(model_names) > 1: return metrics_multiple(models, control_data, y_true) else: return ('Something went wrong', 'error')
def test_oseen(core_radius, gamma, dist, xdrift, ydrift, u_advection, v_advection): print('core_radius:', core_radius, 'Gamma', gamma, 'xdrift', xdrift, 'ydrift', ydrift, 'u_advection', u_advection, 'v_advection', v_advection) model = [[], [], [], [], [], []] model[0] = core_radius model[1] = gamma core_radius_ori = model[0] gamma_ori = model[1] x_index = np.linspace(-1, 1, dist) y_index = np.linspace(-1, 1, dist) x_index, y_index = np.meshgrid(x_index, y_index) x_real = 0.0 y_real = 0.0 model[4] = u_advection model[5] = v_advection u_data, v_data = fitting.velocity_model(core_radius, gamma, x_real, y_real, u_advection, v_advection, x_index + xdrift, y_index + ydrift) u_data = u_data + u_advection v_data = v_data + v_advection # NOISE u_data = np.random.normal(u_data, 0.3) v_data = np.random.normal(v_data, 0.3) model = fitting.fit(core_radius, gamma, x_index, y_index, x_real, y_real, u_data, v_data, u_advection, v_advection, 0) print('core_radius:', model[0], 'error(%):', (1 - (model[0]) / core_radius_ori) * 100) print('gamma:', model[1], 'error(%):', (1 - (model[1]) / gamma_ori) * 100) print('x_real:', model[2]) print('y_real:', model[3]) u_model, v_model = fitting.velocity_model(model[0], model[1], model[2], model[3], model[4], model[5], x_index, y_index) corr = fitting.correlation_coef(u_data, v_data, u_model, v_model) print('correlation:', corr) print('---') fitting.plot_fit(x_index, y_index, u_data, v_data, u_model, v_model, model[2], model[3], model[0], model[1], model[4], model[5], corr, 0, 0, '.', 0, 'png')
def train(expr_in, vae_lr=1e-4, epochs=500, info_step=10, batch_size=50, latent_dim=2, f="nb", log=True, scale=True): # Preprocessing expr_in[expr_in < 0] = 0.0 if log: expr_in = np.log2(expr_in + 1) if scale: for i in range(expr_in.shape[0]): expr_in[i, :] = expr_in[i, :] / np.max(expr_in[i, :]) # Number of data samples n_sam = expr_in.shape[0] # Dimension of input data in_dim = expr_in.shape[1] # Build VAE model and its optimizer lmd = fitting.fit(expr_in, f) model_vae = model.VAE(in_dim=in_dim, latent_dim=latent_dim, f=f, lmd=lmd) optimizer_vae = tf.keras.optimizers.Adam(vae_lr) # Training for epoch in range(1, epochs + 1): # Minibatch for VAE training vae_train_set = tf.data.Dataset.from_tensor_slices(expr_in).shuffle( n_sam).batch(batch_size) # Batch training for vae_batch in vae_train_set: # Update VAE model rec_loss, kl_loss, rank_loss = update_model( model_vae, vae_batch, optimizer_vae, losses.vae_loss) # Print training info if epoch % info_step == 0: print("Epoch", epoch, " rec_loss: ", rec_loss.numpy(), " kl_loss: ", kl_loss.numpy(), " rank_loss: ", rank_loss.numpy()) return model_vae
def visualize(feature_1, feature_2, classifier): """ 6.3: Creates a scatter plot of diabetes data, displaying areas of predicted negative/positive result. Args: feature_1 (string): The first feature to plot by feature_2 (string): The second feature to plot by Returns: plt: the scatter plot float: the accuracy score on the training set float: the accuracy score on the validation set """ trained_classifier, training_score, validation_score = fitting.fit( classifier, include_features=[feature_1, feature_2]) plt = data.create_scatter_plot(feature_1, feature_2) X = data.data_frame[[feature_1, feature_2]].values y = data.data_frame['diabetes'].values step = 0.5 # Mesh x_min, x_max = X[:, 0].min() - 1, X[:, 0].max() + 1 y_min, y_max = X[:, 1].min() - 1, X[:, 1].max() + 1 xx, yy = np.meshgrid(np.arange(x_min, x_max, step), np.arange(y_min, y_max, step)) x_r = xx.ravel() y_r = yy.ravel() r = np.c_[x_r, y_r] Z = trained_classifier.predict(r) Z = Z.reshape(xx.shape) plt.pcolormesh(xx, yy, Z, cmap=plt.cm.coolwarm) plt.xlim(xx.min(), xx.max()) plt.ylim(yy.min(), yy.max()) plt.scatter(X, X) #plt.show(block=True) return plt, training_score, validation_score
def fitPredPlot(t: pd.DataFrame, v: pd.DataFrame, feature1: str, feature2: str, include_error: bool = False, cf="knn"): """Overall function that takes inn training dataframe, validation dataframe. Fit's the classifier, does predictions based on validation data, and plots the true values features, classifier type and classifier arguments. The function fit the chosen classifier and returns the fitted classifier. Args: t: dataframe training v: dataframe validation feature1: feature used for x feature2: feature used for y include_error: flag used for visualize prediction errors in plot cf: The classifier to use for prediction and fitting. Returns: plt (matplotLib plot): The prediction plot acc (integer): Accuracy score for the prediction """ # Organizing data based on choosen features v_data = v[[feature1, feature2]] v_target = v["diabetes"].replace(["neg", "pos"], [0, 1]) scatterplot(v, feature1, feature2) # Training and predicting clf = fit(t, feature1, feature2, classifier=cf, max_iter=5000) pred_target = clf.predict(v_data) # plot plt = visualize_clf(feature1, feature2, v_data, v_target, pred_target, include_error, clf) acc = metrics.accuracy_score(v_target, pred_target) print(f"Accuracy score for {cf}:{acc}") return plt, acc
def val_changed(): classifier = request.form["classifiers"] x_feature = request.form["x_features"] y_feature = request.form["y_features"] try: plt = visualize.plot_diabetes( data_training, data_validation, fitting.fit(data_training, data_validation, classifier, x_feature, y_feature), x_feature, y_feature) acc_train, acc_val = fitting.getAccuracy(data_training, data_validation, classifiers[0], features[0], features[1]) except AssertionError as e: print(e) return plot(error=True) return render_template('show_plot.html', error=False, classifiers=classifiers, features=features, p1=acc_train, p2=acc_val)
y_min, y_max = X1.min() - 1, X1.max() + 1 stepsize = 0.8 xx, yy = np.meshgrid( np.arange(x_min, x_max, stepsize), np.arange(y_min, y_max, stepsize), ) Z = classifer.predict(np.c_[xx.ravel(), yy.ravel()]) train_pred = classifer.predict(test[features]) acc = accuracy_score(test["diabetes"], train_pred) fig = plt.figure() ax = fig.add_subplot(1, 1, 1) title = "Decision surface of {}, Accuracy {:.2%}".format( classifer.__class__.__name__, acc) Z = Z.reshape(xx.shape) ax.contourf(xx, yy, Z, cmap=plt.cm.coolwarm, alpha=0.8) ax.scatter(X0, X1, c=y, cmap=plt.cm.coolwarm, s=20, edgecolors="k") ax.set_ylabel(features[1]) ax.set_xlabel(features[0]) ax.set_title(title) return fig else: print("Cant scatter plot unless there is two features") return None if __name__ == "__main__": feaut = ["insulin", "glucose"] clf = ft.fit(feaut, eval("SVC(kernel='linear')"), "diabetes.csv") visualize(feaut, clf, "diabetes.csv") plt.show()
if e == 'pos': test.append(1) else: test.append(0) test = np.array(test) #plt.pcolormesh(np.expand_dims(x,0), np.expand_dims(y,1), test*np.eye(len(prediction))) """ for idx, row in df.groupby('diabetes'): plt.scatter(row[feature_set[0]], row[feature_set[1]], c=[colors[r] for r in row['diabetes']], label=idx) plt.xlabel(feature_set[0]) plt.ylabel(feature_set[1]) plt.legend(title="Diabetes") return plt if __name__ == '__main__': features = ['glucose','pressure'] targeted_column = 'diabetes' data_frame, training_set, validation_set = data.diabetes_dataset() trained_classifier = fitting.fit(training_set, 'KNN', features, targeted_column) prediction = trained_classifier.predict(validation_set[features]) sctplt = visualizer(prediction, validation_set, ['glucose','pressure'],trained_classifier) sctplt.show(block=True)
import fitting import numpy as np t = np.linspace(0, 100, 50) data = 3*t + 4 m = fitting.Parameter(2) b = fitting.Parameter(5) def fit_func(x): return (m()*x + b()) fitting.fit(fit_func, [m, b], data, t) print "m: %f" % m() print "b: %f" % b()
def fit_data(data_dir, file_name, mode, fit_method): name = '{}/{}/{}'.format(data_dir, file_name, file_name) if fit_method == 'gaussian': min_SNR = 40 else: min_SNR = 80 many_plots = 'True' if mode == 'real': noiseless = 'False' elif mode == 'noiseless': noiseless = 'True' elif mode == 'select': noiseless = 'True' else: sys.exit('Invalid "mode" given by user') # Convert the datacubes to a list of dictionaries. Each dictionary contains information # (e.g. radius, spectrum) for a single spaxel. data, wavel, in_wavel = prepare_data(name, noiseless) # For the real data, bin the spaxels to a minimum SNR. Each element of the data dictionary now represents a single annular bin. if mode == 'real': data = SNR_bin(data, min_SNR) # Bin data as above, but use the noiseless data (with the 'real' errors). elif mode == 'noiseless': data = SNR_bin(data, min_SNR) # From the noiseless data, just select num_noiseless individual spaxels, equally spaced in radius. elif mode == 'select': data = reduce(data, num_noiseless) # Set spectrum error to 1 if using noiseless data (so it is irrelevant in reduced chi fitting). if noiseless == True: for item in data: data['spec_err'] = np.ones(len(wavel)) # Create dictionary of functions that are used to calculate second moment and partial derivatives of a Gauss-Hermite function. ghdict = gauss_moments() # THE FITTING num = len(data) for i, x in enumerate(data): out, params, perr = fit(wavel, x['spec'], x['spec_err'], in_wavel, fit_method) I, I_err, S, S_err = extract(out, fit_method, ghdict, in_wavel) newkeys = { 'I': I, 'I_err': I_err, 'S': S, 'S_err': S_err, 'params': params, 'perr': perr } x.update(newkeys) #print i + 1, "out of", num, "fits complete!" # Convert fitted values in keys (and h4) to 1D arrays. keys = ['rad', 'S', 'S_err', 'S_mod', 'I', 'I_err', 'I_mod', 'j'] rad, S, S_err, S_mod, I, I_err, I_mod, j = [ np.asarray([x[attribute] for x in data]) for attribute in keys ] h4 = np.asarray([x['params']['h4'] for x in data]) # For noiseless data set the error in the fitted velocity equal to '1.0' (so it is irrelevant in Reduced Chi fitting). if noiseless == 'True': S_err = np.ones(len(S_err)) # Store the fitted values to the data using pickle - this is the completed data! main_data = rad, I, I_err, S, S_err, j pickle.dump(main_data, open("{0}_data.p".format(name), "wb")) # BELOW IS JUST ANALYTICS # Calculate average difference between theoretical model and the data (as a fraction of the model). I_diff = (I - I_mod) / I_mod S_diff = (S - S_mod) / S_mod print 'Average absolute difference in dispersion:', np.sum( (S - S_mod)) / len(S) print 'Average h4 is:', np.sum(h4) / len(h4) # Check what fraction of fitted values are within two sigma of the theoretical model (should be about 95%.) within_err = 0 for i in range(0, len(S)): if (S_mod[i] > (S[i] - 2 * S_err[i])) and (S_mod[i] < (S[i] + 2 * S_err[i])): within_err += 1 print 'Fraction within error is:', within_err / float(len(rad)) # PLOTTING # Plot fitted dispersion with errorbars for real data, and without errorbars for noiseless data. plt.figure() if noiseless == 'False': plt.errorbar(rad, S, yerr=S_err, ls='none') else: plt.plot(rad, S, 'b-x') plt.plot(rad, S_mod, 'g-') plt.ylabel('Dispersion \n $kms^{-1}$') plt.xlim(0, rad[-1]) plt.title(min_SNR) plt.savefig("{}/{}/velocity.pdf".format(data_dir, file_name)) # Plot fractional difference in dispersion between theoretical model and fitted value. plt.figure() plt.plot(rad, S_diff, '.', markersize=2) plt.ylabel('Dispersion fractional difference') plt.xlabel('R / milliarcseconds') plt.xlim(0, rad[-1]) # with Y = 0 line as a visual aid. Y = np.zeros(100) X = np.linspace(0, 1000, 100) plt.plot(X, Y) if many_plots == 'True': # Plot fractional difference in dispersion between theoretical model and fitted value. plt.figure() plt.plot(rad, I_diff, '.', markersize=2) plt.plot(X, Y) plt.ylabel('Intensity fractional difference') plt.xlabel('R / milliarcseconds') plt.xlim(0, rad[-1]) # Plot h4 and h6. plt.figure() plt.plot(rad, h4, 'g.', markersize=3) #plt.plot(rad, h6, 'r.', markersize=3) plt.plot(X, Y) plt.ylabel('h4') plt.xlabel('R / milliarcseconds') plt.xlim(0, rad[-1]) # Plot intensity plt.figure() plt.plot(rad, I) plt.ylabel('Intensity \n $L_{\odot} per pixel$') plt.xlim(0, rad[-1])
def visualize(classifier, training_set, dataset, target, features): classifier = fit(training_set, classifier, ["age", "pregnant"], "diabetes") plot_dataset(classifier, dataset, target, features)
import data import fitting import matplotlib.pyplot as plt import numpy as np #生成阶数和损失 scala = [i + 1 for i in range(3)] train_loss = [] validation_loss = [] #对单一变量的k阶拟合,不知道为什么,只能够算到第三阶. for i in scala: trainset,trainresultset,validationset,\ varesultset=data.createdata("Olympic.txt",i) arg = fitting.fit(trainset, trainresultset) #训练预测的结果 predictresult = trainset.dot(arg) train_loss.append(((trainresultset - predictresult)**2).mean()) #验证预测的结果 predictresult = validationset.dot(arg) validation_loss.append(((varesultset - predictresult)**2).mean()) print("trainning loss:\n", train_loss, '\n') print("validation loss:\n", validation_loss, '\n') #设置一个画图的类型列表 #kind=[['b','o'],['r','*'],['g','^'],['k','+']] #画图部分 fig = plt.figure(1) ax1 = fig.add_subplot(2, 2, 1) ax2 = fig.add_subplot(2, 2, 2)
g = sum([topcolors[i][1] for i in range(0,n)]) / n b = sum([topcolors[i][2] for i in range(0,n)]) / n color_wb = (r,g,b) points_to_fit.append(((0.5+cbox)*boxwidth, (0.5+rbox)*boxheight, lum(color_wb))) # may be able to reduce time by only saving into a rbox by cbox sized array of color vals for r in range(rbox*boxheight, (rbox+1)*boxheight): for c in range(cbox*boxwidth, (cbox+1)*boxwidth): wb_im[r][c] = color_wb profile_sum += queTimer.getAndReset() num_profiles += 1 fit_func = fitting.fit(points_to_fit) max_val = 0 min_val = 255 for r in range(smaller_h): for c in range(smaller_w): wb_im[r][c] = [int(fit_func(c,r))] * 3 if wb_im[r][c][0] > max_val: max_val = wb_im[r][c][0] wb_im = array(Image.fromarray(wb_im).resize((width, height))) figure() imshow(wb_im) figure()
ax.set_title(title) fig.savefig(imagepath) return train_score, test_score if __name__ == '__main__': from data import get_numerical_columns from fitting import classifier_map train_table, test_table = get_split_table() train_labels, test_labels = get_labels(train_table, test_table) feature_options = get_numerical_columns() feature_set = np.random.choice(feature_options, size=2) fig = plt.figure() classifier_name = np.random.choice(list(classifier_map.keys())) classifier = fit(classifier_name, feature_set, train_table) train_score = classifier.score(train_table[feature_set], train_labels) test_score = classifier.score(test_table[feature_set], test_labels) ax = visualize_confidence(classifier, train_table, *feature_set) plot_with_columns( train_table, *feature_set, ax=ax, marker='+', label='train', ) plot_with_columns(test_table, *feature_set, ax=ax, label='test') ax.set_title( f'{classifier_name}: Test {test_score:.5f} Train {train_score:.5f}') ax.legend() plt.show()
def ransac(data,n,k,t,d,debug=False,return_all=False): """fit model parameters to data using the RANSAC algorithm This implementation written from pseudocode found at http://en.wikipedia.org/w/index.php?title=RANSAC&oldid=116358182 {{{ Given: data - a set of observed data points model - a model that can be fitted to data points n - the minimum number of data values required to fit the model k - the maximum number of iterations allowed in the algorithm t - a threshold value for determining when a data point fits a model d - the number of close data values required to assert that a model fits well to data Return: bestfit - model parameters which best fit the data (or nil if no good model is found) iterations = 0 bestfit = nil besterr = something really large while iterations < k { maybeinliers = n randomly selected values from data maybemodel = model parameters fitted to maybeinliers alsoinliers = empty set for every point in data not in maybeinliers { if point fits maybemodel with an error smaller than t add point to alsoinliers } if the number of elements in alsoinliers is > d { % this implies that we may have found a good model % now test how good it is bettermodel = model parameters fitted to all points in maybeinliers and alsoinliers thiserr = a measure of how well model fits these points if thiserr < besterr { bestfit = bettermodel besterr = thiserr } } increment iterations } return bestfit }}} """ iterations = 0 bestfit = None besterr = numpy.inf best_inlier_idxs = None while iterations < k: maybe_idxs, test_idxs = random_partition(n,data.shape[0]) maybeinliers = data[maybe_idxs,:] test_points = data[test_idxs] maybemodel,_ = fit(maybeinliers) _,test_err = fit(test_points) also_idxs = test_idxs[test_err < t] # select indices of rows with accepted points alsoinliers = data[also_idxs,:] if debug: print 'test_err.min()',test_err.min() print 'test_err.max()',test_err.max() print 'numpy.mean(test_err)',numpy.mean(test_err) print 'iteration %d:len(alsoinliers) = %d'%( iterations,len(alsoinliers)) if len(alsoinliers) > d: betterdata = numpy.concatenate( (maybeinliers, alsoinliers) ) bettermodel,better_errs = fit(betterdata) thiserr = numpy.mean( better_errs ) if thiserr < besterr: bestfit = bettermodel besterr = thiserr best_inlier_idxs = numpy.concatenate( (maybe_idxs, also_idxs) ) iterations+=1 if bestfit is None: raise ValueError("did not meet fit acceptance criteria") if return_all: return bestfit, {'inliers':best_inlier_idxs} else: return bestfit
t_sprung = 5 ufnc = stepfnc(t_sprung, 1) # Eingangsfunktion mit Zeitpunkt, Sprunghöhe t_max = 30 dt = 5e-3 # Schrittweite des Ergebnisvektors PID = [1, 1, 1, 1] # Parameter des PID Reglers - T_i, T_d, T_n, K t, b_out, S, IN, S_noise = Simulator(dt, t_max, ufnc, PT1**o, True, *PID, False) y1 = b_out[S] PID = [0.5, 1, 1, 1] # Parameter des PID Reglers - T_i, T_d, T_n, K t, b_out, S, IN, S_noise = Simulator(dt, t_max, ufnc, PT1**o, False, *PID, False) y2 = b_out[S] [T_e, K_e], p2 = fit(G_abs, G_phi, w, f0, System) PID = param([0, T_e, 0, 0, 0, 0, 0], K_e) t, b_out, S, IN, S_noise = Simulator(dt, t_max, ufnc, PT1**o, False, *PID, False) y3 = b_out[S] figure(6) plot(t, y1, 'b', label='ungeregelt') plot(t, y2, '--g', label='geregelt default') plot(t, y3, 'r', label='geregelt nach Reinisch') legend(loc='lower right') # AUSGABE --------------------------------------------------------------------- #mp.rcParams.update({'font.size': 30}) #figure(3)