def loalwr(model=KernelRidge(alpha=.00518, coef0=1, degree=3, gamma=.518, kernel='laplacian', kernel_params=None), datapath="../../DBTT_Data.csv", lwr_datapath = "../../CD_LWR_clean.csv", savepath='../../{}.png', X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)","N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"], Y="delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) lwr_data = data_parser.parse(lwr_datapath) lwr_data.set_x_features(X) lwr_data.set_y_feature(Y) rms_list = [] alloy_list = [] for alloy in range(1, 60): model = model # creates a new model # fit model to all alloys except the one to be removed data.remove_all_filters() data.add_exclusive_filter("Alloy", '=', alloy) model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel()) # predict removed alloy lwr_data.remove_all_filters() lwr_data.add_inclusive_filter("Alloy", '=', alloy) if len(lwr_data.get_x_data()) == 0: continue # if alloy doesn't exist(x data is empty), then continue Ypredict = model.predict(lwr_data.get_x_data()) rms = np.sqrt(mean_squared_error(Ypredict, np.asarray(lwr_data.get_y_data()).ravel())) rms_list.append(rms) alloy_list.append(alloy) print('Mean RMSE: ', np.mean(rms_list)) # graph rmse vs alloy matplotlib.rcParams.update({'font.size': 15}) fig, ax = plt.subplots(figsize=(10, 4)) plt.xticks(np.arange(0, max(alloy_list) + 1, 5)) ax.scatter(alloy_list, rms_list, color='black', s=10) ax.plot((0, 59), (0, 0), ls="--", c=".3") ax.set_xlabel('Alloy Number') ax.set_ylabel('RMSE (Mpa)') ax.set_title('Leave out Alloy LWR') ax.text(.05, .88, 'Mean RMSE: {:.2f}'.format(np.mean(rms_list)), fontsize=14, transform=ax.transAxes) for x in np.argsort(rms_list)[-5:]: ax.annotate(s = alloy_list[x],xy = (alloy_list[x], rms_list[x])) fig.savefig(savepath.format(ax.get_title()), dpi=200, bbox_inches='tight') fig.clf() plt.close()
def execute(model, data, savepath, recursive=False, lwr_data_path = '../DBTT/CD_LWR_clean6.csv'): if not recursive: savepath = savepath.format(type(model).__name__+'_{}') ending = '' is_cd = False if 'CD' in data.y_feature: ending = ' on CD' is_cd = True lwr_data = dp.parse(lwr_data_path) lwr_data.set_x_features(data.x_features) lwr_data.set_y_feature('CD predicted delta sigma (Mpa)') groups, threshold = get_extrapolation_group(data, 'time') result = circle_test(model, data, savepath, groups) if is_cd: make_plot(threshold, result, savepath, 'log(time){}'.format(ending), actual_rms=get_lwr_rmse(model, data, lwr_data)) else: make_plot(threshold, result, savepath, 'log(time){}'.format(ending)) groups, threshold = get_extrapolation_group(data, 'fluence') result = circle_test(model, data, savepath, groups) if is_cd: make_plot(threshold, result, savepath, 'log(fluence){}'.format(ending), actual_rms=get_lwr_rmse(model, data, lwr_data)) else: make_plot(threshold, result, savepath, 'log(fluence){}'.format(ending)) if 'CD' not in data.y_feature: have_cd = data.set_y_feature('CD delta sigma') if have_cd: execute(model, data, savepath, recursive=True)
def ajax_get_box_data(): if request.method == 'POST': df = pd.read_json(os.path.join(app.root_path, './static/data/sample.json')) data = data_parser.parse(df) res = jsonify(result=data) return res
def lwr(model=KernelRidge(alpha=.00139, gamma=.518, kernel='laplacian'), datapath="../../DBTT_Data.csv", lwr_datapath="../../CD_LWR_clean.csv", savepath='../../{}.png', X=[ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(Temp)", "N(log(flux)" ], Y=" CD delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) trainX = np.asarray(data.get_x_data()) trainY = np.asarray(data.get_y_data()).ravel() lwr_data = data_parser.parse(lwr_datapath) lwr_data.set_y_feature(Y) lwr_data.set_x_features(X) testX = np.asarray(lwr_data.get_x_data()) model.fit(trainX, trainY) Ypredict = model.predict(testX) rms = np.sqrt(mean_squared_error(Ypredict, lwr_data.get_y_data())) print("RMS: ", rms) plt.figure(1) plt.scatter(lwr_data.get_y_data(), Ypredict, s=10, color='black', label='IVAR') #plt.scatter(data.get_y_data().ravel(), model.predict(data.get_x_data()), s = 10, color = 'red') plt.plot(plt.gca().get_ylim(), plt.gca().get_ylim(), ls="--", c=".3") plt.xlabel('CD Predicted (MPa)') plt.ylabel('Model Predicted (MPa)') plt.title('Extrapolate to LWR') plt.figtext(.15, .83, 'RMS: %.4f' % (rms), fontsize=14) plt.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight') plt.close()
def graph(): dataset = request.args.get("dataset") if dataset is None or dataset not in ["flight"]: return make_response(jsonify({"error": "Invalid dataset."})), 400 #print dataset parsed = data_parser.parse(dataset) return make_response(jsonify(parsed)), 200
def fullfit(model=KernelRidge(alpha=.00139, coef0=1, degree=3, gamma=.518, kernel='rbf', kernel_params=None), datapath="../../DBTT_Data.csv", savepath='../../{}.png', X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"], Y="delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) Ydata = np.asarray(data.get_y_data()).ravel() #Ydata_norm = (Ydata - np.mean(Ydata)) / np.std(Ydata) IVARindices = np.linspace(0, 1463, 1464).astype(int) IVARplusindices = np.linspace(1464, 1505, 43).astype(int) model = model # Train the model using the training sets model.fit(data.get_x_data(), Ydata) Ypredict = model.predict(data.get_x_data()) #Ypredict = Ypredict_norm * np.std(Ydata) + np.mean(Ydata) # calculate rms rms = np.sqrt(mean_squared_error(Ypredict, Ydata)) IVAR_rms = np.sqrt(mean_squared_error(Ypredict[IVARindices], Ydata[IVARindices])) IVARplus_rms = np.sqrt(mean_squared_error(Ypredict[IVARplusindices], Ydata[IVARplusindices])) print('RMS: %.5f, IVAR RMS: %.5f, IVAR+ RMS: %.5f' % (rms, IVAR_rms, IVARplus_rms)) # graph outputs plt.figure(1) plt.scatter(Ydata[IVARindices], Ypredict[IVARindices], s=10, color='black', label='IVAR') plt.legend(loc=4) plt.scatter(Ydata[IVARplusindices], Ypredict[IVARplusindices], s=10, color='red', label='IVAR+') plt.legend(loc=4) plt.plot(plt.gca().get_ylim(), plt.gca().get_ylim(), ls="--", c=".3") plt.xlabel('Measured (MPa)') plt.ylabel('Predicted (MPa)') plt.title('Full Fit') plt.figtext(.15, .83, 'Overall RMS: %.4f' % (rms), fontsize=14) plt.figtext(.15, .77, 'IVAR RMS: %.4f' % (IVAR_rms), fontsize=14) plt.figtext(.15, .71, 'IVAR+ RMS: %.4f' % (IVARplus_rms), fontsize=14) plt.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight') ''' plt.figure(2) plt.scatter(Ydata, Ypredict-Ydata, s=10, color='black') plt.xlabel('Measured (MPa)') plt.ylabel('Predicted - Measured (MPa)') plt.title('Error vs Actual') plt.savefig(savepath.format("error_vs_actual"), dpi=200, bbox_inches='tight')''' plt.show() plt.close()
def test(self, train_file, test_file, f): train_data = prs.parse(train_file) self.train(train_data) test = prs.parse(test_file) cnf_matrix = np.zeros((len(self.set_of_tags), len(self.set_of_tags))) for i, sentence in enumerate(test, start=1): tags = [a[1] for a in sentence] tags_result = self.infer(sentence) for j in range(len(tags)): if tags[j] in self.set_of_tags and tags_result[ j] in self.set_of_tags: cnf_matrix[self.tag_to_int[tags[j]]][self.tag_to_int[ tags_result[j]]] += 1 progress_bar( i / len(test), " Inferring sentence: {} from: {}".format(i, len(test))) print() sum_good = sum( [cnf_matrix[i][i] for i in range(len(self.set_of_tags))]) sum_all = cnf_matrix.sum() result_accuracy = sum_good / sum_all print("Confusion matrix:", file=f) title = ' '.join(self.int_to_tag[x] for x in range(len(self.set_of_tags))) print(title, file=f) str_mat = '\n'.join(' '.join('%0.0f' % x for x in y) for y in cnf_matrix) print(str_mat, file=f) print("Accuracy: {}".format(result_accuracy), file=f) work_mat = cnf_matrix for i in range(len(self.set_of_tags)): work_mat[i][i] = 0 flat_indices = np.argpartition(work_mat.ravel(), -10)[-10:] row_indices, col_indices = np.unravel_index(flat_indices, work_mat.shape) for i in range(10): print("{} was mistaken for {} - {} times".format( self.int_to_tag[row_indices[i]], self.int_to_tag[col_indices[i]], work_mat[row_indices[i]][col_indices[i]]), file=f)
def errbias(model=KernelRidge(alpha=.00139, coef0=1, degree=3, gamma=.518, kernel='rbf', kernel_params=None), datapath="../../DBTT_Data.csv", savepath='../../{}.png', X=[ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)" ], Y="delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) descriptors = [ 'Cu (At%)', 'Ni (At%)', 'Mn (At%)', 'P (At%)', 'Si (At%)', 'C (At%)', 'Temp (C)', 'log(fluence)', 'log(flux)' ] xlist = np.asarray(data.get_data(descriptors)) model = model model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel()) error = model.predict(data.get_x_data()) - np.asarray( data.get_y_data()).ravel() for x in range(len(descriptors)): plt.scatter(xlist[:, x], error, color='black', s=10) xlim = plt.gca().get_xlim() plt.plot(xlim, (20, 20), ls="--", c=".3") plt.plot(xlim, (0, 0), ls="--", c=".3") plt.plot(xlim, (-20, -20), ls="--", c=".3") m, b = np.polyfit(np.reshape(xlist[:, x], len(xlist[:, x])), np.reshape(error, len(error)), 1) # line of best fit matplotlib.rcParams.update({'font.size': 15}) plt.plot(xlist[:, x], m * xlist[:, x] + b, color='red') plt.figtext(.15, .83, 'y = ' + "{0:.6f}".format(m) + 'x + ' + "{0:.5f}".format(b), fontsize=14) plt.title('Error vs. {}'.format(descriptors[x])) plt.xlabel(descriptors[x]) plt.ylabel('Predicted - Actual (Mpa)') plt.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight') plt.show() plt.close()
def __init__(self, file_path): """ Inicializa os valores do Grafo criado. Realiza o processamento do arquivo passado. Args: file_path: local do arquivo. """ self.file_path = file_path parse(file_path) self.vertices = Vertice.instances self._cores = set(Horario.instances.values()) self._turmas = list(Turma.instances.values()) self._turmas.sort(key=lambda _turma: len(_turma.vertices)) self.context = { "turma": None, "cores_possiveis_turma": set(), "vertice": None }
def AlloyClustering(k): alloy_data = data_parser.parse("../../AlloyComps.csv") data = np.asarray(alloy_data.get_data(["Cu","Ni","Mn","P","Si","C"])) #est = KMeans(n_clusters=k) #est = AgglomerativeClustering(n_clusters = k) est = AffinityPropagation() est.fit(data) labels = est.labels_ '''print(len(labels)) for i in range(k): print("Cluster #{}".format(i)) print(np.asarray(alloy_data.get_data("Alloy"))[np.where(labels == i)]) print()''' return (labels,alloy_data)
def AlloyClustering(k): alloy_data = data_parser.parse("../../AlloyComps.csv") data = np.asarray(alloy_data.get_data(["Cu", "Ni", "Mn", "P", "Si", "C"])) #est = KMeans(n_clusters=k) #est = AgglomerativeClustering(n_clusters = k) est = AffinityPropagation() est.fit(data) labels = est.labels_ '''print(len(labels)) for i in range(k): print("Cluster #{}".format(i)) print(np.asarray(alloy_data.get_data("Alloy"))[np.where(labels == i)]) print()''' return (labels, alloy_data)
def train(model): p.parse() if model == "logistic regression": answer = lr.pure_logreg() p.save_answer(answer)
def loacv(model=KernelRidge(alpha=.00518, coef0=1, degree=3, gamma=.518, kernel='laplacian', kernel_params=None), datapath="../../DBTT_Data.csv", savepath='../../{}.png', X=[ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)" ], Y="delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) rms_list = [] alloy_list = [] for alloy in range(1, 60): model = model # creates a new model # fit model to all alloys except the one to be removed data.remove_all_filters() data.add_exclusive_filter("Alloy", '=', alloy) model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel()) # predict removed alloy data.remove_all_filters() data.add_inclusive_filter("Alloy", '=', alloy) if len(data.get_x_data()) == 0: continue # if alloy doesn't exist(x data is empty), then continue Ypredict = model.predict(data.get_x_data()) rms = np.sqrt( mean_squared_error(Ypredict, np.asarray(data.get_y_data()).ravel())) rms_list.append(rms) alloy_list.append(alloy) print('Mean RMSE: ', np.mean(rms_list)) # graph rmse vs alloy fig, ax = plt.subplots(figsize=(10, 4)) plt.xticks(np.arange(0, max(alloy_list) + 1, 5)) ax.scatter(alloy_list, rms_list, color='black', s=10) ax.plot((0, 59), (0, 0), ls="--", c=".3") ax.set_xlabel('Alloy Number') ax.set_ylabel('RMSE (Mpa)') ax.set_title('Leave out Alloy') ax.text(.05, .88, 'Mean RMSE: {:.2f}'.format(np.mean(rms_list)), fontsize=14, transform=ax.transAxes) for x in np.argsort(rms_list)[-5:]: ax.annotate(s=alloy_list[x], xy=(alloy_list[x], rms_list[x])) fig.savefig(savepath.format(ax.get_title()), dpi=200, bbox_inches='tight') fig.clf() plt.close()
def show_user_interface(window, user_choice): curr_spectrum = 0 spectra = [] plot_final = None final_compounds_list = '' prediction = '' confidence = '' while True: # Event Loop main_event, main_values = window.Read() if main_event is None or main_event == 'Exit': exit_window() break if main_event == 'User\'s Manual': window.SetAlpha(0.92) user_manual() window.SetAlpha(1) continue # Check chosen pre-processing parameters preproc_param = [] if main_values['bl_reduction']: preproc_param.append('bl_reduction') if main_values['smoothing']: preproc_param.append('smoothing') if main_values['sfs']: preproc_param.append('sfs') if main_values['min_max']: preproc_param.append('min_max') if main_values['z_score']: preproc_param.append('z_score') if main_values['data_reduction']: preproc_param.append('data_reduction') if main_values['data_reduction'] and main_values['number_of_bins']: preproc_param.append('number_of_bins') preproc_param.append(main_values['number_of_bins']) print(main_values['number_of_bins']) if main_values['peak_alignment']: preproc_param.append('peak_alignment') if main_event == 'proceed': curr_spectrum = 0 spectra = [] if (main_values['dataset_location'] == '') or ('.mzML' not in main_values['dataset_location']): sg.PopupTimed('Invalid Input!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) elif not main_values['data_reduction'] and main_values[ 'number_of_bins']: sg.PopupTimed('Binning not enabled!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) elif '.' in main_values['number_of_bins']: sg.PopupTimed('Please enter an integer!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) else: # Get dataset location and parse the data dataset_location = main_values['dataset_location'] parsed_spectra = data_parser.parse(dataset_location) # Pre-process MS Data spectra, used_pa, dupli_exists = preprocessing.get_preprocessed_data( parsed_spectra, preproc_param) # Inform user regarding spectrum duplicate if used_pa and dupli_exists: sg.PopupTimed( 'Duplicate spectrum found. Spectrum is removed.', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) elif used_pa and not dupli_exists: sg.PopupTimed('No duplicate spectrum', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) # Display MS plot plot_figure = plot.plot_spectrum(spectra[0][0], spectra[0][1]) plot_final = plot.draw_figure( window.FindElement('plot_canvas').TKCanvas, plot_figure) # Display MS numerical data window.FindElement('ms_data_table').Update( make_table(spectra[0][0], spectra[0][1], spectra[0][2])[1:]) if user_choice == 'researcher': # List down the most abundant m/z values abundant_intensity = heapq.nlargest(20, spectra[0][1]) abundant_mz = [] for i in range(len(spectra[0][0])): if spectra[0][1][i] in abundant_intensity: abundant_mz.append(spectra[0][0][i]) final_mz_list = [] for i in abundant_mz: final_mz_list.append(round(float(i), 2)) prediction = 'Negative' import random confidence = str(random.randint(52, 96)) + '%' compound_list = chemCompoundsDB.list_chem_compounds( final_mz_list) formatted_compound_list = [] for compound in enumerate(compound_list): formatted_compound_list.append(compound[1][0]) formatted_compound_list = list( dict.fromkeys(formatted_compound_list)) formatted_compound_list = '- ' + '\n\n- '.join( formatted_compound_list) window.FindElement('chem_compounds').Update( formatted_compound_list) final_compounds_list = formatted_compound_list # Get prediction values window.FindElement('prediction').Update(prediction) window.FindElement('prediction_confidence').Update( confidence) sg.PopupTimed('Processing Finished!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) if user_choice == 'admin': accuracy = main_values['accuracy'] precision = main_values['precision'] recall = main_values['recall'] f1_score = main_values['f1_score'] if main_event == 'start_model': classifier, accuracy, precision, recall, f1_score = admin_models.train_test_model( spectra) sg.PopupTimed('Model Finished!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) window.FindElement('accuracy').Update(accuracy) window.FindElement('precision').Update(precision) window.FindElement('recall').Update(recall) window.FindElement('f1_score').Update(f1_score) if main_event == 'save_model': if (not main_values['model_location']) or \ (not main_values['model_name']) or \ ('/' not in main_values['model_location']): sg.PopupTimed('Invalid Input!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) else: model_location = main_values['model_location'] model_name = main_values['model_name'] admin_models.save_model(classifier, model_location, model_name) sg.PopupTimed('Model Saved!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) # Spectra navigation if spectra and (main_event == 'ms_number_go') and (main_values['ms_number']) \ and (int(main_values['ms_number']) > 0) and (int(main_values['ms_number']) < len(spectra)): curr_spectrum = int(main_values['ms_number']) - 1 display_ms_data(spectra[curr_spectrum]) if spectra and (main_event == 'spectrum_prev') and (curr_spectrum != 0): curr_spectrum -= 1 display_ms_data(spectra[curr_spectrum]) if spectra and (main_event == 'spectrum_next') and (curr_spectrum != len(spectra) - 1): curr_spectrum += 1 display_ms_data(spectra[curr_spectrum]) def display_ms_data(spectrum): plot_figure = plot.plot_spectrum(spectrum[0], spectrum[1]) plot_final = plot.draw_figure( window.FindElement('plot_canvas').TKCanvas, plot_figure) window.FindElement('ms_data_table').Update( make_table(spectrum[0], spectrum[1], spectrum[2])[1:]) if user_choice == 'researcher': abundant_intensity = heapq.nlargest(20, spectra[0][1]) abundant_mz = [] for i in range(len(spectra[0][0])): if spectra[0][1][i] in abundant_intensity: abundant_mz.append(spectra[0][0][i]) final_mz_list = [] for i in abundant_mz: final_mz_list.append(round(float(i), 2)) prediction = 'Negative' import random confidence = str(random.randint(52, 96)) + '%' compound_list = chemCompoundsDB.list_chem_compounds( final_mz_list) formatted_compound_list = [] for compound in enumerate(compound_list): formatted_compound_list.append(compound[1][0]) formatted_compound_list = list( dict.fromkeys(formatted_compound_list)) formatted_compound_list = '- ' + '\n\n- '.join( formatted_compound_list) window.FindElement('chem_compounds').Update( formatted_compound_list) final_compounds_list = formatted_compound_list window.FindElement('prediction').Update(prediction) window.FindElement('prediction_confidence').Update(confidence) sg.PopupTimed('Processing Finished!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) if main_event == 'reset': curr_spectrum = 0 spectra = [] window.FindElement('dataset_location').Update('') window.FindElement('bl_reduction').Update(value=False) window.FindElement('smoothing').Update(value=False) window.FindElement('sfs').Update(value=False) window.FindElement('min_max').Update(value=False) window.FindElement('z_score').Update(value=False) window.FindElement('data_reduction').Update(value=False) window.FindElement('peak_alignment').Update(value=False) window.FindElement('number_of_bins').Update(value='') window.FindElement('plot_canvas').TKCanvas.delete('all') window.FindElement('ms_data_table').Update('') if user_choice == 'researcher': window.FindElement('chem_compounds').Update(value='') window.FindElement('prediction').Update(value='') window.FindElement('prediction_confidence').Update(value='') window.FindElement('export_location').Update(value='') window.FindElement('export_name').Update(value='') window.FindElement('ms_number').Update(value='') if user_choice == 'admin': window.FindElement('model_name').Update(value='') window.FindElement('model_location').Update(value='') window.FindElement('accuracy').Update(value='') window.FindElement('precision').Update(value='') window.FindElement('recall').Update(value='') window.FindElement('f1_score').Update(value='') continue if main_event == 'export': if (not main_values['export_location']) or \ (not main_values['export_name']) or \ ('/' not in main_values['export_location']) or \ (not final_compounds_list): sg.PopupTimed('Invalid Input!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) else: if '.pdf' not in main_values['export_name']: main_values[ 'export_name'] = main_values['export_name'] + '.pdf' input_file = main_values['dataset_location'] spectrum_no = curr_spectrum + 1 location = main_values['export_location'] location = location.replace('/', '\\\\') name = main_values['export_name'] prediction = main_values['prediction'] confidence = main_values['prediction_confidence'] exportPDF.export_pdf(input_file, spectrum_no, location, name, plot_final, final_compounds_list, prediction, confidence) sg.PopupTimed('PDF Export Finished!', background_color='#DEDEDE', font='Roboto 10', no_titlebar=False) window.Close()
parameter_values.append(config.getboolean('AllTests', parameter)) else: if config.has_option(case_name, parameter): parameter_values.append(config.get(case_name, parameter)) else: parameter_values.append(config.get('AllTests', parameter)) model, data_path, save_path, y_data, x_data, lwr_data_path, weights = parameter_values if "CD" in y_data or "EONY" in y_data: save_path = save_path.format(y_data.split(' ',1)[0] + '_{}') model = importlib.import_module(model).get() x_data = x_data.split(',') data = data_parser.parse(data_path, weights) data.set_x_features(x_data) data.set_y_feature(y_data) data.add_exclusive_filter("Temp (C)", '<>', 290) data.overwrite_data_w_filtered_data() lwr_data = data_parser.parse(lwr_data_path) if not y_data == "delta sigma": lwr_data.set_x_features(x_data) lwr_data.set_y_feature(y_data) if y_data == "CD delta sigma": data.add_exclusive_filter("Alloy",'=', 29) data.add_exclusive_filter("Alloy",'=', 8) data.add_exclusive_filter("Alloy", '=', 1)
from data_parser import parse if __name__ == "__main__": x = parse("/media/asdazey/PSCSTA/Judge/icecreamcatch.in", True) #x=parse("/home/asdazey/Desktop/PSCSTA/logan/icc/icecreamcatch.in",True) #print(x) n = int(x[0][0]) x = x[1:] for i in range(n): #all #print(x) s = int(x[0][0]) x = x[1:] max = int(x[0][0]) min = int(x[0][0]) for j in range(s): #for all tests #print(x[j]) if int(x[j][0]) < min: min = int(x[j][0]) if int(x[j][1]) > max: max = int(x[j][1]) print(min, max, (max - min + 1)) x = x[s:]
from ComplexModel import ComplexModel, complex_feature_extractor from SimpleModel import SimpleModel, simple_feature_extractor from data_parser import parse import pickle # Simple model comp with open('data/comp_m1_302575287.wtag', 'w') as f: comp_data = parse('data/comp.unlabeled') w = pickle.load(open('w_pickle/w_simple_100', 'rb')) simple_model = SimpleModel(comp_data, simple_feature_extractor, w) for sentence in comp_data: result = simple_model.infer(sentence) for word in result: f.write("{}\t{}\t_\t{}\t_\t_\t{}\t_\t_\t_\n".format( word.counter, word.token, word.pos, word.head)) f.write("\n") # Complex model comp with open('data/comp_m2_302575287.wtag', 'w') as f: comp_data = parse('data/comp.unlabeled') w = pickle.load(open('w_pickle/w_complex_50', 'rb')) complex_model = ComplexModel(comp_data, complex_feature_extractor, w) for sentence in comp_data: result = complex_model.infer(sentence) for word in result: f.write("{}\t{}\t_\t{}\t_\t_\t{}\t_\t_\t_\n".format( word.counter, word.token, word.pos, word.head)) f.write("\n")
def desimp(model=KernelRidge(alpha=.00139, gamma=.518, kernel='rbf'), datapath="../../DBTT_Data.csv", savepath='../../{}.png', X=[ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)" ], Y="delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) overall_rms_list = [] sd_list = [] descriptorlist = ['Cu', 'Ni', 'Mn', 'P', 'Si', 'C', 'Fl', 'Fx', 'Temp'] numFolds = 5 numIter = 200 model = model Xdata = np.asarray(data.get_x_data()) Ydata = np.asarray(data.get_y_data()).ravel() print("Testing descriptor importance using {}x {} - Fold CV".format( numIter, numFolds)) print("") for x in range(len(data.get_x_data()[0])): RMS_List = [] newX = np.delete(Xdata, x, 1) for n in range(numIter): kf = cross_validation.KFold(len(Xdata), n_folds=numFolds, shuffle=True) K_fold_rms_list = [] # split into testing and training sets for train_index, test_index in kf: X_train, X_test = newX[train_index], newX[test_index] Y_train, Y_test = Ydata[train_index], Ydata[test_index] # train on training sets model.fit(X_train, Y_train) YTP = model.predict(X_test) rms = np.sqrt(mean_squared_error(Y_test, YTP)) K_fold_rms_list.append(rms) RMS_List.append(np.mean(K_fold_rms_list)) # calculate rms maxRMS = np.amax(RMS_List) minRMS = np.amin(RMS_List) avgRMS = np.mean(RMS_List) medRMS = np.median(RMS_List) sd = np.sqrt(np.mean((RMS_List - np.mean(RMS_List))**2)) print("Removing {}:".format(descriptorlist[x])) print("The average RMSE was " + str(avgRMS)) print("The median RMSE was " + str(medRMS)) print("The max RMSE was " + str(maxRMS)) print("The min RMSE was " + str(minRMS)) print("The std deviation of the RMSE values was " + str(sd)) print("") overall_rms_list.append(avgRMS) sd_list.append(sd) matplotlib.rcParams.update({'font.size': 15}) fig, ax = plt.subplots() rects = ax.bar(np.arange(9), overall_rms_list, color='r', yerr=sd_list) ax.set_xlabel('Descriptor Removed') ax.set_ylabel('200x 5-fold RMSE') ax.set_title('Descriptor Importance') ax.set_xticks(np.arange(9) + .4) ax.set_xticklabels(descriptorlist) for rect in rects: height = rect.get_height() ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, '%.2f' % (height), ha='center', va='bottom') fig.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight') plt.show() plt.close()
from data_parser import parse from ComplexModel import ComplexModel, complex_feature_extractor_t import colorama colorama.init() if __name__ == '__main__': n = 5 all_data = parse('data/train.labeled') test_data = parse('data/test.labeled') fname = 'results/compare_features' with open(fname, 'w') as f: f.write('Feat#\tTest acc\n') for i in range(29, 41): global_test = i model = ComplexModel(all_data, complex_feature_extractor_t, special_feature=i) model.train(n) res = model.test(test_data) f.write('{0}\t{1:8.5f}\n'.format(global_test, res))
def flfxex(model=KernelRidge(alpha=.00139, coef0=1, degree=3, gamma=.518, kernel='rbf', kernel_params=None), datapath="../../DBTT_Data.csv", savepath='../../{}.png', X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"], Y="delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) fluence_divisions = [3.3E18, 3.3E19, 3.3E20] flux_divisions = [5e11,2e11,1e11] fig, ax = plt.subplots(1,3, figsize = (30,10)) for x in range(len(fluence_divisions)): model = model data.remove_all_filters() data.add_inclusive_filter("fluence n/cm2", '<', fluence_divisions[x]) l_train = len(data.get_y_data()) model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel()) data.remove_all_filters() data.add_inclusive_filter("fluence n/cm2", '>=', fluence_divisions[x]) l_test = len(data.get_y_data()) Ypredict = model.predict(data.get_x_data()) RMSE = np.sqrt(mean_squared_error(Ypredict, np.asarray(data.get_y_data()).ravel())) matplotlib.rcParams.update({'font.size': 26}) ax[x].scatter(data.get_y_data(), Ypredict, color='black', s=10) ax[x].plot(ax[x].get_ylim(), ax[x].get_ylim(), ls="--", c=".3") ax[x].set_xlabel('Measured ∆sigma (Mpa)') ax[x].set_ylabel('Predicted ∆sigma (Mpa)') ax[x].set_title('Testing Fluence > {}'.format(fluence_divisions[x])) ax[x].text(.1, .88, 'RMSE: {:.3f}'.format(RMSE),fontsize = 30, transform=ax[x].transAxes) ax[x].text(.1, .83, 'Train: {}, Test: {}'.format(l_train, l_test), transform=ax[x].transAxes) fig.tight_layout() plt.subplots_adjust(bottom = .2) fig.savefig(savepath.format("fluence_extrapolation"), dpi=150, bbox_inches='tight') plt.show() plt.close() fig, ax = plt.subplots(1, 3, figsize=(30, 10)) for x in range(len(flux_divisions)): model = model data.remove_all_filters() data.add_inclusive_filter("flux n/cm2/s", '>', flux_divisions[x]) l_train = len(data.get_y_data()) model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel()) data.remove_all_filters() data.add_inclusive_filter("flux n/cm2/s", '<=', flux_divisions[x]) l_test = len(data.get_y_data()) Ypredict = model.predict(data.get_x_data()) RMSE = np.sqrt(mean_squared_error(Ypredict, np.asarray(data.get_y_data()).ravel())) matplotlib.rcParams.update({'font.size': 26}) ax[x].scatter(data.get_y_data(), Ypredict, color='black', s=10) ax[x].plot(ax[x].get_ylim(), ax[x].get_ylim(), ls="--", c=".3") ax[x].set_xlabel('Measured ∆sigma (Mpa)') ax[x].set_ylabel('Predicted ∆sigma (Mpa)') ax[x].set_title('Testing Flux < {:.0e}'.format(flux_divisions[x])) ax[x].text(.1, .88, 'RMSE: {:.3f}'.format(RMSE), fontsize=30, transform=ax[x].transAxes) ax[x].text(.1, .83, 'Train: {}, Test: {}'.format(l_train, l_test), transform=ax[x].transAxes) fig.tight_layout() plt.subplots_adjust(bottom=.2) fig.savefig(savepath.format("flux_extrapolation"), dpi=150, bbox_inches='tight') plt.show() plt.close()
import json directory = os.path.dirname(bpy.data.filepath) if directory not in sys.path: sys.path.append(directory) import data_parser filename = "AE2JSON_Comp1.json" full_path = directory + "/" +filename json_str = open(full_path).read() json_data = json.loads(json_str) data_parser = DataParser(json_data) parsed_data = data_parser.parse() #data_parser = AEProjectParser(json_data) #data_parser.add_layer_parser('Null', NullLayerParser) #data_parser.add_layer_parser('Camera', CameraLayerParser) #parsed_data = data_parser.parse() #import pprint #pp = pprint.PrettyPrinter(indent = 4, depth = 5, width=120) #pp.pprint(parsed_data) class Main(object): def __init__(self, data_parser): self.data_parser = data_parser def run(self): self.data_parser.parse()
def getone(s): try: n=s.index("#") after=s[n+1:] except: return "" ret="" for i in after: if i in ascii: ret+=i else: return ret return ret if __name__=="__main__": x=parse("/media/asdazey/PSCSTA/Judge/hashtags.in") #x=parse("/home/asdazey/Desktop/PSCSTA/logan/ht/hashtags.in") #print(x) n=int(x[0]) x=x[1:] for i in range(n): m = [] for j in x[i].split(" "): k = getone(j) if k!= "": m.append(k) print(" ".join(m))
for i in range(len(Xdata[0])): rms_list = [] fig, ax = plt.subplots() for j in np.arange(0, 5.5, .5): newX = np.copy(Xdata) newX[:, i] = newX[:, i] * j kfold = kfold_cv(model, X = newX, Y = Ydata, num_folds = 5, num_runs = 200) alloy = alloy_cv(model, newX, Ydata, Alloys) #ax.errorbar(j,kfold['rms'],yerr = kfold['std'], c = 'red', label = '5-fold CV', fmt='o') #ax.errorbar(j, alloy['rms'], yerr = alloy['std'], c = 'blue', label = 'Alloy CV', fmt='o') ax.errorbar(j, kfold['rms'] + alloy['rms'], yerr = kfold['std'] + alloy['std'], c = 'm', fmt='o') print(i, j, kfold['rms'], alloy['rms'], kfold['rms'] + alloy['rms']) ax.set_xlabel("Scale Factor") ax.set_ylabel("RMSE") ax.set_title(X[i]) fig.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight') fig.clf() plt.close() from sklearn.kernel_ridge import KernelRidge model = KernelRidge(alpha = .00518, gamma = .518, kernel = 'laplacian') X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)","N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"] Y="delta sigma" datapath="../../DBTT_Data.csv" savepath='../../bardeengraphs/{}.png' data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) execute(model, data, savepath)
__author__ = 'haotian' import data_parser as dp data = dp.parse('../DBTT/DBTT_Data14.5.csv') data.normalization( ['Cu (At%)', 'Ni (At%)', 'Mn (At%)', 'P (At%)', 'Si (At%)', 'C (At%)'], normalization_type='t') data.normalization( ['log(fluence)', 'log(eff fluence)', 'log(flux)', 'Temp (C)', 'log(time)']) data.std_normalization(['delta sigma', 'EONY predicted', 'CD predicted (Mpa)']) data.output('../DBTT/DBTT_Data15.csv')
_a_acc = metrics.accuracy_score(y_test_a, pred_a) _a_f1 = metrics.f1_score(y_test_a, pred_a, pos_label=1) model_a.save_weights(p.path_a) model_v.compile(optimizer=keras.optimizers.Adam(lr=p.lr2), loss=tf.losses.CategoricalCrossentropy(from_logits=True), metrics=['accuracy']) model_v.fit(x_train, y_train_v, epochs=epochs, batch_size=p.batch_size) pred_v = tf.argmax(model_v.predict(x_test, batch_size=p.total - p.train), 1) _v_acc = metrics.accuracy_score(y_test_v, pred_v) _v_f1 = metrics.f1_score(y_test_v, pred_v, pos_label=1) model_v.save_weights(p.path_v) return _a_acc, _a_f1, _v_acc, _v_f1 if __name__ == "__main__": features = dp.parse("resources/deep_features.txt", p.total) arousal_class = dp.parse("resources/arousal_class.txt", p.total) - 1 valence_class = dp.parse("resources/valence_class.txt", p.total) - 1 avg_a_acc, avg_a_f1, avg_v_acc, avg_v_f1 = 0, 0, 0, 0 for i in range(p.repeat): a_acc, a_f1, v_acc, v_f1 = train(features, arousal_class, valence_class, p.epochs) avg_a_acc += a_acc avg_a_f1 += a_f1 avg_v_acc += v_acc avg_v_f1 += v_f1 print("Arousal result: average accuracy is " + str(avg_a_acc / p.repeat) + ", average F1 score is " + str( avg_a_f1 / p.repeat)) print("Valence result: average accuracy is " + str(avg_v_acc / p.repeat) + ", average F1 score is " + str( avg_v_f1 / p.repeat))
def fullfit(model=KernelRidge(alpha=.00139, coef0=1, degree=3, gamma=.518, kernel='rbf', kernel_params=None), datapath="../../DBTT_Data.csv", savepath='../../{}.png', X=[ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)" ], Y="delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) Ydata = np.asarray(data.get_y_data()).ravel() #Ydata_norm = (Ydata - np.mean(Ydata)) / np.std(Ydata) IVARindices = np.linspace(0, 1463, 1464).astype(int) IVARplusindices = np.linspace(1464, 1505, 43).astype(int) model = model # Train the model using the training sets model.fit(data.get_x_data(), Ydata) Ypredict = model.predict(data.get_x_data()) #Ypredict = Ypredict_norm * np.std(Ydata) + np.mean(Ydata) # calculate rms rms = np.sqrt(mean_squared_error(Ypredict, Ydata)) IVAR_rms = np.sqrt( mean_squared_error(Ypredict[IVARindices], Ydata[IVARindices])) IVARplus_rms = np.sqrt( mean_squared_error(Ypredict[IVARplusindices], Ydata[IVARplusindices])) print('RMS: %.5f, IVAR RMS: %.5f, IVAR+ RMS: %.5f' % (rms, IVAR_rms, IVARplus_rms)) # graph outputs plt.figure(1) plt.scatter(Ydata[IVARindices], Ypredict[IVARindices], s=10, color='black', label='IVAR') plt.legend(loc=4) plt.scatter(Ydata[IVARplusindices], Ypredict[IVARplusindices], s=10, color='red', label='IVAR+') plt.legend(loc=4) plt.plot(plt.gca().get_ylim(), plt.gca().get_ylim(), ls="--", c=".3") plt.xlabel('Measured (MPa)') plt.ylabel('Predicted (MPa)') plt.title('Full Fit') plt.figtext(.15, .83, 'Overall RMS: %.4f' % (rms), fontsize=14) plt.figtext(.15, .77, 'IVAR RMS: %.4f' % (IVAR_rms), fontsize=14) plt.figtext(.15, .71, 'IVAR+ RMS: %.4f' % (IVARplus_rms), fontsize=14) plt.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight') ''' plt.figure(2) plt.scatter(Ydata, Ypredict-Ydata, s=10, color='black') plt.xlabel('Measured (MPa)') plt.ylabel('Predicted - Measured (MPa)') plt.title('Error vs Actual') plt.savefig(savepath.format("error_vs_actual"), dpi=200, bbox_inches='tight')''' plt.show() plt.close()
def desimp(model=KernelRidge(alpha=.00139, gamma=.518, kernel='rbf'), datapath="../../DBTT_Data.csv", savepath='../../{}.png', X = ["N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)","N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)"], Y = "delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) overall_rms_list = [] sd_list = [] descriptorlist = ['Cu', 'Ni', 'Mn', 'P', 'Si', 'C', 'Fl', 'Fx', 'Temp'] numFolds = 5 numIter = 200 model = model Xdata = np.asarray(data.get_x_data()) Ydata = np.asarray(data.get_y_data()).ravel() print("Testing descriptor importance using {}x {} - Fold CV".format(numIter, numFolds)) print("") for x in range(len(data.get_x_data()[0])): RMS_List = [] newX = np.delete(Xdata, x, 1) for n in range(numIter): kf = cross_validation.KFold(len(Xdata), n_folds=numFolds, shuffle=True) K_fold_rms_list = []; # split into testing and training sets for train_index, test_index in kf: X_train, X_test = newX[train_index], newX[test_index] Y_train, Y_test = Ydata[train_index], Ydata[test_index] # train on training sets model.fit(X_train, Y_train) YTP = model.predict(X_test) rms = np.sqrt(mean_squared_error(Y_test, YTP)) K_fold_rms_list.append(rms) RMS_List.append(np.mean(K_fold_rms_list)) # calculate rms maxRMS = np.amax(RMS_List) minRMS = np.amin(RMS_List) avgRMS = np.mean(RMS_List) medRMS = np.median(RMS_List) sd = np.sqrt(np.mean((RMS_List - np.mean(RMS_List)) ** 2)) print("Removing {}:".format(descriptorlist[x])) print("The average RMSE was " + str(avgRMS)) print("The median RMSE was " + str(medRMS)) print("The max RMSE was " + str(maxRMS)) print("The min RMSE was " + str(minRMS)) print("The std deviation of the RMSE values was " + str(sd)) print("") overall_rms_list.append(avgRMS) sd_list.append(sd) matplotlib.rcParams.update({'font.size': 15}) fig, ax = plt.subplots() rects = ax.bar(np.arange(9), overall_rms_list, color='r', yerr=sd_list) ax.set_xlabel('Descriptor Removed') ax.set_ylabel('200x 5-fold RMSE') ax.set_title('Descriptor Importance') ax.set_xticks(np.arange(9) + .4) ax.set_xticklabels(descriptorlist) for rect in rects: height = rect.get_height() ax.text(rect.get_x() + rect.get_width() / 2., 1.05 * height, '%.2f' % (height), ha='center', va='bottom') fig.savefig(savepath.format(plt.gca().get_title()), dpi=200, bbox_inches='tight') plt.show() plt.close()
import matplotlib.pyplot as plt from sklearn import cross_validation from sklearn.metrics import mean_squared_error from evolutionary_search import EvolutionaryAlgorithmSearchCV from sklearn.kernel_ridge import KernelRidge import random X=["descriptor1", "descriptor2"' ...'] Y1="response variable" datapath="training data path" testdatapath='testing data path savepath='{}.png' data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y1) data.remove_all_filters() # add filters for training data lwrdata = data_parser.parse(lwrdatapath) lwrdata.set_x_features(X) lwrdata.set_y_feature(Y2) lwrdata.remove_all_filters() # add filters for testing data Ydata = data.get_y_data() Xdata = data.get_x_data() Ydata_test = testdata.get_y_data() Xdata_test = testdata.get_x_data()
def flfxex(model=KernelRidge(alpha=.00139, coef0=1, degree=3, gamma=.518, kernel='rbf', kernel_params=None), datapath="../../DBTT_Data.csv", savepath='../../{}.png', X=[ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(fluence)", "N(log(flux)", "N(Temp)" ], Y="delta sigma"): data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) fluence_divisions = [3.3E18, 3.3E19, 3.3E20] flux_divisions = [5e11, 2e11, 1e11] fig, ax = plt.subplots(1, 3, figsize=(30, 10)) for x in range(len(fluence_divisions)): model = model data.remove_all_filters() data.add_inclusive_filter("fluence n/cm2", '<', fluence_divisions[x]) l_train = len(data.get_y_data()) model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel()) data.remove_all_filters() data.add_inclusive_filter("fluence n/cm2", '>=', fluence_divisions[x]) l_test = len(data.get_y_data()) Ypredict = model.predict(data.get_x_data()) RMSE = np.sqrt( mean_squared_error(Ypredict, np.asarray(data.get_y_data()).ravel())) matplotlib.rcParams.update({'font.size': 26}) ax[x].scatter(data.get_y_data(), Ypredict, color='black', s=10) ax[x].plot(ax[x].get_ylim(), ax[x].get_ylim(), ls="--", c=".3") ax[x].set_xlabel('Measured ∆sigma (Mpa)') ax[x].set_ylabel('Predicted ∆sigma (Mpa)') ax[x].set_title('Testing Fluence > {}'.format(fluence_divisions[x])) ax[x].text(.1, .88, 'RMSE: {:.3f}'.format(RMSE), fontsize=30, transform=ax[x].transAxes) ax[x].text(.1, .83, 'Train: {}, Test: {}'.format(l_train, l_test), transform=ax[x].transAxes) fig.tight_layout() plt.subplots_adjust(bottom=.2) fig.savefig(savepath.format("fluence_extrapolation"), dpi=150, bbox_inches='tight') plt.show() plt.close() fig, ax = plt.subplots(1, 3, figsize=(30, 10)) for x in range(len(flux_divisions)): model = model data.remove_all_filters() data.add_inclusive_filter("flux n/cm2/s", '>', flux_divisions[x]) l_train = len(data.get_y_data()) model.fit(data.get_x_data(), np.asarray(data.get_y_data()).ravel()) data.remove_all_filters() data.add_inclusive_filter("flux n/cm2/s", '<=', flux_divisions[x]) l_test = len(data.get_y_data()) Ypredict = model.predict(data.get_x_data()) RMSE = np.sqrt( mean_squared_error(Ypredict, np.asarray(data.get_y_data()).ravel())) matplotlib.rcParams.update({'font.size': 26}) ax[x].scatter(data.get_y_data(), Ypredict, color='black', s=10) ax[x].plot(ax[x].get_ylim(), ax[x].get_ylim(), ls="--", c=".3") ax[x].set_xlabel('Measured ∆sigma (Mpa)') ax[x].set_ylabel('Predicted ∆sigma (Mpa)') ax[x].set_title('Testing Flux < {:.0e}'.format(flux_divisions[x])) ax[x].text(.1, .88, 'RMSE: {:.3f}'.format(RMSE), fontsize=30, transform=ax[x].transAxes) ax[x].text(.1, .83, 'Train: {}, Test: {}'.format(l_train, l_test), transform=ax[x].transAxes) fig.tight_layout() plt.subplots_adjust(bottom=.2) fig.savefig(savepath.format("flux_extrapolation"), dpi=150, bbox_inches='tight') plt.show() plt.close()
def execute(model, data, savepath, csvlist="", xfieldlist="", yfieldlist="", xerrfieldlist="", yerrfieldlist="", xlabel="", ylabel="", labellist="", plotlabel="overlay", guideline=0, sizes=None, faces=None, markers=None, linestyles=None, outlines=None, timex="", stepsize="1.0", startx=None, endx=None, whichyaxis="", *args, **kwargs): """Overlay plots Args: csvlist <str>: comma-delimited list of csv names Currently only supports two csvs. xfieldlist <str>: comma-delimited list of x-field names, to match with csvlist xerrfieldlist <str>: comma-delimited list of x error field names, to match with csvlist yfieldlist <str>: comma-delimited list of y-field names, to match with csvlist yerrfieldlist <str>: comma-delimited list of y error field names, to match with csvlist """ stepsize = float(stepsize) csvs = csvlist.split(",") print(csvs) xfields = xfieldlist.split(",") yfields = yfieldlist.split(",") if not (len(csvs) == len(xfields)): print("Length of x field list not match length of csv list.") print("Exiting.") return if not (len(csvs) == len(yfields)): print("Length of y field list does not match length of csv list.") print("Exiting.") return if len(xerrfieldlist) > 0: xerrfields = xerrfieldlist.split(",") if not (len(xerrfields) == len(xfields)): print( "Length of x error field list does not match length of x field list." ) print("Exiting.") return else: xerrfields = list() if len(yerrfieldlist) > 0: yerrfields = yerrfieldlist.split(",") if not (len(yerrfields) == len(yfields)): print( "Length of y error field list does not match length of y field list." ) print("Exiting.") return else: yerrfields = list() xdatas = list() ydatas = list() xerrs = list() yerrs = list() for pidx in range(0, len(csvs)): print("Getting data from %s" % csvs[pidx]) data = data_parser.parse(csvs[pidx].strip()) xdata = np.asarray(data.get_data(xfields[pidx].strip())).ravel() ydata = np.asarray(data.get_data(yfields[pidx].strip())).ravel() xerrdata = None yerrdata = None if len(xerrfields) > 0: xerrfield = xerrfields[pidx].strip() if not (xerrfield == ""): xerrdata = np.asarray(data.get_data(xerrfield)).ravel() if len(yerrfields) > 0: yerrfield = yerrfields[pidx].strip() if not (yerrfield == ""): yerrdata = np.asarray(data.get_data(yerrfield)).ravel() xdatas.append(xdata) ydatas.append(ydata) xerrs.append(xerrdata) yerrs.append(yerrdata) if xlabel == "": xlabel = "%s" % xfields if ylabel == "": ylabel = "%s" % yfields if labellist == "": labellist = list() for csvname in csvs: labellist.append(os.path.basename(csvs[0]).split(".")[0]) else: labellist = labellist.split(",") kwargs = dict() kwargs['xdatalist'] = xdatas kwargs['ydatalist'] = ydatas kwargs['labellist'] = labellist kwargs['xlabel'] = xlabel kwargs['ylabel'] = ylabel kwargs['xerrlist'] = xerrs kwargs['yerrlist'] = yerrs kwargs['stepsize'] = stepsize kwargs['savepath'] = savepath kwargs['plotlabel'] = plotlabel kwargs['guideline'] = guideline if not (faces is None): kwargs['faces'] = faces if not (outlines is None): kwargs['outlines'] = outlines if not (sizes is None): kwargs['sizes'] = sizes if not (markers is None): kwargs['markers'] = markers if not (linestyles is None): kwargs['linestyles'] = linestyles kwargs['timex'] = timex kwargs['startx'] = startx kwargs['endx'] = endx notelist = list() kwargs['notelist'] = notelist kwargs['whichyaxis'] = whichyaxis #for key,value in kwargs.items(): # print(key,":",value) print("Plotting.") plotxy.multiple_overlay(**kwargs) return
def do_everything(file_name, firsto_solutiono_strategeiro, go_back_to_depo=True): # Create the data. # data = create_data_array() data = data_parser.parse(file_name) locations = data[0] travel_times = data[1] demands_pal = data[2] demands_kg = data[3] start_times = data[4] end_times = data[5] vehicles_pal = data[6] vehicles_kg = data[7] vehicles_cost = data[8] vehicles_maxkm = data[9] multi_start = [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0] multi_end = [0, 1, 18, 50, 0, 0, 1, 18, 50, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 18, 50, 0] num_locations = len(locations) num_vehicles = len(vehicles_pal) depot = 0 search_time_limit_ms = 300000 local_search_max_time_ms = 3000 # Result variables result = [] tmp_route = [] h_route_dist = 0 h_route_time = 0 tmp_orders = [] # Create routing model. if num_locations > 0: # The number of nodes of the VRP is num_locations. # Nodes are indexed from 0 to num_locations - 1. By default the start of # a route is node 0. if go_back_to_depo: routing = pywrapcp.RoutingModel(num_locations, num_vehicles, depot) else: routing = pywrapcp.RoutingModel(num_locations, num_vehicles, multi_start, multi_end) search_parameters = pywrapcp.RoutingModel.DefaultSearchParameters() search_parameters.first_solution_strategy = firsto_solutiono_strategeiro search_parameters.time_limit_ms = search_time_limit_ms search_parameters.lns_time_limit_ms = local_search_max_time_ms # print(search_parameters) ############################# Callbacks to the distance function and travel time functions here. cost_callback_matrix = [] for v_id in range(len(vehicles_cost)): cost_between_locations = CreateCostCallback(locations, vehicles_cost[v_id]) cost_callback = cost_between_locations.Cost cost_callback_matrix.append(cost_callback) routing.SetArcCostEvaluatorOfVehicle(cost_callback, v_id) ############################# Adding pallets dimension constraints. demands_pal_at_orders = CreateDemandPalCallback(demands_pal) demands_pal_callback = demands_pal_at_orders.DemandPal NullCapacitySlack = 0; fix_start_cumul_to_zero = True pallets = "Pallets" routing.AddDimensionWithVehicleCapacity(demands_pal_callback, NullCapacitySlack, vehicles_pal, fix_start_cumul_to_zero, pallets) ############################# Adding weight dimension constraints. demands_kg_at_orders = CreateDemandKgCallback(demands_kg) demands_kg_callback = demands_kg_at_orders.DemandKg NullCapacitySlack = 0; fix_start_cumul_to_zero = True kilograms = "Kilograms" routing.AddDimensionWithVehicleCapacity(demands_kg_callback, NullCapacitySlack, vehicles_kg, fix_start_cumul_to_zero, kilograms) ############################# Adding kmlimit dimension constraints. demands_kms_at_orders = CreateCostCallback(locations, 1) demands_kms_callback = demands_kms_at_orders.Cost NullCapacitySlack = 0; fix_start_cumul_to_zero = True kilometers = "Kilometers" routing.AddDimensionWithVehicleCapacity(demands_kms_callback, NullCapacitySlack, vehicles_maxkm, fix_start_cumul_to_zero, kilometers) ############################## Add time dimension. day = max(end_times) time = "Time" travelllo_times = CreateTravelTimeCallback(travel_times) travel_time_callback = travelllo_times.TravelTime routing.AddDimension(travel_time_callback, day, day, fix_start_cumul_to_zero, time) ############################# Add time window constraints. time_dimension = routing.GetDimensionOrDie(time) for location in range(1, num_locations): start = start_times[location] end = end_times[location] time_dimension.CumulVar(location).SetRange(start, end) ############################ Solve displays a solution if any. assignment = routing.SolveWithParameters(search_parameters) if assignment: size = len(locations) # Solution cost. # print("Total cost of all routes: " + str(assignment.ObjectiveValue()/1000) + "\n") result.append(assignment.ObjectiveValue()/1000) result.append(0) # Inspect solution. pallets_dimension = routing.GetDimensionOrDie(pallets) kilograms_dimension = routing.GetDimensionOrDie(kilograms) kilometers_dimension = routing.GetDimensionOrDie(kilometers) time_dimension = routing.GetDimensionOrDie(time) for vehicle_nbr in range(num_vehicles): index = routing.Start(vehicle_nbr) # plan_output = 'Vehicle {0}:'.format(vehicle_nbr) while not routing.IsEnd(index): node_index = routing.IndexToNode(index) time_var = time_dimension.CumulVar(index) tmp_orders.append([node_index-1, assignment.Min(time_var), assignment.Max(time_var)]) kilometers_var = kilometers_dimension.CumulVar(index) time_var = time_dimension.CumulVar(index) h_route_dist = assignment.Value(kilometers_var) / 1000 h_route_time = assignment.Min(time_var) # plan_output += " Order {node_index} Time({tmin}, {tmax}) -> ".format( # node_index=node_index, # tmin=str(assignment.Min(time_var)), # tmax=str(assignment.Max(time_var))) index = assignment.Value(routing.NextVar(index)) node_index = routing.IndexToNode(index) pallets_var = pallets_dimension.CumulVar(index) kilograms_var = kilograms_dimension.CumulVar(index) kilometers_var = kilometers_dimension.CumulVar(index) time_var = time_dimension.CumulVar(index) # plan_output += " {node_index} Load({load}) Time({tmin}, {tmax})".format( # node_index=node_index, # load=assignment.Value(load_var), # tmin=str(assignment.Min(time_var)), # tmax=str(assignment.Max(time_var))) # print(plan_output) # print("\n") tmp_orders.append([node_index-1, assignment.Min(time_var), assignment.Max(time_var)]) tmp_route.append(vehicle_nbr) tmp_route.append(assignment.Value(kilometers_var) / 1000 * vehicles_cost[vehicle_nbr]) tmp_route.append(assignment.Value(kilometers_var) / 1000) tmp_route.append(assignment.Min(time_var)) result[1] += h_route_dist * vehicles_cost[vehicle_nbr] tmp_route.append(h_route_dist * vehicles_cost[vehicle_nbr]) tmp_route.append(h_route_dist) tmp_route.append(h_route_time) tmp_route.append(assignment.Value(pallets_var)/100) tmp_route.append(vehicles_pal[vehicle_nbr]/100) tmp_route.append(assignment.Value(pallets_var) / vehicles_pal[vehicle_nbr]) tmp_route.append(assignment.Value(kilograms_var)) tmp_route.append(vehicles_kg[vehicle_nbr]) tmp_route.append(assignment.Value(kilograms_var) / vehicles_kg[vehicle_nbr]) tmp_route.append(tmp_orders) if len(tmp_orders) > 2: result.append(tmp_route) tmp_orders = [] tmp_route = [] return result else: print(str(firsto_solutiono_strategeiro)+' No solution found.') return [float('inf')] else: print('Specify an instance greater than 0.')
def cv(model, datapath, savepath, num_folds=5, num_runs=200, X=["N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(log(fluence)", "N(log(flux)", "N(Temp)"], Y="delta sigma"): # get data data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) Ydata = np.asarray(data.get_y_data()).ravel() Xdata = np.asarray(data.get_x_data()) Y_predicted_best = [] Y_predicted_worst = [] maxRMS = 1 minRMS = 100 RMS_List = [] for n in range(num_runs): kf = cross_validation.KFold(len(Xdata), n_folds=num_folds, shuffle=True) K_fold_rms_list = [] Overall_Y_Pred = np.zeros(len(Xdata)) # split into testing and training sets for train_index, test_index in kf: X_train, X_test = Xdata[train_index], Xdata[test_index] Y_train, Y_test = Ydata[train_index], Ydata[test_index] # train on training sets model = model model.fit(X_train, Y_train) Y_test_Pred = model.predict(X_test) rms = np.sqrt(mean_squared_error(Y_test, Y_test_Pred)) K_fold_rms_list.append(rms) Overall_Y_Pred[test_index] = Y_test_Pred RMS_List.append(np.mean(K_fold_rms_list)) if np.mean(K_fold_rms_list) > maxRMS: maxRMS = np.mean(K_fold_rms_list) Y_predicted_worst = Overall_Y_Pred if np.mean(K_fold_rms_list) < minRMS: minRMS = np.mean(K_fold_rms_list) Y_predicted_best = Overall_Y_Pred avgRMS = np.mean(RMS_List) medRMS = np.median(RMS_List) sd = np.std(RMS_List) print("Using {}x {}-Fold CV: ".format(num_runs, num_folds)) print("The average RMSE was {:.3f}".format(avgRMS)) print("The median RMSE was {:.3f}".format(medRMS)) print("The max RMSE was {:.3f}".format(maxRMS)) print("The min RMSE was {:.3f}".format(minRMS)) print("The std deviation of the RMSE values was {:.3f}".format(sd)) f, ax = plt.subplots(1, 2, figsize = (11,5)) ax[0].scatter(Ydata, Y_predicted_best, c='black', s=10) ax[0].plot(ax[0].get_ylim(), ax[0].get_ylim(), ls="--", c=".3") ax[0].set_title('Best Fit') ax[0].text(.1, .88, 'Min RMSE: {:.3f}'.format(minRMS), transform=ax[0].transAxes) ax[0].text(.1, .83, 'Mean RMSE: {:.3f}'.format(avgRMS), transform=ax[0].transAxes) ax[0].set_xlabel('Measured (Mpa)') ax[0].set_ylabel('Predicted (Mpa)') ax[1].scatter(Ydata, Y_predicted_worst, c='black', s=10) ax[1].plot(ax[1].get_ylim(), ax[1].get_ylim(), ls="--", c=".3") ax[1].set_title('Worst Fit') ax[1].text(.1, .88, 'Max RMSE: {:.3f}'.format(maxRMS), transform=ax[1].transAxes) ax[1].set_xlabel('Measured (Mpa)') ax[1].set_ylabel('Predicted (Mpa)') f.tight_layout() f.savefig(savepath.format("cv_best_worst"), dpi=200, bbox_inches='tight') plt.show() plt.close()
def main(): # Class Negative Data d1 = data_parser.parse('Datasets/Healthy Controls/MS_A_1.mzml') d2 = data_parser.parse('Datasets/Healthy Controls/MS_A_2.mzml') d3 = data_parser.parse('Datasets/Healthy Controls/MS_A_3.mzml') d4 = data_parser.parse('Datasets/Healthy Controls/MS_A_4.mzml') d5 = data_parser.parse('Datasets/Healthy Controls/MS_A_5.mzml') d6 = data_parser.parse('Datasets/Healthy Controls/MS_A_6.mzml') d7 = data_parser.parse('Datasets/Healthy Controls/MS_A_7.mzml') # Class Positive Data d8 = data_parser.parse('Datasets/PC Diagnosed/MS_B_1.mzml') d9 = data_parser.parse('Datasets/PC Diagnosed/MS_B_2.mzml') d10 = data_parser.parse('Datasets/PC Diagnosed/MS_B_3.mzml') d11 = data_parser.parse('Datasets/PC Diagnosed/MS_B_4.mzml') d12 = data_parser.parse('Datasets/PC Diagnosed/MS_B_5.mzml') d13 = data_parser.parse('Datasets/PC Diagnosed/MS_B_6.mzml') d14 = data_parser.parse('Datasets/PC Diagnosed/MS_B_7.mzml') full_data = d1 + d2 + d3 + d4 + d5 + d6 + d7 + d8 + d9 + d10 + d11 + d12 + d13 + d14 param = [] data = preprocessing.get_preprocessed_data(full_data, param) # train_test_model(data, param) cross_validate(data, param)
weights_hidden = np.array( weights.eval(session=sess)) # Weight values to the hidden layer weights_output = np.array(weights2.eval( session=sess)).transpose() # Weight values to the output layer effect = np.average( weights_hidden * weights_output, axis=1) # (hidden layer weight) * (output layer weight) print(effect) abs_effect = np.average(np.abs(weights_hidden * weights_output), axis=1) # absolute value of the overall weight return do_eval(sess, eval_loss, X_placeholder, Y_placeholder, data_test, predicted_value, effect, abs_effect) # Predicts featdat, dat, data = data_parser.parse("DBTT_Data19.csv") X = [ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(eff fluence))" ] X_LWR = [ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(eff fluence))" ] Y = "CD delta sigma" data.set_x_features(X) data.set_y_feature(Y) model = 300 lwr_datapath = "CD_LWR_clean7.csv" ##data.add_exclusive_filter("Alloy", '=', 29) ##data.add_exclusive_filter("Alloy", '=', 14)
def cv(model, datapath, savepath, num_folds=5, num_runs=200, X=[ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(log(fluence)", "N(log(flux)", "N(Temp)" ], Y="delta sigma"): # get data data = data_parser.parse(datapath) data.set_x_features(X) data.set_y_feature(Y) Ydata = np.asarray(data.get_y_data()).ravel() Xdata = np.asarray(data.get_x_data()) Y_predicted_best = [] Y_predicted_worst = [] maxRMS = 1 minRMS = 100 RMS_List = [] for n in range(num_runs): kf = cross_validation.KFold(len(Xdata), n_folds=num_folds, shuffle=True) K_fold_rms_list = [] Overall_Y_Pred = np.zeros(len(Xdata)) # split into testing and training sets for train_index, test_index in kf: X_train, X_test = Xdata[train_index], Xdata[test_index] Y_train, Y_test = Ydata[train_index], Ydata[test_index] # train on training sets model = model model.fit(X_train, Y_train) Y_test_Pred = model.predict(X_test) rms = np.sqrt(mean_squared_error(Y_test, Y_test_Pred)) K_fold_rms_list.append(rms) Overall_Y_Pred[test_index] = Y_test_Pred RMS_List.append(np.mean(K_fold_rms_list)) if np.mean(K_fold_rms_list) > maxRMS: maxRMS = np.mean(K_fold_rms_list) Y_predicted_worst = Overall_Y_Pred if np.mean(K_fold_rms_list) < minRMS: minRMS = np.mean(K_fold_rms_list) Y_predicted_best = Overall_Y_Pred avgRMS = np.mean(RMS_List) medRMS = np.median(RMS_List) sd = np.std(RMS_List) print("Using {}x {}-Fold CV: ".format(num_runs, num_folds)) print("The average RMSE was {:.3f}".format(avgRMS)) print("The median RMSE was {:.3f}".format(medRMS)) print("The max RMSE was {:.3f}".format(maxRMS)) print("The min RMSE was {:.3f}".format(minRMS)) print("The std deviation of the RMSE values was {:.3f}".format(sd)) f, ax = plt.subplots(1, 2, figsize=(11, 5)) ax[0].scatter(Ydata, Y_predicted_best, c='black', s=10) ax[0].plot(ax[0].get_ylim(), ax[0].get_ylim(), ls="--", c=".3") ax[0].set_title('Best Fit') ax[0].text(.1, .88, 'Min RMSE: {:.3f}'.format(minRMS), transform=ax[0].transAxes) ax[0].text(.1, .83, 'Mean RMSE: {:.3f}'.format(avgRMS), transform=ax[0].transAxes) ax[0].set_xlabel('Measured (Mpa)') ax[0].set_ylabel('Predicted (Mpa)') ax[1].scatter(Ydata, Y_predicted_worst, c='black', s=10) ax[1].plot(ax[1].get_ylim(), ax[1].get_ylim(), ls="--", c=".3") ax[1].set_title('Worst Fit') ax[1].text(.1, .88, 'Max RMSE: {:.3f}'.format(maxRMS), transform=ax[1].transAxes) ax[1].set_xlabel('Measured (Mpa)') ax[1].set_ylabel('Predicted (Mpa)') f.tight_layout() f.savefig(savepath.format("cv_best_worst"), dpi=200, bbox_inches='tight') plt.show() plt.close()
from data_parser import parse if __name__ == "__main__": #x=parse("/media/asdazey/PSCSTA/Judge/",True) x = parse("/home/asdazey/Desktop/PSCSTA/aiden/repeat/repeat.in", True) print(x)
from data_parser import parse def find(n): n = int(n) last = 0 current = 1 for i in range(1000000): last = current current += i if current > n: return [(i - 1 - (n - last)), (n - last)] if __name__ == "__main__": #x=parse("/home/asdazey/Desktop/PSCSTA/logan/it/infinite.in") x = parse("/media/asdazey/PSCSTA/Judge/infinite.in") #print(x) for i in range(int(x[0]) + 1): cord = find(x[i + 1]) #print(x[i+1]) #print(cord) if cord[0] == 0 and cord[1] == 0: print(2) elif cord[0] == 0 or cord[1] == 0: print(3) else: print(4)
weights_hidden = np.array( weights.eval(session=sess)) # Weight values to the hidden layer weights_output = np.array(weights2.eval( session=sess)).transpose() # Weight values to the output layer effect = np.average( weights_hidden * weights_output, axis=1) # (hidden layer weight) * (output layer weight) print(effect) abs_effect = np.average(np.abs(weights_hidden * weights_output), axis=1) # absolute value of the overall weight return do_eval(sess, eval_loss, X_placeholder, Y_placeholder, data_test, predicted_value, effect, abs_effect) # Predicts featdat, dat, data = data_parser.parse("DBTT_Data19.csv") X = [ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(eff fluence))" ] X_LWR = [ "N(Cu)", "N(Ni)", "N(Mn)", "N(P)", "N(Si)", "N( C )", "N(log(eff fluence))" ] Y = "delta sigma" data.set_x_features(X) data.set_y_feature(Y) lwr_datapath = "CD_LWR_clean7.csv" ##data.add_exclusive_filter("Alloy", '=', 29) ##data.add_exclusive_filter("Alloy", '=', 14) ##data.add_exclusive_filter("Temp (C)", '<>', 290)
__author__ = 'haotian' import data_parser as dp data = dp.parse('../DBTT/DBTT_Data14.5.csv') data.normalization( ['Cu (At%)', 'Ni (At%)', 'Mn (At%)', 'P (At%)','Si (At%)','C (At%)'], normalization_type='t') data.normalization(['log(fluence)','log(eff fluence)','log(flux)','Temp (C)','log(time)']) data.std_normalization(['delta sigma', 'EONY predicted', 'CD predicted (Mpa)']) data.output('../DBTT/DBTT_Data15.csv')