def verify(): write_log('$$ Log Load Started $$') mail_downloader() model_extract() data_load() write_log('Log Sent') write_log('$$ Log Load Ended $$')
def run(): """ 如果文件不存在,则创建 :return: """ if not os.path.exists('./res'): os.makedirs('res') config = get_config() if not os.path.exists(config['url']) or not os.path.exists( config['title'] or not os.path.exists(config['content'])): data_load(config) if not os.path.exists(config['content_clean']): data_clean_content(config) if not os.path.exists(config['content_filter']): filter_stop_word(config) if not os.path.exists(config['content_stemming']): stemming(config) if not os.path.exists(config['term_list']): create_term_list(config) documents = get_content(config) tf_documents = get_tf(documents) if not os.path.exists(config['idf']): create_idf(config, documents) idf_documents = get_idf(config) if not os.path.exists(config['tf_idf']): create_tf_idf(config, tf_documents, idf_documents, documents)
def main(): df = data_load(args.file_path, args.nrows) num = df.shape[0] * 4 // 5 train_df = df[:num] valid_df = df[num:] estimator = get_estimator() for _ in range(1): logging.info('==== Start to train ===>') estimator.train(input_fn=lambda: read_csv_data(train_df.values)) logging.info('==== Start to evaluate ===>') estimator.evaluate(input_fn=lambda: read_csv_data(valid_df.values))
def main(): print("load data") dfs = dl.data_load(sample_list, doclean) print(dfs['ttW'].columns) if process_type == 'plot': do_plot(dfs, sample_list) elif process_type == 'apply': print("apply mode") model = load_model('Outputs/training/model_nn_v0.h5') with open('Outputs/training/scaler.pickle', 'rb') as f: sc = pickle.load(f) with open('Outputs/training/vl.pickle', 'rb') as f: vl = pickle.load(f) #def model_create_feature(): var_list = dl.sel_vars() cat_list = ['l0_id', 'l1_id', 'dileptype'] #,'mjjctag' for s in sample_list: #print(dfs[s].columns) df_trans = dfs[s][var_list] df_trans = hp.val_to_cat(dfs[s], cat_list) print("df_trans columns: ", df_trans.columns) #df_trans = dfs[s][var_list] #print("before:\n",df_trans.head()) df_trans = pd.DataFrame(df_trans, columns=vl) df_trans = sc.transform(df_trans) print("after transformation:\n", df_trans[:5]) predictScore = model.predict(df_trans) dfs[s].loc[:, 'score'] = dfs[s].loc[:, 'Njets'] dfs[s].loc[:, 'score'] = predictScore #print("with score:",dfs[s].head()) for i in range(0, 10): print(i / 10) #pl.plot_var(dfs,sample_list,'mjj',True,i/10) #pl.plot_var(dfs,sample_list,'Njets',True,i/10) #pl.plot_var(dfs,sample_list,'score',True,i/10) pl.plot_var(dfs, sample_list, 'ctaglj0', False, i / 10) #pl.plot_var(dfs,sample_list,'mjj',False,i/10) elif process_type == 'read' or process_type == 'train': if dfs: print("prepare for training:") print(" - transform input :") cat_list = ['l0_id', 'l1_id', 'dileptype'] #,'mjjctag' noncat_list = list(set(dfs['ttW'].columns) - set(cat_list)) #for s in sample_list: #dfs[s]=hp.val_to_cat(dfs[s],cat_list) # dfs[s][noncat_list]=hp.norm_gev(dfs[s][noncat_list]) #var_list= list(dfs['ttW'].columns) #print(var_list) #print(dfs['ttW'].head()) print(" - split samples to train/test features/targets :") #X_train, X_test, y_train, y_test, w_train, w_test = hp.pred_ds(dfs) X_train, X_test, y_train, y_test, cl_weight, var_list = hp.pred_ds( dfs, cat_list, noncat_list) #var_list= list(X_train.columns) print(type(X_train), " <- type X_train") print(type(y_train), " <- type y_train") #fl = md.create_feat(noncat_list) #print(X_test[:3]) learning_rate = 0.001 nepochs = 500 batch_size = 512 validation_split = 0.2 if debug: return 0 if process_type == 'train': model = md.create_model(learning_rate, var_list) epochs, hist = md.train_model( model, X_train, y_train, # w_train, cl_weight, nepochs, batch_size, validation_split) list_of_metrics_to_plot = ['loss', 'val_loss'] hp.plot_curve(epochs, hist, list_of_metrics_to_plot) list_of_metrics_to_plot = ['acc', 'val_acc'] hp.plot_curve(epochs, hist, list_of_metrics_to_plot) print("\n Train set:") score_tr = model.evaluate(X_train, y_train, batch_size=batch_size) print(score_tr) print("\n Evaluate the new model against the test set:") score = model.evaluate(X_test, y_test, batch_size=batch_size) print(score) model.save('Outputs/training/model_nn_v0.h5') elif process_type == 'read': model = load_model('Outputs/training/model_nn_v0.h5') if model and (process_type != 'apply'): # X_test=X_test.to_numpy() # y_test=y_test.to_numpy() # X_train=X_train.to_numpy() # y_train=y_train.to_numpy() testPredict = model.predict(X_test) xt_p = {} print(X_test, y_test) x_sig, x_bkg = hp.sig_bkg_ds_separate(X_test, y_test) print("predict xsig") xt_p['ttW'] = model.predict(x_sig) xt_p['ttbar'] = model.predict(x_bkg) x_p = {} x_sig, x_bkg = hp.sig_bkg_ds_separate(X_train, y_train) x_p['ttW'] = model.predict(x_sig) x_p['ttbar'] = model.predict(x_bkg) bins = [i / 80 for i in range(80)] bins.append(1.) plt.figure("response") for i in sample_list: plt.hist(xt_p[i], bins, alpha=0.5, label=i + ' Predict', density=True, color=samples[i]['color']) plt.hist(x_p[i], bins, alpha=1, label=i + ' Train', density=True, color=samples[i]['color'], histtype='step') plt.legend(loc="upper right") plt.savefig("Outputs/training/classPred_NN_ttw_ttbar.png", transparent=True) plt.close("response") print( classification_report(y_test, testPredict.round(), target_names=["ttbar", "ttW"])) # auc = roc_auc_score(y_test, testPredict) print("Area under ROC curve: %.4f" % (auc)) hp.get_roc(y_test, testPredict) print_summary = True if print_summary: with open("Outputs/training/summary.txt", "w") as f: f.write("Parameters:\n") #f.write(" classifier_model: {}\n".format(model.get_config())) f.write( "LR {}, epochs {}, batch_size {}, VS {} \n".format( learning_rate, nepochs, batch_size, validation_split)) f.write(": {}\n".format( classification_report( y_test, testPredict.round(), target_names=["signal", "background"]))) f.write("\nAUC:{}\n".format(auc)) f.write("model.summary() :{}\n".format( model.summary()))
# else: # q = np.arccos(q[0]) * q[1:] / np.linalg.norm(q[1:]) # return q vo_stats = {} #seq_num = [1, 2, 3, 4] vo_stats[seq] = {'R': np.eye(3), 't': np.zeros(3), 's': 1} mean_t = np.zeros(3) # optionally, use the ps dictionary to calc stats std_t = np.ones(3) train_img, train_pose = data_load(data_dir, img_width, img_height, seq_num, mean_t=mean_t, std_t=std_t, align_R=vo_stats[seq]['R'], align_t=vo_stats[seq]['t'], align_s=vo_stats[seq]['s']) a = np.empty([0, 6]) b = [1, 2, 3, 4, 5, 6] a = np.append(a, np.array([b]), axis=0) a = np.append(a, np.array([b]), axis=0) train_img = np.array(train_img).astype('float32') train_pose = np.array(train_pose).astype('float32') print(train_img.shape)
import numpy as np from data_loader import data_load from model import Model train, test = data_load() m = train.shape[0] pix = train[1][0].shape[1] train_x = [] train_y = [] test_x = [] test_y = [] for i in train: train_x.append(i[0]) train_y.append(i[1]) for i in test: test_x.append(i[0]) test_y.append(i[1]) train_x = np.asarray(train_x) test_x = np.asarray(test_x) train_y = np.asarray(train_y) test_y = np.asarray(test_y) train_x = train_x.reshape(train_x.shape[0], -1).T test_x = test_x.reshape(test_x.shape[0], -1).T train_x = train_x / 255 test_x = test_x / 255