def check_support_data_batch_performance(train_op, exclude_list, final_train_fea, data_merged, support_size, train_steps, aug_rate): if support_size > 0: # construct the support data for the first and second batch support_data_index = select_support_data_svm(final_train_fea, data_merged[0], support_size) support_data = get_support_data(data_merged, support_data_index) # load the second batch data data_2 = exclude_data(data_all, exclude_list) data_2 = append_old_feature_new_data(data_2) data_merged = merge_data(support_data, data_2) used_data = merge_data(augmentation(support_data, aug_rate), data_2) if support_size == 0: used_data = exclude_data(data_all, exclude_list) used_data = append_old_feature_new_data(used_data) data_merged = merge_data(used_data, data_merged) train_model(used_data, train_op, train_steps) predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_merged) evaluate_model(data_merged[5], np.array(predict_label)) evaluate_model(data_merged[0], np.array(predict_label_train)) return final_train_fea, data_merged
def check_fix_rep_batch_performance(train_op, exclude_list, data_merged): used_data = exclude_data(data_all, exclude_list) data_merged = merge_data(used_data, data_merged) train_model(used_data, train_op, train_steps) predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_merged) evaluate_model(data_merged[5], np.array(predict_label)) evaluate_model(data_merged[0], np.array(predict_label_train))
def check_feature_representation(f_size, f_lam, s_size): # load the initial data and find the original support data using the first model print('load model: fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format( f_size, f_lam, s_size, 2)) saver_load.restore( sess, '../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format( f_size, f_lam, s_size, 2)) data_1 = exclude_data(data_all, range(2, 6)) predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_1) evaluate_model(data_1[5], np.array(predict_label)) evaluate_model(data_1[0], np.array(predict_label_train)) feature_list = list() feature_list.append(data_1[0]) feature_list.append(final_train_fea) for s_class in range(3, 7): print('load model: fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format( f_size, f_lam, s_size, s_class)) saver_load.restore( sess, '../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format( f_size, f_lam, s_size, s_class)) predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_1) evaluate_model(data_1[5], np.array(predict_label)) evaluate_model(data_1[0], np.array(predict_label_train)) feature_list.append(final_train_fea) with open('for_tsne_{}.pickle'.format(f_lam), 'w') as f: cPickle.dump(feature_list, f)
def restart_from_ckpt(f_size, f_lam, s_size, s_class): if s_class==1: data_1 = exclude_data(data_all, range(2,6)) train_model(data_1, train_op, train_steps) # check performance predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(data_1) evaluate_model(data_1[5],np.array(predict_label)) evaluate_model(data_1[0],np.array(predict_label_train)) if s_class>=2: print('load model: fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size, f_lam, s_size, s_class)) saver_load.restore(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size, f_lam, s_size, s_class)) data_1 = exclude_data(data_all, range(s_class,6)) predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(data_1) evaluate_model(data_1[5],np.array(predict_label)) evaluate_model(data_1[0],np.array(predict_label_train)) # continue new_step = set_fisher_regularizer(f_lam, data_1, f_size) train_loss, data_merged = check_support_data_batch_performance(new_step, range(s_class)+range(s_class+1,6), final_train_fea, data_1, s_size, train_steps, 2) saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size, f_lam, s_size, s_class+1))
def train_model(data, train_op, train_steps): train_label = data[0] train_label_categorical = data[1] train_pssm_full_length = data[2] train_encoding_full_length = data[3] train_functional_domain_encoding =data[4] test_label = data[5] test_label_categorical = data[6] test_pssm_full_length = data[7] test_encoding_full_length = data[8] test_functional_domain_encoding = data[9] for i in range(train_steps): batch=generate_feeding_batch(train_pssm_full_length,train_encoding_full_length, train_functional_domain_encoding,train_label_categorical,batch_size) if i%output_step == 0: summary,predicted_label_output,cross_entropy_output,y_conv_output,acc = sess.run([ merged, predicted_label,cross_entropy,y_conv,accuracy],feed_dict={pssm: batch[0], encoding: batch[1], domain: batch[2], y_: batch[3], keep_prob: 1.0}) print("step %d, training accuracy %g"%(i, acc)) batch_test=generate_feeding_batch(test_pssm_full_length,test_encoding_full_length, test_functional_domain_encoding,test_label_categorical,batch_size) summary,acc= sess.run([merged,accuracy],feed_dict={ pssm: batch_test[0], encoding: batch_test[1], domain: batch_test[2], y_: batch_test[3], keep_prob: 1.0}) test_writer.add_summary(summary,i) print("step %d, test accuracy %g"%(i, acc)) print('cross_entropy: %g'%cross_entropy_output) print(predicted_label_output) if i%1000==0 and i!=0: print('Step %d whole set check'%i) predict_label,_=whole_set_check_simple(data) evaluate_model(test_label[:len(predict_label)],np.array(predict_label)) # if i%2000==0: # save_path=saver.save(sess,'./model_level_'+str(level)+'.ckpt') summary,cross_entropy_output,_ = sess.run([merged,cross_entropy,train_op],feed_dict={ pssm: batch[0], encoding: batch[1], domain: batch[2], y_: batch[3], keep_prob: 0.5}) train_writer.add_summary(summary,i)
def test_evaluate_model_with_empty_list(self) -> None: # pylint: disable=R0201 """ Test train_model() with no data. """ # Given model: Model = "TODO" validate_files: List[pd.DataFrame] = [] # When metrics: Mapping[str,Any] = evaluate_model(model=model, samples=validate_files) # Then self.assertGreater(metrics['auc'], 0.8)
def run_iteration(conf, ds, datasets_bin, sanity): """ """ model = create_model(conf) callbacks = create_callbacks(conf) class_weights = get_class_weights(ds["train"], conf) start_time = time.time() history = model.fit( ds["train"], steps_per_epoch = conf["steps"]["train"], epochs = conf["num_epochs"], validation_data = ds["test"], validation_steps = conf["steps"]["test"], validation_freq = 1, class_weight = class_weights, callbacks = callbacks, verbose = 1 ) if conf["verbosity"]: print ("Time spent on training: {:.2f} minutes.".format(np.round(time.time() - start_time)/60)) evaluate_model(model, history, ds, conf) count = {"findings": 0, "total": 0} pseudo = {"pred_list": [], "lab_list": [], "name_list": []} pseudo, count = generate_labels(pseudo, count, ds["unlab"], model, conf) # Sort in order of highest confidence to lowest pseudo_sorted = custom_sort(pseudo) checkout_findings(pseudo_sorted, conf, show=False) datasets_bin, added_samples = resample_and_combine(ds, conf, pseudo, pseudo_sorted, datasets_bin, limit=conf["class_limit"]) # Update unlab_ds ds["unlab"] = reduce_dataset(ds["unlab"], remove=added_samples) sanity, conf = update_sanity(sanity, len(added_samples), datasets_bin, conf)
def support_with_ewc(f_size, f_lam, s_size): # get partial data data_1 = exclude_data(data_all, range(2,6)) train_model(data_1, train_op, train_steps) # check performance predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(data_1) evaluate_model(data_1[5],np.array(predict_label)) evaluate_model(data_1[0],np.array(predict_label_train)) # set the regularizer new_step = set_fisher_regularizer(f_lam, data_1, f_size) train_loss, data_merged = check_support_data_batch_performance(new_step,range(2)+range(3,6), final_train_fea, data_1, s_size, train_steps, 2) saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size, f_lam, s_size, 3)) new_step = set_fisher_regularizer(f_lam, data_merged, f_size) train_loss, data_merged = check_support_data_batch_performance(new_step,range(3)+range(4,6), train_loss, data_merged, s_size, train_steps, 2) saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size, f_lam, s_size, 4)) new_step = set_fisher_regularizer(f_lam, data_merged, f_size) train_loss, data_merged = check_support_data_batch_performance(new_step,range(4)+range(5,6), train_loss, data_merged, s_size, train_steps, 2) saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size, f_lam, s_size, 5)) new_step = set_fisher_regularizer(f_lam, data_merged, f_size) train_loss, data_merged = check_support_data_batch_performance(new_step,range(5), train_loss, data_merged, s_size, train_steps, 2) saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size, f_lam, s_size, 6))
def restart_from_ckpt(s_class): if s_class == 1: data_1 = exclude_data(data_all, range(2, 6)) saver_load.restore( sess, '../model/fsize_100_flam_1000_ssize_2000_class_2.ckpt') # check performance predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_1) evaluate_model(data_1[5], np.array(predict_label)) evaluate_model(data_1[0], np.array(predict_label_train)) if s_class >= 2: print('load model: fix_rep_class_{}.ckpt'.format(s_class)) saver_load.restore(sess, '../model/fix_rep_class_{}.ckpt'.format(s_class)) data_1 = exclude_data(data_all, range(s_class, 6)) check_fix_rep_batch_performance(train_op, range(s_class) + range(s_class + 1, 6), data_1) saver_load.save(sess, '../model/fix_rep_class_{}.ckpt'.format(s_class + 1))
def test_all_pipeline_with_none(self) -> None: # pylint: disable=R0201 """ Test prepare_dataset() with no data. """ # Given input_raw: pd.DataFrame = pd.DataFrame( data={1, 2, 3}) train_inputs: List[pd.DataFrame] = [] validate_files: List[str] = [] # When train_inputs.append(build_features(prepare_dataset(input_raw=input_raw))) model: Model = train_model(inputs=train_inputs, epochs=1, batch_size=1) metrics: Mapping[str,Any] = evaluate_model(model, validate_files) # Then self.assertGreater(metrics['auc'], 0.8)
def incremental_all_data(): # get partial data data_1 = exclude_data(data_all, range(2, 6)) data_1 = append_old_feature_new_data(data_1) train_model(data_1, train_op, train_steps) # check performance predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_1) evaluate_model(data_1[5], np.array(predict_label)) evaluate_model(data_1[0], np.array(predict_label_train)) # get further data data_2 = exclude_data(data_all, range(2) + range(3, 6)) data_2 = append_old_feature_new_data(data_2) data_merged = merge_data(data_1, data_2) train_model(data_merged, train_op, train_steps) predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_merged) evaluate_model(data_merged[5], np.array(predict_label)) evaluate_model(data_merged[0], np.array(predict_label_train)) # get further data data_2 = exclude_data(data_all, range(3) + range(4, 6)) data_2 = append_old_feature_new_data(data_2) data_merged = merge_data(data_merged, data_2) train_model(data_merged, train_op, train_steps) predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_merged) evaluate_model(data_merged[5], np.array(predict_label)) evaluate_model(data_merged[0], np.array(predict_label_train)) # get further data data_2 = exclude_data(data_all, range(4) + range(5, 6)) data_2 = append_old_feature_new_data(data_2) data_merged = merge_data(data_merged, data_2) train_model(data_merged, train_op, train_steps) predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_merged) evaluate_model(data_merged[5], np.array(predict_label)) evaluate_model(data_merged[0], np.array(predict_label_train)) data_2 = exclude_data(data_all, range(5)) data_2 = append_old_feature_new_data(data_2) data_merged = merge_data(data_merged, data_2) train_model(data_merged, train_op, train_steps) predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check( data_merged) evaluate_model(data_merged[5], np.array(predict_label)) evaluate_model(data_merged[0], np.array(predict_label_train))
def perform_icarl(examplar_size, s_class): if s_class==1: saver_load.restore(sess, '../model/fsize_10_flam_0.0001_ssize_2000_class_2.ckpt') else: print('load model: icarl_ssize_{}_class_{}.ckpt'.format(examplar_size, s_class)) saver_load.restore(sess, '../model/icarl_ssize_{}_class_{}.ckpt'.format(examplar_size, s_class)) data_1 = exclude_data(data_all, range(s_class,6)) # train_model(data_1, train_op, train_steps) # construct examplar predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(data_1) evaluate_model(data_1[5],np.array(predict_label)) evaluate_model(data_1[0],np.array(predict_label_train)) print('Here is the result of NME:') test_predict = nme_pred(final_train_fea, final_test_fea, data_1[0]) evaluate_model(data_1[5],np.array(test_predict)) examplar_index = construct_examplar(final_train_fea, data_1[0], examplar_size, all_train_label) examplar = get_support_data(data_1, examplar_index) # perform training data_2 = exclude_data(data_all, range(s_class)+range(s_class+1,6)) used_data = merge_data(examplar, data_2) train_model(used_data, train_op, train_steps) predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(used_data) evaluate_model(used_data[5],np.array(predict_label)) evaluate_model(used_data[0],np.array(predict_label_train)) print('Here is the result of NME:') test_predict = nme_pred(final_train_fea, final_test_fea, used_data[0]) evaluate_model(used_data[5],np.array(test_predict)) # save model saver_load.save(sess, '../model/icarl_ssize_{}_class_{}.ckpt'.format(examplar_size, s_class+1))