Exemplo n.º 1
0
def check_support_data_batch_performance(train_op, exclude_list,
                                         final_train_fea, data_merged,
                                         support_size, train_steps, aug_rate):
    if support_size > 0:
        # construct the support data for the first and second batch
        support_data_index = select_support_data_svm(final_train_fea,
                                                     data_merged[0],
                                                     support_size)
        support_data = get_support_data(data_merged, support_data_index)

        # load the second batch data
        data_2 = exclude_data(data_all, exclude_list)
        data_2 = append_old_feature_new_data(data_2)
        data_merged = merge_data(support_data, data_2)
        used_data = merge_data(augmentation(support_data, aug_rate), data_2)
    if support_size == 0:
        used_data = exclude_data(data_all, exclude_list)
        used_data = append_old_feature_new_data(used_data)
        data_merged = merge_data(used_data, data_merged)

    train_model(used_data, train_op, train_steps)
    predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
        data_merged)
    evaluate_model(data_merged[5], np.array(predict_label))
    evaluate_model(data_merged[0], np.array(predict_label_train))

    return final_train_fea, data_merged
Exemplo n.º 2
0
def check_fix_rep_batch_performance(train_op, exclude_list, data_merged):
    used_data = exclude_data(data_all, exclude_list)
    data_merged = merge_data(used_data, data_merged)

    train_model(used_data, train_op, train_steps)
    predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
        data_merged)
    evaluate_model(data_merged[5], np.array(predict_label))
    evaluate_model(data_merged[0], np.array(predict_label_train))
Exemplo n.º 3
0
def check_feature_representation(f_size, f_lam, s_size):
    # load the initial data and find the original support data using the first model
    print('load model: fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(
        f_size, f_lam, s_size, 2))
    saver_load.restore(
        sess, '../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(
            f_size, f_lam, s_size, 2))
    data_1 = exclude_data(data_all, range(2, 6))
    predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
        data_1)
    evaluate_model(data_1[5], np.array(predict_label))
    evaluate_model(data_1[0], np.array(predict_label_train))

    feature_list = list()
    feature_list.append(data_1[0])
    feature_list.append(final_train_fea)
    for s_class in range(3, 7):
        print('load model: fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(
            f_size, f_lam, s_size, s_class))
        saver_load.restore(
            sess, '../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(
                f_size, f_lam, s_size, s_class))

        predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
            data_1)
        evaluate_model(data_1[5], np.array(predict_label))
        evaluate_model(data_1[0], np.array(predict_label_train))
        feature_list.append(final_train_fea)
    with open('for_tsne_{}.pickle'.format(f_lam), 'w') as f:
        cPickle.dump(feature_list, f)
Exemplo n.º 4
0
def restart_from_ckpt(f_size, f_lam, s_size, s_class):
    if s_class==1:
        data_1 = exclude_data(data_all, range(2,6))

        train_model(data_1, train_op, train_steps)

        # check performance
        predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(data_1)
        evaluate_model(data_1[5],np.array(predict_label))
        evaluate_model(data_1[0],np.array(predict_label_train))

    if s_class>=2:
        print('load model: fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size,
            f_lam, s_size, s_class))
        saver_load.restore(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size,
            f_lam, s_size, s_class))

        data_1 = exclude_data(data_all, range(s_class,6))
        predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(data_1)
        evaluate_model(data_1[5],np.array(predict_label))
        evaluate_model(data_1[0],np.array(predict_label_train))

        # continue
        new_step = set_fisher_regularizer(f_lam, data_1, f_size)
        train_loss, data_merged = check_support_data_batch_performance(new_step,
            range(s_class)+range(s_class+1,6), 
            final_train_fea, data_1, s_size, train_steps, 2)

    saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size,
        f_lam, s_size, s_class+1))
Exemplo n.º 5
0
def train_model(data, train_op, train_steps):
    train_label = data[0]
    train_label_categorical = data[1]
    train_pssm_full_length = data[2]
    train_encoding_full_length = data[3]
    train_functional_domain_encoding =data[4]
    test_label = data[5]
    test_label_categorical = data[6]
    test_pssm_full_length = data[7]
    test_encoding_full_length = data[8]
    test_functional_domain_encoding = data[9]
    for i in range(train_steps):
        batch=generate_feeding_batch(train_pssm_full_length,train_encoding_full_length,
        	train_functional_domain_encoding,train_label_categorical,batch_size)
        if i%output_step == 0:
            summary,predicted_label_output,cross_entropy_output,y_conv_output,acc = sess.run([
            	merged, predicted_label,cross_entropy,y_conv,accuracy],feed_dict={pssm: batch[0], 
                encoding: batch[1], domain: batch[2], y_: batch[3], keep_prob: 1.0})
            print("step %d, training accuracy %g"%(i, acc))
            batch_test=generate_feeding_batch(test_pssm_full_length,test_encoding_full_length,
            	test_functional_domain_encoding,test_label_categorical,batch_size)
            summary,acc= sess.run([merged,accuracy],feed_dict={
            	pssm: batch_test[0], 
            	encoding: batch_test[1], 
            	domain: batch_test[2], 
            	y_: batch_test[3], 
            	keep_prob: 1.0})
            test_writer.add_summary(summary,i)
            print("step %d, test accuracy %g"%(i, acc))
            print('cross_entropy: %g'%cross_entropy_output)
            print(predicted_label_output)
        if i%1000==0 and i!=0:
            print('Step %d whole set check'%i)
            predict_label,_=whole_set_check_simple(data)
            evaluate_model(test_label[:len(predict_label)],np.array(predict_label))
        # if i%2000==0:
        #     save_path=saver.save(sess,'./model_level_'+str(level)+'.ckpt')
        summary,cross_entropy_output,_ = sess.run([merged,cross_entropy,train_op],feed_dict={
        	pssm: batch[0], 
            encoding: batch[1], 
            domain: batch[2], 
            y_: batch[3], 
            keep_prob: 0.5})
        train_writer.add_summary(summary,i)
    def test_evaluate_model_with_empty_list(self) -> None:  # pylint: disable=R0201
        """ Test train_model() with no data.
        """
        # Given
        model: Model = "TODO"
        validate_files: List[pd.DataFrame] = []

        # When
        metrics: Mapping[str,Any] = evaluate_model(model=model, samples=validate_files)

        # Then
        self.assertGreater(metrics['auc'], 0.8)
Exemplo n.º 7
0
def run_iteration(conf, ds, datasets_bin, sanity):
    """
    """
    model = create_model(conf)
    callbacks = create_callbacks(conf)
    class_weights = get_class_weights(ds["train"], conf)

    start_time = time.time()
    history = model.fit(
            ds["train"],
            steps_per_epoch = conf["steps"]["train"],
            epochs = conf["num_epochs"],
            validation_data = ds["test"],
            validation_steps = conf["steps"]["test"],
            validation_freq = 1,
            class_weight = class_weights,
            callbacks = callbacks,
            verbose = 1
    )
    if conf["verbosity"]:
        print ("Time spent on training: {:.2f} minutes.".format(np.round(time.time() - start_time)/60))

    evaluate_model(model, history, ds, conf)

    count = {"findings": 0, "total": 0}
    pseudo = {"pred_list": [], "lab_list": [], "name_list": []}

    pseudo, count = generate_labels(pseudo, count, ds["unlab"], model, conf)

    # Sort in order of highest confidence to lowest
    pseudo_sorted = custom_sort(pseudo)

    checkout_findings(pseudo_sorted, conf, show=False)

    datasets_bin, added_samples = resample_and_combine(ds, conf, pseudo, pseudo_sorted, datasets_bin, limit=conf["class_limit"])

    # Update unlab_ds
    ds["unlab"] = reduce_dataset(ds["unlab"], remove=added_samples)

    sanity, conf = update_sanity(sanity, len(added_samples), datasets_bin, conf)
Exemplo n.º 8
0
def support_with_ewc(f_size, f_lam, s_size):
    # get partial data
    data_1 = exclude_data(data_all, range(2,6))

    train_model(data_1, train_op, train_steps)

    # check performance
    predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(data_1)
    evaluate_model(data_1[5],np.array(predict_label))
    evaluate_model(data_1[0],np.array(predict_label_train))

    # set the regularizer
    new_step = set_fisher_regularizer(f_lam, data_1, f_size)
    train_loss, data_merged = check_support_data_batch_performance(new_step,range(2)+range(3,6), 
        final_train_fea, data_1, s_size, train_steps, 2)

    saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size,
        f_lam, s_size, 3))

    new_step = set_fisher_regularizer(f_lam, data_merged, f_size)
    train_loss, data_merged = check_support_data_batch_performance(new_step,range(3)+range(4,6), 
        train_loss, data_merged, s_size, train_steps, 2)

    saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size,
        f_lam, s_size, 4))

    new_step = set_fisher_regularizer(f_lam, data_merged, f_size)
    train_loss, data_merged = check_support_data_batch_performance(new_step,range(4)+range(5,6), 
        train_loss, data_merged, s_size, train_steps, 2)

    saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size,
        f_lam, s_size, 5))

    new_step = set_fisher_regularizer(f_lam, data_merged, f_size)
    train_loss, data_merged = check_support_data_batch_performance(new_step,range(5), 
        train_loss, data_merged, s_size, train_steps, 2)

    saver_load.save(sess,'../model/fsize_{}_flam_{}_ssize_{}_class_{}.ckpt'.format(f_size,
        f_lam, s_size, 6))
Exemplo n.º 9
0
def restart_from_ckpt(s_class):
    if s_class == 1:
        data_1 = exclude_data(data_all, range(2, 6))
        saver_load.restore(
            sess, '../model/fsize_100_flam_1000_ssize_2000_class_2.ckpt')
        # check performance
        predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
            data_1)
        evaluate_model(data_1[5], np.array(predict_label))
        evaluate_model(data_1[0], np.array(predict_label_train))

    if s_class >= 2:
        print('load model: fix_rep_class_{}.ckpt'.format(s_class))
        saver_load.restore(sess,
                           '../model/fix_rep_class_{}.ckpt'.format(s_class))

        data_1 = exclude_data(data_all, range(s_class, 6))

        check_fix_rep_batch_performance(train_op,
                                        range(s_class) + range(s_class + 1, 6),
                                        data_1)

    saver_load.save(sess, '../model/fix_rep_class_{}.ckpt'.format(s_class + 1))
Exemplo n.º 10
0
    def test_all_pipeline_with_none(self) -> None:  # pylint: disable=R0201
        """ Test prepare_dataset() with no data.
        """
        # Given
        input_raw: pd.DataFrame = pd.DataFrame(
            data={1, 2, 3})
        train_inputs: List[pd.DataFrame] = []
        validate_files: List[str] = []

        # When
        train_inputs.append(build_features(prepare_dataset(input_raw=input_raw)))
        model: Model = train_model(inputs=train_inputs,
                                 epochs=1,
                                 batch_size=1)
        metrics: Mapping[str,Any] = evaluate_model(model, validate_files)

        # Then
        self.assertGreater(metrics['auc'], 0.8)
Exemplo n.º 11
0
def incremental_all_data():
    # get partial data
    data_1 = exclude_data(data_all, range(2, 6))
    data_1 = append_old_feature_new_data(data_1)

    train_model(data_1, train_op, train_steps)

    # check performance
    predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
        data_1)
    evaluate_model(data_1[5], np.array(predict_label))
    evaluate_model(data_1[0], np.array(predict_label_train))

    # get further data
    data_2 = exclude_data(data_all, range(2) + range(3, 6))
    data_2 = append_old_feature_new_data(data_2)
    data_merged = merge_data(data_1, data_2)
    train_model(data_merged, train_op, train_steps)

    predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
        data_merged)
    evaluate_model(data_merged[5], np.array(predict_label))
    evaluate_model(data_merged[0], np.array(predict_label_train))

    # get further data
    data_2 = exclude_data(data_all, range(3) + range(4, 6))
    data_2 = append_old_feature_new_data(data_2)
    data_merged = merge_data(data_merged, data_2)
    train_model(data_merged, train_op, train_steps)

    predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
        data_merged)
    evaluate_model(data_merged[5], np.array(predict_label))
    evaluate_model(data_merged[0], np.array(predict_label_train))

    # get further data
    data_2 = exclude_data(data_all, range(4) + range(5, 6))
    data_2 = append_old_feature_new_data(data_2)
    data_merged = merge_data(data_merged, data_2)
    train_model(data_merged, train_op, train_steps)

    predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
        data_merged)
    evaluate_model(data_merged[5], np.array(predict_label))
    evaluate_model(data_merged[0], np.array(predict_label_train))

    data_2 = exclude_data(data_all, range(5))
    data_2 = append_old_feature_new_data(data_2)
    data_merged = merge_data(data_merged, data_2)
    train_model(data_merged, train_op, train_steps)

    predict_label_train, predict_label, final_train_fea, final_test_fea = whole_set_check(
        data_merged)
    evaluate_model(data_merged[5], np.array(predict_label))
    evaluate_model(data_merged[0], np.array(predict_label_train))
Exemplo n.º 12
0
def perform_icarl(examplar_size, s_class):
    if s_class==1:
        saver_load.restore(sess, '../model/fsize_10_flam_0.0001_ssize_2000_class_2.ckpt')
    else:
        print('load model: icarl_ssize_{}_class_{}.ckpt'.format(examplar_size, s_class))
        saver_load.restore(sess,
            '../model/icarl_ssize_{}_class_{}.ckpt'.format(examplar_size, s_class))

        data_1 = exclude_data(data_all, range(s_class,6))
        # train_model(data_1, train_op, train_steps)
        # construct examplar
        predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(data_1)
        evaluate_model(data_1[5],np.array(predict_label))
        evaluate_model(data_1[0],np.array(predict_label_train))
        print('Here is the result of NME:')
        test_predict = nme_pred(final_train_fea, final_test_fea, data_1[0])
        evaluate_model(data_1[5],np.array(test_predict))
        examplar_index = construct_examplar(final_train_fea,
            data_1[0], examplar_size, all_train_label)
        examplar = get_support_data(data_1, examplar_index)

        # perform training
        data_2 =  exclude_data(data_all, range(s_class)+range(s_class+1,6))
        used_data = merge_data(examplar, data_2)
        train_model(used_data, train_op, train_steps)
        predict_label_train,predict_label,final_train_fea,final_test_fea=whole_set_check(used_data)
        evaluate_model(used_data[5],np.array(predict_label))
        evaluate_model(used_data[0],np.array(predict_label_train))
        print('Here is the result of NME:')
        test_predict = nme_pred(final_train_fea, final_test_fea, used_data[0])
        evaluate_model(used_data[5],np.array(test_predict))
    # save model
    saver_load.save(sess,
        '../model/icarl_ssize_{}_class_{}.ckpt'.format(examplar_size, s_class+1))