示例#1
0
def lr_solver(train_data, train_label, validation, test, unlabel, feature_extract, feature_handler):
    """
    """
    logging.info('begin to train the lr classifier')

    # train_data = train_data[:100,:]
    # validation = validation[:100,:]
    # test = test[:100,:]
    # train_label = train_label[:100]
    train_data, validation, test , unlabel = feature_extract (train_data, train_label, validation, test, unlabel)
    # print new_train_data.shape
    train_data, validation, test , unlabel = feature_handler (train_data, validation, test, unlabel)
    """
    lr = LogisticRegression ()
    params_test = {"penalty":['l1','l2'],
                 "C":[0.1,0.2,0.3,0.5,0.7,1,3,5],
                 "tol":[0.001,0.003,0.005,0.01,0.05,0.1,0.5],
                 "random_state":[1000000007]}
    rand_search_result = GridSearchCV (lr, param_grid = params_test, n_jobs = 3, cv = 3, scoring='roc_auc')
    rand_search_result.fit (train_data , train_label)
    params = evaluate.report (rand_search_result.grid_scores_)
    print params
    """

    print train_data.shape[1]
    params = {'penalty': 'l1', 'C':0.1 , 'random_state': 1000000007, 'tol': 0.001, 'warm_start' : True}

    lr = LogisticRegression(**params)    
    lr.fit (train_data , train_label)
    joblib.dump (lr, ROOT + '/result/lr.pkl')
    evaluate.get_auc (lr.predict_proba (validation)[:,1])
    return lr.predict_proba (train_data)[:,1]
示例#2
0
文件: gbdt.py 项目: cxlove/RPPredict
def gbdt_solver(train_data, train_label, validation, test, unlabel, dimreduce=decomposition.undo):
    """
    """
    # train_data = train_data[:100,:]
    # train_label = train_label[:100]

    logging.info("begin to train the gbdt classifier")
    new_train_data, new_val, new_test, new_unlabel = dimreduce(train_data, train_label, validation, test, unlabel)
    logging.info("finished feature extracting")

    """
    gb = GradientBoostingClassifier ()
    params_gbdt = {"n_estimators":[100,200,500,1000],
                 "learning_rate":[0.02,0.03,0.05,0.1],
                 "max_depth":[3,5,7,9],
                 "random_state":[1000000007]}"""

    # rand_search_result = GridSearchCV (gb, param_grid = params_gbdt , n_jobs = 3  , cv = 3, scoring = 'roc_auc')
    # rand_search_result = RandomizedSearchCV (gb, param_distributions = params_gbdt, n_jobs = 3, cv = 3, n_iter = 100, scoring = 'roc_auc')
    # rand_search_result.fit (new_train_data , train_label)
    # params = tools.report (rand_search_result.grid_scores_)

    params = {
        "n_estimators": 600,
        "learning_rate": 0.03,
        "random_state": 1000000007,
        "max_depth": 2,
        "warm_start": True,
    }
    gb = GradientBoostingClassifier(**params)
    gb.fit(new_train_data, train_label)
    joblib.dump(gb, ROOT + "/result/gbdt.pkl")
    evaluate.get_auc(gb.predict_proba(new_val)[:, 1])
    return gb.predict_proba(new_test)[:, 1]
示例#3
0
def ssl_solver (train, label, validation, test, unlabel, dimreduce, classifier = LabelSpreading) :
    """
    """

    train, validation, test, unlabel = dimreduce (train, label, validation, test, unlabel)

    data = np.vstack ([train, unlabel])
    label = np.hstack ([label, [-1] * unlabel.shape[0]])
    assert data.shape[0] == len (label)

    cf = classifier (kernel = 'knn', n_neighbors = 100, max_iter = 3)
    # cf = classifier (kernel = 'rbf', gamma = 0.3, max_iter = 3)

    cf.fit (data, label)
    evaluate.get_auc (cf.predict_proba (validation)[:,1])
    return cf.predict_proba (test)[:,1]
示例#4
0
def nb_solver(train_data, train_label, validation, test, classifier, dimreduce, convertbinary):
    """
    """
    logging.info('begin to train the naive bayes classifier')

    # train_data = train_data[:100,:]
    # validation = validation[:100,:]
    # test = test[:100,:]
    # train_label = train_label[:100]
    train_data, validation, test = dimreduce(train_data, train_label, validation, test)
    # print new_train_data.shape
    train_data, validation, test = convertbinary(train_data, validation, test)

    nb = classifier ()
    nb.fit(train_data , train_label)
    evaluate.get_auc (nb.predict_proba (validation)[:,1])
    return nb.predict_proba (test)[:,1]
示例#5
0
def rf_solver(train_data, train_label, validation, test, unlabel, feature_extract, feature_handler):
    """
    """
    logging.info("begin to train the random forest classifier")

    # train_data = train_data[:100,:]
    # validation = validation[:100,:]
    # test = test[:100,:]
    # train_label = train_label[:100]
    train_data, validation, test, unlabel = feature_extract(train_data, train_label, validation, test, unlabel)
    # print new_train_data.shape
    train_data, validation, test, unlabel = feature_handler(train_data, validation, test, unlabel)

    rf = RandomForestClassifier(warm_start=True, n_jobs=2, n_estimators=2000, max_depth=3, min_samples_split=50)
    rf.fit(train_data, train_label)
    # joblib.dump (rf, ROOT + '/result/rf.pkl')
    evaluate.get_auc(rf.predict_proba(validation)[:, 1])
    return rf.predict_proba(train_data)[:, 1]
示例#6
0
文件: sgd.py 项目: cxlove/RPPredict
def sgd_solver(train_data, train_label, validation, test, unlabel, feature_extract, feature_handler):
    """
    """
    logging.info('begin to train the sgd classifier')

    # train_data = train_data[:100,:]
    # validation = validation[:100,:]
    # test = test[:100,:]
    # train_label = train_label[:100]
    train_data, validation, test , unlabel = feature_extract (train_data, train_label, validation, test, unlabel)
    # print new_train_data.shape
    train_data, validation, test , unlabel = feature_handler (train_data, validation, test, unlabel)

    sgd = SGDClassifier(loss = 'modified_huber', alpha=0.0001, average=False, class_weight=None, epsilon=0.1,
                     eta0=0.0, fit_intercept=True, l1_ratio=0.15,
                     learning_rate='optimal', n_iter=5, n_jobs=2,
                     penalty='l2', power_t=0.5, random_state=1000000007, shuffle=True,
                     verbose=0, warm_start=True)
    sgd.fit (train_data , train_label)
    joblib.dump (sgd, ROOT + '/result/sgd.pkl')
    evaluate.get_auc (sgd.predict_proba (validation)[:,1])
    return sgd.predict_proba (train_data)[:,1]
示例#7
0
def s3vm_solver(train_data, train_label, validation, test, unlabel, feature_extract, feature_handler):
    """
    """
    logging.info('begin to train the s3vm classifier')

    #unlabel = unlabel[:100,:]
    #train_data = train_data[:100,:]
    #validation = validation[:100,:]
    #test = test[:100,:]
    #train_label = train_label[:100]

    train_data, validation, test , unlabel = feature_extract (train_data, train_label, validation, test, unlabel)
    # print new_train_data.shape
    train_data, validation, test , unlabel = feature_handler (train_data, validation, test, unlabel)

    data = np.vstack ([train_data, unlabel])
    label = np.hstack ([train_label, [-1] * unlabel.shape[0]])
    assert data.shape[0] == len (label)
    s3vm = methods.scikitTSVM.SKTSVM(kernel='linear')

    s3vm.fit (data , label)
    evaluate.get_auc (s3vm.predict_proba (validation)[:,1])
    return s3vm.predict_proba (train_data)[:,1]
示例#8
0
文件: main.py 项目: Sabokrou/GAID
def test_with_patch_image(ugan):

    ugan.get_test_data()
    img_name, y_true, res_loss, dis_loss, y_score = ugan.test(FLAGS, True)

    print('[*] testing ...')

    roc_auc = get_auc(y_true, y_score, True)

    print("ROC curve area: %.4f" % roc_auc)

    for idx in range(np.shape(y_true)[0]):
        print("image name: [%s] anomaly score: %.2f, actual label: %.d, generator loss: %.2f, discriminator loss: %.2f" \
              % (str(re.split('/|[.]|\\\\', img_name[idx])[-2]), y_score[idx],
                 y_true[idx], res_loss[idx], dis_loss[idx]))

    test_res = list(zip(y_score,y_true))
    np.savetxt("score"+str(FLAGS.patch_size)+".csv", test_res,header="score,label", delimiter=",")
示例#9
0
def cotraining (model_one, model_two, n_iter = 100) :
    """
    """
    data, train_number, val_number, test_number, unlabel_number, label, uid = datahandler.clean_data ()

    train = data[:train_number,:]
    validation = data[train_number:train_number+val_number:,:]
    test = data[train_number+val_number:-unlabel_number,:]
    unlabel = data[-unlabel_number:,:]

    train, validation, test, unlabel = decomposition.gbdt_dimreduce_threshold (train, label, validation, test, unlabel) 
    # train, validation, test, unlabel = split.split_continuum_value_tvt (train, validation, test, unlabel)

#    train_number = 100
#    unlabel_number = 1000
#
#    train = train[:100,:]
#    unlabel = unlabel[:1000,:]
#    label = label[:100]

    train_one = copy.deepcopy (train)
    label_one = copy.deepcopy (label)
    train_two = copy.deepcopy (train)
    label_two = copy.deepcopy (label)

    model_one.fit (train_one, label_one)
    model_two.fit (train_two, label_two)

    for iter in xrange (1 , n_iter + 1 , 1) :
        logging.info ('#%d iter for co-training :' % iter)

        unlabel_label = [-1] * unlabel_number
        unlabel_index = range (0, unlabel_number)
        step = 0
        while len (unlabel_index) > 0 :
            step += 1
            logging.info ('co-training step #%d , reamining unlabel: %d' % (step, len (unlabel_index)))
            model_one, model_two, unlabel_label, unlabel_index, train_two, label_two = training (model_one, model_two, unlabel, unlabel_label, unlabel_index, train_two, label_two)
            model_two, model_one, unlabel_label, unlabel_index, train_one, label_one = training (model_two, model_one, unlabel, unlabel_label, unlabel_index, train_one, label_one)
            
            evaluate.get_auc (model_one.predict_proba (validation)[:,1])
            evaluate.get_auc (model_two.predict_proba (validation)[:,1])
            evaluate.get_auc ((model_one.predict_proba (validation)[:,1] + model_two.predict_proba (validation)[:,1]) / 2.0)

            joblib.dump (model_one, ROOT + '/result/model/model_one_%d_%d.pkl' % (iter, step))
            joblib.dump (model_two, ROOT + '/result/model/model_two_%d_%d.pkl' % (iter, step))
    
            evaluate.output (uid, (model_one.predict_proba (test)[:,1] + model_two.predict_proba (test)[:,1]) / 2.0, ROOT + '/result/predict/cotraining_%d_%d.csv' % (iter, step))
            evaluate.output (uid, model_one.predict_proba (test)[:,1], ROOT + '/result/predict/model_one_%d_%d.csv' % (iter, step))
            evaluate.output (uid, model_two.predict_proba (test)[:,1], ROOT + '/result/predict/model_two_%d_%d.csv' % (iter, step))
示例#10
0
def test_with_patch_image(gaid):

    img_name, y_true, y_score = gaid.test(FLAGS, True)

    print('[*] testing ...')



    roc_auc = get_auc(y_true, y_score, True)

    print("ROC curve area: %.4f" % roc_auc)

    
    for idx in range(np.shape(y_true)[0]):
        print("image name: [%s] anomaly score: %.2f, actual label: %.d" \
              % (str(re.split('/|[.]|\\\\', img_name[idx])[-2]), y_score[idx],
                 y_true[idx]))
    
    test_res = list(zip(y_score,y_true))
    np.savetxt("score-"+str(FLAGS.test_dir)+"-"+str(FLAGS.patch_size)+".csv", test_res,header="score,label", delimiter=",")
示例#11
0
文件: model.py 项目: thkarag/GAID
    def train(self, config):

        print(" [*] train model ...")

        # output log
        if os.path.exists("output_log.txt"):
            os.remove("output_log.txt")
        logging.basicConfig(filename='output_log.txt', level=logging.INFO)

        # Optimizer
        d_optim = tf.train.AdamOptimizer(config.g_learning_rate, beta1=config.beta) \
            .minimize(self.d_loss, var_list=self.d_vars)
        g_optim = tf.train.AdamOptimizer(config.d_learning_rate, beta1=config.beta) \
            .minimize(self.g_loss, var_list=self.g_vars)

        # Initialize global var.
        try:
            tf.global_variables_initializer().run()
        except:
            tf.initialize_all_variables().run()

        # Merge summary
        self.g_sum = merge_summary([self.d__sum, self.g_sum,self.g_loss_real_sum, self.g_loss_rec_sum, self.g_loss_sum, self.d_loss_fake_sum])
        self.d_sum = merge_summary([self.d_sum, self.d_loss_sum, self.d_loss_real_sum])
        self.writer = SummaryWriter("./logs", self.sess.graph)

        # Load data sample
        sample_files = self.data[0:self.sample_num]
        sample = [get_image(sample_file,
                    input_height=self.input_height,
                    input_width=self.input_width,
                    resize_height=self.output_height,
                    resize_width=self.output_width,
                    crop=self.crop,
                    grayscale=self.grayscale) for sample_file in sample_files]
        if (self.grayscale):
            sample_inputs = np.array(sample).astype(np.float32)[:, :, :, None]
        else:
            sample_inputs = np.array(sample).astype(np.float32)

        # Load checkpoint
        could_load, checkpoint_counter = self.load(self.checkpoint_dir)
        if could_load:
            counter = checkpoint_counter
            print(" [*] Load SUCCESS")
        else:
            print(" [!] Load failed...")

        counter = 1
        start_run_time = time.time()
        total_batch_time = 0
        best_auc = 0.0
        sample_save = True

        for epoch in xrange(config.epoch):

            self.data = glob(os.path.join(config.data_dir, config.dataset, config.train_dir, self.input_fname_pattern))

            np.random.shuffle(self.data)

            batch_idxs = min(len(self.data), config.train_size) // config.batch_size

            for idx in xrange(0, batch_idxs):

                start_batch_time = time.time()

                batch_files = self.data[idx * config.batch_size:(idx + 1) * config.batch_size]
                batch = [get_image(batch_file,
                        input_height=self.input_height,
                        input_width=self.input_width,
                        resize_height=self.output_height,
                        resize_width=self.output_width,
                        crop=self.crop,
                        grayscale=self.grayscale) for batch_file in batch_files]
                if self.grayscale:
                    batch_images = np.array(batch).astype(np.float32)[:, :, :, None]
                else:
                    batch_images = np.array(batch).astype(np.float32)

                # Update D network
                _, summary_str = self.sess.run([d_optim, self.d_sum ],feed_dict={self.inputs: batch_images})

                self.writer.add_summary(summary_str, counter)

                # Update G network
                _, summary_str = self.sess.run([g_optim,self.g_sum], feed_dict={self.inputs: batch_images})

                self.writer.add_summary(summary_str, counter)

                errD = self.d_loss.eval({self.inputs: batch_images})

                errG = self.g_loss.eval({self.inputs: batch_images})

                counter += 1
                end_batch_time = time.time()
                time_batch =  (end_batch_time - start_batch_time)*1000
                total_batch_time += time_batch

                hours, rem = divmod(end_batch_time - start_run_time, 3600)
                minutes, seconds = divmod(rem, 60)

                print(
                    "Epoch: [%2d/%2d] [%4d/%4d] time: %02d:%02d:%02d , G (Reconstructor) loss: %.8f, "
                    "D (Representation matching) loss: %.8f , Avg Run Time (ms/batch): %.8f ,(it/s): %.8f" \
                    % (epoch+1, config.epoch, idx+1, batch_idxs, int(hours), int(minutes),
                       seconds, errD, errG, total_batch_time/counter,counter/(total_batch_time/1000)))

                if np.mod(counter, batch_idxs*2)== 1:
                    try:
                        samples, d_loss, g_loss = self.sess.run([self.sampler, self.d_loss, self.g_loss],
                                                                feed_dict={self.inputs: sample_inputs})

                        save_images(samples, image_manifold_size(samples.shape[0]),
                                    './{}/train_{:02d}_{:04d}.png'.format(config.sample_dir, epoch, idx))
                        if sample_save:
                            sample_save = False
                            save_images(sample_inputs, image_manifold_size(sample_inputs.shape[0]),
                                        './{}/train_sample_inputs_{:02d}_{:04d}.png'.format(config.sample_dir, epoch, idx))
                        print("[Sample] D (Representation matching) loss: %.8f, G (Reconstructor) loss: %.8f" % (d_loss, g_loss))

                    except:
                        print("one pic error!...")

            # test
            _,y_true, y_score = self.test(config)

            roc_auc = get_auc(y_true, y_score)

            if best_auc < roc_auc:
                best_auc = roc_auc
                self.save(config.checkpoint_dir, epoch)

            logging.info("Epoch: [%2d/%2d] , AUC: %.8f, Best AUC: %.8f,  Avg run time: %.8f" % (epoch+1, config.epoch,
                         roc_auc, best_auc,  total_batch_time/counter))
            print("Epoch: [%2d/%2d], AUC: %.8f, Best AUC: %.8f,  Avg run time: %.8f" % (epoch+1, config.epoch,
                        roc_auc, best_auc,  total_batch_time/counter))