示例#1
0
文件: main.py 项目: yzspku/BiCNN
def evaluate(logger, models, train_loaders, validation_loaders, test_loaders):
    logger.info('')
    logger.info(
        'evaluating model on multiple sets combining both global-level and object-level models\' predictions'
    )
    logger.info('predict weights: ' + str(predict_weights[0]) + ', ' +
                str(predict_weights[1]))
    begin_time = time.time()

    helper.evaluate(logger=logger,
                    models=models,
                    data_loaders=train_loaders,
                    set_name='train set',
                    predict_weights=predict_weights)
    helper.evaluate(logger=logger,
                    models=models,
                    data_loaders=validation_loaders,
                    set_name='validation set',
                    predict_weights=predict_weights)
    helper.evaluate(logger=logger,
                    models=models,
                    data_loaders=test_loaders,
                    set_name='test set',
                    predict_weights=predict_weights)

    logger.info('evaluation has been done! total time: %.4fs' %
                (time.time() - begin_time))
示例#2
0
def test():
    print("Loading test data...")
    start_time = time.time()
    x_test, y_test = process_file(test_dir, word_to_id, cat_to_id,
                                  config.seq_length)

    session = tf.Session()
    session.run(tf.global_variables_initializer())
    saver = tf.train.Saver()
    saver.restore(sess=session, save_path=save_path)  # 读取保存的模型

    print('Testing...')
    loss_test, acc_test = evaluate(session, x_test, y_test)
    msg = 'Test Loss: {0:>6.2}, Test Acc: {1:>7.2%}'
    print(msg.format(loss_test, acc_test))

    batch_size = 128
    data_len = len(x_test)
    num_batch = int((data_len - 1) / batch_size) + 1

    y_test_cls = np.argmax(y_test, 1)
    y_pred_cls = np.zeros(shape=len(x_test), dtype=np.int32)  # 保存预测结果
    for i in range(num_batch):  # 逐批次处理
        start_id = i * batch_size
        end_id = min((i + 1) * batch_size, data_len)
        feed_dict = {
            model.input_x: x_test[start_id:end_id],
            model.keep_prob: 1.0
        }
        y_pred_cls[start_id:end_id] = session.run(model.y_pred_cls,
                                                  feed_dict=feed_dict)

        # 评估
    print("Precision, Recall and F1-Score...")
    print(
        metrics.classification_report(y_test_cls,
                                      y_pred_cls,
                                      target_names=categories))

    # 混淆矩阵
    print("Confusion Matrix...")
    cm = metrics.confusion_matrix(y_test_cls, y_pred_cls)
    print(cm)

    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    session.close()
示例#3
0
def binary_prediction(train, test, train_labels, test_labels, model):
    """
    Runs and evaluates for the requested model for the binary classification.

    :param train (pandas data frame): The training set of the data containing the features.
    :param test (pandas data frame): The testing set of the data containing the features.
    :param train_labels (pandas data frame): The training set of the data containing the labels.
    :param test_labels (pandas data frame): The testing set of the data containing the labels.
    :param model (string): The requested model to run.

    :return: string containing the evaluation metrics values.

    """

    t0 = time.time()

    if model == 'SVM':
        preds = predict_with_one_class_SVM(train, test, train_labels,
                                           test_labels)

    elif model == 'GNB':
        preds = predict_with_GNB(train, test, train_labels, test_labels)

    elif model == 'XGB':
        train_labels = change_minority_class_label_to_zero(train_labels)
        test_labels = change_minority_class_label_to_zero(test_labels)
        preds = predict_with_XGB(train, test, train_labels, test_labels)

    t1 = time.time()

    if len(preds) > 0:
        class_metrics = evaluate(test_labels, preds, n_decimals)
        evaluation = print_evaluation(class_metrics)

    runtime = str(round((t1 - t0) / secs, n_decimals))

    return evaluation + runtime
示例#4
0
def multi_prediction(train, test, train_labels, test_labels, model):
    """
    Runs and evaluates for the requested model for the multi label classification.

    :param train (pandas data frame): The training set of the data containing the features.
    :param test (pandas data frame): The testing set of the data containing the features.
    :param train_labels (pandas data frame): The training set of the data containing the labels.
    :param test_labels (pandas data frame): The testing set of the data containing the labels.
    :param model (string): The requested model to run.

    :return: string containing the evaluation metrics values.

    """

    t0 = time.time()
    if model == 'SVM':
        preds = predict_with_one_class_SVM_multi(train, test, train_labels,
                                                 test_labels)

    elif model == 'GNB':
        preds = predict_with_GNB_multi(train, test, train_labels, test_labels)

    elif model == 'XGB':
        train_labels = create_one_col_for_labels(train_labels)
        test_labels = create_one_col_for_labels(test_labels)
        preds = predict_with_XGB_multi(train, test, train_labels, test_labels)

    t1 = time.time()

    if len(preds) > 0:
        class_metrics = evaluate(test_labels, preds, n_decimals, False)
        evaluation = print_evaluation(class_metrics, False)

    runtime = str(round((t1 - t0) / secs, n_decimals))

    return evaluation + runtime
示例#5
0
addLabel(models_graph)

plt.savefig('Bar_Charts_of_Models_and_their_Accuracy.png', dpi=300, transparent=True)

plt.show()


# ### Evaluating Models

# In[8]:


# Model Evaluation
for model_name, model in models:
    helper.evaluate(X_test, y_test, model_name, model)


# ### Cross Validating Models

# #### Cross Validating with a single metric

# In[9]:


# Splitting data into 10 folds
cv_kfold = model_selection.KFold(n_splits=10, shuffle=True, random_state=23)
scorer = "r2"

model_names = []
cv_mean_scores = []
示例#6
0
def main(_):
    #initial outer file
    WordIndex = Word_Index(FLAGS.word2id_path)
    TarIndex = Tar_Tag_Index()
    OpiIndex = Opi_Tag_Index()
    FLAGS.num_word = len(WordIndex.word2idex)
    FLAGS.num_class = TarIndex.num_class

    df_test = pd.read_csv(FLAGS.test_file, sep='#', skip_blank_lines=False, dtype={'len': np.int32})
    df_train = pd.read_csv(FLAGS.train_file, sep='#', skip_blank_lines=False, dtype={'len': np.int32})
    df_train = df_train.iloc[np.random.permutation(len(df_train))].reset_index()
    eval_size = int(len(df_train) * FLAGS.dev_rate)
    df_eval = df_train.iloc[-eval_size:]
    df_train = df_train.iloc[:-eval_size]
    print('trainsize' + str(len(df_train)))
    train_data_itor = DataItor(df_train)
    eval_data_itor = DataItor(df_eval)
    test_data_itor = DataItor(df_test)
    FLAGS.check_every_point = int(train_data_itor.size / FLAGS.batch_size)
    word2id, id2word = helper.loadMap(FLAGS.word2id_path)
    if os.path.exists(FLAGS.pretrain_file):
        FLAGS.pretrain_emb = initial_embedding_yelp_bin(word2id)
    else:
        FLAGS.pretrain_emb = None

    myconfig = tf.ConfigProto(allow_soft_placement = True)
    with tf.Session(config=myconfig) as sess:
        model = BILSTM(FLAGS)
        sess.run(tf.global_variables_initializer())
        w_xs_eval, y_tuple_eval, couple_eval, lens_eval = eval_data_itor.next_all(FLAGS.use_couple)
        w_xs_test, y_tuple_test, couple_test, lens_test = test_data_itor.next_all_no_padding(FLAGS.use_couple)
        print('eval data size %f' % len(w_xs_eval))



        # _, id2label = helper.loadMap(FLAGS.label2id_path)
        saver = tf.train.Saver(max_to_keep=2)
        previous_best_valid_f1_score = 0
        previous_best_epoch = -1
        bad_count = 0
        heap_target, heap_opword = [], []
        while train_data_itor.epoch < FLAGS.num_epochs:
            x_train_batch, y_train_batch,couple_train_batch = train_data_itor.next_batch(FLAGS.batch_size,FLAGS.use_couple)
            train_step, train_loss = model.train_model_union(sess, x_train_batch, y_train_batch,couples=couple_train_batch)
            if train_data_itor.batch_time % FLAGS.check_every_point == 0:
                print("current batch_time: %d" % (train_data_itor.batch_time))
                opword_y_eval_pred, target_y_eval_pred, eval_loss, _,_ = model.inference_for_cpu(sess, w_xs_eval, y_tuple_eval, couple_eval)
                # print('every loss:%f,%f' % (ent_loss, opi_loss))
                precison, recall, target_f1_eval = helper.evaluate(w_xs_eval, y_tuple_eval[0],
                                                                   target_y_eval_pred, id2word=id2word,seq_lens=lens_eval,
                                                                   label_type='target')

                print('evalution on eval data, target_eval_loss:%.3f,precison:%.3f,recall:%.3f,fscore:%.3f' % ( eval_loss, precison, recall, target_f1_eval))



                opword_y_test_pred, target_y_test_pred, test_loss, all_ent_att_scores, all_opi_att_scores = model.inference_for_cpu(sess, w_xs_test, y_tuple_test,couple_test)
                precison1, recall1, target_f1_test = helper.evaluate(w_xs_test, y_tuple_test[0],
                                                                     target_y_test_pred, id2word=id2word,seq_lens=lens_test,
                                                                     label_type='target')
                print('evalution on test data, target_eval_loss:%.3f,precison:%.3f,recall:%.3f,fscore:%.3f' % ( test_loss, precison1, recall1, target_f1_test))

                opword_precison, opword_recall, opword_f1_eval = helper.evaluate(w_xs_eval, y_tuple_eval[1],
                                                                                 opword_y_eval_pred, id2word=id2word,seq_lens=lens_eval,
                                                                                 label_type='opword')
                print('evalution on eval data, opword_eval_loss:%.3f,precison:%.3f,recall:%.3f,fscore:%.3f' % (eval_loss, opword_precison, opword_recall, opword_f1_eval))
                # opword_y_test_pred, opword_test_loss = model.decode_opinion(sess, test_datas, y_opinions_test)
                opword_precison, opword_recall, opword_f1_test = helper.evaluate(w_xs_test, y_tuple_test[1], opword_y_test_pred,
                                                                                 id2word=id2word,seq_lens=lens_test,
                                                                                 label_type='opword')
                print('evalution on test data, opword_eval_loss:%.3f,precison:%.3f,recall:%.3f,fscore:%.3f' % (test_loss, opword_precison, opword_recall, opword_f1_test))

                if len(heap_target) < 5:
                    heapq.heappush(heap_target, (target_f1_eval, train_data_itor.epoch, target_f1_test,opword_f1_test))
                else:
                    if target_f1_eval > heap_target[0][0]:
                        _, delete_file_epoch,_,_ = heapq.heappop(heap_target)
                        heapq.heappush(heap_target, (target_f1_eval, train_data_itor.epoch, target_f1_test,opword_f1_test))

                if len(heap_opword) < 5:
                    heapq.heappush(heap_opword,(opword_f1_eval, train_data_itor.epoch, opword_f1_test))
                else:
                    if opword_f1_eval > heap_opword[0][0]:
                        _, delete_file_epoch, _ = heapq.heappop(heap_opword)
                        heapq.heappush(heap_opword, (opword_f1_eval, train_data_itor.epoch, opword_f1_test))
                # early stop
                if target_f1_eval > previous_best_valid_f1_score:
                    previous_best_valid_f1_score = target_f1_eval
                    bad_count = 0
                    store_weights(all_ent_att_scores)

                else:
                    bad_count += 1

                if bad_count >= FLAGS.patients:
                    print('early stop!')
                    break
        print('Train Finished!!')
        # writer = codecs.open(exp_paths[task_id], 'a', 'utf-8')

        # writer.close()
        show_result(heap_target)
        # show_result(heap_opword)


    pass
示例#7
0
gt_csv_path = 'crater_data/gt/gt_tile1_24.csv'

path1 = 'results/cnn/evaluations/' + removal_method1 + '/west_train_west_test_1_24_cnn_noduplicates.csv'
path2 = 'results/cnn/evaluations/' + removal_method2 + '/west_train_west_test_1_24_cnn_noduplicates.csv'

save_path = 'results/cnn/evaluations/NMS_BIRCH/west_train_west_test_1_24_cnn'
testset_name = 'tile1_24'

# the image for drawing rectangles
img_path = os.path.join('crater_data', 'images', testset_name + '.pgm')
gt_img = cv.imread(img_path)


gt = pd.read_csv(gt_csv_path, header=None)

no_dup_data1 = pd.read_csv(path1, header=None)
no_dup_data2 = pd.read_csv(path2, header=None)

start_time = time.time()

# compare the results of two duplicate removal methods
#evaluate_cmp(no_dup_data1, no_dup_data2, gt, gt_img, 64, True, save_path, param)
evaluate(gt, gt, gt_img, 64, True, save_path, param)
#img = draw_craters_rectangles(img_path, merge, show_probs=False)
#img = draw_craters_circles(img_path, merge, show_probs=False)
#cv.imwrite("%s.jpg" % (csv_path.split('.')[0]), img, [int(cv.IMWRITE_JPEG_QUALITY), 100])


end_time = time.time()
time_dif = end_time - start_time
print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
示例#8
0
    resize_size=224, batch_size=32, object_boxes_dict=bounding_boxes)
logger.info('loading dataset costs %.4fs' % (time.time() - begin_time))

logger.info('loading models')

begin_time = time.time()
model_glb_name = 'resnet152'
model_glb = helper.get_model_by_name(model_glb_name, pretrained=False)
helper.replace_model_fc(model_glb_name, model_glb)
model_glb.load_state_dict(torch.load(model_glb_path))

model_obj_name = 'densenet161'
model_obj = helper.get_model_by_name(model_obj_name, pretrained=False)
helper.replace_model_fc(model_obj_name, model_obj)
model_obj.load_state_dict(torch.load(model_obj_path))
logger.info('loading models costs %.4fs' % (time.time() - begin_time))

models = [model_glb, model_obj]
validation_loaders = [valid_loader_glb, valid_loader_obj]
test_loaders = [test_loader_glb, test_loader_obj]

helper.evaluate(logger=logger,
                models=models,
                data_loaders=validation_loaders,
                set_name='validation set',
                predict_weights=predict_weights)
helper.evaluate(logger=logger,
                models=models,
                data_loaders=test_loaders,
                set_name='test set',
                predict_weights=predict_weights)
示例#9
0
def train():
    # 配置 Tensorboard,每次训练的结果保存在以日期时间命名的文件夹中。
    print("Configuring TensorBoard and Saver...")
    tensorboard_dir = 'tensorboard/textcnn' + '/' + time.strftime(
        '%Y%m%d%H%M', time.localtime(time.time()))
    if not os.path.exists(tensorboard_dir):
        os.makedirs(tensorboard_dir)

    tf.summary.scalar("loss", model.loss)
    tf.summary.scalar("accuracy", model.acc)
    merged_summary = tf.summary.merge_all()
    writer = tf.summary.FileWriter(tensorboard_dir)

    # 载入训练集与验证集
    print("Loading training and validation data...")
    start_time = time.time()
    x_train, y_train, x_val, y_val = load_data(temp_dir, train_dir, val_dir,
                                               word_to_id, cat_to_id,
                                               config.seq_length)
    time_dif = get_time_dif(start_time)
    print("Time usage:", time_dif)

    total_batch = tf.Variable(0, trainable=False)  # 总批次,不可训练的变量

    # 创建session
    session = tf.Session()
    # 导入权重
    saver = load_model(session, save_dir)
    # 图写入tensorboard
    writer.add_graph(session.graph)

    print('Training and evaluating...')
    start_time = time.time()
    best_acc_val = 0.0  # 最佳验证集准确率
    last_improved = session.run(total_batch)  # 记录上一次提升批次
    require_improvement = 5000  # 如果超过1000轮未提升,提前结束训练

    flag = False
    for epoch in range(config.num_epochs):
        print('Epoch:', epoch + 1)
        batch_train = batch_iter(x_train, y_train, config.batch_size)

        for x_batch, y_batch in batch_train:
            feed_dict = feed_data(model, x_batch, y_batch,
                                  config.dropout_keep_prob)

            if session.run(total_batch) % config.save_per_batch == 0:
                # 每多少轮次将训练结果写入tensorboard scalar
                s = session.run(merged_summary, feed_dict=feed_dict)
                writer.add_summary(s, session.run(total_batch))

            if session.run(total_batch) % config.print_per_batch == 0:
                # 每多少轮次输出在训练集和验证集上的性能
                loss_train, F1_train, _, _ = evaluate(session, model, x_train,
                                                      y_train)
                loss_val, F1_val, _, _ = evaluate(session, model, x_val,
                                                  y_val)  # todo

                if F1_val > best_acc_val:
                    # 保存最好结果
                    best_acc_val = F1_val
                    last_improved = session.run(total_batch)
                    saver.save(sess=session, save_path=save_path)
                    improved_str = '*'
                else:
                    improved_str = ''

                time_dif = get_time_dif(start_time)
                msg = 'Iter: {0:>6}, Train Loss: {1:>6.2}, Train F1: {2:>7.2%},' \
                      + ' Val Loss: {3:>6.2}, Val F1: {4:>7.2%}, Time: {5} {6}'
                print(
                    msg.format(session.run(total_batch), loss_train, F1_train,
                               loss_val, F1_val, time_dif, improved_str))

            session.run(model.optim, feed_dict=feed_dict)  # 运行优化
            session.run(tf.assign(
                total_batch, total_batch +
                1))  # 用tf.assign迭代total_batch可以在saver中记录total_batch的变化

            if session.run(total_batch) - last_improved > require_improvement:
                # 验证集正确率长期不提升,提前结束训练
                print("No optimization for a long time, auto-stopping...")
                flag = True
                break  # 跳出循环
        if flag:  # 同上
            break
    session.close()
示例#10
0
    today_lst = [test_dct[stock][d] for stock in stock_lst]
    total_value = np.array(shares).dot(np.array(today_lst))
    print("Test day {}, cash is {:.2f}$, portfolio value is {:.2f}$".format(
        d, cash, total_value + cash))
    # print(shares)
    print(actions)
    res.append(total_value + cash)

np.save('./data/DQNAgent_vision.npy', np.array(res))
res = np.load('./data/DQNAgent_vision.npy', allow_pickle=True)

agent = np.load('./data/DQNAgent.npy', allow_pickle=True)
agent = agent[25:]
benchmark = np.array(benchmark)[1:]
benchmark = benchmark[25:]
rate_avg, sharpe = evaluate(res)
res = res[25:]
rate2 = (res[-1] - res[0]) / res[0]
rate3 = (benchmark[-1] - benchmark[0]) / benchmark[0]
print(
    "total return {},rate of return avg {}, sharp ratio {}, benchmark_return {}"
    .format(rate2, rate_avg, sharpe, rate3))
x = [i + 1 for i in range(len(res))]
sns.lineplot(x, benchmark / benchmark[0], label="SP500")
sns.lineplot(x, res / res[0], label="Agent_Vision")
sns.lineplot(x, agent / agent[0], label="Agent")
plt.xlabel("Days Testing")
plt.ylabel("Value (standardized)")
plt.legend()
plt.show()
示例#11
0
            output = model(Variable(input_tensor[seq_n]),
                           Variable(cap_tensor[seq_n]),
                           Variable(suf_tensor[seq_n]))
            acc_output[:, seq_n, :] = output
        for batch_i, batch_out in enumerate(acc_output):
            best_score, best_path = model.viterbi_decode(batch_out)
            for i, tag_i in enumerate(best_path):
                word = words_input[i][args.window_size][batch_i]
                real_tag = dataset.index_to_tag(target_tensor[batch_i][i])
                predicted_tag = dataset.index_to_tag(tag_i)
                tagged_file.write("{} {} {}\n".format(word, real_tag,
                                                      predicted_tag))

    tagged_file.close()
    acc, prec, rec, f1 = evaluate("tmp/dev_colo.annotated")
    if f1 > dev_best_f1:
        dev_best_f1 = f1
        dev_best_epoch = epoch
        # run test
        tagged_file = open("tmp/test_colo.annotated", "w")
        for i, (words_input, tags) in enumerate(test_loader):
            input_tensor, target_tensor, cap_tensor, suf_tensor = model.get_train_tensors(
                words_input,
                tags,
                word_to_index,
                dataset.tag_to_index,
                suffix2id=dataset.suffix_to_index)
            acc_output = Variable(
                v_type(1, len(input_tensor), dataset.get_num_tags()))
            # loop sequence
示例#12
0
# import random
from networkx import nx
from matplotlib import pyplot as plt
from helper import getNormalizedMatrix_np
from helper import evaluate
from networkx import nx

# In[L2]

allSC = np.load('pred0128/raw_sc_test_RESTING_L2.npy')
trueFC = np.load('pred0128/raw_fc_test_RESTING_L2.npy')
predFC = np.load(
    'pred0128/prediction_SC_RESTINGSTATE_partialcorrelationFC_L2_set3_lm1000.0.npy'
)[:, :, :, 0]
# predFC = np.load('pred0128/raw_prediction_SC_RESTINGSTATE_partialcorrelationFC_L2_set3_lm1000.0.npy')[:,:,:,0]
pearson, diff = evaluate(predFC, trueFC, normalize=True)
print(np.mean(pearson), np.mean(diff))

# bestIdx = np.argmax(pearson)

minMSE = 100
for idx in range(allSC.shape[0]):
    sc = getNormalizedMatrix_np(allSC[idx])
    tfc = getNormalizedMatrix_np(trueFC[idx])
    pfc = getNormalizedMatrix_np(predFC[idx])
    # mse = np.linalg.norm(tfc-pfc)
    # # mse = np.sum(np.abs(tfc-pfc))
    # print(idx,mse)
    # if mse<minMSE:
    #     sc_best = sc
    #     tfc_best = tfc
示例#13
0
def main(_):
    #initial outer file
    WordIndex = Word_Index(FLAGS.word2id_path)
    Index = [Tar_Tag_Index(), Opi_Tag_Index()]
    object_tag_Index = Index[FLAGS.object_id]

    FLAGS.dev_rate = 0.1
    FLAGS.num_word = len(WordIndex.word2idex)
    FLAGS.num_class = object_tag_Index.num_class

    df_test = pd.read_csv(FLAGS.test_file,
                          sep='#',
                          skip_blank_lines=False,
                          dtype={'len': np.int32})
    df_train = pd.read_csv(FLAGS.train_file,
                           sep='#',
                           skip_blank_lines=False,
                           dtype={'len': np.int32})
    random_index = np.random.permutation(len(df_train))
    df_train = df_train.iloc[random_index].reset_index()
    print(random_index)
    for i in range(len(random_index)):
        print(random_index[i])

    eval_size = int(len(df_train) * FLAGS.dev_rate)
    df_eval = df_train.iloc[-eval_size:]

    df_train = df_train.iloc[:-eval_size]
    print('trainsize' + str(len(df_train)))
    train_data_itor = DataItor(df_train)
    eval_data_itor = DataItor(df_eval)
    test_data_itor = DataItor(df_test)
    FLAGS.check_every_point = int(train_data_itor.size / FLAGS.batch_size)
    word2id, id2word = helper.loadMap(FLAGS.word2id_path)
    if os.path.exists(FLAGS.pretrain_file):
        FLAGS.pretrain_emb = initial_embedding_yelp_bin(word2id)
    else:
        FLAGS.pretrain_emb = None

    myconfig = tf.ConfigProto(allow_soft_placement=True)
    with tf.Session(config=myconfig) as sess:
        model = BILSTM(FLAGS)
        sess.run(tf.global_variables_initializer())
        w_xs_eval, y_tuple_eval, _, lens_eval = eval_data_itor.next_all(False)
        w_xs_test, y_tuple_test, _, lens_test = test_data_itor.next_all_no_padding(
            False)
        print('eval data size %f' % len(w_xs_eval))

        # _, id2label = helper.loadMap(FLAGS.label2id_path)
        saver = tf.train.Saver(max_to_keep=2)
        previous_best_valid_f1_score = 0
        previous_best_epoch = -1
        bad_count = 0
        heap_target, heap_opword = [], []
        while train_data_itor.epoch < FLAGS.num_epochs:
            x_train_batch, y_train_batch, _ = train_data_itor.next_batch(
                FLAGS.batch_size)
            train_step, train_loss = model.train_model(
                sess, x_train_batch, y_train_batch[FLAGS.object_id])
            if train_data_itor.batch_time % FLAGS.check_every_point == 0:
                print("current batch_time: %d" % (train_data_itor.batch_time))
                y_eval_pred, eval_loss = model.inference_for_single(
                    sess, w_xs_eval, y_tuple_eval[FLAGS.object_id])
                precison, recall, target_f1_eval = helper.evaluate(
                    w_xs_eval,
                    y_tuple_eval[FLAGS.object_id],
                    y_eval_pred,
                    id2word=id2word,
                    seq_lens=lens_eval,
                    label_type=types[FLAGS.object_id])

                print(
                    'evalution on eval data, target_eval_loss:%.3f,precison:%.3f,recall:%.3f,fscore:%.3f'
                    % (eval_loss, precison, recall, target_f1_eval))

                y_test_pred, test_loss = model.inference_for_single(
                    sess, w_xs_test, y_tuple_test[FLAGS.object_id])
                precison1, recall1, target_f1_test = helper.evaluate(
                    w_xs_test,
                    y_tuple_test[FLAGS.object_id],
                    y_test_pred,
                    id2word=id2word,
                    seq_lens=lens_test,
                    label_type=types[FLAGS.object_id])
                print(
                    'evalution on test data, target_eval_loss:%.3f,precison:%.3f,recall:%.3f,fscore:%.3f'
                    % (test_loss, precison1, recall1, target_f1_test))

                if len(heap_target) < 5:
                    heapq.heappush(heap_target,
                                   (target_f1_eval, train_data_itor.epoch,
                                    target_f1_test))
                else:
                    if target_f1_eval > heap_target[0][0]:
                        _, delete_file_epoch, _ = heapq.heappop(heap_target)
                        heapq.heappush(heap_target,
                                       (target_f1_eval, train_data_itor.epoch,
                                        target_f1_test))

                # early stop
                if target_f1_eval > previous_best_valid_f1_score:
                    previous_best_valid_f1_score = target_f1_eval
                    bad_count = 0
                else:
                    bad_count += 1

                if bad_count >= FLAGS.patients:
                    print('early stop!')
                    break
        print('Train Finished!!')

        show_result(heap_target)

    pass
# In[40]:


model_parameters = pd.DataFrame(history.params)
model_parameters.to_csv("neural_network_parameters.csv", index=True)
model_parameters


# ### Model Evaluation

# In[16]:


model_name = 'Neural Network'
helper.evaluate(X_test, y_test, model_name, neural_network_model)


# ### Prediction

# In[17]:


y_pred = neural_network_model.predict(X_test)


# In[18]:


model_prediction_results = pd.DataFrame({
    'actual_generation' : list(y_test),
示例#15
0
# the image for drawing rectangles
img_path = os.path.join('crater_data', 'images', testset_name + '.pgm')
gt_img = cv.imread(img_path)

data = pd.read_csv(csv_path, header=None)
gt = pd.read_csv(gt_csv_path, header=None)

threshold = 0.75

start_time = time.time()

# first pass, remove duplicates for points of same window size
df1 = {}
merge = pd.DataFrame()
for ws in data[2].unique():
    df1[ws] = data[ (data[3] > 0.75) & (data[2] == ws) ] # take only 75% or higher confidence
    merge = pd.concat([merge, df1[ws]])

nodup = Banderia_duplicate_removal(merge)

# save the no duplicate csv file
nodup[[0,1,2]].to_csv("%s_noduplicates.csv" % save_path, header=False, index=False)
craters = nodup[[0,1,2]]

# evaluate with gt and draw it on final image.
dr, fr, qr, bf, f_measure, tp, fp, fn  = evaluate(craters, gt, gt_img, 64, True, save_path, param)


end_time = time.time()
time_dif = end_time - start_time
print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))
示例#16
0
文件: train.py 项目: victor8733/cule
def worker(gpu, ngpus_per_node, callback, args):
    args.gpu = gpu

    if args.distributed:
        args.seed += args.gpu
        torch.cuda.set_device(args.gpu)

        args.rank = int(os.environ['RANK']) if 'RANK' in os.environ else 0
        if args.multiprocessing_distributed:
            args.rank = args.rank * ngpus_per_node + args.gpu

        torch.distributed.init_process_group(backend='nccl', init_method='tcp://127.0.0.1:8632',
                                             world_size=args.world_size, rank=args.rank)
    else:
        args.rank = 0

    if args.lr_scale:
        scaled_lr = args.lr * math.sqrt((args.num_ales * args.world_size) / 16)
        if args.rank == 0:
            print('Scaled learning rate from {:4.4f} to {:4.4f}'.format(args.lr, scaled_lr))
        args.lr = scaled_lr

    args.use_cuda_env = args.use_cuda_env and torch.cuda.is_available()
    args.no_cuda_train = (not args.no_cuda_train) and torch.cuda.is_available()
    args.verbose = args.verbose and (args.rank == 0)

    np.random.seed(args.seed)
    torch.manual_seed(np.random.randint(1, 10000))
    if args.use_cuda_env or (args.no_cuda_train == False):
        torch.cuda.manual_seed(np.random.randint(1, 10000))

    env_device = torch.device('cuda', args.gpu) if args.use_cuda_env else torch.device('cpu')
    train_device = torch.device('cuda', args.gpu) if (args.no_cuda_train == False) else torch.device('cpu')

    if args.rank == 0:
        if args.output_filename:
            train_csv_file = open(args.output_filename, 'w', newline='')
            train_csv_writer = csv.writer(train_csv_file, delimiter=',')
            train_csv_writer.writerow(['frames','fps','total_time',
                                       'rmean','rmedian','rmin','rmax','rstd',
                                       'lmean','lmedian','lmin','lmax','lstd',
                                       'entropy','value_loss','policy_loss'])

            eval_output_filename = '.'.join([''.join(args.output_filename.split('.')[:-1] + ['_test']), 'csv'])
            eval_csv_file = open(eval_output_filename, 'w', newline='')
            eval_csv_file.write(json.dumps(vars(args)))
            eval_csv_file.write('\n')
            eval_csv_writer = csv.writer(eval_csv_file, delimiter=',')
            eval_csv_writer.writerow(['frames','total_time',
                                       'rmean','rmedian','rmin','rmax','rstd',
                                       'lmean','lmedian','lmin','lmax','lstd'])
        else:
            train_csv_file, train_csv_writer = None, None
            eval_csv_file, eval_csv_writer = None, None

        if args.plot:
            from tensorboardX import SummaryWriter
            current_time = datetime.now().strftime('%b%d_%H-%M-%S')
            log_dir = os.path.join(args.log_dir, current_time + '_' + socket.gethostname())
            writer = SummaryWriter(log_dir=log_dir)
            for k, v in vars(args).items():
                writer.add_text(k, str(v))

        print()
        print('PyTorch  : {}'.format(torch.__version__))
        print('CUDA     : {}'.format(torch.backends.cudnn.m.cuda))
        print('CUDNN    : {}'.format(torch.backends.cudnn.version()))
        print('APEX     : {}'.format('.'.join([str(i) for i in apex.amp.__version__.VERSION])))
        print()

    if train_device.type == 'cuda':
        print(cuda_device_str(train_device.index), flush=True)

    if args.use_openai:
        train_env = create_vectorize_atari_env(args.env_name, args.seed, args.num_ales,
                                               episode_life=args.episodic_life, clip_rewards=False,
                                               max_frames=args.max_episode_length)
        observation = torch.from_numpy(train_env.reset()).squeeze(1)
    else:
        train_env = AtariEnv(args.env_name, args.num_ales, color_mode='gray', repeat_prob=0.0,
                             device=env_device, rescale=True, episodic_life=args.episodic_life,
                             clip_rewards=False, frameskip=4)
        train_env.train()
        observation = train_env.reset(initial_steps=args.ale_start_steps, verbose=args.verbose).squeeze(-1)

    if args.use_openai_test_env:
        test_env = create_vectorize_atari_env(args.env_name, args.seed, args.evaluation_episodes,
                                              episode_life=False, clip_rewards=False)
        test_env.reset()
    else:
        test_env = AtariEnv(args.env_name, args.evaluation_episodes, color_mode='gray', repeat_prob=0.0,
                            device='cpu', rescale=True, episodic_life=False, clip_rewards=False, frameskip=4)

    model = ActorCritic(args.num_stack, train_env.action_space, normalize=args.normalize, name=args.env_name)
    model = model.to(train_device).train()

    if args.rank == 0:
        print(model)
        args.model_name = model.name()

    if args.use_adam:
        optimizer = optim.Adam(model.parameters(), lr=args.lr, amsgrad=True)
    else:
        optimizer = optim.RMSprop(model.parameters(), lr=args.lr, eps=args.eps, alpha=args.alpha)

    model, optimizer = amp.initialize(model, optimizer,
                                      opt_level=args.opt_level,
                                      loss_scale=args.loss_scale
                                     )

    if args.distributed:
        model = DDP(model, delay_allreduce=True)

    num_frames_per_iter = args.num_ales * args.num_steps
    total_steps = math.ceil(args.t_max / (args.world_size * num_frames_per_iter))

    shape = (args.num_steps + 1, args.num_ales, args.num_stack, *train_env.observation_space.shape[-2:])
    states = torch.zeros(shape, device=train_device, dtype=torch.float32)
    states[0, :, -1] = observation.to(device=train_device, dtype=torch.float32)

    shape = (args.num_steps + 1, args.num_ales)
    values  = torch.zeros(shape, device=train_device, dtype=torch.float32)
    returns = torch.zeros(shape, device=train_device, dtype=torch.float32)

    shape = (args.num_steps, args.num_ales)
    rewards = torch.zeros(shape, device=train_device, dtype=torch.float32)
    masks = torch.zeros(shape, device=train_device, dtype=torch.float32)
    actions = torch.zeros(shape, device=train_device, dtype=torch.long)

    # These variables are used to compute average rewards for all processes.
    episode_rewards = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32)
    final_rewards = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32)
    episode_lengths = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32)
    final_lengths = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32)

    if args.use_gae:
        gae = torch.zeros(args.num_ales, device=train_device, dtype=torch.float32)

    maybe_npy = lambda a: a.numpy() if args.use_openai else a

    torch.cuda.synchronize()

    iterator = range(total_steps)
    if args.rank == 0:
        iterator = tqdm(iterator)
        total_time = 0
        evaluation_offset = 0

    for update in iterator:

        T = args.world_size * update * num_frames_per_iter
        if (args.rank == 0) and (T >= evaluation_offset):
            evaluation_offset += args.evaluation_interval
            eval_lengths, eval_rewards = evaluate(args, T, total_time, model, test_env, eval_csv_writer, eval_csv_file)

            if args.plot:
                writer.add_scalar('eval/rewards_mean', eval_rewards.mean().item(), T, walltime=total_time)
                writer.add_scalar('eval/lengths_mean', eval_lengths.mean().item(), T, walltime=total_time)

        start_time = time.time()

        with torch.no_grad():

            for step in range(args.num_steps):
                value, logit = model(states[step])

                # store values
                values[step] = value.squeeze(-1)

                # convert actions to numpy and perform next step
                probs_action = F.softmax(logit, dim=1).multinomial(1).to(env_device)
                observation, reward, done, info = train_env.step(maybe_npy(probs_action))

                if args.use_openai:
                    # convert back to pytorch tensors
                    observation = torch.from_numpy(observation)
                    reward = torch.from_numpy(reward)
                    done = torch.from_numpy(done.astype(np.uint8))
                else:
                    observation = observation.squeeze(-1).unsqueeze(1)

                # move back to training memory
                observation = observation.to(device=train_device)
                reward = reward.to(device=train_device, dtype=torch.float32)
                done = done.to(device=train_device)
                probs_action = probs_action.to(device=train_device, dtype=torch.long)

                not_done = 1.0 - done.float()

                # update rewards and actions
                actions[step].copy_(probs_action.view(-1))
                masks[step].copy_(not_done)
                rewards[step].copy_(reward.sign())

                # update next observations
                states[step + 1, :, :-1].copy_(states[step, :, 1:].clone())
                states[step + 1] *= not_done.view(-1, *[1] * (observation.dim() - 1))
                states[step + 1, :, -1].copy_(observation.view(-1, *states.size()[-2:]))

                # update episodic reward counters
                episode_rewards += reward
                final_rewards[done] = episode_rewards[done]
                episode_rewards *= not_done

                episode_lengths += not_done
                final_lengths[done] = episode_lengths[done]
                episode_lengths *= not_done

            returns[-1] = values[-1] = model(states[-1])[0].data.squeeze(-1)

            if args.use_gae:
                gae.zero_()
                for step in reversed(range(args.num_steps)):
                    delta = rewards[step] + (args.gamma * values[step + 1] * masks[step]) - values[step]
                    gae = delta + (args.gamma * args.tau * masks[step] * gae)
                    returns[step] = gae + values[step]
            else:
                for step in reversed(range(args.num_steps)):
                    returns[step] = rewards[step] + (args.gamma * returns[step + 1] * masks[step])

        value, logit = model(states[:-1].view(-1, *states.size()[-3:]))

        log_probs = F.log_softmax(logit, dim=1)
        probs = F.softmax(logit, dim=1)

        action_log_probs = log_probs.gather(1, actions.view(-1).unsqueeze(-1))
        dist_entropy = -(log_probs * probs).sum(-1).mean()

        advantages = returns[:-1].view(-1).unsqueeze(-1) - value

        value_loss = advantages.pow(2).mean()
        policy_loss = -(advantages.clone().detach() * action_log_probs).mean()

        loss = value_loss * args.value_loss_coef + policy_loss - dist_entropy * args.entropy_coef
        optimizer.zero_grad()
        with amp.scale_loss(loss, optimizer) as scaled_loss:
            scaled_loss.backward()
        torch.nn.utils.clip_grad_norm_(amp.master_params(optimizer), args.max_grad_norm)
        optimizer.step()

        states[0].copy_(states[-1])

        torch.cuda.synchronize()

        if args.rank == 0:
            iter_time = time.time() - start_time
            total_time += iter_time

            if args.plot:
                writer.add_scalar('train/rewards_mean', final_rewards.mean().item(), T, walltime=total_time)
                writer.add_scalar('train/lengths_mean', final_lengths.mean().item(), T, walltime=total_time)
                writer.add_scalar('train/learning_rate', scheduler.get_lr()[0], T, walltime=total_time)
                writer.add_scalar('train/value_loss', value_loss, T, walltime=total_time)
                writer.add_scalar('train/policy_loss', policy_loss, T, walltime=total_time)
                writer.add_scalar('train/entropy', dist_entropy, T, walltime=total_time)

            progress_data = callback(args, model, T, iter_time, final_rewards, final_lengths,
                                     value_loss.item(), policy_loss.item(), dist_entropy.item(),
                                     train_csv_writer, train_csv_file)
            iterator.set_postfix_str(progress_data)

    if args.plot:
        writer.close()

    if args.use_openai:
        train_env.close()
    if args.use_openai_test_env:
        test_env.close()
param = Param.Param()

method_list = ["birch", "exp"]
#gt_list = ["1_24", "1_25", "2_24", "2_25", "3_24", "3_25"]
gt_list = ["1_24"]

for method in method_list:
    print("evaluation of " + method + " approach")
    for gt in gt_list:
        # the image for drawing rectangles
        print("working on tile" + gt)
        tile_name = "tile" + gt
        img_path = os.path.join('crater_data', 'tiles', tile_name + '.pgm')
        gt_img = cv.imread(img_path)
        gt_csv_path = os.path.join('crater_data', 'gt', gt + '_gt.csv')
        gt_data = pd.read_csv(gt_csv_path, header=None)

        # read detection from csv file.
        dt_csv_path = os.path.join('results', 'crater-ception', method,
                                   gt + '_sw_' + method + '.csv')
        craters = pd.read_csv(dt_csv_path, header=None)

        # save results path
        save_path = 'results/crater-ception/' + method + '/evaluations/' + gt + '_sw_' + method
        craters = craters[[0, 1, 2]]
        # evaluate with gt and draw it on final image.
        evaluate(craters, gt_data, gt_img, 64, True, save_path, param)

end_time = time.time()
time_dif = end_time - start_time
print("Time usage: " + str(timedelta(seconds=int(round(time_dif)))))