示例#1
0
def run_train():
    """Train CAPTCHA for a number of steps."""

    test_data = dataset.read_data_sets(
        dataset_dir='/home/sw/Documents/rgb-nir2/nirscene1/field_2ch.npz')
    with tf.Graph().as_default():

        images_pl1, images_pl2, labels_pl = placeholder_inputs(BATCH_SIZE)
        conv_features1, features1 = model.get_features(images_pl1, reuse=False)
        conv_features2, features2 = model.get_features(images_pl2, reuse=True)
        predicts = tf.sqrt(
            tf.reduce_sum(tf.square(features1 - features2), axis=1))

        saver = tf.train.Saver()
        sess = tf.Session()

        saver.restore(sess, "ckpt/model.ckpt-479000")

        print('Test Data Eval:')
        do_eval(sess,
                predicts,
                images_pl1,
                images_pl2,
                labels_pl,
                test_data,
                name='notredame')

        sess.close()
示例#2
0
def run_test():
  """Train CAPTCHA for a number of steps."""
  test_data = dataset.read_data_sets(dataset_dir = '/home/sw/Documents/rgb-nir2/nirscene1/water_2ch.npz')
  with tf.Graph().as_default():
    
    images_pl1, images_pl2, labels_pl = placeholder_inputs(BATCH_SIZE)
    conv_features1,features1 = model.get_features(images_pl1, reuse = False)
    conv_features2,features2 = model.get_features(images_pl2, reuse = True)
    predicts = tf.sqrt(tf.reduce_sum(tf.square(features1-features2),axis=1))
    
    saver = tf.train.Saver()
    sess = tf.Session()
    saver.restore(sess, "ckpt/model.ckpt-479000")

    outputs = []
    labels = []
    
    steps_per_epoch = test_data.num_examples // BATCH_SIZE
    num_examples = steps_per_epoch * BATCH_SIZE
    
    for step in range(steps_per_epoch):
      feed_dict, label = fill_feed_dict(test_data,images_pl1,images_pl2,labels_pl,shuffle=False)
      predicts_value = sess.run(predicts,feed_dict=feed_dict)
      predicts_value = 2-predicts_value
      outputs.extend(predicts_value)
      labels.extend(label)

      view_bar('processing:', step, steps_per_epoch)

    draw_roc(outputs,labels)
    sess.close()
示例#3
0
def _build_model(inputs_queue, clone_batch_size):
    """Builds a clone of train model.

  Args:
    inputs_queue: A prefetch queue for images and labels.
  Returns:
    A dictionary of logits names to logits.
  """
    samples = inputs_queue.dequeue()
    batch_size = clone_batch_size * FLAGS.num_classes
    inputs = tf.identity(samples['image'], name='image')
    labels = tf.identity(samples['label'], name='label')
    model_options = common.ModelOptions(output_stride=FLAGS.output_stride)
    net, end_points = model.get_features(
        inputs,
        model_options=model_options,
        weight_decay=FLAGS.weight_decay,
        is_training=True,
        fine_tune_batch_norm=FLAGS.fine_tune_batch_norm)
    logits, _ = model.classification(net,
                                     end_points,
                                     num_classes=FLAGS.num_classes,
                                     is_training=True)
    if FLAGS.multi_label:
        with tf.name_scope('Multilabel_logits'):
            logits = slim.softmax(logits)
            half_batch_size = batch_size / 2
            for i in range(1, FLAGS.num_classes):
                class_logits = tf.identity(logits[:, i],
                                           name='class_logits_%02d' % (i))
                class_labels = tf.identity(labels[:, i],
                                           name='class_labels_%02d' % (i))
                num_positive = tf.reduce_sum(class_labels)
                num_negative = batch_size - num_positive
                weights = tf.where(
                    tf.equal(class_labels, 1.0),
                    tf.tile([half_batch_size / num_positive], [batch_size]),
                    tf.tile([half_batch_size / num_negative], [batch_size]))
                train_utils.focal_loss(class_labels,
                                       class_logits,
                                       weights=weights,
                                       scope='class_loss_%02d' % (i))
    else:
        logits = slim.softmax(logits)
        train_utils.focal_loss(labels, logits, scope='cls_loss')

    if (FLAGS.dataset == 'protein') and FLAGS.add_counts_logits:
        counts = tf.identity(samples['counts'] - 1, name='counts')
        one_hot_counts = slim.one_hot_encoding(counts, 5)
        counts_logits, _ = model.classification(net,
                                                end_points,
                                                num_classes=5,
                                                is_training=True,
                                                scope='Counts_logits')
        counts_logits = slim.softmax(counts_logits)
        train_utils.focal_loss(one_hot_counts,
                               counts_logits,
                               scope='counts_loss')
        return logits, counts_logits
    return logits
示例#4
0
def evaluate_model(the_model,
                   all_stats,
                   bet_info,
                   historical_games_by_tuple,
                   moving_averages,
                   transform_params,
                   bet_threshold,
                   cv_percent=0.8,
                   cv_runs=100,
                   start_date=SEASON_1415_START,
                   end_date=SEASON_1415_END):
    prediction_by_game_tuple = {}
    overunder_by_game_tuple = {}
    for game in bet_info:
        if not start_date <= game['date'] <= end_date:
            continue
        features = get_features(all_stats,
                                game['home'],
                                game['away'],
                                game['date'],
                                moving_averages,
                                transform_params=transform_params)
        if features is not None:
            prediction = the_model.predict(numpy.array([features]))
            game_tuple = tuple((game['date'], game['home'], game['away']))
            prediction_by_game_tuple[game_tuple] = prediction
            overunder_by_game_tuple[game_tuple] = game['overunder']

    winnings_list = []
    for _ in range(cv_runs):
        win = 0
        loss = 0
        for game_tuple, prediction in prediction_by_game_tuple.iteritems():
            if game_tuple not in historical_games_by_tuple:
                continue
            actual_score = historical_games_by_tuple[game_tuple]
            overunder = overunder_by_game_tuple[game_tuple]
            if numpy.random.uniform(0, 1) > cv_percent:
                continue
            if abs(prediction - overunder) < bet_threshold:
                continue
            if prediction < overunder and actual_score < overunder:
                win += 1
            elif prediction > overunder and actual_score > overunder:
                win += 1
            else:
                loss += 1

        winnings = win * WIN_MONEY - loss * LOSS_MONEY
        winnings_list.append(winnings)

    winnings_avg = numpy.mean(numpy.array(winnings_list))
    winnings_std = numpy.std(numpy.array(winnings_list))

    print "Avg winnings = {0} +/- {1}".format(
        winnings_avg,
        winnings_std,
    )
    return winnings_avg, winnings_std
示例#5
0
def evaluate_model(
    the_model,
    all_stats,
    bet_info,
    historical_games_by_tuple,
    moving_averages,
    transform_params,
    bet_threshold,
    cv_percent=0.8,
    cv_runs=100,
    start_date=SEASON_1415_START,
    end_date=SEASON_1415_END,
):
    prediction_by_game_tuple = {}
    overunder_by_game_tuple = {}
    for game in bet_info:
        if not start_date <= game["date"] <= end_date:
            continue
        features = get_features(
            all_stats, game["home"], game["away"], game["date"], moving_averages, transform_params=transform_params
        )
        if features is not None:
            prediction = the_model.predict(numpy.array([features]))
            game_tuple = tuple((game["date"], game["home"], game["away"]))
            prediction_by_game_tuple[game_tuple] = prediction
            overunder_by_game_tuple[game_tuple] = game["overunder"]

    winnings_list = []
    for _ in range(cv_runs):
        win = 0
        loss = 0
        for game_tuple, prediction in prediction_by_game_tuple.iteritems():
            if game_tuple not in historical_games_by_tuple:
                continue
            actual_score = historical_games_by_tuple[game_tuple]
            overunder = overunder_by_game_tuple[game_tuple]
            if numpy.random.uniform(0, 1) > cv_percent:
                continue
            if abs(prediction - overunder) < bet_threshold:
                continue
            if prediction < overunder and actual_score < overunder:
                win += 1
            elif prediction > overunder and actual_score > overunder:
                win += 1
            else:
                loss += 1

        winnings = win * WIN_MONEY - loss * LOSS_MONEY
        winnings_list.append(winnings)

    winnings_avg = numpy.mean(numpy.array(winnings_list))
    winnings_std = numpy.std(numpy.array(winnings_list))

    print "Avg winnings = {0} +/- {1}".format(winnings_avg, winnings_std)
    return winnings_avg, winnings_std
示例#6
0
def main(unused_argv):
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.logging.info('Prepare to export model to: %s', FLAGS.export_path)

    with tf.Graph().as_default():
        input_image = tf.placeholder(tf.float32,
                                     FLAGS.input_shape,
                                     name=_INPUT_NAME)
        inputs = data_augmentation.preprocess_image(input_image,
                                                    FLAGS.image_size,
                                                    FLAGS.image_size,
                                                    is_training=False)
        if FLAGS.channel:
            inputs = inputs[:, :, :FLAGS.channel]
            # inputs = inputs[:,:,3:]
        inputs = tf.expand_dims(inputs, 0)
        model_options = common.ModelOptions(output_stride=FLAGS.output_stride)
        net, end_points = model.get_features(inputs,
                                             model_options=model_options,
                                             is_training=False,
                                             fine_tune_batch_norm=False)

        if FLAGS.hierarchical_cls:
            end_points = model.hierarchical_classification(net,
                                                           end_points,
                                                           is_training=False)
        else:
            _, end_points = model.classification(net,
                                                 end_points,
                                                 num_classes=FLAGS.num_classes,
                                                 is_training=False)

        prediction = tf.identity(end_points['Logits_Predictions'],
                                 name=_OUTPUT_NAME)

        saver = tf.train.Saver(tf.model_variables())

        tf.gfile.MakeDirs(os.path.dirname(FLAGS.export_path))
        freeze_graph.freeze_graph_with_def_protos(
            tf.get_default_graph().as_graph_def(add_shapes=True),
            saver.as_saver_def(),
            FLAGS.checkpoint_path,
            _OUTPUT_NAME,
            restore_op_name=None,
            filename_tensor_name=None,
            output_graph=FLAGS.export_path,
            clear_devices=True,
            initializer_nodes=None)
示例#7
0
from sklearn.linear_model import LogisticRegression
import pandas as pd
from time import time

df = pd.read_csv("data.csv", header=None)
X = transform(df[0])
y = df[1]

t0 = time()
clf = LogisticRegression()
clf.fit(X,y)

print("Trained in", time()-t0)

cap = cv2.VideoCapture(0)
while True:
	ret, frame = cap.read()
	if not ret:
		print("Not ret: Something went wrong!")
		break
	gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
	features = get_features(gray)
	pred = clf.predict([features])
	print(pred)
	cv2.imshow("win", frame)
	key = cv2.waitKey(1) & 0xff
	if key == ord('q'):
		break

cap.release()
示例#8
0
            tup_list.append((tup.text, tup.tag_))
        data.append(tup_list)
#print(data)


def get_labels(doc):
    return [label for (token, postag, label) in doc]


def get_token(doc):
    return [token for (token, postag) in doc]


# test_data = get_test_data_unlabel(input_file)

X_test = [model.get_features(course_doc) for course_doc in data]

tagger = pycrfsuite.Tagger()
tagger.open(modelM)
y_pred = [tagger.tag(xseq) for xseq in X_test]  #list[list[string]]

#print(y_pred)


def write_txt(X_test, y_pred, test_data):
    #     y_pred_copy = y_pred[:]
    output, wordList = [], []
    pos = 0
    wordList = [get_token(course_doc) for course_doc in test_data]
    flat_list = [item for sublist in wordList for item in sublist]
    #     print(flat_list)
示例#9
0
def main():
    dir_photos = "./data/flickr8k/Flicker8k_photos/"
    file_annot = "./data/flickr8k/Flickr8k_text/Flickr8k.token.txt"

    jpg_files = ds.images_info(dir_photos)
    ann_dframe = ds.annots_info(file_annot, df=True)
    print(
        "Dataset overview\n-------------------------------------------------------------------------------------------------------------\n"
    )
    print(ann_dframe)
    print(
        "\n-------------------------------------------------------------------------------------------------------------\n"
    )

    ## Prepare captions
    print("Preparing caption data for images")
    word_count = ds.word_freq(ann_dframe)
    # print(word_count)

    ## Clean text
    print("Cleaning text ... ", end="")
    for i, cpt in enumerate(ann_dframe.caption.values):
        ann_dframe["caption"].iloc[i] = ds.clean_text(cpt)
    print("done.")
    print(ann_dframe)
    word_count = ds.word_freq(ann_dframe)
    # print(word_count)

    ## Add start and end sequence token
    ann_dframe_orig = copy(ann_dframe)
    ann_dfrm = ds.add_start_end_tokens(ann_dframe)
    print(ann_dfrm)

    vgg_net = vis.models.vgg16(pretrained="imagenet", progress=True)
    for p in vgg_net.parameters():
        p.requires_grad = False
    ## Load model parameters from path
    # vgg_net.load_state_dict(torch.load('./models/vgg16-397923af.pth'))
    ## Features in the last layer
    num_ftrs = vgg_net.classifier[-1].in_features
    print(num_ftrs)
    print(vgg_net)
    ## Remove the last classifier layer: Softmax, ReLU, Dropout
    vgg_net.classifier = vgg_net.classifier[:-1]
    # ## Net architecture
    # summary(vgg_net, input_size=(3, 224, 224))
    print(vgg_net)
    # ## Features in the last layer
    # num_ftrs = vgg_net.classifier[-1].in_features
    # print(num_ftrs)
    #
    ## Read images with specified transforms
    print("Reading images ... ", end='')
    images = ds.read_image(jpg_files,
                           dir_photos,
                           normalize=True,
                           resize=224,
                           tensor=True)
    print("done.")
    # print(images.keys())
    ## Get feature map for image tensor through VGG-16
    img_featrs = OD()
    print("Gathering images' features from last conv layer ... ", end='')
    for i, jpg_name in enumerate(images.keys()):
        with torch.no_grad():
            print(i, jpg_name)
            img_featrs[jpg_name] = vgg_net(images[jpg_name].unsqueeze(0))
    print("done.")
    # print(img_featrs, img_featrs[jpg_name].size(), sep='\n')
    print(img_featrs.keys())

    # Get features for images in our dataset from pretrained VGG-16
    features = mdl.get_features(dir_photos, read=True, download=False)
    print(features)

    ## Prep image tensor
    print("Prepping image tensor ... ", end="")
    fnames = []
    img_tns_list = []
    cap_list = []
    for i, jpg_name in enumerate(ann_dfrm.filename.values):
        if (i % 5) == 0:
            if jpg_name in img_featrs.keys():
                fnames.append(jpg_name)
                img_tns_list.append(img_featrs[jpg_name])
                cap_list.append(ann_dfrm.iloc[i]["caption"])
    print("done.")
    print(len(img_tns_list), len(cap_list))
    img_tns = torch.cat(img_tns_list)
    print(img_tns.shape)
    print(
        "Saving filenames list, image tensor list, captions tensor list ... ",
        end="")
    torch.save(fnames, 'fnames.pkl')
    torch.save(img_tns_list, 'image_tns_list.pkl')
    torch.save(cap_list, 'captions_list.pkl')
    print("done.")

    print("Loading fnames, image tensor list and captions tensor list ... ",
          end="")
    fnames = torch.load('fnames.pkl')
    img_tns_list = torch.load('image_tns_list.pkl')
    img_tns = torch.cat(img_tns_list)
    cap_list = torch.load('captions_list.pkl')
    # print(len(fnames), cap_list)
    print("done.")

    cap_seq, vocab_size, cap_max_len, tokens = ds.tokenizer(cap_list)
    n_cap = len(cap_seq)
    vald_prop, test_prop = 0.2, 0.2
    n_vald = int(n_cap * vald_prop)
    n_test = int(n_cap * test_prop)

    train_cap, valid_cap, evaln_cap = ds.split_dset(cap_seq, n_vald, n_test)
    train_ims, valid_ims, evaln_ims = ds.split_dset(img_tns, n_vald, n_test)
    # train_fnm, valid_fnm, evaln_fnm = ds.split_dset(fnames, n_vald, n_test)

    print(len(train_cap), len(valid_cap), len(evaln_cap))
    print(len(train_ims), len(valid_ims), len(evaln_ims))
    # print(len(train_fnm), len(valid_fnm), len(evaln_fnm))

    images_train, captions_train, target_caps_train = ds.prep_data(
        train_ims, train_cap, vocab_size, cap_max_len)
    images_valid, captions_valid, target_caps_valid = ds.prep_data(
        valid_ims, valid_cap, vocab_size, cap_max_len)

    ## Dataloader
    bs = 64
    trainset = ds.Flickr8k(images_train, captions_train, target_caps_train)
    validset = ds.Flickr8k(images_valid, captions_valid, target_caps_valid)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=bs,
                                              shuffle=True)
    validloader = torch.utils.data.DataLoader(validset, batch_size=bs)

    #
    # ## Device: CPU or GPU?
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print("Using " + device)

    ## Model
    model = mdl.CapNet(vocab_size, cap_max_len).to(device)
    criterion = nn.CrossEntropyLoss()

    ## Optimizer
    optimizer = optim.Adam(model.parameters(), lr=0.001)
    scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=20, gamma=0.1)
    max_n_epochs = 5

    # ## Training
    print("Starting training ... ")

    epoch_train_loss, epoch_valid_loss = [], []
    min_val_loss = 100
    for epoch in range(1, max_n_epochs + 1):
        print("-------------------- Epoch: [%d / %d] ----------------------" %
              (epoch, max_n_epochs))
        training_loss, validation_loss = 0.0, 0.0
        ## Batch training
        for i, data in enumerate(trainloader):
            images, captions, target_caps = data[0].to(device), data[1].to(
                device), data[2].to(device)
            optimizer.zero_grad()
            out = model(images, captions.t())
            loss = criterion(out, target_caps)
            loss.backward()
            optimizer.step()
            training_loss += loss.item()
        epoch_train_loss.append(training_loss / len(trainloader))
        print("Training loss: %f" % (epoch_train_loss[-1]), end=" ")
        for i, data in enumerate(validloader):
            with torch.set_grad_enabled(False):
                images, captions, target_caps = data[0].to(device), data[1].to(
                    device), data[2].to(device)
                out = model(images, captions.t())
                loss = criterion(out, target_caps)
                validation_loss += loss.item()
        epoch_valid_loss.append(validation_loss / len(validloader))
        print("Validation loss: %f" % (epoch_valid_loss[-1]))
        scheduler.step()

        if epoch_valid_loss[-1] < min_val_loss:
            print("Found best model.")
            best_model = deepcopy(model)

    plt.plot(list(range(max_n_epochs)),
             epoch_train_loss,
             label="Training loss")
    plt.plot(list(range(max_n_epochs)),
             epoch_valid_loss,
             label="Validation loss")
    plt.xlabel("Number of epochs")
    plt.ylabel("Loss")
    plt.title("Number of epochs vs loss")
    plt.legend()
    plt.show()

    ###########
    # Save model
    print("Saving best model ... ")
    torch.save(best_model, 'best_model.pkl')
示例#10
0
                                           flag=1,
                                           transform=transform_train)
test_ds = vision.ImageFolderDataset(input_str + test_dir,
                                    flag=1,
                                    transform=transform_test)

loader = gluon.data.DataLoader
train_data = loader(train_ds, batch_size, shuffle=True, last_batch='keep')
valid_data = loader(valid_ds, batch_size, shuffle=True, last_batch='keep')
train_valid_data = loader(train_valid_ds,
                          batch_size,
                          shuffle=True,
                          last_batch='keep')
test_data = loader(test_ds, batch_size, shuffle=False, last_batch='keep')

net = get_features(mx.gpu())
net.hybridize()


def SaveNd(data, net, name):
    x = []
    y = []
    print('提取特征 %s' % name)
    for fear1, fear2, label in tqdm(data):
        fear1 = fear1.as_in_context(mx.gpu())
        fear2 = fear2.as_in_context(mx.gpu())
        out = net(fear1, fear2).as_in_context(mx.cpu())
        x.append(out)
        y.append(label)
    x = nd.concat(*x, dim=0)
    y = nd.concat(*y, dim=0)
def run_train():
    """Train CAPTCHA for a number of steps."""

    test_data = dataset.read_data_sets(
        dataset_dir='/home/sw/Documents/rgb-nir2/qd_fang2_9_8/field_2ch.npz')
    with tf.Graph().as_default():
        train_reader = Reader(
            '/home/sw/Documents/rgb-nir2/qd_fang2_9_8/country_2ch.tfrecord',
            name='train_data',
            batch_size=BATCH_SIZE)
        leftIMG, rightIMG, labels_op = train_reader.feed()  #[64,128]

        images_pl1, images_pl2, labels_pl = placeholder_inputs(BATCH_SIZE)
        conv_features1, features1 = model.get_features(images_pl1, reuse=False)
        conv_features2, features2 = model.get_features(images_pl2, reuse=True)
        predicts = tf.sqrt(
            tf.reduce_sum(tf.square(features1 - features2), axis=1))

        total_loss = model.caculate_loss(conv_features1, conv_features2,
                                         features1, features2)
        tf.summary.scalar('sum_loss', total_loss)
        train_op = model.training(total_loss)

        summary = tf.summary.merge_all()
        saver = tf.train.Saver(max_to_keep=50)
        #    init_op = tf.global_variables_initializer()

        sess = tf.Session()
        summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
        #    sess.run(init_op)
        saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir))

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            max_step = 500000
            for step in range(390380, max_step):
                start_time = time.time()
                lefts, rights, batch_labels = sess.run(
                    [leftIMG, rightIMG, labels_op])
                _, summary_str, loss_value = sess.run([train_op, summary, total_loss], \
                                                                             feed_dict={images_pl1:lefts, images_pl2:rights, labels_pl:batch_labels})

                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()
                duration = time.time() - start_time
                if step % 10 == 0:
                    logging.info(
                        '>> Step %d run_train: loss = %.4f  (%.3f sec)' %
                        (step, loss_value, duration))
                    #-------------------------------
                if step % 1000 == 0:
                    logging.info('>> %s Saving in %s' %
                                 (datetime.now(), checkpoint_dir))

                    saver.save(sess, checkpoint_file, global_step=step)

                    logging.info('Test Data Eval:')
                    do_eval(sess,
                            step,
                            predicts,
                            images_pl1,
                            images_pl2,
                            labels_pl,
                            test_data,
                            name='notredame')

        except KeyboardInterrupt:
            print('INTERRUPTED')
            coord.request_stop()

        finally:
            saver.save(sess, checkpoint_file, global_step=step)
            print('\rModel saved in file :%s' % checkpoint_dir)
            coord.request_stop()
            coord.join(threads)

        sess.close()
示例#12
0
def main():
    dir_photos = "./data/flickr8k/Flicker8k_photos/"
    file_annot = "./data/flickr8k/Flickr8k_text/Flickr8k.token.txt"

    jpg_files = ds.images_info(dir_photos)
    ann_dframe = ds.annots_info(file_annot, df=True)
    print("Dataset overview\n-------------------------------------------------------------------------------------------------------------\n")
    print(ann_dframe)
    print("\n-------------------------------------------------------------------------------------------------------------\n")


    ## Prepare captions
    print("Preparing caption data for images")
    word_count = ds.word_freq(ann_dframe)
    # print(word_count)


    ## Clean text
    print("Cleaning text ... ", end="")
    for i, cpt in enumerate(ann_dframe.caption.values):
        ann_dframe["caption"].iloc[i] = ds.clean_text(cpt)
    print("done.")
    print(ann_dframe)
    word_count = ds.word_freq(ann_dframe)
    # print(word_count)

    ## Add start and end sequence token
    ann_dframe_orig = copy(ann_dframe)
    ann_dfrm = ds.add_start_end_tokens(ann_dframe)
    print(ann_dfrm)


    vgg_net = vis.models.vgg16(pretrained="imagenet", progress=True)
    for p in vgg_net.parameters():
        p.requires_grad = False
    ## Load model parameters from path
    # vgg_net.load_state_dict(torch.load('./models/vgg16-397923af.pth'))
    ## Features in the last layer
    num_ftrs = vgg_net.classifier[-1].in_features
    print(num_ftrs)
    print(vgg_net)
    ## Remove the last classifier layer: Softmax, ReLU, Dropout
    vgg_net.classifier = vgg_net.classifier[:-1]
    # ## Net architecture
    # summary(vgg_net, input_size=(3, 224, 224))
    print(vgg_net)
    # ## Features in the last layer
    # num_ftrs = vgg_net.classifier[-1].in_features
    # print(num_ftrs)
    #
    ## Read images with specified transforms
    print("Reading images ... ", end='')
    images = ds.read_image(jpg_files, dir_photos, normalize=True, resize=224, tensor=True)
    print("done.")
    # print(images.keys())
    ## Get feature map for image tensor through VGG-16
    img_featrs = OD()
    print("Gathering images' features from last conv layer ... ", end='')
    for i, jpg_name in enumerate(images.keys()):
        with torch.no_grad():
            print(i, jpg_name)
            img_featrs[jpg_name] = vgg_net(images[jpg_name].unsqueeze(0))
    print("done.")
    # print(img_featrs, img_featrs[jpg_name].size(), sep='\n')
    print(img_featrs.keys())

    # Get features for images in our dataset from pretrained VGG-16
    features = mdl.get_features(dir_photos, read=True, download=False)
    print(features)
    
    ## Prep image tensor
    print("Prepping image tensor ... ", end="")
    fnames = []
    img_tns_list = []
    cap_list = []
    for i, jpg_name in enumerate(ann_dfrm.filename.values):
         if (i % 5) == 0:
             if jpg_name in img_featrs.keys():
                 fnames.append(jpg_name)
                 img_tns_list.append(img_featrs[jpg_name])
                 cap_list.append(ann_dfrm.iloc[i]["caption"])
    print("done.")
    print(len(img_tns_list), len(cap_list))
    img_tns = torch.cat(img_tns_list)
    print(img_tns.shape)
    print("Saving filenames list, image tensor list, captions tensor list ... ", end="")
    torch.save(fnames, 'fnames.pkl')
    torch.save(img_tns_list, 'image_tns_list.pkl')
    torch.save(cap_list, 'captions_list.pkl')
    print("done.")
    
    print("Loading fnames, image tensor list and captions tensor list ... ", end="")
    fnames = torch.load('fnames.pkl')
    img_tns_list = torch.load('image_tns_list.pkl')
    img_tns = torch.cat(img_tns_list)
    cap_list = torch.load('captions_list.pkl')
    # print(len(fnames), cap_list)
    print("done.")

    cap_seq, vocab_size, cap_max_len, tokens = ds.tokenizer(cap_list)
    n_cap = len(cap_seq)
    vald_prop, test_prop = 0.2, 0.2
    n_vald = int(n_cap * vald_prop)
    n_test = int(n_cap * test_prop)

    train_cap, valid_cap, evaln_cap = ds.split_dset(cap_seq, n_vald, n_test)
    train_ims, valid_ims, evaln_ims = ds.split_dset(img_tns, n_vald, n_test)
    # train_fnm, valid_fnm, evaln_fnm = ds.split_dset(fnames, n_vald, n_test)

    print(len(train_cap), len(valid_cap), len(evaln_cap))
    print(len(train_ims), len(valid_ims), len(evaln_ims))

    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print("Using " + device)

    print("Loading model ...")
    model = torch.load('best_model.pkl')
    print(model)
    model.eval()

    # print(fnames)

    preds = []
    for feat in evaln_ims:
        preds.append(predict_caption(model, feat, cap_max_len, tokens, device))

    best_targets = []
    for p, t in zip(preds, cap_list[:n_test]):
        pred = p.split(" ")
        targ = [t.split(" ")]
        z=sentence_bleu(targ, pred, weights=(1, 0, 0, 0))
        if z > 0.50:
            print(p, t, z, sep='\n')
            print("\n")
            best_targets.append(t)
    print(best_targets)

    for cap in best_targets:
        rows = ann_dfrm.loc[ann_dfrm["caption"]==cap, "filename"]
        print(rows)
示例#13
0
def build_model():
  """Builds graph for model to train with rewrites for quantization.
  Returns:
    g: Graph with fake quantization ops and batch norm folding suitable for
    training quantized weights.
    train_tensor: Train op for execution during training.
  """
  g = tf.Graph()
  with g.as_default(), tf.device(
      tf.train.replica_device_setter(FLAGS.ps_tasks)):
    samples, _ = get_dataset.get_dataset(FLAGS.dataset, FLAGS.dataset_dir,
                                         split_name=FLAGS.train_split,
                                         is_training=True,
                                         image_size=[FLAGS.image_size, FLAGS.image_size],
                                         batch_size=FLAGS.batch_size,
                                         channel=FLAGS.input_channel)

    inputs = tf.identity(samples['image'], name='image')
    labels = tf.identity(samples['label'], name='label')
    model_options = common.ModelOptions(output_stride=FLAGS.output_stride)
    net, end_points = model.get_features(
        inputs,
        model_options=model_options,
        weight_decay=FLAGS.weight_decay,
        is_training=True,
        fine_tune_batch_norm=FLAGS.fine_tune_batch_norm)
    logits, _ = model.classification(net, end_points, 
                                     num_classes=FLAGS.num_classes,
                                     is_training=True)
    logits = slim.softmax(logits)
    focal_loss_tensor = train_utils.focal_loss(labels, logits, weights=1.0)
    # f1_loss_tensor = train_utils.f1_loss(labels, logits, weights=1.0)
    # cls_loss = f1_loss_tensor
    cls_loss = focal_loss_tensor

    # Gather update_ops
    update_ops = tf.get_collection(tf.GraphKeys.UPDATE_OPS)
    # Gather initial summaries.
    summaries = set(tf.get_collection(tf.GraphKeys.SUMMARIES))

    global_step = tf.train.get_or_create_global_step()
    learning_rate = train_utils.get_model_learning_rate(
          FLAGS.learning_policy, FLAGS.base_learning_rate,
          FLAGS.learning_rate_decay_step, FLAGS.learning_rate_decay_factor,
          FLAGS.number_of_steps, FLAGS.learning_power,
          FLAGS.slow_start_step, FLAGS.slow_start_learning_rate)
    opt = tf.train.AdamOptimizer(learning_rate)
    # opt = tf.train.RMSPropOptimizer(learning_rate, momentum=FLAGS.momentum)
    summaries.add(tf.summary.scalar('learning_rate', learning_rate))

    for loss in tf.get_collection(tf.GraphKeys.LOSSES):
      summaries.add(tf.summary.scalar('sub_losses/%s'%(loss.op.name), loss))
    classifation_loss = tf.identity(cls_loss, name='classifation_loss')
    summaries.add(tf.summary.scalar('losses/classifation_loss', classifation_loss))
    regularization_loss = tf.get_collection(tf.GraphKeys.REGULARIZATION_LOSSES)
    regularization_loss = tf.add_n(regularization_loss, name='regularization_loss')
    summaries.add(tf.summary.scalar('losses/regularization_loss', regularization_loss))

    total_loss = tf.add(cls_loss, regularization_loss, name='total_loss')
    grads_and_vars = opt.compute_gradients(total_loss)

    total_loss = tf.check_numerics(total_loss, 'LossTensor is inf or nan.')
    summaries.add(tf.summary.scalar('losses/total_loss', total_loss))

    grad_updates = opt.apply_gradients(grads_and_vars, global_step=global_step)
    update_ops.append(grad_updates)
    update_op = tf.group(*update_ops, name='update_barrier')
    with tf.control_dependencies([update_op]):
      train_tensor = tf.identity(total_loss, name='train_op')

  # Merge all summaries together.
  summary_op = tf.summary.merge(list(summaries))
  return g, train_tensor, summary_op
示例#14
0
def run_train():
    """Train CAPTCHA for a number of steps."""

    with tf.Graph().as_default():
        train_reader = Reader(
            '/home/sw/Documents/rgb-nir2/qd_fang2_9_8/country_2ch.tfrecord',
            name='train_data',
            batch_size=BATCH_SIZE)
        leftIMG, rightIMG, labels_op = train_reader.feed()  #[64,128]

        conv_features1, features1 = model.get_features(leftIMG, reuse=False)
        conv_features2, features2 = model.get_features(rightIMG, reuse=True)
        predicts = tf.sqrt(
            tf.reduce_sum(tf.square(features1 - features2), axis=1))

        total_loss = model.caculate_loss(conv_features1, conv_features2,
                                         features1, features2)
        eval_all = model.evaluation(features1, features2, labels_op, 1)  #train

        tf.summary.scalar('sum_loss', total_loss)
        tf.summary.scalar('roc/tp', eval_all['tp'])
        tf.summary.scalar('roc/fp', eval_all['fp'])
        tf.summary.scalar('roc/tpr',
                          eval_all['tp'] / (eval_all['tp'] + eval_all['fn']))
        tf.summary.scalar('roc/precision', eval_all['precision'])
        train_op = model.training(total_loss)

        summary = tf.summary.merge_all()
        saver = tf.train.Saver(max_to_keep=50)
        init_op = tf.global_variables_initializer()

        sess = tf.Session()
        summary_writer = tf.summary.FileWriter(train_dir, sess.graph)
        sess.run(init_op)
        #    saver.restore(sess, tf.train.latest_checkpoint(checkpoint_dir))

        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(sess=sess, coord=coord)

        try:
            max_step = 100000
            for step in range(1, max_step):
                start_time = time.time()
                _, summary_str, loss_value, predicts_value, f1, f2 = sess.run([
                    train_op, summary, total_loss, predicts, features1,
                    features2
                ])
                summary_writer.add_summary(summary_str, step)
                summary_writer.flush()
                duration = time.time() - start_time
                if step % 10 == 0:
                    logging.info(
                        '\r>> Step %d run_train: loss = %.4f  (%.3f sec)' %
                        (step, loss_value, duration))
                    #-------------------------------
                if step % 1000 == 0:
                    logging.info('>> %s Saving in %s' %
                                 (datetime.now(), checkpoint_dir))
                    saver.save(sess, checkpoint_file, global_step=step)

        except KeyboardInterrupt:
            print('INTERRUPTED')
            coord.request_stop()

        finally:
            saver.save(sess, checkpoint_file, global_step=step)
            print('\rModel saved in file :%s' % checkpoint_dir)
            coord.request_stop()
            coord.join(threads)

        sess.close()
示例#15
0
def main():
    dir_photos = "./data/Flickr8k/Flickr8k_Dataset/Flicker8k_Dataset/"
    file_annot = "./data/Flickr8k/Flickr8k_text/Flickr8k.token.txt"

    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    ## Get basic dataset info
    print("DATASET INFO")
    print(
        "---------------------------------------------------------------------------------------------------------\n"
    )
    jpg_files = ds.images_info(dir_photos)
    print("Number of photos in Flickr8k: %d" % (len(jpg_files)))
    ann_dframe = ds.annots_info(file_annot, df=True)
    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    ## Visualize data overview
    print("DATASET OVERVIEW")
    print(
        "---------------------------------------------------------------------------------------------------------\n"
    )
    print(ann_dframe)
    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    ## Prepare captions
    print("CURATE CAPTIONS")
    print(
        "---------------------------------------------------------------------------------------------------------\n"
    )
    word_count = ds.word_freq(ann_dframe)
    # print(word_count)

    ## Clean text
    start = time.time()
    print("Cleaning text ... ", end="")
    for i, cpt in enumerate(ann_dframe.caption.values):
        ann_dframe["caption"].iloc[i] = ds.clean_text(cpt)
    print("done.")
    # print(ann_dframe)
    # word_count = ds.word_freq(ann_dframe)
    # print(word_count)

    ## Add start and end sequence token
    ann_dframe_orig = copy(ann_dframe)
    print("Adding start and end tokens ... ", end="")
    ann_dfrm = ds.add_start_end_tokens(ann_dframe)
    print("done.")
    elapsed = time.time() - start
    print("\nTime to preprocess {} captions: {:.2f} \
            seconds".format(i, elapsed))
    # print(ann_dfrm)
    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    # ## Read images with specified transforms
    print("READ IMAGES & EXTRACT FEATURES")
    print(
        "---------------------------------------------------------------------------------------------------------\n"
    )
    mean = [0.485, 0.456, 0.406]
    stdv = [0.229, 0.224, 0.225]
    transforms = vis.transforms.Compose([
        vis.transforms.Resize(256),
        vis.transforms.CenterCrop(224),
        vis.transforms.ToTensor(),
        vis.transforms.Normalize(mean=mean, std=stdv)
    ])
    print("Reading images ... ", end='')
    images = ds.read_image(dir_photos, transforms)
    print("done.")

    # Get feature maps for image tensor through VGG-16
    features_dict, features_fname = mdl.get_features(images,
                                                     download_wts=False,
                                                     save=True,
                                                     cuda=True)
    # print(features_dict)

    ## Load feature maps
    features_dict = torch.load(features_fname)
    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    ## Prep image tensor
    print("PREP IMAGE TENSOR")
    print(
        "---------------------------------------------------------------------------------------------------------\n"
    )
    ann_dfrm = ann_dfrm.loc[ann_dfrm["idx"].values == "0", :]
    print(ann_dfrm)
    ds.word_freq(ann_dfrm)
    fnames = []
    img_tns_list = []
    cap_list = []
    for i, jpg_name in enumerate(ann_dfrm.filename.values):
        if jpg_name in features_dict.keys():
            fnames.append(jpg_name)
            img_tns_list.append(features_dict[jpg_name])
            cap_list.append(ann_dfrm.iloc[i]["caption"])
    print(len(img_tns_list), len(cap_list))
    img_tns = torch.cat(img_tns_list)
    print(img_tns.shape)
    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    ## Text tokenize
    print("TEXT TOKENIZE")
    print(
        "---------------------------------------------------------------------------------------------------------\n"
    )
    tokens, cap_seq, vocab_size, cap_max_len = ds.tokenizer(cap_list)
    print("Vocab size: ", vocab_size)
    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    ## Dataset splits
    print("DATASET SPLIT")
    print(
        "---------------------------------------------------------------------------------------------------------\n"
    )
    n_cap = len(cap_seq)
    vald_prop, test_prop = 0.2, 0.2
    n_vald = int(n_cap * vald_prop)
    n_test = int(n_cap * test_prop)
    train_cap, valid_cap, evaln_cap = ds.split_dset(cap_seq, n_vald, n_test)
    train_ims, valid_ims, evaln_ims = ds.split_dset(img_tns, n_vald, n_test)
    train_fnm, valid_fnm, evaln_fnm = ds.split_dset(fnames, n_vald, n_test)

    print(len(train_cap), len(valid_cap), len(evaln_cap))
    print(len(train_ims), len(valid_ims), len(evaln_ims))
    print(len(train_fnm), len(valid_fnm), len(evaln_fnm))
    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    ## Prep data for training and validation
    print("FINAL PREP FOR TRAINING & VALIDATION")
    print(
        "---------------------------------------------------------------------------------------------------------\n"
    )
    images_train, captions_train, target_caps_train = ds.prep_data(
        train_ims, train_cap, cap_max_len)
    images_valid, captions_valid, target_caps_valid = ds.prep_data(
        valid_ims, valid_cap, cap_max_len)
    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    ## TRAINING
    print("TRAINING")
    print(
        "---------------------------------------------------------------------------------------------------------\n"
    )

    ## Hyperparameters
    bs = 64
    lr = 0.001
    lr_steps = 20
    gamma = 0.1
    max_n_epochs = 5

    ## Dataloader
    print("DATALOADERS")
    trainset = ds.Flickr8k(images_train, captions_train, target_caps_train)
    validset = ds.Flickr8k(images_valid, captions_valid, target_caps_valid)
    trainloader = torch.utils.data.DataLoader(trainset,
                                              batch_size=bs,
                                              shuffle=True)
    validloader = torch.utils.data.DataLoader(validset, batch_size=bs)

    ## Device: CPU or GPU?
    print("DEVICE:", end=" ")
    device = "cuda:0" if torch.cuda.is_available() else "cpu"
    print("Using " + device)

    ## Model
    print("MODEL:")
    model = mdl.CapNet(vocab_size, cap_max_len).to(device)

    # Criterion
    criterion = nn.CrossEntropyLoss()

    ## Optimizer
    optimizer = optim.Adam(model.parameters(), lr=lr)
    scheduler = optim.lr_scheduler.StepLR(optimizer,
                                          step_size=lr_steps,
                                          gamma=gamma)

    ## Training
    print("\nStarting training ... ")

    epoch_train_loss, epoch_valid_loss = [], []
    min_val_loss = 100
    for epoch in range(1, max_n_epochs + 1):
        print("-------------------- Epoch: [%d / %d] ----------------------" %
              (epoch, max_n_epochs))
        training_loss, validation_loss = 0.0, 0.0
        ## Batch training
        for i, data in enumerate(trainloader):
            tr_images, tr_captions, tr_target_caps = data[0].to(
                device), data[1].to(device), data[2].to(device)
            optimizer.zero_grad()
            tr_out = model(tr_images, tr_captions.t())
            tr_loss = criterion(tr_out, tr_target_caps)
            tr_loss.backward()
            optimizer.step()
            training_loss += tr_loss.item()
        epoch_train_loss.append(training_loss / len(trainloader))
        print("Training loss: %f" % (epoch_train_loss[-1]), end=" || ")
        for i, data in enumerate(validloader):
            with torch.set_grad_enabled(False):
                vl_images, vl_captions, vl_target_caps = data[0].to(
                    device), data[1].to(device), data[2].to(device)
                vl_out = model(vl_images, vl_captions.t())
                vl_loss = criterion(vl_out, vl_target_caps)
                validation_loss += vl_loss.item()
        epoch_valid_loss.append(validation_loss / len(validloader))
        print("Validation loss: %f" % (epoch_valid_loss[-1]))
        scheduler.step(epoch=epoch)

        if epoch_valid_loss[-1] < min_val_loss:
            print("Found best model.")
            best_model = deepcopy(model)
            min_val_loss = epoch_valid_loss[-1]

    plt.plot(list(range(max_n_epochs)),
             epoch_train_loss,
             label="Training loss")
    plt.plot(list(range(max_n_epochs)),
             epoch_valid_loss,
             label="Validation loss")
    plt.xlabel("Number of epochs")
    plt.ylabel("Loss")
    plt.title("Number of epochs vs loss")
    plt.legend()
    plt.show()

    ## Save model
    print("Saving best model ... ")
    torch.save(best_model, 'best_model.pkl')
    print(
        "\n-------------------------------------------------------------------------------------------------------\n"
    )

    ## Check output
    print("Loading model ...")
    model = torch.load('best_model.pkl')
    print(model)

    model.eval()
    preds = []
    for feat in evaln_ims:
        preds.append(model.prediction(feat, tokens, device))

    best_targets = []
    bleu_scores = []
    for p, t in zip(preds, cap_list[:n_test]):
        pred = p.split(" ")
        targ = [t.split(" ")]
        z = sentence_bleu(targ, pred, weights=(1, 0, 0, 0))
        bleu_scores.append(z)
        if z > 0.50:
            print(p, t, z, sep='\n')
            print("\n")
            best_targets.append((p, t, z))
    for i, tgt in enumerate(best_targets):
        print("{}: {}".format(i, tgt))
    print("MEAN BLEU SCORE: %3f" % np.mean(bleu_scores))
示例#16
0
def eval_model():
    """Evaluates model."""
    tf.logging.set_verbosity(tf.logging.INFO)
    tf.gfile.MakeDirs(FLAGS.eval_dir)
    tf.logging.info('Evaluating on %s set', FLAGS.eval_split)
    g = tf.Graph()
    with g.as_default():
        samples, num_samples = get_dataset.get_dataset(
            FLAGS.dataset,
            FLAGS.dataset_dir,
            split_name=FLAGS.eval_split,
            is_training=False,
            image_size=[FLAGS.image_size, FLAGS.image_size],
            batch_size=FLAGS.batch_size,
            channel=FLAGS.input_channel)
        inputs = tf.identity(samples['image'], name='image')
        labels = tf.identity(samples['label'], name='label')
        model_options = common.ModelOptions(output_stride=FLAGS.output_stride)
        net, end_points = model.get_features(inputs,
                                             model_options=model_options,
                                             is_training=False,
                                             fine_tune_batch_norm=False)

        _, end_points = model.classification(net,
                                             end_points,
                                             num_classes=FLAGS.num_classes,
                                             is_training=False)
        eval_ops = metrics(end_points, labels)
        #num_samples = 1000
        num_batches = math.ceil(num_samples / float(FLAGS.batch_size))
        tf.logging.info('Eval num images %d', num_samples)
        tf.logging.info('Eval batch size %d and num batch %d',
                        FLAGS.batch_size, num_batches)
        # session_config = tf.ConfigProto(device_count={'GPU': 0})
        session_config = tf.ConfigProto(allow_soft_placement=True)
        session_config.gpu_options.allow_growth = True
        if FLAGS.use_slim:
            num_eval_iters = None
            if FLAGS.max_number_of_evaluations > 0:
                num_eval_iters = FLAGS.max_number_of_evaluations
            slim.evaluation.evaluation_loop(
                FLAGS.master,
                FLAGS.checkpoint_dir,
                logdir=FLAGS.eval_dir,
                num_evals=num_batches,
                eval_op=eval_ops,
                session_config=session_config,
                max_number_of_evaluations=num_eval_iters,
                eval_interval_secs=FLAGS.eval_interval_secs)
        else:
            with tf.Session(config=session_config) as sess:
                init_op = tf.group(tf.global_variables_initializer(),
                                   tf.local_variables_initializer())
                sess.run(init_op)
                saver_fn = get_checkpoint_init_fn(FLAGS.checkpoint_dir)
                saver_fn(sess)
                coord = tf.train.Coordinator()
                threads = tf.train.start_queue_runners(sess=sess, coord=coord)
                try:
                    i = 0
                    all_pres = []
                    predictions_custom_list = []
                    all_labels = []
                    while not coord.should_stop():
                        logits_np, labels_np = sess.run(
                            [end_points['Logits_Predictions'], labels])
                        logits_np = logits_np[0]
                        labels_np = labels_np[0]
                        all_labels.append(labels_np)
                        labels_id = np.where(labels_np == 1)[0]
                        predictions_id = list(
                            np.where(logits_np > (_THRESHOULD))[0])
                        predictions_np = np.where(logits_np > (_THRESHOULD), 1,
                                                  0)
                        if np.sum(predictions_np) == 0:
                            max_id = np.argmax(logits_np)
                            predictions_np[max_id] = 1
                            predictions_id.append(max_id)
                        predictions_custom_list.append(predictions_np)
                        i += 1
                        sys.stdout.write(
                            'Image[{0}]--> labels:{1}, predictions: {2}\n'.
                            format(i, labels_id, predictions_id))
                        sys.stdout.flush()

                        predictions_image_list = []
                        for thre in range(1, FLAGS.threshould, 1):
                            predictions_id = list(
                                np.where(logits_np > (thre / 100000000))[0])
                            predictions_np = np.where(
                                logits_np > (thre / 100000000), 1, 0)
                            if np.sum(predictions_np) == 0:
                                max_id = np.argmax(logits_np)
                                predictions_np[max_id] = 1
                                predictions_id.append(max_id)
                            predictions_image_list.append(predictions_np)
                        all_pres.append(predictions_image_list)
                except tf.errors.OutOfRangeError:
                    coord.request_stop()
                    coord.join(threads)
                finally:
                    sys.stdout.write('\n')
                    sys.stdout.flush()
                    pred_rows = []
                    all_labels = np.stack(all_labels, 0)
                    pres_custom = np.stack(predictions_custom_list, 0)
                    eval_custom = metric_eval(all_labels, pres_custom)
                    sys.stdout.write(
                        'Eval[f1_score, precision, recall]: {}\n'.format(
                            eval_custom['All']))
                    sys.stdout.flush()
                    pred_rows.append(eval_custom)
                    all_pres = np.transpose(all_pres, (1, 0, 2))
                    for pre, thre in zip(all_pres,
                                         range(1, FLAGS.threshould, 1)):
                        pred_rows.append(metric_eval(all_labels, pre, thre))
                    columns = ['Thre'] + list(
                        PROTEIN_CLASS_NAMES.values()) + ['All']
                    submission_df = pd.DataFrame(pred_rows)[columns]
                    submission_df.to_csv(os.path.join('./result/protein',
                                                      'protein_eval.csv'),
                                         index=False)