def main():
    import opts    
    import misc.utils as utils        
    opt = opts.parse_opt()
    opt.caption_model ='topdown'
    opt.batch_size=10
    opt.id ='topdown'
    opt.learning_rate= 5e-4 
    opt.learning_rate_decay_start= 0 
    opt.scheduled_sampling_start=0 
    opt.save_checkpoint_every=25#11500
    opt.val_images_use=5000
    opt.max_epochs=40
    opt.start_from=None
    opt.input_json='data/meta_coco_en.json'
    opt.input_label_h5='data/label_coco_en.h5'
    opt.input_image_h5 = 'data/coco_image_512.h5'    
    opt.use_att = utils.if_use_att(opt.caption_model)
    opt.ccg = False
    loader = DataLoader(opt)
    opt.vocab_size = loader.vocab_size
    opt.seq_length = loader.seq_length    
    data = loader.get_batch('train')
    
    data = loader.get_batch('val')
示例#2
0
def main():
    opt = opts.parse_opt()
    print(opt)

    assert (opt.dataset == "imdb")
    train_iter, dev_iter, test_iter, syn_data = imdb_make_synthesized_iter(opt)

    train(opt, train_iter, dev_iter, test_iter, syn_data)
示例#3
0
def main():
    global total
    global correct
    opt = opts.parse_opt()
    # load vocab
    # get model
    # opt.load_epoch = 3
    bart = BART(opt)
    bart.load_model(f'models/3_model.pt')
    # generate(opt, model, SRC, TRG, opt.beam_size)
    generate(opt, bart)
    print(correct / total)
示例#4
0
文件: train.py 项目: xixiareone/SGRAF
def main():
    opt = opts.parse_opt()
    logging.basicConfig(format='%(asctime)s %(message)s', level=logging.INFO)
    tb_logger.configure(opt.logger_name, flush_secs=5)

    # Load Vocabulary Wrapper
    vocab = deserialize_vocab(
        os.path.join(opt.vocab_path, '%s_vocab.json' % opt.data_name))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(opt.data_name, vocab,
                                                opt.batch_size, opt.workers,
                                                opt)

    # Construct the model
    model = SGRAF(opt)

    # Train the Model
    best_rsum = 0

    for epoch in range(opt.num_epochs):
        print(opt.logger_name)
        print(opt.model_name)

        adjust_learning_rate(opt, model.optimizer, epoch)

        # train for one epoch
        train(opt, train_loader, model, epoch, val_loader)

        # evaluate on validation set
        r_sum = validate(opt, val_loader, model)

        # remember best R@ sum and save checkpoint
        is_best = r_sum > best_rsum
        best_rsum = max(r_sum, best_rsum)

        if not os.path.exists(opt.model_name):
            os.mkdir(opt.model_name)
        save_checkpoint(
            {
                'epoch': epoch + 1,
                'model': model.state_dict(),
                'best_rsum': best_rsum,
                'opt': opt,
                'Eiters': model.Eiters,
            },
            is_best,
            filename='checkpoint_{}.pth.tar'.format(epoch),
            prefix=opt.model_name + '/')
def main():

    opt = opts.parse_opt()

    opt.src_data = '../data/visualstorytelling/train.pkl'
    opt.test_src_data = '../data/visualstorytelling/test.pkl'

    dataset = get_loader(opt.src_data, opt.batch_size, train=True)
    test_dataset = get_loader(opt.test_src_data, opt.batch_size, train=False, shuffle=False)

    # get model
    bart = BART(opt)

    train_model(opt, bart, dataset, test_dataset, 10)
示例#6
0
def get_en_opts():
    opt = opts.parse_opt()
    opt.caption_model = 'cross_topdown'
    opt.batch_size = 10
    #Pretrain
    opt.id = 'topdown'
    opt.learning_rate = 5e-4
    opt.learning_rate_decay_start = 0
    opt.scheduled_sampling_start = 0
    opt.save_checkpoint_every = 1300  #11500
    opt.val_images_use = 5000
    opt.max_epochs = 40
    opt.start_from = None
    opt.input_json = '/home/andyweizhao/wabywang/010/data/dataset/coco_processed.json'
    opt.input_label_h5 = 'data/dataset/coco_label.h5'
    opt.input_fc_dir = '/media/andyweizhao/Elements/CVPR/cocotalk_fc'
    opt.input_att_dir = 'data/cocotalk_att'
    return opt
示例#7
0
def main():
    opt = opts.parse_opt()
    opt.caption_model = 'topdown'
    opt.batch_size = 10
    opt.id = 'topdown'
    opt.learning_rate = 5e-5
    opt.learning_rate_decay_start = -1
    opt.scheduled_sampling_start = -1
    opt.save_checkpoint_every = 5000  #
    opt.val_images_use = 5000
    opt.max_epochs = 60
    opt.start_from = 'save/multitask_pretrain'  #"save" #None
    opt.language_eval = 1
    opt.input_json = 'data/meta_coco_en.json'
    opt.input_label_h5 = 'data/label_coco_en.h5'
    opt.self_critical_after = 25
    opt.finetune_cnn_after = 0
    opt.ccg = False
    opt.input_image_h5 = 'data/coco_image_512.h5'
    opt.checkpoint_path = 'save/multitask_pretrain_rl'
    train(opt)
示例#8
0
import datetime
import json

from dataset.dataset import COCO_Search18, COCO_Search18_evaluation, COCO_Search18_rl
from models.baseline_attention_multihead import baseline
from models.loss import CrossEntropyLoss, DurationSmoothL1Loss, MLPRayleighDistribution, MLPLogNormalDistribution, \
    LogAction, LogDuration, NSS, CC, KLD, CC_MatchLoss, CC_terms
from utils.checkpointing import CheckpointManager
from utils.recording import RecordManager
from utils.evaluation import human_evaluation, evaluation, pairs_eval_scanmatch
from utils.logger import Logger
from opts import parse_opt
from utils.evaltools.scanmatch import ScanMatch
from models.sampling import Sampling

args = parse_opt()

# For reproducibility - refer https://pytorch.org/docs/stable/notes/randomness.html
# These five lines control all the major sources of randomness.
np.random.seed(args.seed)
torch.manual_seed(args.seed)
torch.cuda.manual_seed_all(args.seed)
torch.backends.cudnn.benchmark = False
torch.backends.cudnn.deterministic = True

transform = transforms.Compose([
    transforms.Resize((args.height, args.width)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])
示例#9
0
# coding:utf8
import torch
import skimage.io

from opts import parse_opt
from models.decoder import Decoder
from models.encoder import Encoder

opt = parse_opt()
assert opt.test_model, 'please input test_model'
assert opt.image_file, 'please input image_file'

encoder = Encoder(opt.resnet101_file)
encoder.to(opt.device)
encoder.eval()

img = skimage.io.imread(opt.image_file)
with torch.no_grad():
    img = encoder.preprocess(img)
    img = img.to(opt.device)
    fc_feat, att_feat = encoder(img)

print("====> loading checkpoint '{}'".format(opt.test_model))
chkpoint = torch.load(opt.test_model, map_location=lambda s, l: s)
decoder = Decoder(chkpoint['idx2word'], chkpoint['settings'])
decoder.load_state_dict(chkpoint['model'])
print("====> loaded checkpoint '{}', epoch: {}, train_mode: {}".format(
    opt.test_model, chkpoint['epoch'], chkpoint['train_mode']))
decoder.to(opt.device)
decoder.eval()
def main():
    # Hyper Parameters
    
    opt = opts.parse_opt()

    device_id = opt.gpuid
    device_count = len(str(device_id).split(","))
    #assert device_count == 1 or device_count == 2
    print("use GPU:", device_id, "GPUs_count", device_count, flush=True)
    os.environ['CUDA_VISIBLE_DEVICES']=str(device_id)
    device_id = 0
    torch.cuda.set_device(0)

    # Load Vocabulary Wrapper
    vocab = deserialize_vocab(os.path.join(opt.vocab_path, '%s_vocab.json' % opt.data_name))
    opt.vocab_size = len(vocab)

    # Load data loaders
    train_loader, val_loader = data.get_loaders(
        opt.data_name, vocab, opt.batch_size, opt.workers, opt)

    # Construct the model
    model = SCAN(opt)
    model.cuda()
    model = nn.DataParallel(model)

     # Loss and Optimizer
    criterion = ContrastiveLoss(opt=opt, margin=opt.margin, max_violation=opt.max_violation)
    mse_criterion = nn.MSELoss(reduction="batchmean")
    optimizer = torch.optim.Adam(model.parameters(), lr=opt.learning_rate)

    # optionally resume from a checkpoint
    if not os.path.exists(opt.model_name):
        os.makedirs(opt.model_name)
    start_epoch = 0
    best_rsum = 0

    if opt.resume:
        if os.path.isfile(opt.resume):
            print("=> loading checkpoint '{}'".format(opt.resume))
            checkpoint = torch.load(opt.resume)
            start_epoch = checkpoint['epoch']
            best_rsum = checkpoint['best_rsum']
            model.load_state_dict(checkpoint['model'])
            print("=> loaded checkpoint '{}' (epoch {}, best_rsum {})"
                  .format(opt.resume, start_epoch, best_rsum))
        else:
            print("=> no checkpoint found at '{}'".format(opt.resume))
    evalrank(model.module, val_loader, opt)

    print(opt, flush=True)
    
    # Train the Model
    for epoch in range(start_epoch, opt.num_epochs):
        message = "epoch: %d, model name: %s\n" % (epoch, opt.model_name)
        log_file = os.path.join(opt.logger_name, "performance.log")
        logging_func(log_file, message)
        print("model name: ", opt.model_name, flush=True)
        adjust_learning_rate(opt, optimizer, epoch)
        run_time = 0
        for i, (images, captions, lengths, masks, ids, _) in enumerate(train_loader):
            start_time = time.time()
            model.train()

            optimizer.zero_grad()

            if device_count != 1:
                images = images.repeat(device_count,1,1)

            score = model(images, captions, lengths, masks, ids)
            loss = criterion(score)

            loss.backward()
            if opt.grad_clip > 0:
                clip_grad_norm_(model.parameters(), opt.grad_clip)
            optimizer.step()
            run_time += time.time() - start_time
            # validate at every val_step
            if i % 100 == 0:
                log = "epoch: %d; batch: %d/%d; loss: %.4f; time: %.4f" % (epoch, 
                            i, len(train_loader), loss.data.item(), run_time / 100)
                print(log, flush=True)
                run_time = 0
            if (i + 1) % opt.val_step == 0:
                evalrank(model.module, val_loader, opt)

        print("-------- performance at epoch: %d --------" % (epoch))
        # evaluate on validation set
        rsum = evalrank(model.module, val_loader, opt)
        #rsum = -100
        filename = 'model_' + str(epoch) + '.pth.tar'
        # remember best R@ sum and save checkpoint
        is_best = rsum > best_rsum
        best_rsum = max(rsum, best_rsum)
        save_checkpoint({
            'epoch': epoch + 1,
            'model': model.state_dict(),
            'best_rsum': best_rsum,
            'opt': opt,
        }, is_best, filename=filename, prefix=opt.model_name + '/')
示例#11
0
                    cPickle.dump(infos, f)
                with open(
                        os.path.join(
                            opt['source'].checkpoint_path, 'histories_' +
                            opt['source'].id + str(iteration) + '.pkl'),
                        'wb') as f:
                    cPickle.dump(histories, f)

                if best_flag:
                    checkpoint_path = os.path.join(
                        opt['source'].checkpoint_path, 'model-best.pth')
                    torch.save(model.state_dict(), checkpoint_path)
                    print("model saved to {}".format(checkpoint_path))
                    with open(
                            os.path.join(
                                opt['source'].checkpoint_path,
                                'infos_' + opt['source'].id + '-best.pkl'),
                            'wb') as f:
                        cPickle.dump(infos, f)

        # Stop if reaching max epochs
        if epoch >= opt['source'].max_epochs and opt['source'].max_epochs != -1:
            break


opt = {}
opt['source'] = opts.parse_opt()
opt['target'] = opts.parse_opt('target')

train(opt)
示例#12
0
def main():
    mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    train_data = mnist.train.images * 2.0 - 1.0
    train_label = mnist.train.labels

    test_data = mnist.test.images * 2.0 - 1.0
    test_label = mnist.test.labels

    x_dim = train_data.shape[1]
    y_dim = train_label.shape[1]

    opt = opts.parse_opt()
    batch_size = opt.batch_size

    # Changing the options here.
    opt.input_data = "MNIST"
    opt.input_c_dim = 1
    opt.output_c_dim = 1
    opt.input_dim = x_dim
    opt.label_dim = y_dim
    # Running arguments
    opt.c = 1.
    opt.ld = 500.
    opt.H_lambda = 10.
    opt.cgan_flag = True
    opt.patch_flag = True
    opt.G_lambda = 10.
    opt.s_l = 0
    opt.t_l = 1

    # batch_size = opt.batch_size

    # Runnign a session, to load the saved model.
    with tf.Session() as sess:
        model_store = opt.model_restore
        print 'MNIST model is stored at %s' % model_store
        whitebox_model = MNISTModel(model_store)
        #initial ADVGAN
        model = advGAN(whitebox_model, model_store, opt, sess)

        best_model_path = './GAN/save/best.ckpt'
        print 'advGAN is stored at %s' % best_model_path
        model.load(best_model_path)

        # tvars = tf.trainable_variables()
        # tvars_vals = sess.run(tvars)

        # for var, val in zip(tvars, tvars_vals):
        #     if 'generator' not in var.name:
        #         continue
        #     print(var.name, val.shape)  # Prints the name of the variable alongside its value.

        # We have to load a batch of images, then create the fake ones.
        # They should be identical.
        num_images = 10
        images = train_data[:num_images]
        fake_images = sess.run([model.fake_images_sample],
                               {model.source: images})

        plt.imshow(np.reshape(fake_images[0], [28, 28]))
        plt.show()
示例#13
0
                infos['iter'] = iteration
                infos['epoch'] = epoch
                infos['iterators'] = loader.iterators
                infos['split_ix'] = loader.split_ix
                infos['best_val_score'] = best_val_score
                infos['opt'] = opt
                infos['vocab'] = loader.get_vocab()

                histories['val_result_history'] = val_result_history
                histories['loss_history'] = loss_history
                histories['lr_history'] = lr_history
                histories['ss_prob_history'] = ss_prob_history
                with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'.pkl'), 'wb') as f:
                    cPickle.dump(infos, f)
                with open(os.path.join(opt.checkpoint_path, 'histories_'+opt.id+'.pkl'), 'wb') as f:
                    cPickle.dump(histories, f)

                if best_flag:
                    checkpoint_path = os.path.join(opt.checkpoint_path, 'model-best.pth')
                    torch.save(model.state_dict(), checkpoint_path)
                    print("model saved to {}".format(checkpoint_path))
                    with open(os.path.join(opt.checkpoint_path, 'infos_'+opt.id+'-best.pkl'), 'wb') as f:
                        cPickle.dump(infos, f)

        # Stop if reaching max epochs
        if epoch >= opt.max_epochs and opt.max_epochs != -1:
            break

opt = opts.parse_opt()
train(opt)
示例#14
0
def test_point_wise():
    train, dev, test = load_wiki(FLAGS.data, filter=FLAGS.clean)  #wiki
    # train, test, dev = load(FLAGS.data, filter=FLAGS.clean) #trec
    q_max_sent_length = max(
        map(lambda x: len(x), train['question'].str.split()))
    a_max_sent_length = max(map(lambda x: len(x), train['answer'].str.split()))
    print(q_max_sent_length)
    print(a_max_sent_length)
    print(len(train))
    print('train question unique:{}'.format(len(train['question'].unique())))
    print('train length', len(train))
    print('test length', len(test))
    print('dev length', len(dev))

    alphabet, embeddings, embeddings_complex = prepare(
        [train, test, dev],
        max_sent_length=a_max_sent_length,
        dim=FLAGS.embedding_dim,
        is_embedding_needed=True,
        fresh=True)
    print(embeddings_complex)
    print('alphabet:', len(alphabet))
    opt = opts.parse_opt(q_max_sent_length, a_max_sent_length, alphabet,
                         embeddings, embeddings_complex)
    with tf.Graph().as_default():
        with tf.device("/gpu:0"):
            session_conf = tf.ConfigProto()
            session_conf.allow_soft_placement = FLAGS.allow_soft_placement
            session_conf.log_device_placement = FLAGS.log_device_placement
            session_conf.gpu_options.allow_growth = True
        sess = tf.Session(config=session_conf)
        with sess.as_default(), open(precision, "w") as log:
            s = 'embedding_dim:  ' + str(
                FLAGS.embedding_dim) + '\n' + 'dropout_keep_prob:  ' + str(
                    FLAGS.dropout_keep_prob) + '\n' + 'l2_reg_lambda:  ' + str(
                        FLAGS.l2_reg_lambda) + '\n' + 'learning_rate:  ' + str(
                            FLAGS.learning_rate
                        ) + '\n' + 'batch_size:  ' + str(
                            FLAGS.batch_size) + '\n' 'trainable:  ' + str(
                                FLAGS.trainable
                            ) + '\n' + 'num_epochs:  ' + str(
                                FLAGS.num_epochs) + '\n' 'data:  ' + str(
                                    FLAGS.data) + '\n'
            log.write(str(s) + '\n')
            # train,test,dev = load("trec",filter=True)
            # alphabet,embeddings = prepare([train,test,dev],is_embedding_needed = True)
            # cnn = model(opt
            #     max_input_left=q_max_sent_length,
            #     max_input_right=a_max_sent_length,
            #     vocab_size=len(alphabet),
            #     embedding_size=FLAGS.embedding_dim,
            #     batch_size=FLAGS.batch_size,
            #     embeddings=embeddings,
            #     embeddings_complex=embeddings_complex,
            #     dropout_keep_prob=FLAGS.dropout_keep_prob,
            #     filter_sizes=list(map(int, FLAGS.filter_sizes.split(","))),
            #     num_filters=FLAGS.num_filters,
            #     l2_reg_lambda=FLAGS.l2_reg_lambda,
            #     is_Embedding_Needed=True,
            #     trainable=FLAGS.trainable,
            #     overlap_needed=FLAGS.overlap_needed,
            #     position_needed=FLAGS.position_needed,
            #     pooling=FLAGS.pooling,
            #     hidden_num=FLAGS.hidden_num,
            #     extend_feature_dim=FLAGS.extend_feature_dim)
            cnn = setup(opt)
            cnn.build_graph()
            # Define Training procedure
            global_step = tf.Variable(0, name="global_step", trainable=False)
            starter_learning_rate = FLAGS.learning_rate
            learning_rate = tf.train.exponential_decay(starter_learning_rate,
                                                       global_step, 100, 0.96)
            optimizer = tf.train.AdamOptimizer(FLAGS.learning_rate)
            # optimizer =  tf.train.GradientDescentOptimizer(learning_rate)

            grads_and_vars = optimizer.compute_gradients(cnn.loss)
            train_op = optimizer.apply_gradients(grads_and_vars,
                                                 global_step=global_step)
            saver = tf.train.Saver(tf.global_variables(), max_to_keep=20)
            sess.run(tf.global_variables_initializer())
            map_max = 0.65
            now = int(time.time())
            timeArray = time.localtime(now)
            timeStamp = time.strftime("%Y%m%d%H%M%S", timeArray)
            timeDay = time.strftime("%Y%m%d", timeArray)
            print(timeStamp)
            for i in range(FLAGS.num_epochs):
                d = get_overlap_dict(train,
                                     alphabet,
                                     q_len=q_max_sent_length,
                                     a_len=a_max_sent_length)
                datas = batch_gen_with_point_wise(train,
                                                  alphabet,
                                                  FLAGS.batch_size,
                                                  q_len=q_max_sent_length,
                                                  a_len=a_max_sent_length)
                for data in datas:
                    if opt.model == 'QA_quantum' or opt.model == 'CNNQLM_I' or opt.model == 'CNNQLM_I_Flat' or opt.model == 'CNNQLM_Vocab' or opt.model == 'CNNQLM_Dim':
                        feed_dict = {
                            cnn.question: data[0],
                            cnn.answer: data[1],
                            cnn.input_y: data[2],
                            cnn.q_position: data[3],
                            cnn.a_position: data[4],
                            cnn.overlap: data[5],
                            cnn.q_overlap: data[6],
                            cnn.a_overlap: data[7]
                        }
                        _, step, loss, accuracy, pred, scores, input_y = sess.run(
                            [
                                train_op, global_step, cnn.loss, cnn.accuracy,
                                cnn.predictions, cnn.scores, cnn.input_y
                            ], feed_dict)
                    else:
                        feed_dict = {
                            cnn.question: data[0],
                            cnn.answer: data[1],
                            cnn.input_y: data[2],
                            cnn.q_overlap: data[3],
                            cnn.a_overlap: data[4],
                            cnn.q_position: data[5],
                            cnn.a_position: data[6]
                        }
                        _, step, loss, accuracy, pred, scores = sess.run([
                            train_op, global_step, cnn.loss, cnn.accuracy,
                            cnn.predictions, cnn.scores
                        ], feed_dict)
                    time_str = datetime.datetime.now().isoformat()
                    print("{}: step {}, loss {:g}, acc {:g}  ".format(
                        time_str, step, loss, accuracy))
                now = int(time.time())
                timeArray = time.localtime(now)
                timeStamp = time.strftime("%Y%m%d%H%M%S", timeArray)
                timeDay = time.strftime("%Y%m%d", timeArray)
                print(timeStamp)
                predicted = predict(sess, cnn, train, alphabet,
                                    FLAGS.batch_size, q_max_sent_length,
                                    a_max_sent_length)
                predicted_label = np.argmax(predicted, 1)
                map_mrr_train = evaluation.evaluationBypandas(
                    train, predicted[:, -1])
                predicted_test = predict(sess, cnn, test, alphabet,
                                         FLAGS.batch_size, q_max_sent_length,
                                         a_max_sent_length)
                predicted_label = np.argmax(predicted_test, 1)
                map_mrr_test = evaluation.evaluationBypandas(
                    test, predicted_test[:, -1])
                if map_mrr_test[0] > map_max:
                    map_max = map_mrr_test[0]
                    timeStamp = time.strftime("%Y%m%d%H%M%S",
                                              time.localtime(int(time.time())))
                    folder = 'runs/' + timeDay
                    out_dir = folder + '/' + timeStamp + \
                        '__' + FLAGS.data + str(map_mrr_test[0])
                    if not os.path.exists(folder):
                        os.makedirs(folder)
                    #save_path = saver.save(sess, out_dir)
                print("{}:train epoch:map mrr {}".format(i, map_mrr_train))
                print("{}:test epoch:map mrr {}".format(i, map_mrr_test))
                line2 = " {}:epoch: map_test{}".format(i, map_mrr_test)
                log.write(line2 + '\n')
                log.flush()
            log.close()
示例#15
0
def train():
    opt = opts.parse_opt()
    opt.input_data = "MNIST"

    img_size = (opt.img_dim, opt.img_dim)
    print 'Dimension of images:', img_size
    train_data, train_label, id_gender = \
        get_30_people_chunk(opt.image_path, 1, gender_meta=True, img_size=img_size)
    test_data, test_label = get_30_people_chunk(opt.image_path,
                                                2,
                                                img_size=img_size)
    names = get_people_names(opt.image_path, 30)

    if opt.balance_data:
        ratio = opt.balance_ratio
        print 'Balancing dataset with ratio %f' % ratio
        train_data, train_label = balance_dataset(train_data, train_label)
        test_data, test_label = balance_dataset(test_data, test_label)

    if opt.balance_gender:
        print train_data.shape, train_label.shape
        print test_data.shape, test_label.shape
        print 'Balancing genders'
        selected_people = []
        for i in range(id_gender.shape[1]):
            indices, = np.where(id_gender[:, i] == 1)
            selected_people.append(np.random.choice(indices, 5, replace=False))
        selected_people = np.concatenate(selected_people)

        print 'Selected people are:'
        print np.array(names)[selected_people]

        selected_imgs = train_label[:, selected_people].sum(axis=1) != 0
        train_data = train_data[selected_imgs, :]
        train_label = train_label[selected_imgs, :]

        selected_imgs = test_label[:, selected_people].sum(axis=1) != 0
        test_data = test_data[selected_imgs, :]
        test_label = test_label[selected_imgs, :]

    print 'Shape of data:'
    print '\tTraining data: ' + str(train_data.shape)
    print '\tTraining label: ' + str(train_label.shape)
    print '\tMax, Min Train: %.4f, %.4f' % (np.max(train_data),
                                            np.min(train_data))
    print '\tTest data: ' + str(test_data.shape)
    print '\tTest label: ' + str(test_label.shape)
    print '\tMax, Min Test: %.4f, %.4f' % (np.max(test_data),
                                           np.min(test_data))

    x_dim = train_data.shape[1]
    y_dim = train_label.shape[1]

    opt.input_c_dim = 3
    opt.output_c_dim = 3
    opt.input_dim = x_dim
    opt.label_dim = y_dim
    input_shape = (x_dim, x_dim, opt.input_c_dim)

    batch_size = opt.batch_size
    print 'Batch size: %d' % batch_size

    NUM_REPR = 5
    NUM_SAMPLES_EACH = int(batch_size / NUM_REPR / 2)
    output_samples = get_output_samples(train_data, train_label, id_gender,
                                        NUM_REPR, NUM_SAMPLES_EACH)

    NUM_THREADS = 2
    tf_config = tf.ConfigProto()
    tf_config.intra_op_parallelism_threads = NUM_THREADS
    tf_config.gpu_options.allow_growth = True

    iteration_time = []
    with tf.Session(config=tf_config) as sess:

        id_model_path = '%s_%d_id_0' % (opt.lfw_base_path, x_dim)
        print '\tRetrieving evil model from "%s"' % id_model_path
        evil_model = FaceRecognizer(id_model_path, train_label.shape[1],
                                    input_shape, opt.input_c_dim)

        gender_model_path = '%s_%d_gender_0' % (opt.lfw_base_path, x_dim)
        print '\tRetrieving good model from "%s"' % gender_model_path
        good_model = FaceRecognizer(gender_model_path, 2, input_shape,
                                    opt.input_c_dim)
        model = advGAN(good_model, evil_model, opt, sess, mnist=False)

        iteration = 0
        if opt.resnet_gen:
            generator_mode = 'ResNet'
        else:
            generator_mode = 'Regular'
        summary_dir = "logs/LFW/g_%d_ld_%d_gl_%d_L2_%.2f_lr_%.4f_%s/" % (
            opt.G_lambda, opt.ld, opt.good_loss_coeff, opt.L2_lambda,
            opt.learning_rate, generator_mode)
        if os.path.isdir(summary_dir) is False:
            print 'Creating directory %s for logs.' % summary_dir
            os.mkdir(summary_dir)
        # else:
        #     print 'Removing all files in %s' % (summary_dir + '*')
        #     shutil.rmtree(summary_dir)

        writer = tf.summary.FileWriter(summary_dir, sess.graph)
        loader = Dataset2(train_data, train_label)
        print 'Training data loaded.'

        print 'Maximum iterations: %d' % opt.max_iteration
        max_acc_diff = -1.0
        while iteration < opt.max_iteration:
            # this function returns (data, label, np.array(target)).
            feed_data, evil_labels, real_data = loader.next_batch(
                batch_size, negative=False)
            good_labels = id_gender[np.argmax(evil_labels, axis=1)]

            feed = {
                model.source: feed_data,
                model.target: real_data,
                model.good_labels: good_labels,
                model.evil_labels: evil_labels
            }

            # Training G once.
            summary_str, G_loss, _ = sess.run(
                [model.total_loss_merge_sum, model.g_loss, model.G_train_op],
                feed)
            writer.add_summary(summary_str, iteration)

            # Training G twice.
            summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \
                good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([
                    model.total_loss_merge_sum,
                    model.g_loss,
                    model.gan_loss,
                    model.hinge_loss,
                    model.l1_loss,
                    model.l2_loss,
                    model.good_fn_loss,
                    model.evil_fn_loss,
                    model.adv_loss,
                    model.total_loss,
                    model.G_train_op], feed)
            writer.add_summary(summary_str, iteration)

            # Training D.
            summary_str, D_loss, _ = \
                sess.run([model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op], feed)
            writer.add_summary(summary_str, iteration)

            if iteration % opt.losses_log_every == 0:
                print "iteration: ", iteration
                print '\tD: %.4f, G: %.4f\n\thinge(%.2f): %.4f, L1(%.2f): %.4f, L2(%.2f): %.4f' % (
                    D_loss, G_loss, opt.H_lambda, hinge_loss, opt.L1_lambda,
                    l1_loss, opt.L2_lambda, l2_loss)
                print '\t\tGAN total loss: %.4f' % gan_loss
                print '\tGood: %.4f, Evil: %.4f' % (good_fn_loss, evil_fn_loss)
                print '\tAdv: %.4f, Total: %.4f' % (adv_loss, total_loss)

                new_test_data = []
                new_pred_data = []
                head = 0
                last_batch = False
                while head < test_data.shape[0]:
                    if head + batch_size <= test_data.shape[0]:
                        tail = head + batch_size
                    else:
                        tail = test_data.shape[0]
                        head = test_data.shape[0] - batch_size
                        last_batch = True
                    cur_data, pred_data = sess.run(
                        [model.fake_images_output, model.prediction_ready],
                        {model.source: test_data[head:tail, :]})

                    if last_batch:
                        new_test_data.append(
                            cur_data[-(test_data.shape[0] % batch_size):, :])
                        new_pred_data.append(
                            pred_data[-(test_data.shape[0] % batch_size):, :])
                    else:
                        new_test_data.append(cur_data)
                        new_pred_data.append(pred_data)
                    head += batch_size
                new_test_data = np.concatenate(new_test_data)
                new_pred_data = np.concatenate(new_pred_data)

                good_pred = np.argmax(
                    model.good_model.model.predict(new_pred_data), axis=1)
                evil_pred = np.argmax(
                    model.evil_model.model.predict(new_pred_data), axis=1)
                evil_true = np.argmax(test_label, axis=1)
                good_true = np.argmax(id_gender[evil_true, :], axis=1)

                good_accuracy = accuracy_score(good_true, good_pred)
                evil_accuracy = accuracy_score(evil_true, evil_pred)
                total_good_confusion = confusion_matrix(good_true, good_pred)
                total_evil_confusion = confusion_matrix(
                    evil_true, evil_pred, labels=range(opt.evil_label_num))

                print '\tGood Accuracy: %.4f, Evil Accuracy: %.4f' % (
                    good_accuracy, evil_accuracy)
                print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy)
                print 'Good confusion matrix:'
                print total_good_confusion
                evil_misclass = total_evil_confusion.sum(
                    axis=0) - np.diag(total_evil_confusion)
                evil_idxs = np.argsort(-evil_misclass)
                print 'Top 3 Misclassifications:'
                print np.array(names)[evil_idxs][:3]
                print evil_misclass[evil_idxs][:3]
                evil_tp = np.diag(total_evil_confusion)
                evil_idxs = np.argsort(-evil_tp)
                print 'Top 3 True classifications:'
                print np.array(names)[evil_idxs][:3]
                print evil_tp[evil_idxs][:3]

                # print 'Selected people are:'
                # print names[evil_idxs].tolist()
                # print evil_tp
                # print total_evil_confusion
                # print evil_idxs

                fake_samples, fake_noise = sess.run(
                    [model.fake_images_output, model.fake_noise_output],
                    {model.source: output_samples})

                fakes = merge(fake_samples, [2 * NUM_REPR, NUM_SAMPLES_EACH])
                original = merge(output_samples,
                                 [2 * NUM_REPR, NUM_SAMPLES_EACH])
                noise = merge(fake_noise, [2 * NUM_REPR, NUM_SAMPLES_EACH])
                final_image = np.concatenate([fakes, noise, original], axis=1)

                scipy_imsave('snapshot_%d.png' % iteration, final_image)

                if (good_accuracy - evil_accuracy) > max(0.5, max_acc_diff):
                    print '\tSaving new training data at accuracy diff: %.4f' % (
                        good_accuracy - evil_accuracy),
                    max_acc_diff = good_accuracy - evil_accuracy

                    # other_good = FaceRecognizer('%s_%d_gender_0' % (opt.lfw_base_path, x_dim),
                    #                             2, input_shape, opt.input_c_dim)

                    # other_pred = np.argmax(other_good.model.predict(new_pred_data), axis=1)
                    # print 'Other Good accuracy: %.4f' % accuracy_score(good_true, other_pred)

                    # other_pred = np.argmax(other_good.model.predict(
                    #     preprocess_images(new_test_data * 255.0)), axis=1)
                    # print '\tTest data processeced accuracy: %.4f' % \
                    #     accuracy_score(good_true, other_pred)

                    # other_evil = FaceRecognizer('%s_%d_id_0' % (opt.lfw_base_path, x_dim),
                    #                             34, input_shape, opt.input_c_dim)
                    # other_pred = np.argmax(other_evil.model.predict(new_pred_data), axis=1)
                    # print 'Other Evil accuracy: %.4f' % accuracy_score(evil_true, other_pred)
                    # other_pred = np.argmax(other_evil.model.predict(
                    #     preprocess_images(new_test_data * 255.0)), axis=1)
                    # print '\tTest data processeced accuracy: %.4f' % \
                    #     accuracy_score(evil_true, other_pred)

                    new_train_data = []
                    head = 0
                    last_batch = False
                    while head < train_data.shape[0]:
                        if head + batch_size <= train_data.shape[0]:
                            tail = head + batch_size
                        else:
                            tail = train_data.shape[0]
                            head = train_data.shape[0] - batch_size
                            last_batch = True
                        cur_data = sess.run(
                            model.fake_images_output,
                            {model.source: train_data[head:tail, :]})

                        if last_batch:
                            new_train_data.append(
                                cur_data[-(train_data.shape[0] %
                                           batch_size):, :])
                        else:
                            new_train_data.append(cur_data)
                        head += batch_size
                    new_train_data = np.concatenate(new_train_data)

                    np.savez_compressed(opt.output_path,
                                        train_data=new_train_data,
                                        org_train_data=train_data,
                                        train_label=train_label,
                                        test_data=new_test_data,
                                        org_test_data=test_data,
                                        test_label=test_label,
                                        id_gender=id_gender)
                    print '\t[DONE]'

            iteration += 1
示例#16
0
def parse_opt():
    args = opts.parse_opt()
    print('* parse_opt : ', vars(args))
示例#17
0
def main():
    opt = opts.parse_opt()
    data_dir = opt.input_data_dir

    # Data augmentation and normalization for training
    # Just normalization for validation
    data_transforms = {
        'train':
        transforms.Compose([
            transforms.Resize((256, 256)),
            transforms.RandomResizedCrop(224),
            transforms.RandomHorizontalFlip(),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
        'val':
        transforms.Compose([
            transforms.Resize((224, 224)),
            # transforms.CenterCrop(224),
            transforms.ToTensor(),
            transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
        ]),
    }
    batch_size = opt.batch_size
    image_datasets = {
        x: VisualConceptDataset(os.path.join(data_dir, x),
                                os.path.join(opt.input_label_dir, x),
                                data_transforms[x])
        for x in ['train', 'val']
    }
    dataloaders = {
        x: torch.utils.data.DataLoader(image_datasets[x],
                                       batch_size=batch_size,
                                       shuffle=True,
                                       num_workers=4)
        for x in ['train', 'val']
    }

    dataset_sizes = {x: len(image_datasets[x]) for x in ['train', 'val']}
    # device = torch.device("cuda:2" if torch.cuda.is_available() else "cpu")
    log_every = opt.losses_log_every
    print_every = opt.print_every
    checkpoint_path = opt.checkpoint_path
    if not os.path.exists(checkpoint_path):
        os.mkdir(checkpoint_path)
    TIMESTAMP = "{0:%Y-%m-%dT%H-%M-%S/}".format(datetime.now())
    tb_summary_writer = tb and tb.SummaryWriter(
        os.path.join(checkpoint_path, TIMESTAMP))

    def train_model(model, criterion, optimizer, scheduler, num_epochs=25):
        since = time.time()

        best_model_wts = copy.deepcopy(model.state_dict())
        best_acc = 0.0

        iteration = 0

        for epoch in range(num_epochs):
            print('Epoch {}/{}'.format(epoch, num_epochs - 1))
            print('-' * 100)

            # Each epoch has a training and validation phase
            for phase in ['val', 'train']:
                if phase == 'train':
                    scheduler.step()
                    model.train()  # Set model to training mode
                else:
                    model.eval()  # Set model to evaluate mode

                running_loss = 0.0
                running_corrects = 0
                running_prec = 0.0
                running_recall = 0.0
                f1score_sum = 0.0

                # Iterate over data.
                for inputs, labels in dataloaders[phase]:
                    # inputs = inputs.to(device)
                    inputs = inputs.cuda()
                    # labels = labels.float().to(device)
                    labels = labels.to(torch.float).cuda()
                    binary_labels = torch.gt(labels, 0).to(torch.int)
                    # print('in:', inputs.size(), labels.size())

                    # zero the parameter gradients
                    optimizer.zero_grad()

                    # forward
                    # track history if only in train
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        preds = torch.gt(outputs, 0).to(torch.int)
                        # loss only consider label 1 predict and 0 predict ignore
                        if opt.label_smoothing:
                            # label smoothing loss
                            outputs.mul(labels)
                        else:
                            # without label smoothing
                            outputs.mul(binary_labels.to(torch.float))
                        loss = criterion(outputs, labels)

                        # backward + optimize only if in training phase
                        if phase == 'train':
                            loss.backward()
                            optimizer.step()

                    # statistics
                    running_loss += loss.item() * inputs.size(0)
                    running_corrects += torch.sum(preds == binary_labels.data)
                    '''
                    pred  = [1, 1, 0, 0, 1]
                    label = [1, 0, 0, 1, 1]
                    pred == label -> [1, 0, 1, 0, 1]
                    tp = torch.sum([1, 0, 0, 0, 1])
                    prec   = tp / torch.sum(pred)
                    recall = tp / torch.sum(label)
                    '''
                    tmp1 = (preds == binary_labels.data).to(torch.int)
                    tmp2 = binary_labels.mul(tmp1)
                    tp = torch.sum(tmp2).to(torch.float)
                    prec = torch.div(tp,
                                     torch.sum(preds).to(torch.float) + 1e-8)
                    recall = torch.div(
                        tp,
                        torch.sum(binary_labels.to(torch.float)) + 1e-8)
                    f1score = torch.div(2 * prec * recall,
                                        prec + recall + 1e-8)

                    running_prec += prec
                    running_recall += recall
                    f1score_sum += f1score

                    # print('pred:', preds.size(), labels.size())

                    iteration += 1
                    add_summary_value(tb_summary_writer, 'train_loss', loss,
                                      iteration)
                    add_summary_value(tb_summary_writer, 'running_loss',
                                      running_loss / iteration, iteration)
                    add_summary_value(tb_summary_writer, 'running_corrects',
                                      running_corrects / iteration, iteration)
                    add_summary_value(tb_summary_writer, 'running_tp', tp,
                                      iteration)
                    add_summary_value(tb_summary_writer, 'running_prec', prec,
                                      iteration)
                    add_summary_value(tb_summary_writer, 'running_recall',
                                      recall, iteration)
                    add_summary_value(tb_summary_writer, 'running_f1score',
                                      f1score, iteration)

                    if (iteration % print_every == 0):
                        print(
                            '{} : Epoch {} Iteration {} Loss: {:.4f}/10000 running_loss: {:.4f}, Acc: {:.4f}'
                            .format(phase, epoch, iteration, loss * 10000,
                                    running_loss / iteration,
                                    running_corrects / batch_size))
                        print(
                            'TP: {}, Prec: {}, Recall: {} F1_score: {}'.format(
                                tp.data / batch_size, prec.data / batch_size,
                                recall.data / batch_size,
                                f1score.data / batch_size))

                epoch_loss = running_loss / dataset_sizes[phase]
                epoch_acc = running_corrects.double() / dataset_sizes[phase]
                epoch_prec = running_prec / dataset_sizes[phase]
                epoch_recall = running_recall / dataset_sizes[phase]
                epoch_f1score = f1score_sum / dataset_sizes[phase]

                if phase == 'train':
                    add_summary_value(tb_summary_writer, 'train_epoch_loss',
                                      epoch_loss, epoch)
                    add_summary_value(tb_summary_writer,
                                      'train_epoch_corrects', epoch_acc, epoch)
                    add_summary_value(tb_summary_writer, 'train_epoch_f1score',
                                      epoch_f1score, epoch)
                    add_summary_value(tb_summary_writer, 'train_epoch_prec',
                                      epoch_prec, epoch)
                    add_summary_value(tb_summary_writer, 'train_epoch_recall',
                                      epoch_recall, epoch)
                else:
                    add_summary_value(tb_summary_writer, 'val_epoch_loss',
                                      epoch_loss, epoch)
                    add_summary_value(tb_summary_writer, 'val_epoch_corrects',
                                      epoch_acc, epoch)
                    add_summary_value(tb_summary_writer, 'val_epoch_f1score',
                                      epoch_f1score, epoch)
                    add_summary_value(tb_summary_writer, 'val_epoch_prec',
                                      epoch_prec, epoch)
                    add_summary_value(tb_summary_writer, 'val_epoch_recall',
                                      epoch_recall, epoch)

                print()
                print('{} : Loss: {:.4f} Acc: {:.4f} '.format(
                    phase, epoch_loss, epoch_acc))
                print('prec: {:.4f} recall: {:.4f} f1score: {:.4f}\n'.format(
                    epoch_prec, epoch_recall, epoch_f1score))

                # deep copy the model
                # if phase == 'val' and epoch_acc > best_acc:
                if phase == 'val' and epoch_f1score > best_acc:
                    print('epoch_f1score: %f , history_best_score: %f' %
                          (epoch_f1score, best_acc))
                    # best_acc = epoch_acc
                    best_acc = epoch_f1score
                    best_model_wts = copy.deepcopy(model.state_dict())
                    torch.save(
                        model.state_dict(),
                        os.path.join(checkpoint_path,
                                     '%s-model-best.pth' % opt.id))
                    torch.save(
                        optimizer.state_dict(),
                        os.path.join(checkpoint_path,
                                     '%s-info-best.path' % opt.id))

                    print("model save to %s/%s-model-best.pth" %
                          (checkpoint_path, opt.id))
                    print()

            print()

        time_elapsed = time.time() - since
        print('Training complete in {:.0f}m {:.0f}s'.format(
            time_elapsed // 60, time_elapsed % 60))
        print('Best val Acc: {:4f}'.format(best_acc))

        # load best model weights
        model.load_state_dict(best_model_wts)
        return model

    # 调用模型
    model_ft = models.resnet101(pretrained=True)
    # 提取fc层中固定的参数
    fc_features = model_ft.fc.in_features
    # 修改类别为 vocab_size
    num_classes = opt.num_classes
    model_ft.fc = nn.Linear(fc_features, num_classes)

    # model_ft = myResnet(model, num_classes)
    # model_ft = model_ft.to(device)
    model_ft = model_ft.cuda()
    '''
    For example, if a dataset contains 100 positive and 300 negative examples of a single class, 
    then pos_weight for the class should be equal to 300. 
    The loss would act as if the dataset contains 3×100=300 positive examples.
    '''
    criterion = nn.BCEWithLogitsLoss(pos_weight=torch.tensor(
        opt.pos_weight).cuda())  # average caption length is 9.5
    # criterion = nn.BCEWithLogitsLoss(reduction='sum')
    # criterion = nn.MultiLabelMarginLoss()
    # criterion = NEG_loss(num_classes, )

    # Observe that all parameters are being optimized
    if opt.optim == 'sgd':
        optimizer_ft = optim.SGD(model_ft.parameters(),
                                 lr=opt.learning_rate,
                                 momentum=opt.momentum)
    elif opt.optim == 'adam':
        optimizer_ft = optim.Adam(model_ft.parameters(), lr=opt.learning_rate)
    else:
        print('%s is not supported yet' % opt.optim)

    # Decay LR by a factor of 0.1 every 7 epochs
    exp_lr_scheduler = lr_scheduler.StepLR(
        optimizer_ft,
        step_size=opt.learning_rate_decay_every,
        gamma=opt.learning_rate_decay_factor)

    if opt.label_smoothing:
        print('Using Label Smoothing.')

    model_ft = train_model(model_ft,
                           criterion,
                           optimizer_ft,
                           exp_lr_scheduler,
                           num_epochs=opt.max_epochs)

    torch.save(model_ft.state_dict(),
               os.path.join(checkpoint_path, '%s-model-best.pth' % opt.id))
    print("model save to %s/%s-model-best.pth" % (checkpoint_path, opt.id))
示例#18
0
import numpy as np
import pandas as pd
from data_augmentation import random_transform
from random import randint
from collections import Counter
from sklearn.utils import shuffle
from PIL import Image
import opts

args = opts.parse_opt()

if args.input_size == 128:
    resize_shape = (128, 128, 3)
else:
    resize_shape = (256, 256, 3)


def get_image(file,
              shape=(resize_shape[0], resize_shape[1]),
              location='data/train/'):
    image = Image.open(location + file)
    image = image.resize(shape)
    image = np.array(image)
    if len(image.shape) == 2:
        image = np.stack([image] * 3, axis=2)
    return image


def get_data():
    data = pd.read_csv("data/train.csv")
    data = shuffle(data)
示例#19
0
def train():
    flatten_flag = True  # flatten output of G or not?
    opt = opts.parse_opt()
    opt.input_data = "MNIST"
    # mapping [0,1] -> [-1,1]
    # load data
    # mnist = input_data.read_data_sets('MNIST_data', one_hot=True)
    # train_data = mnist.train.images * 2.0 - 1.0
    # train_label = mnist.train.labels

    # test_data = mnist.test.images * 2.0 - 1.0
    # test_label = mnist.test.labels

    loaded = np.load('MNIST_data/B.npz')
    train_data, train_label, test_data, test_label = \
        loaded['train_data'], loaded['train_label'], \
        loaded['test_data'], loaded['test_label']

    # We create the label clues here.
    if opt.cgan_gen is True:
        label_clue = np.zeros((train_label.shape[1], opt.img_dim, opt.img_dim,
                               train_label.shape[1]))
        for lbl in range(train_label.shape[1]):
            label_clue[lbl, :, :, lbl] = 1

    if opt.cgan_gen:
        output_samples, output_labels = output_sample(test_data, test_label,
                                                      True)
    else:
        output_samples = output_sample(test_data, test_label)
    print output_samples.shape

    print 'Shape of data:'
    print '\tTraining data: ' + str(train_data.shape)
    print '\tTraining label: ' + str(train_label.shape)
    print '\tTest data: ' + str(test_data.shape)
    print '\tTest label: ' + str(test_label.shape)

    x_dim = train_data.shape[1]
    y_dim = train_label.shape[1]

    opt.input_c_dim = 1
    opt.output_c_dim = 1
    opt.input_dim = x_dim
    opt.label_dim = y_dim

    batch_size = opt.batch_size

    NUM_THREADS = 2
    tf_config = tf.ConfigProto()
    tf_config.intra_op_parallelism_threads = NUM_THREADS
    tf_config.gpu_options.allow_growth = True

    with tf.Session(config=tf_config) as sess:
        # Initialize the variables, and restore the variables form checkpoint if there is.
        # and initialize the writer
        global_step = 0

        print '\tRetrieving evil model from "%s"' % opt.evil_model_path
        evil_model = MNISTModel(opt.evil_model_path)
        print '\tRetrieving good model from "%s"' % opt.good_model_path
        good_model = OddEvenMNIST(opt.good_model_path)
        # model = advGAN(whitebox_model, model_store, opt, sess)
        model = advGAN(good_model, evil_model, opt, sess)

        min_adv_accuracy = 10e10
        max_accuracy_diff = -np.inf

        # summary_dir = "logs/MNIST/g_%d_ld_%d_gl_%d_L2_%.2f_dn_%d" % (
        #     opt.G_lambda, opt.ld, opt.good_loss_coeff,
        #     opt.L2_lambda, opt.d_train_num)

        summary_dir = "logs/MNIST/dn_%d_gn_%d" % (opt.d_train_num,
                                                  opt.g_train_num)

        duplicate_num = 0
        while os.path.isdir(summary_dir + '_' + str(duplicate_num) + '/'):
            duplicate_num += 1
        summary_dir += '_' + str(duplicate_num) + '/'
        print 'Creating directory %s for logs.' % summary_dir
        os.mkdir(summary_dir)

        writer = tf.summary.FileWriter(summary_dir, sess.graph)
        loader = Dataset2(train_data, train_label)
        print 'Training data loaded.'

        best_evil_accuracy = -1.0
        best_res_epoch = -1
        best_res = None
        for epoch_num in range(opt.max_epoch):
            print 'Epoch %d' % epoch_num

            # Randomly shuffle the data.
            random_indices = np.arange(train_data.shape[0])
            np.random.shuffle(random_indices)
            train_data = train_data[random_indices, :]
            train_label = train_label[random_indices, :]

            real_buckets = []
            for lbl in range(train_label.shape[1]):
                real_buckets.append(np.where(train_label[:, lbl] == 1)[0])

            # Mini-batch Gradient Descent.
            batch_no = 0
            while (batch_no * batch_size) < train_data.shape[0]:
                head = batch_no * batch_size
                if head + batch_size <= train_data.shape[0]:
                    tail = head + batch_size
                else:
                    tail = train_data.shape[0]
                    head = train_data.shape[0] - batch_size

                feed_data = train_data[head:tail, :]
                evil_labels = train_label[head:tail, :]
                good_labels = odd_even_labels(evil_labels)

                # Finding randomly sampled real data.
                real_data = np.zeros_like(feed_data)
                # Indices of training batch with specific label.
                # label_indices[i] = indices of feed data, that have evil_label[i] == 1.
                label_indices = [np.where(evil_labels[:, lbl] == 1)[0] \
                    for lbl in range(evil_labels.shape[1])]

                for lbl in range(evil_labels.shape[1]):
                    # We take a random sample of size |label_indices[lbl]|
                    # from the real bucket of `lbl`.
                    selected_real_data = np.random.choice(
                        real_buckets[lbl], label_indices[lbl].shape[0])

                    # We put this random sample in the same index of their
                    # corresponding batch training data.
                    real_data[label_indices[lbl], :] = train_data[
                        selected_real_data, :]

                feed = {
                    model.source: feed_data,
                    model.target: real_data,
                    model.good_labels: good_labels,
                    model.evil_labels: evil_labels
                }

                # Train G.
                for _ in range(opt.g_train_num):
                    summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \
                        good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([
                            model.total_loss_merge_sum,
                            model.g_loss,
                            model.gan_loss,
                            model.hinge_loss,
                            model.l1_loss,
                            model.l2_loss,
                            model.good_fn_loss,
                            model.evil_fn_loss,
                            model.adv_loss,
                            model.total_loss,
                            model.G_train_op], feed)
                    writer.add_summary(summary_str, global_step)

                # Train D.
                for _ in range(opt.d_train_num):
                    summary_str, D_loss, _ = sess.run([
                        model.total_loss_merge_sum, model.d_loss,
                        model.D_pre_train_op
                    ], feed)
                    writer.add_summary(summary_str, global_step)

                global_step += 1
                batch_no += 1

            # Validation after each trainig epoch.
            print '\tD: %.4f, G: %.4f\n\thinge(%.1f): %.4f, L1(%.1f): %.4f, L2(%.1f): %.4f' % (
                D_loss, G_loss, opt.H_lambda, hinge_loss, opt.L1_lambda,
                l1_loss, opt.L2_lambda, l2_loss)
            print '\t\tGAN total loss: %.4f' % gan_loss
            print '\tGood: %.4f, Evil: %.4f' % (good_fn_loss, evil_fn_loss)
            print '\tAdv: %.4f, Total: %.4f' % (adv_loss, total_loss)

            new_pred_data = []
            head = 0
            last_batch = False
            while head < test_data.shape[0]:
                if head + batch_size <= test_data.shape[0]:
                    tail = head + batch_size
                else:
                    tail = test_data.shape[0]
                    head = test_data.shape[0] - batch_size
                    last_batch = True
                if opt.cgan_gen:
                    cur_data = sess.run(
                        model.fake_images_sample,
                        {model.evil_labels: test_label[head:tail, :]})
                else:
                    cur_data = sess.run(
                        model.fake_images_sample,
                        {model.source: test_data[head:tail, :]})

                if last_batch:
                    new_pred_data.append(
                        cur_data[-(test_data.shape[0] % batch_size):, :])
                else:
                    new_pred_data.append(cur_data)
                head += batch_size
            new_pred_data = np.concatenate(new_pred_data)

            good_pred = np.argmax(
                model.good_model.model.predict(new_pred_data), axis=1)
            evil_pred = np.argmax(
                model.evil_model.model.predict(new_pred_data), axis=1)
            evil_true = np.argmax(test_label, axis=1)
            good_true = np.argmax(odd_even_labels(test_label), axis=1)

            good_accuracy = accuracy_score(good_true, good_pred)
            evil_accuracy = accuracy_score(evil_true, evil_pred)
            total_good_confusion = confusion_matrix(good_true, good_pred)
            total_evil_confusion = confusion_matrix(evil_true,
                                                    evil_pred,
                                                    labels=range(
                                                        opt.evil_label_num))

            print '\tGood Accuracy: %.4f, Evil Accuracy: %.4f' % (
                good_accuracy, evil_accuracy)
            print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy)
            print 'Good confusion matrix:'
            print total_good_confusion
            print 'Evil confusion matrix:'
            print total_evil_confusion

            # Creating snapshots to save.
            if opt.cgan_gen:
                fake_samples = sess.run(model.fake_images_sample,
                                        {model.evil_labels: output_labels})
            else:
                fake_samples, fake_noise = sess.run(
                    [model.fake_images_sample, model.sample_noise],
                    {model.source: output_samples})
            max_accuracy_diff = good_accuracy - evil_accuracy

            fakes = merge(fake_samples[:100, :], [10, 10])
            separator = np.ones((280, 2))
            original = merge(output_samples[:100].reshape(-1, 28, 28, 1),
                             [10, 10])

            if opt.cgan_gen:
                scipy.misc.imsave(
                    'snapshot_%d.png' % epoch_num,
                    np.concatenate([fakes, separator, original], axis=1))
            else:
                noise = merge(fake_noise[:100], [10, 10])
                scipy.misc.imsave(
                    'snapshot_%d.png' % epoch_num,
                    np.concatenate([fakes, noise, original], axis=1))

            # Only for the purpose of finding best D and G training times.
            if evil_accuracy > best_evil_accuracy:
                best_evil_accuracy = evil_accuracy
                best_res_epoch = epoch_num
                if opt.cgan_gen:
                    best_res = np.concatenate([fakes, separator, original],
                                              axis=1)
                else:
                    best_res = np.concatenate([fakes, noise, original], axis=1)

        best_image_path = 'best_dn_%d_gn_%d_%d_epoch_%d.png' % \
            (opt.d_train_num, opt.g_train_num, duplicate_num, best_res_epoch)
        scipy.misc.imsave(best_image_path, best_res)

        # print 'Maximum iterations: %d' % opt.max_iteration
        # while iteration < opt.max_iteration:
        #     # this function returns (data, label, np.array(target)).
        #     # data = loader.next_batch(batch_size, negative=False)
        #     feed_data, evil_labels, real_data = loader.next_batch(
        #         batch_size, negative=False)
        #     good_labels = odd_even_labels(evil_labels)

        #     feed = {
        #         model.source: feed_data,
        #         model.target: real_data,
        #         model.good_labels: good_labels,
        #         model.evil_labels: evil_labels
        #     }

        #     # if opt.cgan_gen:
        #     #     feed[model.label_clue] = label_clue[evil_labels.argmax(axis=1)]

        #     # Training G once.
        #     # summary_str, G_loss, _ = sess.run(
        #     #     [model.total_loss_merge_sum, model.g_loss, model.G_train_op], feed)
        #     # writer.add_summary(summary_str, iteration)

        #     # Training G twice.
        #     summary_str, G_loss, gan_loss, hinge_loss, l1_loss, l2_loss, \
        #         good_fn_loss, evil_fn_loss, adv_loss, total_loss, _ = sess.run([
        #             model.total_loss_merge_sum,
        #             model.g_loss,
        #             model.gan_loss,
        #             model.hinge_loss,
        #             model.l1_loss,
        #             model.l2_loss,
        #             model.good_fn_loss,
        #             model.evil_fn_loss,
        #             model.adv_loss,
        #             model.total_loss,
        #             model.G_train_op], feed)
        #     writer.add_summary(summary_str, iteration)

        #     # Training D.
        #     for _ in range(opt.d_train_num):
        #         summary_str, D_loss, _ = sess.run(
        #             [model.total_loss_merge_sum, model.d_loss, model.D_pre_train_op], feed)
        #         writer.add_summary(summary_str, iteration)

        #     if iteration % opt.losses_log_every == 0:

        #     # if iteration != 0 and iteration % opt.save_checkpoint_every == 0:
        #         # checkpoint_path = os.path.join(opt.checkpoint_path, 'checkpoint.ckpt')
        #         # print 'Saving the model in "%s"' % checkpoint_path

        #         # model.saver.save(sess, checkpoint_path, global_step=iteration)
        #         # test_loader = Dataset2(test_data, test_label)

        #         # test_num = test_loader._num_examples
        #         # test_iter_num = (test_num - batch_size) / batch_size

        #         # total_evil_accuracy = 0.0
        #         # total_good_accuracy = 0.0

        #         # fake_samples = [[] for _ in range(test_loader._num_labels)]
        #         # fake_noise = [[] for _ in range(test_loader._num_labels)]
        #         # original_samples = [[] for _ in range(test_loader._num_labels)]

        #         # for _ in range(test_iter_num):

        #         #     # Loading the next batch of test images
        #         #     test_input_data, test_evil_labels, _ = \
        #         #         test_loader.next_batch(batch_size)
        #         #     evil_categorical_labels = np.argmax(test_evil_labels, axis=1)
        #         #     test_good_labels = odd_even_labels(test_evil_labels)
        #         #     feed = {
        #         #         model.source: test_input_data,
        #         #         model.evil_labels: test_evil_labels,
        #         #         model.good_labels: test_good_labels
        #         #     }

        #         #     # if opt.cgan_gen:
        #         #     #     feed[model.label_clue] = label_clue[test_evil_labels.argmax(axis=1)]

        #         #     evil_accuracy, good_accuracy = sess.run(
        #         #         [model.evil_accuracy, model.good_accuracy], feed)
        #         #     # We divide the total accuracy by the number of test iterations.
        #         #     total_good_accuracy += good_accuracy
        #         #     total_evil_accuracy += evil_accuracy
        #         #     # print 'Evil accuracy: %.6f\tGood accuracy: %.6f' % (
        #         #     #     evil_accuracy, good_accuracy)
        #         #     # test_accuracy, test_adv_accuracy = sess.run(
        #         #     #     [model.accuracy, model.adv_accuracy], feed)
        #         #     # test_acc += test_accuracy
        #         #     # test_adv_acc += test_adv_accuracy

        #         #     # fake_images, g_x = sess.run(
        #         #     #     [model.fake_images_sample, model.sample_noise],
        #         #     #     {model.source: test_input_data})

        #         #     # for lbl in range(test_loader._num_labels):
        #         #     #     if len(fake_samples[lbl]) < 10:
        #         #     #         idx = np.where(evil_categorical_labels == lbl)[0]
        #         #     #         if idx.shape[0] >= 10:
        #         #     #             fake_samples[lbl] = fake_images[idx[:10]]
        #         #     #             fake_noise[lbl] = g_x[idx[:10]]
        #         #     #             original_samples[lbl] = test_input_data[idx[:10]]

        #         #     # for lbl, sample, noise in zip(test_evil_labels, fake_images, fake_noise):
        #         #     #     if len(fake_samples[lbl]) > 10:
        #         #     #         continue
        #         #     #     fake_samples[lbl].append(sample)
        #         #     #     fake_noise[lbl].append(noise)

        #         #     # pdb.set_trace()
        #         #     # print fake_images.shape

        #         #     # Finding those predicted labels that are equal to the target label
        #         #     # idxs = np.where(out_predict_labels == target_label)[0]
        #         #     # save_images(samples[:100], [10, 10], 'CIFAR10/result2/test_' + str(source_idx) + str(target_idx)+  '_.png')
        #         #     # pdb.set_trace()
        #         #     # show_samples.append(samples)
        #         #     # input_samples.append(s_imgs)
        #         #     # save_samples.append(samples)
        #         #     # if opt.is_advGAN:
        #         #     #     save_samples.append(samples[idxs])
        #         #     # else:
        #         #         # We add all samples.
        #         # # show_samples = np.concatenate(show_samples, axis=0)
        #         # # save_samples = np.concatenate(save_samples, axis=0)
        #         # good_accuracy = total_good_accuracy / float(test_iter_num)
        #         # evil_accuracy = total_evil_accuracy / float(test_iter_num)
        #         # print '\tAccuracy diff: %f' % (good_accuracy - evil_accuracy)
        #         # print '\tGood accuracy %f, Evil accuracy %f' % (
        #         #     good_accuracy, evil_accuracy)

        #         # Resizing the samples to save them later on.
        #         # fake_samples = np.reshape(np.array(fake_samples), [100, -1])
        #         # original_samples = np.reshape(np.array(original_samples), [100, -1])
        #         # fake_noise = np.reshape(np.array(fake_noise), [100, -1])

        #         # if (good_accuracy - evil_accuracy) > max_accuracy_diff:
        #         # test_accuracy = test_acc / float(test_iter_num)
        #         # test_adv_accuracy = test_adv_acc / float(test_iter_num)
        #         # if (good_accuracy - evil_accuracy) > max_accuracy_diff:
        #         #     max_accuracy_diff = good_accuracy - evil_accuracy
        #         # if min_adv_accuracy > test_adv_accuracy:
        #         #     min_adv_accuracy = test_adv_accuracy
        #         # save_images(fake_images[:100], [10, 10], 'fake.png')
        #         # save_images(test_input_data[:100], [10, 10], 'real.png')
        #         # all_idx = np.arange(100)
        #         # odds = np.where((all_idx / 10) % 2 == 1)[0]
        #         # evens = np.where((all_idx / 10) % 2 == 0)[0]
        #         # order = np.concatenate((odds, evens))
        #         # save_images(fake_samples[order], [10, 10], 'best_images.png')
        #         # save_images(fake_noise[order], [10, 10], 'best_noise.png')
        #         # save_images(original_samples[order], [10, 10], 'best_original.png')

        #         # save_anything = True
        #         # Saving the best yet model.
        #         # best_model_path = os.path.join(opt.checkpoint_path, 'best.ckpt')
        #         # print 'Saving the best model yet at "%s"' % best_model_path
        #         # model.saver.save(sess, best_model_path)

        #         # if save_anything is False:
        #         #     # Nothing is saved. We save a version here.
        #         #     save_images(fake_samples[:100], [10, 10], 'last_images.png')
        #         #     save_images(fake_noise[:100], [10, 10], 'last_noise.png')
        #         #     save_anything = True

        #     iteration += 1

        # We can transform the training and test data given in the beginning here.
        # This is only half the actual data.
        if opt.save_data:
            # if opt.cgan_gen:
            raise NotImplementedError(
                'Saving data for CGAN_GEN is not yet implemented.')
def main():
    import opts
    import misc.utils as utils
    opt = opts.parse_opt()
    opt.caption_model = 'topdown'
    opt.batch_size = 10  #512#32*4*4
    opt.id = 'topdown'
    opt.learning_rate = 5e-4
    opt.learning_rate_decay_start = 0
    opt.scheduled_sampling_start = 0
    opt.save_checkpoint_every = 5000  #450#5000#11500
    opt.val_images_use = 5000
    opt.max_epochs = 50  #30
    opt.start_from = 'save/rt'  #"save" #None
    opt.language_eval = 1
    opt.input_json = 'data/meta_coco_en.json'
    opt.input_label_h5 = 'data/label_coco_en.h5'
    #    opt.input_json='data/coco_ccg.json' #'data/meta_coco_en.json'
    #    opt.input_label_h5='data/coco_ccg_label.h5' #'data/label_coco_en.h5'
    #    opt.input_fc_dir='/nlp/andyweizhao/self-critical.pytorch-master/data/cocotalk_fc'
    #    opt.input_att_dir='/nlp/andyweizhao/self-critical.pytorch-master/data/cocotalk_att'
    opt.finetune_cnn_after = 0
    opt.ccg = False
    opt.input_image_h5 = 'data/coco_image_512.h5'

    opt.use_att = utils.if_use_att(opt.caption_model)

    from dataloader import DataLoader  # just-in-time generated features
    loader = DataLoader(opt)

    #    from dataloader_fixcnn import DataLoader # load pre-processed features
    #    loader = DataLoader(opt)

    opt.vocab_size = loader.vocab_size
    opt.vocab_ccg_size = loader.vocab_ccg_size
    opt.seq_length = loader.seq_length

    import models
    model = models.setup(opt)
    cnn_model = utils.build_cnn(opt)
    cnn_model.cuda()
    model.cuda()

    data = loader.get_batch('train')
    images = data['images']

    #    _fc_feats_2048 = []
    #    _fc_feats_81 = []
    #    _att_feats = []
    #    for i in range(loader.batch_size):
    #        x = Variable(torch.from_numpy(images[i]), volatile=True).cuda()
    #        x = x.unsqueeze(0)
    #        att_feats, fc_feats_81 = cnn_model(x)
    #        fc_feats_2048 = att_feats.mean(3).mean(2).squeeze()
    #        att_feats = F.adaptive_avg_pool2d(att_feats,[14,14]).squeeze().permute(1, 2, 0)#(0, 2, 3, 1)
    #        _fc_feats_2048.append(fc_feats_2048)
    #        _fc_feats_81.append(fc_feats_81)
    #        _att_feats.append(att_feats)
    #    _fc_feats_2048 = torch.stack(_fc_feats_2048)
    #    _fc_feats_81 = torch.stack(_fc_feats_81)
    #    _att_feats = torch.stack(_att_feats)
    #    att_feats = _att_feats.unsqueeze(1).expand(*((_att_feats.size(0), loader.seq_per_img,) + \
    #                                                   _att_feats.size()[1:])).contiguous().view(*((_att_feats.size(0) * loader.seq_per_img,) + \
    #                                                   _att_feats.size()[1:]))
    #    fc_feats_2048 = _fc_feats_2048.unsqueeze(1).expand(*((_fc_feats_2048.size(0), loader.seq_per_img,) + \
    #                                                  _fc_feats_2048.size()[1:])).contiguous().view(*((_fc_feats_2048.size(0) * loader.seq_per_img,) + \
    #                                                  _fc_feats_2048.size()[1:]))
    #    fc_feats_81 = _fc_feats_81
    #
    #    att_feats = Variable(att_feats, requires_grad=False).cuda()
    #    Variable(fc_feats_81)

    crit = utils.LanguageModelCriterion()
    eval_kwargs = {'split': 'val', 'dataset': opt.input_json, 'verbose': True}
    eval_kwargs.update(vars(opt))
    val_loss, predictions, lang_stats = eval_split(cnn_model, model, crit,
                                                   loader, eval_kwargs, True)
def main():
    opt = opts.parse_opt()
    if opt.path_opt is not None:
        with open(opt.path_opt, 'r') as handle:
            options_yaml = yaml.load(handle, Loader=yaml.FullLoader)
        utils.update_values(options_yaml, vars(opt))

    opt.checkpoint_path = opt.checkpoint_path + opt.exp_name
    print('=============')
    print(opt.exp_name)
    print('=============')

    opt.input_json = opt.data_path + opt.input_json
    opt.input_dic = opt.data_path + opt.input_dic
    opt.seg_feature_root = opt.data_path + opt.seg_feature_root
    opt.feature_root = opt.data_path + opt.feature_root
    opt.proposal_h5 = opt.data_path + opt.proposal_h5
    opt.densecap_references = [
        opt.data_path + reference for reference in opt.densecap_references
    ]

    opt.test_mode = (opt.val_split == 'testing')
    if opt.enable_BUTD:
        assert opt.att_input_mode == 'region', 'region attention only under the BUTD mode'

    cudnn.benchmark = True

    torch.manual_seed(opt.seed)
    np.random.seed(opt.seed)
    random.seed(opt.seed)
    if opt.cuda:
        torch.cuda.manual_seed_all(opt.seed)

    if opt.dataset == 'anet':
        from misc.dataloader_anet import DataLoader
    else:
        raise Exception('only support anet!')

    if not os.path.exists(opt.checkpoint_path):
        os.makedirs(opt.checkpoint_path)

    # open the detection json file.
    print('DataLoader loading proposal file: ', opt.proposal_h5)
    h5_proposal_file = h5py.File(opt.proposal_h5, 'r', driver='core')
    num_proposals = h5_proposal_file['dets_num'][:]
    label_proposals = h5_proposal_file['dets_labels'][:]
    h5_proposal_file.close()

    # Data Loader
    dataset = DataLoader(opt,
                         split=opt.train_split,
                         seq_per_img=opt.seq_per_img,
                         num_proposals=num_proposals,
                         label_proposals=label_proposals)
    dataloader = torch.utils.data.DataLoader(dataset,
                                             batch_size=opt.batch_size,
                                             shuffle=True,
                                             num_workers=opt.num_workers)

    dataset_val = DataLoader(opt,
                             split=opt.val_split,
                             seq_per_img=opt.seq_per_img,
                             num_proposals=num_proposals,
                             label_proposals=label_proposals)
    dataloader_val = torch.utils.data.DataLoader(dataset_val,
                                                 batch_size=opt.batch_size,
                                                 shuffle=False,
                                                 num_workers=opt.num_workers)
    # ======================================================================

    # Build the Model
    opt.vocab_size = dataset.vocab_size
    opt.detect_size = dataset.detect_size
    opt.seq_length = opt.seq_length
    opt.glove_w = torch.from_numpy(dataset.glove_w).float()
    opt.glove_vg_cls = torch.from_numpy(dataset.glove_vg_cls).float()
    opt.glove_clss = torch.from_numpy(dataset.glove_clss).float()

    opt.wtoi = dataset.wtoi
    opt.itow = dataset.itow
    opt.itod = dataset.itod
    opt.ltow = dataset.ltow
    opt.itoc = dataset.itoc
    opt.wtol = dataset.wtol
    opt.wtod = dataset.wtod
    opt.vg_cls = dataset.vg_cls

    if opt.att_model == 'cyclical':
        model = build_model(opt, device)
    else:
        raise ValueError('Unknown captioning model: {}'.format(opt.att_model))

    infos = {}
    histories = {}
    # if opt.start_from is not None:
    if opt.resume:
        if opt.load_best_score == 1:
            model_path = os.path.join(opt.checkpoint_path, 'model-best.pth')
            info_path = os.path.join(opt.checkpoint_path,
                                     'infos_' + opt.id + '-best.pkl')
        else:
            model_path = os.path.join(opt.checkpoint_path, 'model.pth')
            info_path = os.path.join(opt.checkpoint_path,
                                     'infos_' + opt.id + '.pkl')

        # open old infos and check if models are compatible
        with open(info_path, 'rb') as f:
            infos = pickle.load(f)
            saved_model_opt = infos['opt']

        # opt.learning_rate = saved_model_opt.learning_rate
        print('========================================')
        print('Loading the model %s...' % (model_path))
        if opt.inference_only:
            print('Running Inference only ...')
        print('========================================')
        # model.load_state_dict(torch.load(model_path))
        if not is_code_development():
            model.load_state_dict(torch.load(model_path))
        else:
            model.load_state_dict(
                torch.load(model_path,
                           map_location=lambda storage, loc: storage))

        if os.path.isfile(
                os.path.join(opt.checkpoint_path,
                             'histories_' + opt.id + '.pkl')):
            with open(
                    os.path.join(opt.checkpoint_path,
                                 'histories_' + opt.id + '.pkl'), 'rb') as f:
                histories = pickle.load(f)

    best_val_score = infos.get('best_val_score', None)
    iteration = infos.get('iter', 0)

    if opt.resume_decoder_exp_name != '' and not opt.resume:
        start_epoch = opt.start_epoch
    else:
        start_epoch = infos.get('epoch', 0)

    val_result_history = histories.get('val_result_history', {})
    loss_history = histories.get('loss_history', {})
    lr_history = histories.get('lr_history', {})
    ss_prob_history = histories.get('ss_prob_history', {})

    model = nn.DataParallel(model).to(device)

    params = []
    for key, value in dict(model.named_parameters()).items():
        if value.requires_grad:
            if ('ctx2pool_grd' in key) or ('vis_embed' in key):
                print('Finetune param: {}'.format(key))
                params += [{
                    'params': [value],
                    'lr': opt.learning_rate * 0.1,  # finetune the fc7 layer
                    'weight_decay': opt.weight_decay,
                    'betas': (opt.optim_alpha, opt.optim_beta)
                }]
            else:
                params += [{
                    'params': [value],
                    'lr': opt.learning_rate,
                    'weight_decay': opt.weight_decay,
                    'betas': (opt.optim_alpha, opt.optim_beta)
                }]

    print("Use %s as optmization method" % (opt.optim))
    optimizer = None
    if opt.optim == 'sgd':
        optimizer = optim.SGD(params, lr=opt.learning_rate, momentum=0.9)
    elif opt.optim == 'adam':
        optimizer = optim.Adam(params)
    elif opt.optim == 'adamax':
        optimizer = optim.Adamax(params)
    else:
        raise ValueError('Unknown optimizer: {}'.format(opt.optim))

    # set up tensorboard logger
    tb_logger = utils.set_tb_logger(
        opt.tb_log_dir, opt.exp_name,
        opt.resume) if not opt.inference_only else None

    # set up trainer
    trainer = Trainer(opt, dataset, model, optimizer, dataloader,
                      dataloader_val)

    # set up LR scheduler
    scheduler = ReduceLROnPlateau(optimizer,
                                  'max',
                                  patience=opt.patience,
                                  min_lr=opt.min_lr)

    best_score = {
        "Bleu_1": 0.0,
        "Bleu_2": 0.0,
        "Bleu_3": 0.0,
        "Bleu_4": 0.0,
        "METEOR": 0.0,
        "ROUGE_L": 0.0,
        "CIDEr": 0.0,
        "SPICE": 0.0
    }

    for epoch in range(start_epoch, opt.max_epochs):
        if not opt.inference_only:
            trainer.train(epoch, tb_logger=tb_logger)

        if epoch % opt.val_every_epoch == 0:
            with torch.no_grad():
                lang_stats = trainer.eval(epoch, tb_logger=tb_logger)

            if opt.inference_only:
                break

            # update learning rate by monitoring CIDEr score
            scheduler.step(lang_stats['CIDEr'], epoch)

            # Save model if is improving on validation result
            current_score = lang_stats['CIDEr']

            best_flag = False
            if best_val_score is None or current_score > best_val_score:
                best_val_score = current_score
                best_flag = True
            checkpoint_path = os.path.join(opt.checkpoint_path, 'model.pth')
            if opt.mGPUs:
                torch.save(model.module.state_dict(), checkpoint_path)
            else:
                torch.save(model.state_dict(), checkpoint_path)
            print("model saved to {}".format(checkpoint_path))

            # Dump miscalleous informations
            infos['iter'] = iteration
            infos['epoch'] = epoch
            infos['best_val_score'] = best_val_score
            infos['opt'] = opt
            infos['vocab'] = dataset.itow

            histories['val_result_history'] = val_result_history
            histories['loss_history'] = loss_history
            histories['lr_history'] = lr_history
            histories['ss_prob_history'] = ss_prob_history
            with open(
                    os.path.join(opt.checkpoint_path,
                                 'infos_' + opt.id + '.pkl'), 'wb') as f:
                pickle.dump(infos, f)
            with open(
                    os.path.join(opt.checkpoint_path,
                                 'histories_' + opt.id + '.pkl'), 'wb') as f:
                pickle.dump(histories, f)

            if best_flag:
                checkpoint_path = os.path.join(opt.checkpoint_path,
                                               'model-best.pth')
                if opt.mGPUs:
                    torch.save(model.module.state_dict(), checkpoint_path)
                else:
                    torch.save(model.state_dict(), checkpoint_path)

                print("model saved to {} with best cider score {:.3f}".format(
                    checkpoint_path, best_val_score))
                with open(
                        os.path.join(opt.checkpoint_path,
                                     'infos_' + opt.id + '-best.pkl'),
                        'wb') as f:
                    pickle.dump(infos, f)

                # update best scores
                for metric, _ in best_score.items():
                    best_score[metric] = lang_stats[metric]

            print("===================================")
            print("--> Highest scores on {} set at epoch {}".format(
                opt.val_split, epoch))
            for metric, score in sorted(best_score.items()):
                print('{}: {:.4f}'.format(metric, score))
示例#22
0
文件: train.py 项目: Doragd/NIC_model
def train():
    opt = parse_opt()
    train_mode = opt.train_mode
    idx2word = json.load(open(opt.idx2word, 'r'))
    captions = json.load(open(opt.captions, 'r'))

    # 模型
    decoder = Decoder(idx2word, opt.settings)
    decoder.to(opt.device)
    lr = opt.learning_rate
    optimizer, xe_criterion = decoder.get_optim_and_crit(lr)
    if opt.resume:
        print("====> loading checkpoint '{}'".format(opt.resume))
        chkpoint = torch.load(opt.resume, map_location=lambda s, l: s)
        assert opt.settings == chkpoint['settings'], \
            'opt.settings and resume model settings are different'
        assert idx2word == chkpoint['idx2word'], \
            'idx2word and resume model idx2word are different'
        decoder.load_state_dict(chkpoint['model'])
        if chkpoint['train_mode'] == train_mode:
            optimizer.load_state_dict(chkpoint['optimizer'])
            lr = optimizer.param_groups[0]['lr']
        print("====> loaded checkpoint '{}', epoch: {}, train_mode: {}".format(
            opt.resume, chkpoint['epoch'], chkpoint['train_mode']))
    elif train_mode == 'rl':
        raise Exception('"rl" mode need resume model')

    print('====> process image captions begin')
    word2idx = {}
    for i, w in enumerate(idx2word):
        word2idx[w] = i
    captions_id = {}
    for split, caps in captions.items():
        print('convert %s captions to index' % split)
        captions_id[split] = {}
        for fn, seqs in tqdm.tqdm(caps.items(), ncols=100):
            tmp = []
            for seq in seqs:
                tmp.append(
                    [decoder.sos_id] +
                    [word2idx.get(w, None) or word2idx['<UNK>']
                     for w in seq] + [decoder.eos_id])
            captions_id[split][fn] = tmp
    captions = captions_id
    print('====> process image captions end')

    train_data = get_dataloader(opt.img_feats, captions['train'],
                                decoder.pad_id, opt.max_seq_len,
                                opt.batch_size, opt.num_workers)
    val_data = get_dataloader(opt.img_feats,
                              captions['val'],
                              decoder.pad_id,
                              opt.max_seq_len,
                              opt.batch_size,
                              opt.num_workers,
                              shuffle=False)
    test_captions = {}
    for fn in captions['test']:
        test_captions[fn] = [[]]
    test_data = get_dataloader(opt.img_feats,
                               test_captions,
                               decoder.pad_id,
                               opt.max_seq_len,
                               opt.batch_size,
                               opt.num_workers,
                               shuffle=False)

    if train_mode == 'rl':
        rl_criterion = RewardCriterion()
        ciderd_scorer = get_ciderd_scorer(captions, decoder.sos_id,
                                          decoder.eos_id)

    def forward(data, training=True, ss_prob=0.0):
        decoder.train(training)
        loss_val = 0.0
        reward_val = 0.0
        for fns, fc_feats, (caps_tensor,
                            lengths), ground_truth in tqdm.tqdm(data,
                                                                ncols=100):
            fc_feats = fc_feats.to(opt.device)
            caps_tensor = caps_tensor.to(opt.device)

            if training and train_mode == 'rl':
                sample_captions, sample_logprobs, seq_masks = decoder(
                    fc_feats,
                    sample_max=0,
                    max_seq_len=opt.max_seq_len,
                    mode=train_mode)
                decoder.eval()
                with torch.no_grad():
                    greedy_captions, _, _ = decoder(
                        fc_feats,
                        sample_max=1,
                        max_seq_len=opt.max_seq_len,
                        mode=train_mode)
                decoder.train(training)
                reward = get_self_critical_reward(sample_captions,
                                                  greedy_captions, fns,
                                                  ground_truth, decoder.sos_id,
                                                  decoder.eos_id,
                                                  ciderd_scorer)
                loss = rl_criterion(
                    sample_logprobs, seq_masks,
                    torch.from_numpy(reward).float().to(opt.device))
                reward_val += float(np.mean(reward[:, 0]))
            else:
                pred = decoder(fc_feats, caps_tensor, ss_prob=ss_prob)
                loss = xe_criterion(pred, caps_tensor[:, 1:], lengths)

            loss_val += float(loss)
            if training:
                optimizer.zero_grad()
                loss.backward()
                clip_gradient(optimizer, opt.grad_clip)
                optimizer.step()

        return loss_val / len(data), reward_val / len(data)

    checkpoint_dir = os.path.join(opt.checkpoint, train_mode)
    if not os.path.exists(checkpoint_dir):
        os.makedirs(checkpoint_dir)
    result_dir = os.path.join(opt.result, train_mode)
    if not os.path.exists(result_dir):
        os.makedirs(result_dir)
    previous_loss = None
    for epoch in range(opt.max_epochs):
        print('--------------------epoch: %d' % epoch)
        ss_prob = 0.0
        if epoch > opt.scheduled_sampling_start >= 0:
            frac = (epoch - opt.scheduled_sampling_start
                    ) // opt.scheduled_sampling_increase_every
            ss_prob = min(opt.scheduled_sampling_increase_prob * frac,
                          opt.scheduled_sampling_max_prob)
        train_loss, train_reward = forward(train_data, ss_prob=ss_prob)
        with torch.no_grad():
            val_loss, _ = forward(val_data, training=False)

        if train_mode == 'xe' and previous_loss is not None and val_loss >= previous_loss:
            lr = lr * 0.5
            for param_group in optimizer.param_groups:
                param_group['lr'] = lr
        previous_loss = val_loss

        if epoch in [0, 5, 10, 15, 20, 25, 29, 30, 35, 39, 40, 45, 49]:
            # test
            results = []
            for fns, fc_feats, _, _ in tqdm.tqdm(test_data, ncols=100):
                fc_feats = fc_feats.to(opt.device)
                for i, fn in enumerate(fns):
                    fc_feat = fc_feats[i]
                    with torch.no_grad():
                        rest, _ = decoder.sample(fc_feat,
                                                 beam_size=opt.beam_size,
                                                 max_seq_len=opt.max_seq_len)
                    results.append({'image_id': fn, 'caption': rest[0]})
            json.dump(
                results,
                open(os.path.join(result_dir, 'result_%d.json' % epoch), 'w'))

            chkpoint = {
                'epoch': epoch,
                'model': decoder.state_dict(),
                'optimizer': optimizer.state_dict(),
                'settings': opt.settings,
                'idx2word': idx2word,
                'train_mode': train_mode,
            }
            checkpoint_path = os.path.join(
                checkpoint_dir, 'model_%d_%.4f_%s.pth' %
                (epoch, val_loss, time.strftime('%m%d-%H%M')))
            torch.save(chkpoint, checkpoint_path)

        print('train_loss: %.4f, train_reward: %.4f, val_loss: %.4f' %
              (train_loss, train_reward, val_loss))
示例#23
0
import torch
from torch.utils.data import Dataset
import pickle
import json
import os
import opts
import numpy as np

opt = opts.parse_opt()
opt = vars(opt)

class NCEDataset(Dataset):

	def __init__(self,opt,mode):
		super(NCEDataset, self).__init__()
		self.video_feats = pickle.load(open(opt["s3d_feat_path"],"rb"))

		self.cap_feats = pickle.load(open(opt["bert_feats_path"],"rb"))

		info = json.load(open(opt["info_json"]))
		self.ix_to_word = info['ix_to_word']
		self.word_to_ix = info['word_to_ix']
		self.splits = info['videos']

		self.splits['train'] = list(filter(lambda x: x in self.video_feats.keys(),self.splits['train']))
		self.splits['val'] = list(filter(lambda x: x in self.video_feats.keys(),self.splits['val']))
		self.splits['test'] = list(filter(lambda x: x in self.video_feats.keys(),self.splits['test']))

		self.train  = self.splits['train']
		self.test = self.splits['test']

def main(args):
    cfg = setup(args)

    if args.eval_only:
        model = Trainer.build_model(cfg)
        DetectionCheckpointer(model, save_dir=cfg.OUTPUT_DIR).resume_or_load(
            cfg.MODEL.WEIGHTS, resume=args.resume)
        res = Trainer.test(cfg, model)
        if comm.is_main_process():
            verify_results(cfg, res)
        return res

    trainer = Trainer(cfg)
    trainer.resume_or_load(resume=args.resume)
    return trainer.train()


if __name__ == "__main__":
    args = parse_opt().parse_args()
    print("Command Line Args:", args)
    launch(
        main,
        args.num_gpus,
        num_machines=args.num_machines,
        machine_rank=args.machine_rank,
        dist_url=args.dist_url,
        args=(args, ),
    )