示例#1
0
def main(cfg):
    if cfg['model'] == 'mlp':
        net = MLP(300, 768, cfg['class_num'])
    elif cfg['model'] == 'cnn':
        net = CNN(300, 768, cfg['class_num'])
    elif cfg['model'] == 'lstm':
        net = LSTM(300, cfg['class_num'], cfg['device'])
    elif cfg['model'] == 'gru':
        net = GRU(300, cfg['class_num'], cfg['device'])
    else:
        raise Exception(f'model {args.model} not available')

    if cfg['device'] == 'cuda':
        if len(cfg['gpu_ids']) == 1:
            torch.cuda.set_device(cfg['gpu_ids'][0])
            net = net.cuda()
        else:
            net = net.cuda()
            net = nn.DataParallel(net, device_ids=cfg['gpu_ids'])

    torch.backends.cudnn.benchmark = True

    if cfg['mode'] == 'train':
        train(cfg, net)
    elif cfg['mode'] == 'predict':
        predict(cfg, net, 'checkpoints/{}.pth'.format(cfg['model']))
示例#2
0
def get_model(input_size, embed_size, output_size, model_type, dropout=DROPOUT):
    
    if model_type.lower() == 'lstm':
        return LSTM(input_size, embed_size, output_size, dropout)
        
    if model_type.lower() == 'cnn':
        return CNN(input_size, embed_size, output_size, dropout)

    if model_type.lower() == 'gru':
        return GRU(input_size, embed_size, output_size, dropout)
    
    else:
        return None
示例#3
0
    def __call__(self,
                 number_of_iterations=2,
                 learning_rate=0.005,
                 embedding_size=300,
                 hidden_size=100,
                 batch_size=100):
        print("Starting 'Image Retrieval' in 'GRU' mode with '" +
              self.difficulty + "' data")

        self.model_full_path = self.model_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(
            learning_rate) + "_" + str(embedding_size) + ".pty"
        self.output_file_name = self.output_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(
            learning_rate) + "_" + str(embedding_size) + ".csv"

        self.number_of_iterations = number_of_iterations
        self.learning_rate = learning_rate
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.model = GRU(self.nwords, self.embedding_size,
                         self.image_feature_size, self.output_vector_size,
                         self.hidden_size, self.batch_size)
        self.criterion = nn.CrossEntropyLoss()

        self.evaluate = Evaluate(self.model, self.img_features, self.minibatch,
                                 self.preprocess, self.image_feature_size,
                                 self.output_vector_size)
        print(self.model)

        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.learning_rate)

        self.train_loss_values = []

        self.magic()

        self.save_model()

        self.save_data()
示例#4
0
def test_func(test_y_file, word_file, pos1_file, pos2_file, ans_file,
              res_file):

    model_path = './model/ATT-MODEL'

    word_embedding = np.load('./data/vec.npy')
    wordpos1 = np.load('./data/wordpos1.npy')
    wordpos2 = np.load('./data/wordpos2.npy')

    #test_y = np.load('./data/test_y.npy')
    #test_sentence_word = np.load('./data/test_sentence_word.npy')
    #test_sentence_pos1 = np.load('./data/test_sentence_pos1.npy')
    #test_sentence_pos2 = np.load('./data/test_sentence_pos2.npy')
    #all_ans = np.load('./data/all_ans.npy')

    test_y = np.load('./data/' + test_y_file)
    test_sentence_word = np.load('./data/' + word_file)
    test_sentence_pos1 = np.load('./data/' + pos1_file)
    test_sentence_pos2 = np.load('./data/' + pos2_file)
    all_ans = np.load('./data/' + ans_file)

    all_alpha = np.load("./data/all_alpha.npy")

    settings = Setting()
    batch_size = settings.batch_size

    with tf.Graph().as_default():
        sess = tf.Session()

        with sess.as_default():
            print "model testing begin"
            with tf.variable_scope("model"):
                m = GRU(is_training=False,
                        word_embeddings=word_embedding,
                        word_pos1=wordpos1,
                        word_pos2=wordpos2,
                        settings=settings)

            saver = tf.train.Saver()

            i_length = int(
                len(test_sentence_word) / float(settings.batch_size))
            test_length = i_length * settings.batch_size

            ClassEst = np.zeros((test_length, settings.num_classes - 1))

            for epoch in range(settings.num_epochs - 25,
                               settings.num_epochs - 15):
                alpha = all_alpha[epoch]
                saver.restore(sess, model_path + str(epoch))
                all_prob = []

                for i in range(i_length):
                    print '%d/%d,test one batch' % (i, i_length)
                    temp_sentence_word = []
                    temp_sentence_pos1 = []
                    temp_sentence_pos2 = []
                    temp_sentence_y = []
                    temp_input = test_sentence_word[i *
                                                    settings.batch_size:(i +
                                                                         1) *
                                                    settings.batch_size]

                    temp_sentence_word = test_sentence_word[
                        i * settings.batch_size:(i + 1) * settings.batch_size]
                    temp_sentence_pos1 = test_sentence_pos1[
                        i * settings.batch_size:(i + 1) * settings.batch_size]
                    temp_sentence_pos2 = test_sentence_pos2[
                        i * settings.batch_size:(i + 1) * settings.batch_size]
                    temp_sentence_y = test_y[i * settings.batch_size:(i + 1) *
                                             settings.batch_size]

                    batch_sentence_word = np.array(temp_sentence_word)
                    batch_sentence_pos1 = np.array(temp_sentence_pos1)
                    batch_sentence_pos2 = np.array(temp_sentence_pos2)
                    batch_y = np.array(temp_sentence_y)

                    feed_dict = {}
                    feed_dict[m.input_word] = batch_sentence_word
                    feed_dict[m.input_pos1] = batch_sentence_pos1
                    feed_dict[m.input_pos2] = batch_sentence_pos2
                    feed_dict[m.input_y] = batch_y

                    loss_cur, acc, prob = sess.run(
                        [m.loss, m.accuracy, m.probility], feed_dict=feed_dict)
                    #_, step = sess.run([train_op, global_step], feed_dict=feed_dict)
                    for single_prob in prob:
                        #all_prob.append(single_prob)
                        #all_prob.append(single_prob[1:])
                        all_prob.append(single_prob[1:])

                ClassEst += np.multiply(all_prob, alpha)

            all_prob2 = []
            for index in range(test_length):
                all_prob2.append(np.argmax(softmax(ClassEst[index])))

            all_prob2 = np.reshape(np.array(all_prob2), -1)
            all_prob_length = len(all_prob2)
            #np.save('./output/all_prob.npy', all_prob2)
            np.save('./output/' + res_file, all_prob2)
            #print 'avg_pre_score'
            #print metrics.average_precision_score(all_ans[:all_prob_length], all_prob2)

            y_true = all_ans[:all_prob_length]
            y_pred = all_prob2

            #print 'PR curve area:' + str(average_precision)
            #print(classification_report(all_ans[:all_prob_length], all_prob2))

            order = np.argsort(-all_prob2)

            top100 = order[:100]
            correct_num_100 = 0.0
            for i in top100:
                if all_ans[i] == 1:
                    correct_num_100 += 1.0

            top200 = order[:200]
            correct_num_200 = 0.0
            for i in top200:
                if all_ans[i] == 1:
                    correct_num_200 += 1.0

            top300 = order[:300]
            correct_num_300 = 0.0
            for i in top300:
                if all_ans[i] == 1:
                    correct_num_300 += 1.0

            top400 = order[:400]
            correct_num_400 = 0.0
            for i in top400:
                if all_ans[i] == 1:
                    correct_num_400 += 1.0

            top500 = order[:500]
            correct_num_500 = 0.0
            for i in top500:
                if all_ans[i] == 1:
                    correct_num_500 += 1.0

            top1000 = order[:1000]
            correct_num_1000 = 0.0
            for i in top1000:
                if all_ans[i] == 1:
                    correct_num_1000 += 1.0

            print 'P@100\n' + str(correct_num_100 / 100.0)
            print 'P@200\n' + str(correct_num_200 / 200.0)
            print 'P@300\n' + str(correct_num_300 / 300.0)
            print 'P@400\n' + str(correct_num_400 / 400.0)
            print 'P@500\n' + str(correct_num_500 / 500.0)
            print 'P@1000\n' + str(correct_num_1000 / 1000.0)
            print '~~~Ending'
示例#5
0
def train_model(exp_name, train_tfrecord, val_tfrecord, dictionary_file,
                n_hidden, learn_rate, batch_size, decouple_split=200,
                patience=10, max_epochs=200, sample_length=16, resume=False):
    """
    Train a GRU on some text data

    :param exp_name: experiment name (saved to ~/experiments/story-gen/exp_name)
    :param train_tfrecord: path to tfrecord of training set
    :param val_tfrecord: path to tfrecord of validation set
    :param dictionary_file: path to dictionary json file
    :param n_hidden: number of hidden units in GRU
    :param learn_rate: learning rate
    :param batch_size: batch size
    :param decouple_split: subsequence length between decoupled neural interface
                           or None to not use decoupled neural intefaces
    :param patience: early stopping limit
    :param max_epochs: maximum number of epochs to run
    :param sample_length: length of sample to generate after each epoch
    :param resume: resume from previous run
    :return:
    """

    exp_dir = os.path.join(os.path.expanduser('~/experiments/story-gen/'),
                           exp_name)
    if not os.path.isdir(exp_dir):
        os.makedirs(exp_dir)

    with open(dictionary_file,'r') as f:
        reverse_dict = json.load(f)  # word -> int
    reverse_dict = {v+1:k for k,v in reverse_dict.items()}  # int -> word
    # note: sequences are padded with zero, add to dict_size (for embedding)
    reverse_dict[0] = '_END_'  # this should be removed from sampled output
    dict_size = max(reverse_dict.keys())+1

    if not resume:
        max_sequence = 20000 if decouple_split is not None else 100
        pipeline = Vector_Pipeline(train_tfrecord, val_tfrecord, batch_size,
                                   max_sequence=max_sequence)
        init_train, init_val = pipeline.init_train, pipeline.init_val

        model_input = tf.placeholder_with_default(pipeline.output[:,:-1],
                                                  [None, None], 'input')

        # Embedding
        embedding = orthogonal([dict_size, n_hidden], 'embedding')
        embedded_input = tf.nn.embedding_lookup(embedding, model_input)
        int_label = pipeline.output[:,1:]

        # Decoupled neural interface (optional)
        decoupled = decouple_split is not None
        if decoupled:
            # Split subsequences, reshape to [slow_time, batch, fast_time, feat]
            seq_len = tf.shape(embedded_input)[1]
            # pad so sequence length is divisible by subsequence length
            pad_len = decouple_split-tf.mod(seq_len,tf.constant(decouple_split))
            embedded_input = tf.pad(embedded_input, [[0,0], [0,pad_len], [0,0]],
                                    mode='CONSTANT', constant_values=0)
            int_label = tf.pad(int_label, [[0,0], [0,pad_len]])
            # batch x features x time
            dni_input = tf.transpose(embedded_input, [0,2,1])
            # batch x features x slow_time x fast_time
            dni_input = tf.reshape(
                dni_input,
                [-1, n_hidden,
                 (seq_len+pad_len)//decouple_split, decouple_split])
            # fast_time x features x batch x slow_time
            dni_input = tf.transpose(dni_input, [3,1,0,2])
            # fast_time x features x (batch x slow_time)
            dni_input = tf.reshape(dni_input, [decouple_split, n_hidden, -1])
            # (batch x slow_time) x fast_time x features
            dni_input = tf.transpose(dni_input, [2,0,1])
            # (batch x slow_time) x (fast_time x features)
            dni_input = tf.reshape(dni_input, [tf.shape(dni_input)[0],-1])

            # Decoupled neural interface: simplify to single dense layer
            dni = Dense(dni_input, n_hidden, tf.nn.relu, name='dni',
                        init='uniform', n_in=n_hidden*decouple_split)

            # Reshape DNI out & embedded_input to new_batch x fast_time for GRU
            gru_hidden = tf.reshape(dni.output, [-1, n_hidden])
            embedded_input = tf.reshape(embedded_input,
                                        [-1, decouple_split, n_hidden])
            int_label = tf.reshape(int_label, [-1, decouple_split])
        else:
            gru_hidden = None

        # model part2: GRU
        # transpose: tf.scan needs time x batch x features
        embedded_input = tf.transpose(embedded_input, [1,0,2])
        training_toggle = tf.placeholder(tf.int32, name='training_toggle')
        gru = GRU(embedded_input, n_hidden, training_toggle, h0=gru_hidden,
                  name='gru')
        gru_h0 = gru.h0
        gru_output = gru.output
        # model part3: dropout and dense layer
        dropout_rate = tf.placeholder(tf.float32, name='dropout_rate')
        dropped = tf.nn.dropout(gru_output, 1-dropout_rate)
        dense = Dense(dropped, dict_size)
        model_output = tf.identity(dense.output, 'output')

        # cross-entropy loss
        # note: sequences padded with -1, mask these entries
        mask = tf.not_equal(int_label, -1)
        # swap -1's to avoid error in loss fcn, even though we're ignoring these
        int_label = tf.where(mask, int_label, tf.zeros_like(int_label))
        # mean over entries with mask==1
        mask = tf.cast(mask, dtype=tf.float32)
        loss = tf.nn.sparse_softmax_cross_entropy_with_logits(
            labels=int_label, logits=model_output)
        loss = tf.reduce_sum(mask*loss)/tf.reduce_sum(mask)

        if decoupled:
            # decoupled neural interface loss
            dni_label = tf.stop_gradient(gru.output)
            dni_loss = tf.reduce_mean(tf.square(dni_label-dni.output),
                                      name='dni_loss')
        else:
            dni_loss =  tf.constant(0., dtype=tf.float32)

        train_step = tf.train.AdamOptimizer(learn_rate).minimize(
            loss+dni_loss,name='train_step')
    else:
        (model_input, training_toggle, dropout_rate, train_step, init_train,
         init_val, loss, dni_loss, gru_output, gru_h0, model_output
            ) = reload_graph(exp_dir)
    n_examples = tf.shape(model_input)[0]

    sampled_out = tf.multinomial(model_output[0,:1,:],num_samples=1)
    def epoch_callback(sess):
        # TODO: not sure how to initialize this since it's usually from the DNI
        h0 = np.random.rand(1, n_hidden)
        sampled_text = [np.random.randint(0,dict_size,size=(1,1))]
        for i in range(sample_length+1):
            out,h0 = sess.run([sampled_out, gru_output],
                              feed_dict={gru_h0:h0,
                                         model_input:sampled_text[i],
                                         dropout_rate:0,
                                         training_toggle:0})
            h0 = h0[0]
            sampled_text.append(out)
        sampled_text = sampled_text[1:]
        # temp bugfix: screwed up the reverse dictionary, missing keys
        if any([int(o) not in reverse_dict.keys() for o in sampled_text]):
            sampled_text = [
                o if int(o) in reverse_dict.keys()
                else int(np.random.choice(list(reverse_dict.keys())))
                for o in sampled_text]
        print(' '.join([reverse_dict[int(o)] for o in sampled_text]))
        print('')

    fit(training_toggle, dropout_rate, train_step, init_train, init_val, loss,
        dni_loss, n_examples, patience, max_epochs, exp_dir, epoch_callback,
        resume)
        save_path = 'data/bmli_none.pth'

    data_loader = PairedComparison(4,
                                   ranking=ranking,
                                   direction=direction,
                                   dichotomized=False)

    gini_coefficients = torch.zeros(len(file_names), args.num_experiments,
                                    args.num_episodes, args.sequence_length)
    map_performance = torch.zeros(len(file_names), args.num_experiments,
                                  args.num_episodes, args.sequence_length)
    avg_performance = torch.zeros(len(file_names), args.num_experiments,
                                  args.num_episodes, args.sequence_length)

    for m, file_name in enumerate(file_names):
        model = GRU(data_loader.num_inputs, data_loader.num_targets, 128)

        params, _ = torch.load(file_name, map_location='cpu')
        model.load_state_dict(params)

        for k in tqdm(range(args.num_experiments)):
            for i in range(args.num_episodes):
                inputs, targets, _, _ = data_loader.get_batch(
                    1, args.sequence_length)
                predictive_distribution, weights, variances = model(
                    inputs, targets)

                map_performance[m, k, i] = (
                    (predictive_distribution.probs >
                     0.5).float() == targets).squeeze().detach()
                avg_performance[m, k,
def main(trial_num):
    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_type = "lstm"

    # Hyper-parameters
    sequence_length = 28
    input_size = 28
    num_layers = 1
    hidden_size = 128
    num_classes = 10
    batch_size = 100
    num_epochs = 20
    learning_rate = 0.01
    num_trials = 100
    a_range = [1.0, 3.0]
    # a_s = [1.5, 2.0, 2.2, 2.5, 2.7, 3.0]

    # just for testing
    # num_trials = 1
    # num_epochs = 20
    # a_s = [1.0]

    # for a in a_s:
    trials = {}
    for num_trial in range(num_trials):
        a = random.random() * (a_range[1] - a_range[0]) + a_range[0]
        print('trial Num: ', trial_num,  "a: ", a, "num_trial: ", num_trial)
        trial = {}
        trial['a'] = a
        # define model
        if model_type == 'lstm':
            model = LSTM(input_size, hidden_size, num_layers, num_classes, a, device).to(device)
        elif model_type == 'gru':
            model = GRU(input_size, hidden_size, num_layers, num_classes, a, device).to(device)

        # Loss and optimizer
        criterion = nn.CrossEntropyLoss()
        optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
        train_dataloader = MNIST_dataloader(batch_size, train=True)
        test_dataloader = MNIST_dataloader(batch_size, train=False)
        # Train the model
        total_step = len(train_dataloader.dataloader)

        total = 0
        total_loss = 0
        for epoch in range(num_epochs):
            model.train()
            for i, (images, labels) in enumerate(train_dataloader.dataloader):
                images = images.reshape(-1, sequence_length, input_size).to(device)
                labels = labels.to(device)

                # Forward pass
                outputs, hts = model(images)
                loss = criterion(outputs, labels)
                total_loss += loss * labels.size(0)
                total += labels.size(0)
                # print(LEs, rvals)

                # Backward and optimize
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

                # if (i + 1) % 300 == 0:
                #     print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                #           .format(epoch + 1, num_epochs, i + 1, total_step, total_loss / total))

            # for i, (name, param) in enumerate(model.named_parameters()):
            #     if i == 3:
            #         print(name, param)
            # Test the model
            model.eval()
            with torch.no_grad():
                correct = 0
                total = 0
                total_loss = 0
                for i, (images, labels) in enumerate(test_dataloader.dataloader):
                    images = images.reshape(-1, sequence_length, input_size).to(device)
                    labels = labels.to(device)
                    outputs, _ = model(images)

                    # h = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                    # c = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                    # params = (images, (h, c))
                    # if i == 0:
                    #     LEs, rvals = calc_LEs_an(*params, model=model)

                    loss = criterion(outputs, labels)

                    _, predicted = torch.max(outputs.data, 1)
                    total += labels.size(0)
                    correct += (predicted == labels).sum().item()
                    total_loss += loss * labels.size(0)
                if epoch == (num_epochs - 1):
                    print('Epoch [{}/{}] Loss: {}, Test Accuracy: {} %'.format(epoch + 1, num_epochs, total_loss / total, 100 * correct / total))
            saved_model = copy.deepcopy(model)
            trial[epoch] = {"model": saved_model, "accuracy": 100 * correct / total, "loss": total_loss / total}
            del saved_model
        trials[num_trial] = trial
        pickle.dump(trials, open('trials/{}/models/{}_{}_trials_{}.pickle'.format(model_type, model_type, hidden_size, trial_num), 'wb'))
示例#8
0
def main(_):
    print 'reading word-embedding'
    wordembedding = np.load('./data/vec.npy')
    wordpos1 = np.load('./data/wordpos1.npy')
    wordpos2 = np.load('./data/wordpos2.npy')

    print 'reading training data'
    train_y = np.load('./data/train_y.npy')
    train_sentence_word = np.load('./data/train_sentence_word.npy')
    train_sentence_pos1 = np.load('./data/train_sentence_pos1.npy')
    train_sentence_pos2 = np.load('./data/train_sentence_pos2.npy')

    precious_record = open('./precious_record.txt', 'w')

    save_path = './model/ATT-MODEL'

    settings = Setting()
    settings.num_classes = len(train_y[0])
    settings.voc_size = len(wordembedding)

    batch_size = settings.batch_size

    with tf.Graph().as_default():
        config = tf.ConfigProto()
        config.gpu_options.allow_growth = True
        sess = tf.Session(config=config)
        with sess.as_default():
            initializer = tf.contrib.layers.xavier_initializer()
            print "model training begin"

            with tf.variable_scope("model",
                                   reuse=None,
                                   initializer=initializer):
                m = GRU(is_training=True,
                        word_embeddings=wordembedding,
                        word_pos1=wordpos1,
                        word_pos2=wordpos2,
                        settings=settings)

            merged = tf.summary.merge_all()
            writer = tf.summary.FileWriter("logs/",
                                           sess.graph)  # tensorflow >=0.12

            global_step = tf.Variable(0, name="global_step", trainable=False)
            optimizer = tf.train.AdamOptimizer(settings.learning_rate)

            all_alpha = []

            i_length = int(
                len(train_sentence_word) / float(settings.batch_size))
            train_length = i_length * settings.batch_size
            global ada_length
            ada_length = i_length
            global ada_weight
            ada_weight = np.array(np.ones(ada_length) / ada_length)
            global i
            i = 0

            ggClassEst = np.zeros((train_length, 1))
            params = tf.trainable_variables()
            g = tf.get_default_graph()
            with g.gradient_override_map({"Ada": "AdaGrad"}):
                m.loss = tf.identity(m.loss, name="Ada")
            train_op = optimizer.minimize(m.loss, global_step=global_step)

            sess.run(tf.global_variables_initializer())
            saver = tf.train.Saver(max_to_keep=None)
            print 'Starting Learning'

            feed_dict_list = []

            for epoch in range(settings.num_epochs):
                rand_order = range(i_length)
                #np.random.shuffle(rand_order)
                ii = 0
                all_prob = []
                all_err = []
                all_comp = []
                batch_err = []
                batch_comp = []
                for index in range(10):
                    print ada_weight[index]

                for i in rand_order:
                    print('epoch-%d, %d/%d (%d):run one batch' %
                          (epoch, ii, i_length, i))
                    ii += 1
                    temp_sentence_word = []
                    temp_sentence_pos1 = []
                    temp_sentence_pos2 = []
                    temp_sentence_y = []

                    temp_sentence_word = train_sentence_word[
                        i * settings.batch_size:(i + 1) * settings.batch_size]
                    temp_sentence_pos1 = train_sentence_pos1[
                        i * settings.batch_size:(i + 1) * settings.batch_size]
                    temp_sentence_pos2 = train_sentence_pos2[
                        i * settings.batch_size:(i + 1) * settings.batch_size]
                    temp_sentence_y = train_y[i * settings.batch_size:(i + 1) *
                                              settings.batch_size]

                    batch_sentence_word = np.array(temp_sentence_word)
                    batch_sentence_pos1 = np.array(temp_sentence_pos1)
                    batch_sentence_pos2 = np.array(temp_sentence_pos2)
                    batch_y = np.array(temp_sentence_y)

                    feed_dict = {}
                    feed_dict[m.input_word] = batch_sentence_word
                    feed_dict[m.input_pos1] = batch_sentence_pos1
                    feed_dict[m.input_pos2] = batch_sentence_pos2
                    feed_dict[m.input_y] = batch_y

                    #temp, step, loss_cur, acc = sess.run([train_op, global_step, m.loss, m.accuracy], feed_dict=feed_dict)
                    temp, step, loss_cur, acc, prob, comp, err = sess.run(
                        [
                            train_op, global_step, m.loss, m.accuracy,
                            m.probility, m.comparison, m.difference
                        ],
                        feed_dict=feed_dict)

                    time_str = datetime.datetime.now().isoformat()

                    for single_prob in prob:
                        all_prob.append(single_prob)
                    for single_err in err:
                        all_err.append(single_err)
                    for single_comp in comp:
                        all_comp.append(single_comp)
                    batch_err.append(np.sum(err) / settings.batch_size)
                    batch_comp.append(np.sum(comp) / settings.batch_size)

                    if step % 10 == 0:
                        print("  {}: step {}, loss {:g}, acc {:g}".format(
                            time_str, step, loss_cur, acc))
                        rs = sess.run(merged, feed_dict=feed_dict)
                        writer.add_summary(rs, step)

                #all_prob = np.reshape(np.array(all_prob), -1)
                #all_prob_length = len(all_prob)
                #average_precision = average_precision_score(all_ans[:all_prob_length], all_prob)
                #print 'PR curve area:' + str(average_precision)

                #errorMisClassified = sum([np.exp(all_err[index]) for index in range(train_length) if not all_comp[index]])

                #errorWellClassified = sum([np.exp(all_err[index]) for index in range(train_length) if all_comp[index]])
                errorMisClassified = sum([
                    np.exp(batch_err[index]) for index in range(ada_length)
                    if batch_comp[index] < 0.5
                ])
                errorWellClassified = sum([
                    np.exp(batch_err[index]) for index in range(ada_length)
                    if batch_comp[index] >= 0.5
                ])

                alpha = 0.5 * np.log(errorWellClassified / errorMisClassified)
                print 'alpha:', alpha
                all_alpha.append(alpha)
                for index in range(ada_length):
                    if batch_comp[index] >= 0.5:
                        ada_weight[index] *= np.exp(-alpha + batch_err[index])
                    else:
                        ada_weight[index] *= np.exp(alpha + batch_err[index])
                max_ada_weight = max(ada_weight)
                global delta
                delta = 1.0 / max_ada_weight
                print 'delta:', delta
                ada_weight = np.dot(ada_weight, 1. / sum(ada_weight))
                precious_record.writelines("epoch: " + str(epoch) +
                                           " alpha: " + str(alpha) + "\n")
                saver.save(sess, save_path + str(epoch))

            np.save('./data/all_alpha.npy', all_alpha)
            saver.save(sess, save_path + "_final")

            print 'Trainning finished, Model being saved'
            #current_step = tf.train.global_step(sess, global_step)
            #path = saver.save(sess, save_path+'ATT_Model', global_step=current_step)
            #saver.save(sess, save_path)
    print '-------end------'
示例#9
0
def main(datasets, U, n_epochs=20, batch_size=20, max_l=100, hidden_size=100, \
         word_embedding_size=100, session_hidden_size=50, session_input_size=50, \
         model_name='SMN_last.bin', learning_rate=0.001, r_seed=3435, \
        val_frequency=100):
    hiddensize = hidden_size
    U = U.astype(dtype=theano.config.floatX)
    rng = np.random.RandomState(r_seed)
    lsize, rsize = max_l, max_l
    sessionmask = T.matrix()
    lx = []  #tokens from previous turns
    lxmask = []  #masks from previous turns
    for i in range(max_turn):
        lx.append(T.matrix())
        lxmask.append(T.matrix())

    index = T.lscalar()
    rx = T.matrix('rx')  #tokens from response
    rxmask = T.matrix()  #masks from response
    y = T.ivector('y')
    Words = theano.shared(value=U, name="Words")
    llayer0_input = []
    for i in range(max_turn):
        llayer0_input.append(Words[T.cast(lx[i].flatten(), dtype="int32")] \
            .reshape((lx[i].shape[0], lx[i].shape[1], Words.shape[1])))

    # input: word embeddings of the mini batch
    rlayer0_input = Words[T.cast(rx.flatten(), dtype="int32")].\
                    reshape((rx.shape[0], rx.shape[1], Words.shape[1]))

    train_set, dev_set, test_set = datasets[0], datasets[1], datasets[2]

    train_set_lx = []
    train_set_lx_mask = []
    q_embedding = []
    q_embedding_Cat = []
    q_embedding_Cat_mask = []
    q_embedding_self_att = []
    q_embedding_self_att_rnn = []
    q_embedding_hiddenequal = []

    offset = 2 * lsize
    for i in range(max_turn):
        train_set_lx.append(theano.shared(
            np.asarray(a=train_set[:, offset*i:offset*i+lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
        train_set_lx_mask.append(theano.shared(
            np.asarray(a=train_set[:, offset*i + lsize:offset*i + 2*lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
    train_set_rx = theano.shared(
        np.asarray(a=train_set[:, offset*max_turn:offset*max_turn + lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    train_set_rx_mask = theano.shared(
        np.asarray(a=train_set[:, offset*max_turn+lsize:offset*max_turn + 2*lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    train_set_session_mask = theano.shared(
        np.asarray(a=train_set[:, -max_turn-1:-1], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    train_set_y = theano.shared(np.asarray(train_set[:, -1], dtype="int32"), \
                               borrow=True)

    val_set_lx = []
    val_set_lx_mask = []
    for i in range(max_turn):
        val_set_lx.append(theano.shared(
            np.asarray(a=dev_set[:, offset*i:offset*i + lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
        val_set_lx_mask.append(theano.shared(
            np.asarray(a=dev_set[:, offset*i + lsize:offset*i + 2*lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
    val_set_rx = theano.shared(
        np.asarray(a=dev_set[:, offset*max_turn:offset*max_turn + lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    val_set_rx_mask = theano.shared(
        np.asarray(a=dev_set[:, offset*max_turn + lsize:offset*max_turn + 2*lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    val_set_session_mask = theano.shared(np.asarray(a=dev_set[:, -max_turn-1:-1], \
                                                    dtype=theano.config.floatX), \
                                         borrow=True)
    val_set_y = theano.shared(np.asarray(dev_set[:, -1], dtype="int32"),
                              borrow=True)

    test_set_lx = []
    test_set_lx_mask = []
    for i in range(max_turn):
        test_set_lx.append(theano.shared(
            np.asarray(a=test_set[:, offset*i:offset*i + lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
        test_set_lx_mask.append(theano.shared(
            np.asarray(a=test_set[:, offset*i + lsize:offset*i + 2*lsize], \
                       dtype=theano.config.floatX), \
                       borrow=True))
    test_set_rx = theano.shared(
        np.asarray(a=test_set[:, offset*max_turn:offset*max_turn + lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    test_set_rx_mask = theano.shared(
        np.asarray(a=test_set[:, offset*max_turn + lsize:offset*max_turn + 2*lsize], \
                   dtype=theano.config.floatX), \
                   borrow=True)
    test_set_session_mask = theano.shared(np.asarray(a=test_set[:, -max_turn-1:-1], \
                                                    dtype=theano.config.floatX), \
                                         borrow=True)
    test_set_y = theano.shared(np.asarray(test_set[:, -1], dtype="int32"), \
                               borrow=True)

    dic = {}
    for i in range(max_turn):
        dic[lx[i]] = train_set_lx[i][index * batch_size:(index + 1) *
                                     batch_size]
        dic[lxmask[i]] = train_set_lx_mask[i][index * batch_size:(index + 1) *
                                              batch_size]
    dic[rx] = train_set_rx[index * batch_size:(index + 1) * batch_size]
    dic[sessionmask] = train_set_session_mask[index * batch_size:(index + 1) *
                                              batch_size]
    dic[rxmask] = train_set_rx_mask[index * batch_size:(index + 1) *
                                    batch_size]
    dic[y] = train_set_y[index * batch_size:(index + 1) * batch_size]

    val_dic = {}
    for i in range(max_turn):
        val_dic[lx[i]] = val_set_lx[i][index * batch_size:(index + 1) *
                                       batch_size]
        val_dic[lxmask[i]] = val_set_lx_mask[i][index *
                                                batch_size:(index + 1) *
                                                batch_size]
    val_dic[rx] = val_set_rx[index * batch_size:(index + 1) * batch_size]
    val_dic[sessionmask] = val_set_session_mask[index *
                                                batch_size:(index + 1) *
                                                batch_size]
    val_dic[rxmask] = val_set_rx_mask[index * batch_size:(index + 1) *
                                      batch_size]
    val_dic[y] = val_set_y[index * batch_size:(index + 1) * batch_size]

    test_dic = {}
    for i in range(max_turn):
        test_dic[lx[i]] = test_set_lx[i][index * batch_size:(index + 1) *
                                         batch_size]
        test_dic[lxmask[i]] = test_set_lx_mask[i][index *
                                                  batch_size:(index + 1) *
                                                  batch_size]
    test_dic[rx] = test_set_rx[index * batch_size:(index + 1) * batch_size]
    test_dic[sessionmask] = test_set_session_mask[index *
                                                  batch_size:(index + 1) *
                                                  batch_size]
    test_dic[rxmask] = test_set_rx_mask[index * batch_size:(index + 1) *
                                        batch_size]
    test_dic[y] = test_set_y[index * batch_size:(index + 1) * batch_size]

    # This is the first RNN.
    sentence2vec = GRU(n_in=word_embedding_size, n_hidden=hiddensize, \
                       n_out=hiddensize, batch_size=batch_size)
    for i in range(max_turn):
        q_embedding.append(sentence2vec(llayer0_input[i], lxmask[i], True))
    r_embedding = sentence2vec(rlayer0_input, rxmask, True)

    # This is the concat/elementwise_produce of the after the first RNN which
    # concat the tenth sentence to the first nine sentences.
    for i in range(max_turn):
        q_embedding_Cat.append(T.concatenate([q_embedding[i], \
                                              q_embedding[-1]], \
                               axis=2))
        q_embedding_Cat_mask.append(lxmask[i])
    r_embedding_Cat = T.concatenate([r_embedding, q_embedding[-1]], axis=2)
    r_embedding_Cat_mask = rxmask
    # This is the self_attention step
    sa = self_attention(n_in=hiddensize * 2)
    for i in range(max_turn):
        q_embedding_self_att.append(T.concatenate([q_embedding_Cat[i], \
                                                   sa(q_embedding_Cat[i], \
                                                       q_embedding_Cat_mask[i])], \
                                                  axis=2))
    r_embedding_self_att = T.concatenate([r_embedding_Cat, \
                                          sa(r_embedding_Cat, \
                                              r_embedding_Cat_mask)], \
                                         axis=2)
    # This is the SRNN
    vec2svec = SGRU(n_in=hiddensize*2, n_hidden=hiddensize, \
                    n_out=hiddensize, batch_size=batch_size)

    for i in range(max_turn):
        q_embedding_self_att_rnn.append(vec2svec(q_embedding_self_att[i], \
                                                 q_embedding_Cat_mask[i], \
                                                 True))
    r_embedding_self_att_rnn = vec2svec(r_embedding_self_att, \
                                        r_embedding_Cat_mask, \
                                        True)

    # This is the CNN with pooling and full-connection
    pooling_layer = ConvSim(rng=rng, n_in=max_l, n_out=session_input_size, \
                            hidden_size=hiddensize, session_size=session_hidden_size, \
                            batch_size=batch_size)
    poolingoutput = []
    for i in range(max_turn):
        poolingoutput.append(pooling_layer(llayer0_input[i], \
                                           rlayer0_input, \
                                           q_embedding_self_att_rnn[i], \
                                           r_embedding_self_att_rnn))

    # This is the second RNN
    session2vec = GRU(n_in=session_input_size, n_hidden=session_hidden_size, \
                      n_out=session_hidden_size, batch_size=batch_size)
    res = session2vec(T.stack(poolingoutput, 1), sessionmask, True)

    # This is the final Attention and put the output to a classifier
    W = theano.shared(ortho_weight(session_hidden_size), borrow=True)
    W2 = theano.shared(glorot_uniform((hiddensize, session_hidden_size)),
                       borrow=True)
    b = theano.shared(value=np.zeros((session_hidden_size, ), dtype='float32'),
                      borrow=True)
    U_s = theano.shared(glorot_uniform((session_hidden_size, 1)), borrow=True)

    final = T.dot(T.tanh(T.dot(res, W) + \
                         T.dot(T.stack(q_embedding_self_att_rnn, 1)[:, :, -1, :], W2) \
                         + b), U_s)
    weight = T.exp(T.max(final, 2)) * sessionmask
    weight2 = weight / T.sum(weight, 1)[:, None]
    final2 = T.sum(res * weight2[:, :, None], 1) + 1e-6

    # This is the classifier
    classifier = LogisticRegression(final2, session_hidden_size, 2, rng)

    # Calculate the cost and updata the param with gradient
    cost = classifier.negative_log_likelihood(y)
    error = classifier.errors(y)
    predict = classifier.predict_prob
    opt = Adam()

    # Make params
    params = classifier.params
    params += sentence2vec.params
    params += session2vec.params
    params += pooling_layer.params
    params += [Words, W, b, W2, U_s]
    params += vec2svec.params
    params += sa.params

    # Make updater
    grad_updates = opt.Adam(cost=cost, params=params, lr=learning_rate)

    # The training step
    train_model = theano.function([index], cost, updates=grad_updates, \
                                  givens=dic, on_unused_input='ignore')
    val_model = theano.function([index], [cost, error], givens=val_dic, \
                                on_unused_input='ignore')
    best_dev = 1.
    n_train_batches = datasets[0].shape[0] / batch_size
    for i in xrange(n_epochs):
        cost_all = 0
        total = 0.
        for minibatch_index in np.random.permutation(range(n_train_batches)):
            batch_cost = train_model(minibatch_index)
            total = total + 1
            cost_all = cost_all + batch_cost
            if total % val_frequency == 0:
                sf.write('epcho %d, num %d, train_loss %f' %
                         (i, total, cost_all / total))
                sf.write('\n')
                sf.flush()
                cost_dev = 0
                errors_dev = 0
                j = 0
                for minibatch_index in xrange(datasets[1].shape[0] /
                                              batch_size):
                    tcost, terr = val_model(minibatch_index)
                    cost_dev += tcost
                    errors_dev += terr
                    j = j + 1
                cost_dev = cost_dev / j
                errors_dev = errors_dev / j
                if cost_dev < best_dev:
                    best_dev = cost_dev
                    save_params(params, model_name + 'dev')
                sf.write("epcho %d, num %d, dev_loss %f" %
                         (i, total, cost_dev))
                sf.write('\n')
                sf.write("epcho %d, num %d, dev_accuracy %f" %
                         (i, total, 1 - errors_dev))
                sf.write('\n')
                sf.flush()
        cost_all = cost_all / n_train_batches
        sf.write("epcho %d loss %f" % (i, cost_all))
        sf.write('\n')
        sf.flush()
            'trained_models/none_1_0.pth',
            'trained_models/none_2_0.pth',
            'trained_models/none_3_0.pth',
            'trained_models/none_4_0.pth',
            'trained_models/none_5_0.pth',
            'trained_models/none_6_0.pth',
            'trained_models/none_pretrained_0.pth'
            ]

    logprobs = torch.zeros(inputs_a.shape[0], len(model_paths))
    # for each participant
    with torch.no_grad():
        for participant in tqdm(range(inputs_a.shape[0])):
            # for each model
            for m, model_path in enumerate(model_paths):
                model = GRU(4, 1, 128)

                params, _ = torch.load(model_path, map_location='cpu')
                model.load_state_dict(params)

                participant_inputs = inputs_a[participant] - inputs_b[participant]
                participant_targets = targets[participant]

                avg_probs = 0
                for sample in range(args.samples):
                    predictive_distribution, _, _ = model(participant_inputs, participant_targets)
                    avg_probs += predictive_distribution.probs

                avg_predictive_distribution = Bernoulli(avg_probs / args.samples)
                logprobs[participant, m] = avg_predictive_distribution.log_prob(predictions[participant]).sum()
示例#11
0
class Train():
    def __init__(self, difficulty):
        self.data_path = "../data"
        self.model_path = "../models"
        self.output_path = "../outputs"
        self.difficulty = difficulty
        self.timestamp = str(int(time.time()))
        self.model_name = "gru_" + self.difficulty
        self.data = Data(difficulty=self.difficulty, data_path=self.data_path)
        (self.img_features, self.w2i, self.i2w, self.nwords, self.UNK,
         self.PAD) = self.data()
        self.train = list(self.data.get_train_data())
        self.dev = list(self.data.get_validation_data())
        self.test = list(self.data.get_test_data())
        self.image_feature_size = 2048
        self.output_vector_size = 10

    def __call__(self,
                 number_of_iterations=2,
                 learning_rate=0.005,
                 embedding_size=300,
                 hidden_size=100,
                 batch_size=100):
        print("Starting 'Image Retrieval' in 'GRU' mode with '" +
              self.difficulty + "' data")

        self.model_full_path = self.model_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(
            learning_rate) + "_" + str(embedding_size) + ".pty"
        self.output_file_name = self.output_path + "/" + self.model_name + "_" + self.timestamp + "_" + str(
            learning_rate) + "_" + str(embedding_size) + ".csv"

        self.number_of_iterations = number_of_iterations
        self.learning_rate = learning_rate
        self.embedding_size = embedding_size
        self.hidden_size = hidden_size
        self.batch_size = batch_size
        self.model = GRU(self.nwords, self.embedding_size,
                         self.image_feature_size, self.output_vector_size,
                         self.hidden_size, self.batch_size)
        self.criterion = nn.CrossEntropyLoss()

        self.evaluate = Evaluate(self.model, self.img_features, self.minibatch,
                                 self.preprocess, self.image_feature_size,
                                 self.output_vector_size)
        print(self.model)

        self.optimizer = optim.Adam(self.model.parameters(),
                                    lr=self.learning_rate)

        self.train_loss_values = []

        self.magic()

        self.save_model()

        self.save_data()

    def minibatch(self, data, batch_size=50):
        for i in range(0, len(data), batch_size):
            yield data[i:i + batch_size]

    def preprocess(self, batch):
        """Helper function for functional batches"""
        correct_indexes = [observation[2] for observation in batch]
        img_ids = [observation[1] for observation in batch]
        text_features = [observation[0] for observation in batch]
        last_words = [len(dialog) for dialog in text_features]

        #Add Padding to max len of sentence in batch
        max_length = max(map(len, text_features))
        text_features = [
            txt + [self.PAD] * (max_length - len(txt)) for txt in text_features
        ]

        #return in "stacked" format, added last_words for excluding padding effects on GRU
        return text_features, img_ids, correct_indexes, last_words

    def magic(self):
        for ITER in range(self.number_of_iterations):

            random.shuffle(self.train)
            train_loss = 0.0
            start = time.time()
            iteration = 0

            for batch in self.minibatch(self.train, self.batch_size):
                self.model.zero_grad()
                self.optimizer.zero_grad()
                self.model.hidden = self.model.init_hidden()

                #Load data for model
                text_features, h5_ids, correct_index, last_words = self.preprocess(
                    batch)
                lookup_text_tensor = Variable(torch.LongTensor([text_features
                                                                ])).squeeze()

                full_img_batch = np.empty([
                    len(batch), self.output_vector_size,
                    self.image_feature_size
                ])

                for obs, img_ids in enumerate(h5_ids):
                    for index, h5_id in enumerate(img_ids):
                        full_img_batch[obs, index] = self.img_features[h5_id]

                full_img_batch = Variable(
                    torch.from_numpy(full_img_batch).type(torch.FloatTensor))

                #Target
                target = Variable(torch.LongTensor([correct_index])).squeeze()
                #Vector for excluding padding effects
                last_words = Variable(torch.LongTensor(last_words))

                #Run model and calculate loss
                prediction = self.model(lookup_text_tensor, full_img_batch,
                                        last_words)
                loss = self.criterion(prediction, target)
                train_loss += loss.data[0]

                iteration += self.batch_size
                print(iteration)

                loss.backward()
                self.optimizer.step()

            print(
                "ITERATION %r: train loss/sent=%.4f, time=%.2fs" %
                (ITER + 1, train_loss / len(self.train), time.time() - start))
            self.train_loss_values.append(train_loss / len(self.train))

    def save_model(self):
        #Save model
        torch.save(self.model, self.model_full_path)
        print("Saved model has test score",
              self.evaluate(self.test, self.batch_size))

    def plot(self):
        plt.plot(self.train_loss_values, label="Train loss")
        plt.legend(loc='best')
        plt.xlabel("Epochs")
        plt.ylabel("Loss")
        plt.title(self.model_name +
                  " - has loss with lr = %.4f, embedding size = %r" %
                  (self.learning_rate, self.embedding_size))
        plt.show()

    def save_data(self):
        file = open(self.output_file_name, "w")
        file.write(", ".join(map(str, self.train_loss_values)))
        file.write("\n")
        file.write(str(self.evaluate(self.test, self.batch_size)))
        file.write("\n")
        file.close()
示例#12
0
def main():
    # Device configuration
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model_type = "lstm"
    # Hyper-parameters
    sequence_length = 28
    input_size = 28

    num_layers = 1
    num_classes = 10
    batch_size = 100
    num_epochs = 10
    learning_rate = 0.01
    num_trials = 100
    a_s = [2]
    trials = {}

    # just for testing
    num_trials = 1
    num_epochs = 5
    a_s = np.random.uniform(0.1, 2, [2])
    for a in a_s:
        for num_trial in range(num_trials):
            print("a: ", a, "num_trial: ", num_trial)
            hidden_size = 8
            trial = {}
            if model_type == 'lstm':
                model = LSTM(input_size, hidden_size, num_layers, num_classes, a, device).to(device)
            elif model_type == 'gru':
                model = GRU(input_size, hidden_size, num_layers, num_classes, a, device).to(device)
            # Loss and optimizer
            criterion = nn.CrossEntropyLoss()
            optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)
            train_dataloader = MNIST_dataloader(batch_size, train=True)
            test_dataloader = MNIST_dataloader(batch_size, train=False)
            # Train the model
            total_step = len(train_dataloader.dataloader)

            for epoch in range(num_epochs):
                model.train()
                for i, (images, labels) in enumerate(train_dataloader.dataloader):
                    images = images.reshape(-1, sequence_length, input_size).to(device)
                    labels = labels.to(device)

                    # Forward pass
                    outputs, hts = model(images)
                    loss = criterion(outputs, labels)
                    # print(LEs, rvals)

                    # Backward and optimize
                    optimizer.zero_grad()
                    loss.backward()
                    optimizer.step()

                    if (i + 1) % 300 == 0:
                        print('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                              .format(epoch + 1, num_epochs, i + 1, total_step, loss.item()))


                # Test the model
                model.eval()
                with torch.no_grad():
                    correct = 0
                    total = 0
                    for i, (images, labels) in enumerate(test_dataloader.dataloader):
                        images = images.reshape(-1, sequence_length, input_size).to(device)
                        labels = labels.to(device)
                        outputs, _ = model(images)

                        # calculate LEs
                        # h = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                        # c = torch.zeros(model.num_layers, images.size(0), model.hidden_size).to(model.device)
                        # params = (images, (h, c))
                        # if i == 0:
                        #     LEs, rvals = calc_LEs_an(*params, model=model)

                        loss = criterion(outputs, labels)
                        _, predicted = torch.max(outputs.data, 1)
                        total += labels.size(0)
                        correct += (predicted == labels).sum().item()
                    if epoch == (num_epochs - 1):
                        print('Epoch [{}/{}] Loss: {}, Test Accuracy: {} %'.format(epoch + 1, num_epochs, loss, 100 * correct / total))
                trial[epoch] = {'model': model, 'accuracy': 100 * correct / total,
                                'loss': loss}
            trials[num_trial] = trial
        saved_path = f'../../../dataset/trials/{model_type}/models/'
        pickle.dump(trials, open(f'{saved_path}/lstm_{hidden_size}_trials_0.pickle', 'wb'))
示例#13
0
 def build_model(self):
     self.c2d = C2D().cuda()
     self.gru = GRU(self.c2d).cuda()
示例#14
0
class Trainer(object):
    def __init__(self, config, h_loader, r_loader, test_loader):
        self.config = config
        self.h_loader = h_loader
        self.r_loader = r_loader
        self.test_loader = test_loader

        self.lr = config.lr
        self.beta1 = config.beta1
        self.beta2 = config.beta2
        self.weight_decay = config.weight_decay

        self.n_epochs = config.n_epochs
        self.n_steps = config.n_steps
        self.log_interval = int(config.log_interval)  # in case
        self.checkpoint_step = int(config.checkpoint_step)

        self.use_cuda = config.cuda
        self.outf = config.outf
        self.build_model()
        self.vis = vis_tool.Visualizer()

    def build_model(self):
        self.c2d = C2D().cuda()
        self.gru = GRU(self.c2d).cuda()

    def train(self):
        cfig = get_config()
        opt = optim.Adam(filter(lambda p: p.requires_grad,
                                self.gru.parameters()),
                         lr=self.lr,
                         betas=(self.beta1, self.beta2),
                         weight_decay=self.weight_decay)

        start_time = time.time()
        criterion = nn.BCELoss()

        max_acc = 0.

        for epoch in range(self.n_epochs):
            self.gru.train()
            epoch_loss = []
            for step, (h, r) in enumerate(zip(self.h_loader, self.r_loader)):
                h_video = h[0]
                r_video = r[0]

                h_video = Variable(h_video).cuda()
                r_video = Variable(r_video).cuda()

                self.gru.zero_grad()

                predicted = self.gru(h_video)
                target = torch.ones(len(predicted), dtype=torch.float32).cuda()

                h_loss = criterion(predicted, target)  # compute loss
                h_loss.backward()
                opt.step()

                self.gru.zero_grad()
                predicted = self.gru(r_video)  # predicted snippet's score
                target = torch.zeros(len(predicted),
                                     dtype=torch.float32).cuda()

                r_loss = criterion(predicted, target)  # compute loss
                r_loss.backward()
                opt.step()

                step_end_time = time.time()

                total_loss = r_loss + h_loss
                epoch_loss.append((total_loss.data).cpu().numpy())

                print(
                    '[%d/%d][%d/%d] - time: %.2f, h_loss: %.3f, r_loss: %.3f, total_loss: %.3f'
                    % (epoch + 1, self.n_epochs, step + 1, self.n_steps,
                       step_end_time - start_time, h_loss, r_loss, total_loss))

                self.vis.plot(
                    'H_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (h_loss.data).cpu().numpy())

                self.vis.plot(
                    'R_LOSS with lr:%.4f, b1:%.1f, b2:%.3f, wd:%.5f' %
                    (cfig.lr, cfig.beta1, cfig.beta2, cfig.weight_decay),
                    (r_loss.data).cpu().numpy())

            self.vis.plot("Avg loss plot", np.mean(epoch_loss))

            # Test accuracy
            # self.gru.eval()
            # test_avg_acc = 0.
            # test_cnt = 0
            # for idx, (video, label, filename) in enumerate(self.test_loader):
            #     video = Variable(video).cuda()
            #     predicted = self.gru(video)  # [ frame 수, 1]
            #
            #     predicted = predicted.view(1, -1)
            #     predicted = predicted.cpu().detach().numpy()
            #
            #     predicted = predicted[0]
            #     label = label.cpu().numpy()
            #
            #     # print(type(predicted), type(label))
            #
            #     gt_label_predicted_score = predicted * label
            #     gt_label_predicted_score = list(gt_label_predicted_score)
            #
            #     # gt_label_predicted_score = gt_label_predicted_score.cpu().numpy()
            #     # print("Highlight frame predicted score:", gt_label_predicted_score)
            #
            #     # print(gt_label_predicted_score)
            #     # print(gt_label_predicted_score.shape)
            #
            #     # print(gt_label_predicted_score)
            #
            #     for sc in gt_label_predicted_score[0]:
            #         if sc != 0.:
            #             print("%.3f" % sc, end=' ')
            #
            #     for i in range(len(predicted)):
            #         if predicted[i] >= 0.45:
            #             predicted[i] = 1.
            #         else:
            #             predicted[i] = 0.
            #
            #     # print("After threshold predicted:", predicted)
            #     # print("Actual label:", label)
            #
            #     acc = (predicted == label).sum().item() / float(len(predicted))
            #     print("filename: %s accuracy: %.4f" % (filename, acc))
            #     test_avg_acc += acc
            #     test_cnt += 1
            #
            #     print()
            #
            # test_avg_acc = test_avg_acc / test_cnt
            # print("Epoch %d Test accuracy: %.5f" % (epoch+1, test_avg_acc))
            # self.vis.plot("Test Accuracy plot", test_avg_acc)

            # print("Epoch %d predicted output list" % (epoch+1), output_list)

            # save max test accuracy checkpoint
            # if test_avg_acc >= max_acc:
            #     max_acc = test_avg_acc
            #     torch.save(self.gru.state_dict(), 'max_test_acc_chkpoint' + str(epoch + 1) + '.pth')
            #     print("checkpoint saved")

            if epoch % self.checkpoint_step == 0:
                accuracy, savelist = self.test(self.test_loader)

                if accuracy > max_acc:
                    max_acc = accuracy
                    torch.save(
                        self.gru.state_dict(),
                        './samples/lr_%.4f_chkpoint' % cfig.lr +
                        str(epoch + 1) + '.pth')
                    for f in savelist:
                        np.save("./samples/" + f[0][0] + ".npy", f[1])
                    print(np.load("./samples/testRV04(198,360).mp4.npy"))
                    print("checkpoint saved")

    def test(self, t_loader):
        # self.gru.eval()
        # accuracy = 0.
        #
        # savelist = []
        #
        # total_len = len(t_loader)
        #
        # for step, (tv, label, filename) in enumerate(t_loader):
        #     filename = filename[0].split(".")[0]
        #
        #     label = label.squeeze()
        #
        #     start = 0
        #     end = 24
        #
        #     correct = 0
        #     count = 0
        #
        #     npy = np.zeros(tv.shape[1])
        #
        #     while end < tv.shape[1]:
        #
        #         t_video = Variable(tv[:, start:end, :, :, :]).cuda()
        #         predicted = self.gru(t_video)
        #
        #         gt_label = label[start:end]
        #
        #         if len(gt_label[gt_label == 1.]) > 12:
        #             gt_label = torch.ones(predicted.shape, dtype=torch.float32).cuda()
        #
        #         else:
        #             gt_label = torch.zeros(predicted.shape, dtype=torch.float32).cuda()
        #
        #         if predicted < 0.5:
        #             npy[start:end] = 1.
        #
        #         predicted[predicted < 0.5] = 1.
        #         predicted[predicted >= 0.5] = 0.
        #
        #         correct += (predicted == gt_label).item()
        #
        #         start += 24
        #         end += 24
        #         count += 1
        #
        #     accuracy += (correct / count) / total_len
        #
        #     savelist.append([filename, npy])

        # Test accuracy
        self.gru.eval()
        test_avg_acc = 0.
        test_cnt = 0
        savelist = []

        for idx, (video, label, filename) in enumerate(self.test_loader):
            video = Variable(video).cuda()
            predicted = self.gru(video)  # [ frame 수, 1]

            predicted = predicted.view(1, -1)
            predicted = predicted.cpu().detach().numpy()

            predicted = predicted[0]
            label = label.cpu().numpy()

            # print(type(predicted), type(label))

            gt_label_predicted_score = predicted * label
            gt_label_predicted_score = list(gt_label_predicted_score)

            # gt_label_predicted_score = gt_label_predicted_score.cpu().numpy()
            # print("Highlight frame predicted score:", gt_label_predicted_score)

            # print(gt_label_predicted_score)
            # print(gt_label_predicted_score.shape)

            # print(gt_label_predicted_score)

            for sc in gt_label_predicted_score[0]:
                if sc != 0.:
                    print("%.3f" % sc, end=' ')

            for i in range(len(predicted)):
                if predicted[i] >= 0.45:
                    predicted[i] = 1.
                else:
                    predicted[i] = 0.

            # print("After threshold predicted:", predicted)
            # print("Actual label:", label)

            acc = (predicted == label).sum().item() / float(len(predicted))
            print("filename: %s accuracy: %.4f" % (filename, acc))
            test_avg_acc += acc
            test_cnt += 1

            savelist.append([filename, predicted])

            print()

        test_avg_acc = test_avg_acc / test_cnt

        print("Accuracy:", round(test_avg_acc, 4))
        self.vis.plot("Accuracy with lr:%.3f" % self.lr, test_avg_acc)

        return test_avg_acc, savelist
示例#15
0
batch_size = 256
hidden_size = 128
num_layers = 1
dropout = 0
testnum = 500
# interval is sample interval between last input and first output.
interval = 0

epoch = 100
device = 'cuda'

# Generate sin dataset for training and testing.
dataset = np.sin([i / 50 * 2 * np.pi for i in range(2000)])
x_train, y_train, x_test, y_test, normalizer = generate_data(
    dataset, 'minmax', input_length, output_length, testnum, interval)

# Build, train and predict.
model = GRU(1, hidden_size, num_layers, 1, dropout)
optimizer = opt.Adam(model.parameters())
loss = nn.MSELoss()
batch_train_loss, batch_val_loss = train(model, x_train, y_train, epoch,
                                         batch_size, optimizer, loss, device)
y_predict, y_real, _ = predict(model, x_test, y_test, loss, device, normalizer,
                               batch_size)

# Draw result
plt.plot(y_predict, label='prediction')
plt.plot(y_real, label='real')
plt.legend()
plt.show()
示例#16
0
def main():
    # Training settings
    parser = argparse.ArgumentParser(description='From PyTorch MNIST Example')
    parser.add_argument('--batch-size',
                        type=int,
                        default=32,
                        metavar='N',
                        help='input batch size for training')

    parser.add_argument('--epochs',
                        type=int,
                        default=10000,
                        metavar='E',
                        help='number of epochs to train')
    parser.add_argument('--warmup-epochs',
                        type=int,
                        default=5000,
                        metavar='WE',
                        help='number of epochs to warmup')
    parser.add_argument('--num-steps',
                        type=int,
                        default=100,
                        metavar='N',
                        help='number of batches in one epochs')

    parser.add_argument('--num-points',
                        type=int,
                        default=10,
                        metavar='NS',
                        help='number of query points')

    parser.add_argument('--num-hidden',
                        type=int,
                        default=128,
                        metavar='NE',
                        help='number of hidden units')
    parser.add_argument('--lr',
                        type=float,
                        default=0.0003,
                        metavar='LR',
                        help='learning rate')
    parser.add_argument('--alpha',
                        type=float,
                        default=0,
                        metavar='A',
                        help='kl factor')
    parser.add_argument('--sampling',
                        action='store_true',
                        default=False,
                        help='uses sampling')

    parser.add_argument('--direction',
                        action='store_true',
                        default=False,
                        help='uses directed data-sets')
    parser.add_argument('--ranking',
                        action='store_true',
                        default=False,
                        help='sort data-set according to importance')

    parser.add_argument('--num-runs',
                        type=int,
                        default=1,
                        metavar='NR',
                        help='number of runs')
    parser.add_argument('--save-path',
                        default='trained_models/random_',
                        help='directory to save results')
    parser.add_argument('--load-path',
                        default='trained_models/default_model_0.pth',
                        help='path to load model')

    parser.add_argument('--no-cuda',
                        action='store_true',
                        default=False,
                        help='disables CUDA training')

    args = parser.parse_args()
    use_cuda = not args.no_cuda and torch.cuda.is_available()
    device = torch.device("cuda" if use_cuda else "cpu")

    for i in range(args.num_runs):
        writer = SummaryWriter()
        performance = torch.zeros(args.epochs)
        accuracy_test = 0

        data_loader = PairedComparison(4,
                                       direction=args.direction,
                                       ranking=args.ranking)
        model = GRU(data_loader.num_inputs, data_loader.num_targets,
                    args.num_hidden).to(device)
        if args.alpha > 0:
            print('Loading pretrained network...')
            params, _ = torch.load(args.load_path)
            model.load_state_dict(params)
            model.reset_log_sigma()
            max_alpha = args.alpha
        optimizer = optim.Adam(model.parameters(), lr=args.lr, amsgrad=True)

        with trange(args.epochs) as t:
            for j in t:
                loss_train = 0
                for k in range(args.num_steps):
                    inputs, targets, _, _ = data_loader.get_batch(
                        args.batch_size, args.num_points, device=device)
                    predictive_distribution, _, _ = model(
                        inputs, targets, args.sampling)

                    loss = -predictive_distribution.log_prob(targets).mean()
                    writer.add_scalar('NLL', loss.item(),
                                      j * args.num_steps + k)

                    if args.alpha > 0:
                        alpha = min(j / args.warmup_epochs, 1.0) * max_alpha
                        kld = model.regularization(alpha)
                        loss = loss + kld
                        writer.add_scalar('KLD', kld.item(),
                                          j * args.num_steps + k)

                    loss_train += loss

                    optimizer.zero_grad()
                    loss.backward()
                    torch.nn.utils.clip_grad_norm_(model.parameters(), 40.0)
                    optimizer.step()

                t.set_description('Loss (train): {:5.4f}'.format(
                    loss_train.item() / args.num_steps))
                performance[j] = loss_train.item() / args.num_steps

        torch.save([model.state_dict(), performance],
                   args.save_path + str(i) + '.pth')