示例#1
0
 def __next__(self):
     train_perm = cuda.LongTensor(next(self.training_batches))
     test_perm = cuda.LongTensor(next(self.test_batches))
     return (DataStreamer(X=self.data_streamer.X[:, train_perm],
                          Y=self.data_streamer.Y[:, train_perm]),
             DataStreamer(X=self.data_streamer.X[:, test_perm],
                          Y=self.data_streamer.Y[:, test_perm]))
示例#2
0
def train():
    PYRNG = Random(0)
    ttv_proportions = dict(test=0.001, train=.96, validation=0.039)
    # Whiten, add flips, mask regions outside circle, train/test/val split
    DATA = (get_training_data().to_gpu().normalize().enrich().mask_circle().
            test_train_validation(PYRNG, **ttv_proportions))
    VALDATA = DATA.validation.get_examples(50, PYRNG)
    VALIMGS = Variable(T.from_numpy(VALDATA.images).type(TP.FloatTensor))
    VALCLASSES = Variable(TP.LongTensor(VALDATA.is_iceberg))
    BETA = 1e1
    BETA_FACTOR = .9999
    BATCH_SIZE = 32
    model = BentesModel()
    if T.cuda.is_available():
        model = model.cuda()
    optimizer = optim.Adam(model.parameters())
    scheduler = optim.lr_scheduler.ExponentialLR(optimizer, gamma=0.9999)
    for i in range(1_000_000_000):
        scheduler.step()
        optimizer.zero_grad()
        batch = DATA.train.get_examples(BATCH_SIZE, PYRNG).rotate(PYRNG)
        imgvar = Variable(T.from_numpy(batch.images).type(TP.FloatTensor))
        result = model(imgvar)
        classvar = Variable(TP.LongTensor(batch.is_iceberg))
        accuracy = F.cross_entropy(result.activations, classvar)
        kl = T.mean(result.kl)
        loss = accuracy + BETA * kl
        loss.backward()
        valresult = model(VALIMGS)
        valaccuracy = F.cross_entropy(valresult.activations, VALCLASSES)
        optimizer.step()
        gf = lambda t: f'{t.data[0]:12.3f}'  # noqa: E731
        print(f'Step: {i:6d} CE: {gf(accuracy)} KL: {gf(kl)} loss: {gf(loss)} '
              f'val: {gf(valaccuracy)}')
        scores = (F.log_softmax(
            result.activations,
            dim=1).data.cpu().numpy()[list(range(BATCH_SIZE)),
                                      batch.is_iceberg])
        print(
            np.array(
                list(zip(*(s.astype(float) for s in np.histogram(scores))))).T)
        probs = F.softmax(result.activations).data.cpu().numpy().tolist()
        pprint(list(zip(batch.is_iceberg, probs)))
        BETA *= BETA_FACTOR
        print('first layer parameters/gradients for first kernel')
        print('convolution')
        print(model.layers[1].layer.weight[0])
        print(model.layers[1].layer.weight.grad[0])
        print('noise')
        print(model.layers[1].noise.weight[0])
        print(model.layers[1].noise.weight.grad[0])
        print('prior mean')
        print(model.layers[1].prior.mean[0])
        print(model.layers[1].prior.mean.grad[0])
        print('prior alpha')
        print(model.layers[1].prior.alpha[0])
        print(model.layers[1].prior.alpha.grad[0])
示例#3
0
    def forward(self, batch_item_index, place_correlation):
        """
        The forward pass of the autoencoder.
        :param batch_item_index: a list of arrays that each array stores the place id a user has been to
        :param place_correlation: the pairwise poi relation matrix
        :return: the predicted ratings
        """
        item_vector = self.linear1.weight[:,
                                          T.LongTensor(batch_item_index[0].
                                                       astype(np.int32))]
        # Compute the neighbor inner products
        inner_product = item_vector.t().mm(self.linear4.weight.t())
        item_corr = Variable(
            torch.from_numpy(
                place_correlation[batch_item_index[0]].toarray()).type(
                    T.FloatTensor))
        inner_product = inner_product * item_corr
        neighbor_product = inner_product.sum(dim=0).unsqueeze(0)

        # Compute the self attention score
        score = F.tanh(self.attention_matrix1.mm(item_vector))
        score = F.softmax(score, dim=1)
        embedding_matrix = score.mm(item_vector.t())
        linear_z = self.self_attention(embedding_matrix.t()).t()

        # print score
        for i in range(1, len(batch_item_index)):
            item_vector = self.linear1.weight[:,
                                              T.LongTensor(batch_item_index[i].
                                                           astype(np.int32))]
            # Compute the neighbor inner products
            inner_product = item_vector.t().mm(self.linear4.weight.t())
            item_corr = Variable(
                torch.from_numpy(
                    place_correlation[batch_item_index[i]].toarray()).type(
                        T.FloatTensor))
            inner_product = inner_product * item_corr
            inner_product = inner_product.sum(dim=0).unsqueeze(0)
            neighbor_product = torch.cat((neighbor_product, inner_product), 0)

            # Compute the self attention score
            score = F.tanh(self.attention_matrix1.mm(item_vector))
            score = F.softmax(score, dim=1)
            embedding_matrix = score.mm(item_vector.t())
            tmp_z = self.self_attention(embedding_matrix.t()).t()
            linear_z = torch.cat((linear_z, tmp_z), 0)

        z = F.tanh(linear_z)
        z = F.dropout(z, training=self.training, p=self.dropout_rate)
        z = F.tanh(self.linear2(z))
        z = F.dropout(z, training=self.training, p=self.dropout_rate)
        d_z = F.tanh(self.linear3(z))
        d_z = F.dropout(d_z, training=self.training, p=self.dropout_rate)
        y_pred = F.sigmoid(self.linear4(d_z) + neighbor_product)

        return y_pred
示例#4
0
    def neg_log_likelihood(self, sentences, tags):
        total_loss = Variable(cuda.FloatTensor([0]))
        for sentence, tag in zip(sentences, tags):
            sentence = sentence[1:-1]
            tag = tag[1:-1]
            sent_var = Variable(cuda.LongTensor(sentence))
            tag_var = Variable(cuda.LongTensor(tag))

            feats = self._get_lstm_features(sent_var)
            forward_score = self._forward_alg(feats)
            gold_score = self._score_sentence(feats, tag_var)
            total_loss += forward_score - gold_score

        return total_loss
示例#5
0
 def forward(self, input_user, input_item):
     regression_result, review_softmax, context = self.encoder.forward(
         input_user, input_item)
     output_tip_probs = Variable(
         device.LongTensor(self.empty_output * len(input_user)))
     output, hidden = self.decoder.forward(output_tip_probs, context)
     return regression_result, review_softmax, output
示例#6
0
 def init_start_input(self, batch_size):
     # GO input for decoder # Re-initialize when batch size changes
     if self.init_input is None or self.init_input.size(0) != batch_size:
         self.init_input = Variable(
             device.LongTensor([[self.vocab.SOS_token_id] * batch_size
                                ])).view(batch_size, -1)
     return self.init_input
示例#7
0
    def teacher_forcing(self, feed_x, is_train=False):
        loss_fn = nn.NLLLoss()
        if type(feed_x) is np.ndarray:
            feed_x = tcg.LongTensor(feed_x)
        batch_size, seq_len = feed_x.size()
        x_t = train.Variable(
            tc.LongTensor(
                np.array([self.start_token] * batch_size, dtype=np.int32)))
        log_g_prediction = train.Variable(
            tc.zeros(batch_size, seq_len, self.vocab_size))
        feed_x = feed_x.permute(1, 0)  # seq_len x batch_size
        h = self.__h0_getter(batch_size)
        c = self.__h0_getter(batch_size)

        if self.is_cuda:
            x_t = x_t.cuda()
        loss = 0
        for i in range(seq_len):
            log_pred, h, c = self.forward(x_t, h, c)
            x_t = feed_x[i]
            loss += loss_fn(log_pred, x_t)
            log_g_prediction[:, i, :] = log_pred
        loss /= self.sequence_length
        if is_train:
            self.g_opt.zero_grad()
            loss.backward()
            self.g_opt.step()
        return loss.detach().cpu().numpy(), log_g_prediction
示例#8
0
def train(data, model, optimizer, verbose=True):
    criterion = nn.NLLLoss()

    if model.use_cuda:
        criterion.cuda()

    correct_actions = 0
    total_actions = 0
    tot_loss = 0.
    instance_count = 0

    for sentence, actions in data:

        if len(sentence) <= 2:
            continue

        optimizer.zero_grad()
        model.refresh()

        outputs, _, actions_done = model(sentence, actions)

        if model.use_cuda:
            loss = ag.Variable(cuda.FloatTensor([0]))
            action_idxs = [
                ag.Variable(cuda.LongTensor([a])) for a in actions_done
            ]
        else:
            loss = ag.Variable(torch.FloatTensor([0]))
            action_idxs = [
                ag.Variable(torch.LongTensor([a])) for a in actions_done
            ]

        for output, act in zip(outputs, action_idxs):
            loss += criterion(output.view(-1, 3), act)

        tot_loss += utils.to_scalar(loss.data)
        instance_count += 1

        for gold, output in zip(actions_done, outputs):
            pred_act = utils.argmax(output.data)
            if pred_act == gold:
                correct_actions += 1
        total_actions += len(outputs)

        loss.backward()
        optimizer.step()

    acc = float(correct_actions) / total_actions
    loss = float(tot_loss) / instance_count
    if verbose:
        print(
            "Number of instances: {}    Number of network actions: {}".format(
                instance_count, total_actions))
        print("Acc: {}  Loss: {}".format(
            float(correct_actions) / total_actions, tot_loss / instance_count))
示例#9
0
    def forward(self, minibatch):
        out_stack = []
        minibatch = list(minibatch)
        minibatch_lengths = [len(sent) for sent in minibatch]
        batch_of_words = list(chain.from_iterable(minibatch))

        self.init_state(len(batch_of_words))
        # a hack to get index of the sorted words, so i can unsort them back after they are processed
        # print(batch_of_words)
        sent, ridx = self.len_sort(batch_of_words)
        padded, seq_lengths = self.pad(sent, 0)
        # print(padded)
        out = self.emb(Variable(cuda.LongTensor(padded)))
        # out is of size (all_words x max_len x char_emb_size)
        # print("out size: {0}".format(out.size()))
        out = rnn.pack_padded_sequence(out, seq_lengths, batch_first=True)
        out, hidden_state = self.rnn(out, self.hidden_state)
        # hidden_state[0] is of size: (num_dir x batch_size x lstm_hidden_dim)
        # print("hidden state size: {0}".format(hidden_state[0].size()))

        # TODO verify
        # unsorting IMPORTANT. cos we initially sorted the seq of chars to pass it to rnn.
        hidden_state = torch.index_select(hidden_state[0],
                                          dim=1,
                                          index=Variable(
                                              cuda.LongTensor(ridx)))

        # TODO verify that this is indeed the last outputs of both forward rnn and backward rnn

        out = cat([hidden_state[0], hidden_state[1]], dim=1)
        # print("cat out size: {0}".format(out.size()))
        cfg.ver_print("Hidden state concat", out)
        out = self.linear(out)
        out = self.tanh(out)
        # print("before split and pad function {0}".format(out.size()))
        # this will split 1d tensor of word embeddings, into 2d array of word embeddings based on lengths
        final_out = self.split_and_pad(out, minibatch_lengths)
        # final_out is of size (batch_size x max_seq_len x emb_size)
        # print(final_out.size())
        return final_out
示例#10
0
    def forward(self, sentences):  # dont confuse this with _forward_alg above.
        # Get the emission scores from the BiLSTM
        tag_seq = []
        for sentence in sentences:
            sentence = sentence[1:-1]
            sent_var = Variable(cuda.LongTensor(sentence))
            lstm_feats = self._get_lstm_features(sent_var)

            # Find the best path, given the features.
            tag_seq.append([self.tag_idx[cfg.SENT_START]] +
                           self._viterbi_decode(lstm_feats) +
                           [self.tag_idx[cfg.SENT_END]])

        return tag_seq
示例#11
0
def evaluate(data, model, verbose=False):

    correct_actions = 0
    total_actions = 0
    tot_loss = 0.
    instance_count = 0
    criterion = nn.NLLLoss()

    if model.use_cuda:
        criterion.cuda()

    for sentence, actions in data:

        if len(sentence) > 1:
            outputs, _, actions_done = model(sentence, actions)

            if model.use_cuda:
                loss = ag.Variable(cuda.FloatTensor([0]))
                action_idxs = [
                    ag.Variable(cuda.LongTensor([a])) for a in actions_done
                ]
            else:
                loss = ag.Variable(torch.FloatTensor([0]))
                action_idxs = [
                    ag.Variable(torch.LongTensor([a])) for a in actions_done
                ]

            for output, act in zip(outputs, action_idxs):
                loss += criterion(output.view((-1, 3)), act)

            tot_loss += utils.to_scalar(loss.data)
            instance_count += 1

            for gold, output in zip(actions_done, outputs):
                pred_act = utils.argmax(output.data)
                if pred_act == gold:
                    correct_actions += 1

            total_actions += len(outputs)

    acc = float(correct_actions) / total_actions
    loss = float(tot_loss) / instance_count
    if verbose:
        print(
            "Number of instances: {}    Number of network actions: {}".format(
                instance_count, total_actions))
        print("Acc: {}  Loss: {}".format(
            float(correct_actions) / total_actions, tot_loss / instance_count))
    return acc, loss
示例#12
0
def to_variables(X, C, POS, Y):
    if cfg.BATCH_TYPE == "multi":
        x_var = X
        c_var = C
        pos_var = POS
        y_var = list(chain.from_iterable(list(Y)))

        lm_X = [[
            cfg.LM_MAX_VOCAB_SIZE - 1 if (x >= cfg.LM_MAX_VOCAB_SIZE) else x
            for x in x1d
        ] for x1d in X]

    else:
        x_var = Variable(cuda.LongTensor([X]))
        c_var = C
        # f_var = Variable(torch.from_numpy(f)).float().unsqueeze(dim=0).cuda()
        pos_var = Variable(torch.from_numpy(POS).cuda()).unsqueeze(dim=0)
        lm_X = [
            cfg.LM_MAX_VOCAB_SIZE - 1 if (x >= cfg.LM_MAX_VOCAB_SIZE) else x
            for x in X
        ]
        y_var = Variable(cuda.LongTensor(Y))

    return x_var, c_var, pos_var, y_var, lm_X
示例#13
0
def main():
    target_params = pickle.load(open('save/target_params_py3.pkl', 'rb'))
    target_lstm = TARGET_LSTM(VOCAB_SIZE, BATCH_SIZE, 32, 32, SEQ_LENGTH,
                              START_TOKEN, target_params)  # The oracle model
    train_data = target_lstm.generate(batch_size=10000)
    generator = Generator(VOCAB_SIZE,
                          BATCH_SIZE,
                          32,
                          32,
                          SEQ_LENGTH,
                          START_TOKEN,
                          learning_rate=1e-3)
    mediator = Generator(VOCAB_SIZE,
                         BATCH_SIZE,
                         64,
                         64,
                         SEQ_LENGTH,
                         START_TOKEN,
                         learning_rate=1e-3)
    data_loader = tcdata.DataLoader(tcdata.TensorDataset(
        tcg.LongTensor(train_data)),
                                    batch_size=32,
                                    shuffle=True)
    log_cot = open("save/cot.log", "w")
    for epoch in range(20000):
        for i, (x, ) in enumerate(data_loader):
            m_loss, _ = mediator.teacher_forcing(tc.cat(
                (generator.generate(32, keep_torch=True), x), dim=0),
                                                 is_train=True)
            gen_x = generator.generate(64)
            _, log_pred = mediator.teacher_forcing(gen_x)
            generator.cooperative_training(gen_x, log_pred)
            if i % 20 == 0:
                print("mediator loss at iteration #%d-%d" % (epoch, i), m_loss)
        print("oracle loss at epoch #%d" % epoch,
              target_lstm.calc_nll(generator.generate(64)))
        print("test loss at epoch #%d" % epoch,
              generator.teacher_forcing(target_lstm.generate(64))[0])
        print("oracle loss at epoch #%d" % epoch,
              target_lstm.calc_nll(generator.generate(64)),
              file=log_cot)
        print("test loss at epoch #%d" % epoch,
              generator.teacher_forcing(target_lstm.generate(64))[0],
              file=log_cot)
    log_cot.close()
示例#14
0
    def forward(self, chars):
        out_stack = []

        for word in chars:

            out = self.emb(Variable(cuda.LongTensor(word)))
            out = unsqueeze(out, dim=0)

            out, hidden_state = self.rnn(out, self.hidden_state)

            # TODO verify that this is indeed the last outputs of both forward rnn and backward rnn
            # and that we are concatenating correctly
            out = cat([hidden_state[0][0], hidden_state[0][1]], dim=1)
            cfg.ver_print("Hidden state concat", out)
            out = self.linear(out)
            out = self.tanh(out)
            out_stack.append(out)

        final_out = stack(out_stack, dim=1)
        return final_out
示例#15
0
    def __getitem__(self, index):

        if not hasattr(self, 'hdf5_dataset'):
            self.open_hdf5()

        idx_eFTrack_Eta = tcuda.LongTensor([self.eFTrack_Eta[index]],
                                           device=self.rank)
        idx_eFTrack_Phi = tcuda.LongTensor([self.eFTrack_Phi[index]],
                                           device=self.rank)
        val_eFTrack_PT = tcuda.FloatTensor(self.eFTrack_PT[index],
                                           device=self.rank)

        idx_eFPhoton_Eta = tcuda.LongTensor([self.eFPhoton_Eta[index]],
                                            device=self.rank)
        idx_eFPhoton_Phi = tcuda.LongTensor([self.eFPhoton_Phi[index]],
                                            device=self.rank)
        val_eFPhoton_ET = tcuda.FloatTensor(self.eFPhoton_ET[index],
                                            device=self.rank)

        idx_eFNHadron_Eta = tcuda.LongTensor([self.eFNHadron_Eta[index]],
                                             device=self.rank)
        idx_eFNHadron_Phi = tcuda.LongTensor([self.eFNHadron_Phi[index]],
                                             device=self.rank)
        val_eFNHadron_ET = tcuda.FloatTensor(self.eFNHadron_ET[index],
                                             device=self.rank)

        calorimeter, scaler = self.process_images(idx_eFTrack_Eta,
                                                  idx_eFPhoton_Eta,
                                                  idx_eFNHadron_Eta,
                                                  idx_eFTrack_Phi,
                                                  idx_eFPhoton_Phi,
                                                  idx_eFNHadron_Phi,
                                                  val_eFTrack_PT,
                                                  val_eFPhoton_ET,
                                                  val_eFNHadron_ET)

        # Set labels
        labels_raw = tcuda.FloatTensor(self.labels[index], device=self.rank)
        labels_processed = self.process_labels(labels_raw, scaler)

        if self.return_baseline:
            base_raw = tcuda.FloatTensor(self.base[index], device=self.rank)
            base_processed = self.process_baseline(base_raw)
            return calorimeter, labels_processed, base_processed, scaler

        return calorimeter, labels_processed
def variableFromSentence(lang, sentence, target=False):
    indexes, length = indexesFromSentence(lang, sentence)
    if target is True:
        indexes.insert(0, SOS_token) # start with SOS token
    return (Variable(cuda.LongTensor(indexes).view(-1, 1), requires_grad=False), length)
示例#17
0
def train_autoencoder(train_matrix, test_set):
    num_users, num_items = train_matrix.shape
    weight_matrix = log_surplus_confidence_matrix(train_matrix,
                                                  alpha=args.alpha,
                                                  epsilon=args.epsilon)
    train_matrix[train_matrix > 0] = 1.0
    place_correlation = scipy.sparse.load_npz(
        './data/Foursquare/place_correlation_gamma60.npz')

    assert num_items == place_correlation.shape[0]
    print(train_matrix.shape)

    # Construct the model by instantiating the class defined in model.py
    model = AutoEncoder(num_items,
                        args.inner_layers,
                        num_items,
                        da=args.num_attention,
                        dropout_rate=args.dropout_rate)
    if torch.cuda.is_available():
        model.cuda()

    criterion = torch.nn.MSELoss(size_average=False, reduce=False)
    optimizer = torch.optim.Adam(model.parameters(),
                                 lr=args.learning_rate,
                                 weight_decay=args.weight_decay)

    batch_size = args.batch_size
    user_indexes = np.arange(num_users)

    model.train()
    for t in range(args.epoch):
        print("epoch:{}".format(t))
        np.random.shuffle(user_indexes)
        avg_cost = 0.
        for batchID in range(int(num_users / batch_size)):
            start = batchID * batch_size
            end = start + batch_size

            batch_user_index = user_indexes[start:end]

            batch_x, batch_x_weight, batch_item_index = get_mini_batch(
                train_matrix, weight_matrix, batch_user_index)
            batch_x_weight += 1
            batch_x = Variable(torch.from_numpy(batch_x).type(T.FloatTensor),
                               requires_grad=False)

            y_pred = model(batch_item_index, place_correlation)

            # Compute and print loss
            batch_x_weight = Variable(torch.from_numpy(batch_x_weight).type(
                T.FloatTensor),
                                      requires_grad=False)
            loss = (batch_x_weight *
                    criterion(y_pred, batch_x)).sum() / batch_size

            print(batchID, loss.data)

            # Zero gradients, perform a backward pass, and update the weights.
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            avg_cost += loss / num_users * batch_size

        print("Avg loss:{}".format(avg_cost))

        # print the prediction score for the user 0
        print(
            model([train_matrix.getrow(0).indices], place_correlation)
            [:,
             T.LongTensor(train_matrix.getrow(0).indices.astype(np.int32))])
        print(model([train_matrix.getrow(0).indices], place_correlation))

    # Evaluation
    model.eval()
    topk = 20
    recommended_list = []
    for user_id in range(num_users):
        user_rating_vector = train_matrix.getrow(user_id).toarray()
        pred_rating_vector = model([train_matrix.getrow(user_id).indices],
                                   place_correlation)
        pred_rating_vector = pred_rating_vector.cpu().data.numpy()
        user_rating_vector = user_rating_vector[0]
        pred_rating_vector = pred_rating_vector[0]
        pred_rating_vector[user_rating_vector > 0] = 0

        item_recommended_dict = dict()
        for item_inner_id, score in enumerate(pred_rating_vector):
            item_recommended_dict[item_inner_id] = score

        sorted_item = heapq.nlargest(topk,
                                     item_recommended_dict,
                                     key=item_recommended_dict.get)
        recommended_list.append(sorted_item)

        print(test_set[user_id], sorted_item[:topk])
        print(pred_rating_vector[sorted_item[0]],
              pred_rating_vector[sorted_item[1]],
              pred_rating_vector[sorted_item[2]],
              pred_rating_vector[sorted_item[3]],
              pred_rating_vector[sorted_item[4]])
        print("user:%d, precision@5:%f, precision@10:%f" %
              (user_id,
               eval_metrics.precision_at_k_per_sample(test_set[user_id],
                                                      sorted_item[:5], 5),
               eval_metrics.precision_at_k_per_sample(
                   test_set[user_id], sorted_item[:topk], topk)))

    precision, recall, MAP = [], [], []
    for k in [5, 10, 15, 20]:
        precision.append(
            eval_metrics.precision_at_k(test_set, recommended_list, k))
        recall.append(eval_metrics.recall_at_k(test_set, recommended_list, k))
        MAP.append(eval_metrics.mapk(test_set, recommended_list, k))

    print(precision)
    print(recall)
    print(MAP)
示例#18
0
noises = t.tensor(noises).cuda()
totalMisclassifications = 0
num_adversarial_logs = 0
ff = open("../data/true_adversarial_logs.txt", "w")

with open("../data/log_adversarials.txt", "w") as f:
    for log, label_true in tqdm(zip(
            logs, label_trues)):  # enumeration of the dataset
        if label_true == 1:

            #  Wrap log as a variable
            log = log.float()
            label_true = t.tensor([label_true.long()]).cuda()
            log = Variable(torch.FloatTensor(log.reshape(1, 200)),
                           requires_grad=True)
            label_true = Variable(torch.LongTensor(label_true),
                                  requires_grad=False)

            #  Classification before Adv
            _, label_pred = t.max(net(log).data,
                                  1)  # find the index of the biggest value

            #  Forward pass
            # print(log.size())
            outputs = net(log)
            loss = SoftmaxWithXent(outputs, label_true)
            loss.backward()  # obtain gradients on x

            #  Add perturbation
            log_adversarial = []
            epsilon = 0.004
def variableFromPersona(lang, persona):
    if lang.persona2count[persona] > 20:
        indexes = [lang.persona2index[persona]]
    else:
        indexes = [lang.persona2index['UNK']]
    return Variable(cuda.LongTensor(indexes).view(-1, 1), requires_grad=False)
示例#20
0
def train_a_epoch(name, data, tag_idx, is_oov, model, optimizer, seq_criterion,
                  lm_f_criterion, lm_b_criterion, att_loss, gamma):
    evaluator = Evaluator(name, [0, 1],
                          main_label_name=cfg.POSITIVE_LABEL,
                          label2id=tag_idx,
                          conll_eval=True)
    t = tqdm(data, total=len(data))

    if is_oov[0] == 1:
        print("Yes, UNKNOWN token is out of vocab")
    else:
        print("No, UNKNOWN token is not out of vocab")

    for SENT, X, C, POS, Y, P in t:
        batch_size = len(SENT)
        # zero the parameter gradients
        optimizer.zero_grad()
        model.zero_grad()
        model.init_state(len(X))

        x_var, c_var, pos_var, y_var, lm_X = to_variables(X=X,
                                                          C=C,
                                                          POS=POS,
                                                          Y=Y)

        np.set_printoptions(threshold=np.nan)

        if cfg.CHAR_LEVEL == "Attention":
            lm_f_out, lm_b_out, seq_out, seq_lengths, emb, char_emb = model(
                x_var, c_var)
            unrolled_x_var = list(chain.from_iterable(x_var))

            not_oov_seq = [-1 if is_oov[idx] else 1 for idx in unrolled_x_var]
            char_att_loss = att_loss(
                emb.detach(), char_emb,
                Variable(torch.cuda.LongTensor(not_oov_seq))) / batch_size

        else:
            lm_f_out, lm_b_out, seq_out, seq_lengths = model(x_var, c_var)

        logger.debug("lm_f_out : {0}".format(lm_f_out))
        logger.debug("lm_b_out : {0}".format(lm_b_out))
        logger.debug("seq_out : {0}".format(seq_out))

        logger.debug("tensor X variable: {0}".format(x_var))

        # remove start and stop tags
        pred = argmax(seq_out)

        logger.debug("Predicted output {0}".format(pred))
        seq_loss = seq_criterion(
            seq_out,
            Variable(torch.LongTensor(y_var)).cuda()) / batch_size

        # to limit the vocab size of the sample sentence ( trick used to improve lm model)
        # TODO make sure that start and end symbol of sentence gets through this filtering.
        logger.debug("Sample input {0}".format(lm_X))
        if gamma != 0:
            lm_X_f = [x1d[1:] for x1d in lm_X]
            lm_X_b = [x1d[:-1] for x1d in lm_X]
            lm_X_f = list(chain.from_iterable(lm_X_f))
            lm_X_b = list(chain.from_iterable(lm_X_b))
            lm_f_loss = lm_f_criterion(
                lm_f_out.squeeze(),
                Variable(cuda.LongTensor(lm_X_f)).squeeze()) / batch_size
            lm_b_loss = lm_b_criterion(
                lm_b_out.squeeze(),
                Variable(cuda.LongTensor(lm_X_b)).squeeze()) / batch_size

            if cfg.CHAR_LEVEL == "Attention":
                total_loss = seq_loss + Variable(cuda.FloatTensor(
                    [gamma])) * (lm_f_loss + lm_b_loss) + char_att_loss
            else:
                total_loss = seq_loss + Variable(cuda.FloatTensor(
                    [gamma])) * (lm_f_loss + lm_b_loss)

        else:
            if cfg.CHAR_LEVEL == "Attention":
                total_loss = seq_loss + char_att_loss
            else:
                total_loss = seq_loss

        desc = "total_loss: {0:.4f} = seq_loss: {1:.4f}".format(
            to_scalar(total_loss), to_scalar(seq_loss))
        if gamma != 0:
            desc += " + gamma: {0} * (lm_f_loss: {1:.4f} + lm_b_loss: {2:.4f})".format(
                gamma, to_scalar(lm_f_loss), to_scalar(lm_b_loss))

        if cfg.CHAR_LEVEL == "Attention":
            desc += " + char_att_loss: {0:.4f}".format(
                to_scalar(char_att_loss))

        t.set_description(desc)

        preds = roll(pred, seq_lengths)
        for pred, x, y in zip(preds, X, Y):
            evaluator.append_data(to_scalar(total_loss), pred, x, y)

        total_loss.backward()
        if cfg.CLIP is not None:
            clip_grad_norm(model.parameters(), cfg.CLIP)

        optimizer.step()

    evaluator.classification_report()

    return evaluator, model
示例#21
0
 def __next__(self):
     next_batch_idx = next(self.batch_iterator)
     perm = cuda.LongTensor(next_batch_idx)
     return (self.X[:, perm], self.Y[:, perm])
示例#22
0
def get_candidatelist(epoch,
                      test_dist,
                      dataset,
                      model,
                      config,
                      R,
                      flag='test'):
    # R = config['R']
    test_geo = defaultdict()
    location = dataset.location
    candidatelist = defaultdict(dict)
    all_venues = range(dataset.item_nums)
    if flag == 'test':
        test = dataset.test
        user = test['test_user']
    else:
        test = dataset.valid
        user = test['valid_user']

    for i, uid in enumerate(tqdm.tqdm(user, desc="test")):
        train_checks = dataset.data[uid]['item'][:-1]
        if flag == 'test':
            target = test['test_target_item'][i]
            target_time = test['test_target_time'][i]
            history = test['test_history'][i]
            seq = test['test_seq_item'][i]
            seq_time = test['test_seq_time'][i]
            seq_dist = test['test_seq_dist'][i]
            # seq_dist = [1]
            delatime = test['test_delatime'][i]
        else:
            target = test['valid_target_item'][i]
            history = test['valid_history'][i]
            seq = test['valid_seq_item'][i]
        if target in train_checks:
            continue
        if config['recommend_new']:
            recommend_list = np.setdiff1d(np.array(all_venues),
                                          np.array(train_checks))

            current_location = list(
                location.loc[location.vid == target].values[0])[1:]
            x, y = current_location
            lat_max = R / 111 + x
            lat_min = x - R / 111
            lon_max = R / (111 * np.cos(x * math.pi / 180.0)) + y
            lon_min = y - R / (111 * np.cos(x * math.pi / 180.0))
            near_location = location[(location["lon"] > lon_min) & \
                                    (location["lon"] < lon_max) & \
                                    (location["lat"] > lat_min) & \
                                    (location["lat"] < lat_max)]
            neighbors = list(
                np.intersect1d(near_location.vid.values, recommend_list))
            # geo_dist = []
            if epoch == 0:
                #geo_dist = dataset.place_correlation[neighbors][:, history].toarray()
                geo_dist = []
                # test_geo[uid] = geo_dist
            else:
                # geo_dist = test_geo[uid]
                geo_dist = []
            overall_scores = model(T.LongTensor([uid] * len(neighbors)),
                                   T.LongTensor(seq),
                                   T.LongTensor(seq_time),
                                   T.LongTensor(history),
                                   seq_dist,
                                   geo_dist,
                                   T.LongTensor(neighbors),
                                   T.LongTensor([target_time] *
                                                len(neighbors)),
                                   T.LongTensor(delatime),
                                   flag='test').cpu().detach().numpy()
            predict_scores = zip(neighbors, list(overall_scores))
            predict_scores = sorted(predict_scores,
                                    key=lambda x: x[1],
                                    reverse=True)[0:100]
            candidatelist[uid][target] = [x[0] for x in predict_scores]
        else:
            current_location = list(
                location.loc[location.vid == target].values[0])[1:]
            x, y = current_location
            lat_max = R / 111 + x
            lat_min = x - R / 111
            lon_max = R / (111 * np.cos(x * math.pi / 180.0)) + y
            lon_min = y - R / (111 * np.cos(x * math.pi / 180.0))
            near_location = location[(location["lon"] > lon_min) & \
                                     (location["lon"] < lon_max) & \
                                     (location["lat"] > lat_min) & \
                                     (location["lat"] < lat_max)]
            neighbors = list(near_location.vid.values)
            overall_scores = model(T.LongTensor([uid] * len(neighbors)),
                                   T.LongTensor(seq),
                                   T.LongTensor(history),
                                   T.LongTensor(neighbors),
                                   flag='test').cpu().detach().numpy()
            predict_scores = zip(neighbors, list(overall_scores))
            predict_scores = sorted(predict_scores,
                                    key=lambda x: x[1],
                                    reverse=True)[0:100]
            candidatelist[uid][target] = [x[0] for x in predict_scores]
    return candidatelist, test_geo
示例#23
0
    # prioritized experience replay

    if DEVICE == torch.device(type='cpu'):
        # use latest episode for training
        agent.learn(
            (FloatTensor(states), LongTensor(actions), FloatTensor(rewards),
             FloatTensor(next_states), FloatTensor(dones)),
            (1. - (1. / action_size)))
        # use enhanced training data
        agent.learn(
            (FloatTensor(total_states), LongTensor(total_actions),
             FloatTensor(total_rewards), FloatTensor(total_next_states),
             FloatTensor(total_dones)), (1. - (1. / action_size)))
    else:
        # use latest episode for training
        agent.learn((cuda.FloatTensor(states), cuda.LongTensor(actions),
                     cuda.FloatTensor(rewards), cuda.FloatTensor(next_states),
                     cuda.FloatTensor(dones)), (1. - (1. / action_size)))
        # use enhanced training data
        agent.learn(
            (cuda.FloatTensor(total_states), cuda.LongTensor(total_actions),
             cuda.FloatTensor(total_rewards),
             cuda.FloatTensor(total_next_states),
             cuda.FloatTensor(total_dones)), (1. - (1. / action_size)))
    total_scores.append(score)
    if len(total_scores) > 100:
        total_scores = total_scores[(len(total_scores) - 100):]
    avg_score = float(sum(total_scores)) / float(len(total_scores))
    with open('data.csv', 'a+') as f:
        f.write("{},{},{},{}\n".format(i, score, eps, avg_score))
    def forward(self, batch_pairs, train=True):

        N = len(batch_pairs)

        # pair = tuple of (question, answer)
        if self.persona is True:
            (persona1, input_variable, input_length, persona2, target_variable,
             target_length) = utils.variablesFromPairPersona(self.lang, pair)
            p1 = self.persona_embedding(persona1).view(1, -1)
            p2 = self.persona_embedding(persona2).view(1, -1)
        else:
            input_batch = Variable(cuda.LongTensor(N, self.max_length).zero_(),
                                   requires_grad=False)
            target_batch = Variable(
                cuda.LongTensor(N, self.max_length + 1).zero_(),
                requires_grad=False)  # start with SOS token
            input_batch_len = []
            target_batch_len = []
            for i in xrange(N):
                (input_variable, input_length, target_variable,
                 target_length) = utils.variablesFromPair(
                     self.lang, batch_pairs[i])
                input_batch[i] = input_variable
                target_batch[i] = target_variable
                input_batch_len.append(input_length)
                target_batch_len.append(target_length)
            input_batch_len = cuda.LongTensor(input_batch_len)
            target_batch_len = cuda.LongTensor(target_batch_len)
            p1 = None
            p2 = None

        if train is False:
            print input_variable

        encoder_hidden = self.encoder.initHidden(N)
        decoder_hidden = self.decoder.initHidden(N)

        self.encoder_optimizer.zero_grad()
        self.decoder_optimizer.zero_grad()

        # input_length = input_variable.size()[0]
        # target_length = target_variable.size()[0]

        loss = 0
        if self.attention is True:
            encoder_states = Variable(
                cuda.FloatTensor(input_length,
                                 self.encoder.hidden_size).zero_())

        # Encode the sentence
        # for ei in range(input_length):
        #     encoder_output, encoder_hidden = self.encoder(input_variable[ei], encoder_hidden)
        # if self.attention is True:
        #     encoder_states[ei] = encoder_output[0][0] # First element in batch, only hidden state and not cell state
        # print encoder_hidden[0].size(), input_batch.size()

        encoder_output, encoder_hidden = self.encoder(input_batch,
                                                      encoder_hidden)

        encoder_hidden_states = Variable(
            cuda.FloatTensor(N, self.encoder.hidden_size).zero_())
        for i in xrange(N):
            encoder_hidden_states[i] = encoder_output[i, input_batch_len[i] -
                                                      1, :]

        # if self.attention is True:
        #     self.wf = torch.t(self.wf_layer(encoder_states)) # D x f

        # print torch.mean(encoder_output)
        del input_variable

        # Decode with start symbol as SOS
        response = []
        if train is True:
            decoder_output, decoder_hidden = self.decoder(
                target_batch, decoder_hidden, encoder_hidden_states, p1, p2)

            assert False
            # for di in xrange(self.max_length):
            #     if self.attention is True:
            #         decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_states, self.wf, p1, p2)
            #     else:
            #         decoder_output, decoder_hidden = self.decoder(decoder_input, decoder_hidden, encoder_output[0][0], p1, p2)
            #     # TODO change the loss to batch loss considering pad symbols
            # 	if di == target_length:
            #     	break
            #     loss += self.criterion(decoder_output[0], target_variable[di])
            #     decoder_input = target_variable[di] # Teacher forcing
            #     ind = target_variable[di][0]
        else:
            # greedy decode
            response = []
            for di in xrange(self.max_length):
                if self.attention is True:
                    decoder_output, decoder_hidden = self.decoder(
                        decoder_input, decoder_hidden, encoder_states, self.wf,
                        p1, p2)
                else:
                    decoder_output, decoder_hidden = self.decoder(
                        decoder_input, decoder_hidden, encoder_output[0][0],
                        p1, p2)
                topv, topi = decoder_output.data.topk(1)
                ind = topi[0][0]
                if ind == utils.EOS_token:
                    break
                decoder_input = Variable(cuda.LongTensor([[ind]]),
                                         requires_grad=False)
                response.append(self.lang.index2word[ind])

            # This implementation of beam search is wrong, we need to predict and follow the pointers back.
            # beam_size = 5
            # di = 0
            # while di < self.max_length:

        # tf.summary.scalar('loss', loss)

        # Step back
        if train is True:
            loss.backward()
            self.encoder_optimizer.step()
            self.decoder_optimizer.step()

        del encoder_hidden
        del decoder_hidden
        del decoder_output
        del target_variable
        response = ' '.join(response)
        return response, loss
示例#25
0
def sequence_to_variable(sequence, to_ix, use_cuda=False):
    if use_cuda:
        return ag.Variable( cuda.LongTensor([ to_ix[t] for t in sequence ]) )
    else:
        return ag.Variable( torch.LongTensor([ to_ix[t] for t in sequence ]) )
示例#26
0
 def prepare_sequence(seq, to_ix):
     idxs = [to_ix[w] for w in seq]
     tensor = cuda.LongTensor(idxs)
     return Variable(tensor)