Пример #1
0
    def load(self, fdata, use_char=False, n_context=1, max_len=10):
        sentences = self.preprocess(fdata)
        x, y, char_x, lens = [], [], [], []

        for wordseq, tagseq in sentences:
            wiseq = [self.wdict.get(w, self.unk_wi) for w in wordseq]
            tiseq = [self.tdict[t] for t in tagseq]
            # 获取每个词汇的上下文
            if n_context > 1:
                x.append(self.get_context(wiseq, n_context))
            else:
                x.append(torch.tensor(wiseq, dtype=torch.long))
            y.append(torch.tensor(tiseq, dtype=torch.long))
            # 不足最大长度的部分用0填充
            char_x.append(torch.tensor([
                [self.cdict.get(c, self.unk_ci)
                 for c in w[:max_len]] + [0] * (max_len - len(w))
                for w in wordseq
            ]))
            lens.append(len(tiseq))

        x = pad_sequence(x, True)
        y = pad_sequence(y, True)
        char_x = pad_sequence(char_x, True)
        lens = torch.tensor(lens)

        if use_char:
            dataset = TensorDataset(x, y, char_x, lens)
        else:
            dataset = TensorDataset(x, y, lens)

        return dataset
Пример #2
0
    def postprocess_sequence(self, X):
        """Embed (variable-length) sequences

        Parameters
        ----------
        X : list
            List of input sequences

        Returns
        -------
        fX : numpy array
            Batch of sequence embeddings.
        """

        lengths = torch.tensor([len(x) for x in X])
        sorted_lengths, sort = torch.sort(lengths, descending=True)
        _, unsort = torch.sort(sort)

        sequences = [torch.tensor(X[i],
                                  dtype=torch.float32,
                                  device=self.device) for i in sort]
        padded = pad_sequence(sequences, batch_first=True, padding_value=0)
        packed = pack_padded_sequence(padded, sorted_lengths,
                                      batch_first=True)

        cpu = torch.device('cpu')
        fX = self.model(packed).detach().to(cpu).numpy()
        return fX[unsort]
Пример #3
0
 def _process(self, index):
     if type(index) is list:
         dict_list = sorted(
             [self._transform(s, t) for s, t in self.dataset[index]],
             key=lambda x: x["num_frames"],
             reverse=True)
         spectrogram = pack_sequence([d["spectrogram"] for d in dict_list])
         target_attr = pad_sequence(
             [d["target_attr"] for d in dict_list], batch_first=True)
         silent_mask = pad_sequence(
             [d["silent_mask"] for d in dict_list], batch_first=True)
         return spectrogram, target_attr, silent_mask
     elif type(index) is int:
         s, t = self.dataset[index]
         data_dict = self._transform(s, t)
         return data_dict["spectrogram"], \
                data_dict["target_attr"], \
                data_dict["silent_mask"]
     else:
         raise ValueError("Unsupported index type({})".format(type(index)))
Пример #4
0
    def test_rnn_init_predict_split(self):
        model = nn.LSTM(RNN_INPUT_SIZE, RNN_HIDDEN_SIZE, 3, bidirectional=True)
        seq_lengths = np.random.randint(1, RNN_SEQUENCE_LENGTH + 1, size=7)
        seq_lengths = list(reversed(sorted(map(int, seq_lengths))))
        input = [Variable(torch.randn(l, RNN_INPUT_SIZE)) for l in seq_lengths]
        input = rnn_utils.pad_sequence(input)

        # Test that we are correctly splitting between init and
        # predict net. When we embed parameters, there should be more
        # ops in the init net.
        mp = onnx.ModelProto.FromString(do_export(model, input, export_params=self.embed_params)[0])
        prepared = c2.prepare(mp, device='CPU')
        if self.embed_params:
            assert len(prepared.init_net.op) == 1038
            assert len(prepared.predict_net.op) == 101
        else:
            assert len(prepared.init_net.op) == 27
            assert len(prepared.predict_net.op) == 1112
Пример #5
0
        def make_input(batch_size):
            seq_lengths = np.random.randint(1, RNN_SEQUENCE_LENGTH + 1, size=batch_size)
            seq_lengths = list(reversed(sorted(map(int, seq_lengths))))
            inputs = [Variable(torch.randn(l, RNN_INPUT_SIZE)) for l in seq_lengths]
            inputs = rnn_utils.pad_sequence(inputs)
            if packed_sequence == 2:
                inputs = inputs.transpose(0, 1)
            inputs = [inputs]

            directions = 2 if bidirectional else 1

            if initial_state:
                h0 = Variable(torch.randn(directions * layers, batch_size, RNN_HIDDEN_SIZE))
                inputs.append(h0)
            if packed_sequence != 0:
                inputs.append(Variable(torch.IntTensor(seq_lengths)))
            if len(inputs) == 1:
                input = inputs[0]
            else:
                input = tuple(inputs)
            return input
Пример #6
0
    def forward(self, x, char_x, lens):
        B, T = x.shape
        # 获取掩码
        mask = x.gt(0)
        # 获取词嵌入向量
        x = self.embed(x)

        # 获取字嵌入向量
        char_x = self.char_lstm(char_x[mask])
        char_x = pad_sequence(torch.split(char_x, lens.tolist()), True)

        # 获取词表示与字表示的拼接
        x = torch.cat((x, char_x), dim=-1)
        x = self.drop(x)

        x = pack_padded_sequence(x, lens, True)
        x, _ = self.word_lstm(x)
        x, _ = pad_packed_sequence(x, True)
        x = self.drop(x)

        return self.out(x)
Пример #7
0
                                         policy.parameters()),
                                  lr=0.01)

    for i in range(num_batch):
        policy_optimizer.zero_grad()
        actor_batch = []
        critic_batch = []
        reward_batch = []
        regex = []
        for j in range(batch_size):
            actions, reg = policy.sample_regex(max_len_regex)
            reward_batch.append(evaluator.evaluate(reg))
            actor_outs, critic_outs, _, _ = policy.evaluate_solution(actions)
            actor_batch.append(actor_outs)
            critic_batch.append(critic_outs)
            regex.append(reg)

        actor_batch = pad_sequence(actor_batch, True)
        critic_batch = pad_sequence(critic_batch, True)
        reward_batch = (torch.FloatTensor(reward_batch)
                        if not torch.cuda.is_available() else
                        torch.FloatTensor(reward_batch).cuda())
        reward_batch = reward_batch.unsqueeze(1)

        print("max_reward :", reward_batch.max())
        print("regex max :", regex[int(reward_batch.argmax())])
        loss = -1.0 * (actor_batch * reward_batch).sum()
        print("loss :", loss)
        loss.backward()
        policy_optimizer.step()
Пример #8
0
    def batch_loss(self, batch, model, device, writer=None, **kwargs):

        lengths = torch.tensor([len(x) for x in batch['X']])
        variable_lengths = len(set(lengths)) > 1

        if variable_lengths:

            sorted_lengths, sort = torch.sort(lengths, descending=True)
            _, unsort = torch.sort(sort)

            sequences = [torch.tensor(batch['X'][i],
                                      dtype=torch.float32,
                                      device=device) for i in sort]
            padded = pad_sequence(sequences, batch_first=True, padding_value=0)
            packed = pack_padded_sequence(padded, sorted_lengths,
                                          batch_first=True)
            batch['X'] = packed
        else:
            batch['X'] = torch.tensor(np.stack(batch['X']),
                                      dtype=torch.float32,
                                      device=device)

        # forward pass
        fX = model(batch['X'])

        if variable_lengths:
            fX = fX[unsort]

        # log embedding norms
        if writer is not None:
            norm_npy = np.linalg.norm(self.to_numpy(fX), axis=1)
            self.log_norm_.append(norm_npy)

        batch['fX'] = fX
        batch = self.aggregate(batch)

        fX = batch['fX']
        y = batch['y']

        # pre-compute pairwise distances
        distances = self.pdist(fX)

        # sample triplets
        triplets = getattr(self, 'batch_{0}'.format(self.sampling))
        anchors, positives, negatives = triplets(y, distances)

        # compute loss for each triplet
        losses, deltas, _, _ = self.triplet_loss(
            distances, anchors, positives, negatives,
            return_delta=True)

        if writer is not None:
            pdist_npy = self.to_numpy(distances)
            delta_npy = self.to_numpy(deltas)
            same_speaker = pdist(y.reshape((-1, 1)), metric='equal')
            self.log_positive_.append(pdist_npy[np.where(same_speaker)])
            self.log_negative_.append(pdist_npy[np.where(~same_speaker)])
            self.log_delta_.append(delta_npy)

        # average over all triplets
        return torch.mean(losses)
Пример #9
0
def pad(batch, device=torch.device('cpu')):
    batch_lengths = torch.tensor(_.map_(batch, len),
                                 dtype=torch.long,
                                 device=device)
    return (pad_sequence(batch, batch_first=True,
                         padding_value=1).to(device), batch_lengths)
    print("weights: ", weights)
print("===================")
test_acc=[]
train_acc=[]
weighted_acc = []
test_loss=[]
train_loss=[]
for epoch in range(100):  # again, normally you would NOT do 300 epochs, it is toy data
    print("===================================" + str(epoch+1) + "==============================================")
    losses = 0
    correct=0
    model.train()
    for j, (input_lstm,input, target,seq_length) in enumerate(train_loader):
        if (j+1)%20==0: print("=================================Train Batch"+ str(j+1)+ str(weight)+"===================================================")
        model.zero_grad()
        input_lstm = pad_sequence(sequences=input_lstm,batch_first=True)
        losses_batch,correct_batch= model(input_lstm,input, target,seq_length)
        loss = torch.mean(losses_batch,dim=0)
        correct_batch=torch.sum(correct_batch,dim=0)
        losses += loss.item() * batch_size
        loss.backward()
        weight=model.module.state_dict()["weight"]
        weight=torch.exp(10*weight)/(1+torch.exp(10*weight)).item()
        optimizer.step()
        correct += correct_batch.item()
    accuracy=correct*1.0/((j+1)*batch_size)
    losses=losses / ((j+1)*batch_size)

    losses_test = 0
    correct_test = 0
    losses_test_ce=0
Пример #11
0
def stroke_embed(batch,
                 initials,
                 embedder,
                 bezier_degree,
                 bezier_degree_low,
                 variational=False,
                 inf_loss=False):
    h_initial, c_initial = initials
    # Redundant, but thats fine
    device = torch.device(
        'cuda') if torch.cuda.is_available() else torch.device('cpu')

    # accumulate all info into these empty lists
    sketches_ctrlpt, sketches_ratw, sketches_st_starts, sketches_stopbits = [], [], [], []
    deg_losses = []
    n_strokes = []

    for sk, _ in batch:
        # for each sketch in the batch
        st_starts = torch.tensor([st[0, :2] for st in sk], device=device)
        sk = [
            torch.tensor(st[:, :-1], device=device) - st_start
            for st, st_start in zip(sk, st_starts)
        ]
        ls = [st.shape[0] for st in sk]
        sk = pad_sequence(sk, batch_first=True)
        sk = pack_padded_sequence(sk,
                                  ls,
                                  batch_first=True,
                                  enforce_sorted=False)

        if embedder.rational:
            emb_ctrlpt, emb_ratw = embedder(sk, h_initial, c_initial)
        else:
            if not inf_loss:
                emb_ctrlpt = embedder(sk, h_initial, c_initial, inf_loss=False)
            else:
                emb_ctrlpt, deg_loss = embedder(sk,
                                                h_initial,
                                                c_initial,
                                                inf_loss=True)
                # breakpoint()

        if not inf_loss:
            emb_ctrlpt = emb_ctrlpt[bezier_degree - bezier_degree_low]
            sketches_ctrlpt.append(emb_ctrlpt.view(len(ls), -1))
        else:
            sketches_ctrlpt.append(emb_ctrlpt)
            deg_losses.append(deg_loss)
        # breakpoint()

        if embedder.rational:
            sketches_ratw.append(emb_ratw)
        sketches_st_starts.append(st_starts)
        # create stopbits
        stopbit = torch.zeros(len(ls), 1, device=device)
        stopbit[-1, 0] = 1.
        sketches_stopbits.append(stopbit)
        n_strokes.append(len(ls))

    n_strokes = torch.tensor(n_strokes, device=device)
    if not inf_loss:
        sketches_ctrlpt = pad_sequence(sketches_ctrlpt, batch_first=True)

    if embedder.rational:
        sketches_ratw = pad_sequence(sketches_ratw, batch_first=True)
    sketches_st_starts = pad_sequence(sketches_st_starts, batch_first=True)
    sketches_stopbits = pad_sequence(sketches_stopbits,
                                     batch_first=True,
                                     padding_value=1.0)

    # For every sketch in a batch:
    #   For every stroke in the sketch:
    #     1. (Control Point, Rational Weights) pair
    #     2. Start location of the stroke with respect to a global reference (of the sketch)
    if embedder.rational:
        return sketches_ctrlpt, sketches_ratw, sketches_st_starts, sketches_stopbits, n_strokes
    else:
        if not inf_loss:
            return sketches_ctrlpt, sketches_st_starts, sketches_stopbits, n_strokes
        else:
            return (
                sketches_ctrlpt,
                deg_losses), sketches_st_starts, sketches_stopbits, n_strokes
Пример #12
0
    def fit(self, X, y=None, y_for_verification=None, plot=False):
        # assert not self.semisupervised, "semisupervised not supported yet"

        self.best_delta_mi = -1
        self.best_full_net = None
        self.best_embedding_net = None
        # self.final_model = None
        # self.final_model_trained = False
        self.best_n_clusters = 1
        self.zero_cutoff = self.initial_zero_cutoff
        self.exp_dist = 0

        if self.random_seed is not None:
            np.random.seed(self.random_seed)

        use_y_to_verify_performance = y_for_verification is not None
        self.semisupervised = self.semisupervised and y is not None

        if self.semisupervised and self.semisupervised_weight is None:
            self.semisupervised_weight = np.sum(y != -1) / y.shape[0]

        if self.semisupervised:
            n_classes = np.unique(y[y != -1]).shape[0]  # because of the -1

        if use_y_to_verify_performance:
            verify_n_classes = np.unique(y_for_verification).shape[0]
            self._print_with_verbosity(
                f"number of classes in verification set: {verify_n_classes}",
                3)

        if self.model == "auto":
            self.model = self._select_model(X)

        if self.is_tokens:
            X = pad_sequence(X, padding_value=0, batch_first=True)

        if type(X) is not torch.Tensor:
            X = torch.Tensor(X)

        self.device = torch.device("cuda") if (
            torch.cuda.is_available()
            and self.use_gpu) else torch.device("cpu")
        if self.device.type == "cpu":
            self._print_with_verbosity("WARNING: using CPU, may be very slow",
                                       0,
                                       strict=True)

        self._print_with_verbosity(f"using torch device {self.device}", 1)

        self._print_with_verbosity("building dataset", 1)

        dataset = self._build_dataset(
            X,
            y=y if self.semisupervised else None,
        )

        data_loader = DataLoader(dataset,
                                 shuffle=True,
                                 batch_size=self.batch_size)

        self.model = self.model.to(self.device)

        if self.optimizer_override is None:
            self.optimizer = optim.Adam(self.model.parameters(),
                                        lr=self.learning_rate)
        else:
            self.optimizer = self.optimizer_override(self.model.parameters(),
                                                     lr=self.learning_rate)

        if self.semisupervised:
            label_subnet = ClusterNet(self.n_components,
                                      n_classes).to(self.device)
            self.semisupervised_model = FullNet(self.model,
                                                label_subnet).to(self.device)
            self.optimizer = optim.Adam(self.semisupervised_model.parameters(),
                                        lr=self.learning_rate)

        self._print_with_verbosity("training", 1)

        for i in self._progressbar_with_verbosity(range(self.epochs),
                                                  0,
                                                  strict=True):
            self.model.train()
            self._print_with_verbosity(f"this is epoch {i}", 1)
            self._train_siamese_one_epoch(data_loader)
            self.model.eval()
            transformed = self.transform(X, model=self.model)

            self._get_exp_dist(data_loader)
            self._print_with_verbosity(
                f"found expected distance between related points as {self.exp_dist}",
                3)
            cluster_assignments = self._cluster(transformed)
            self._print_with_verbosity(f"found {self.n_clusters} clusters", 1)

            preds = self._build_cluster_subnet(X, transformed,
                                               cluster_assignments)

            if use_y_to_verify_performance:
                nmi_score = normalized_mutual_info_score(
                    cluster_assignments, y_for_verification, 'geometric')
                self._print_with_verbosity(
                    f"NMI of cluster labels with y: {nmi_score}", 2)

                nmi_score = normalized_mutual_info_score(
                    preds, y_for_verification, 'geometric')
                self._print_with_verbosity(
                    f"NMI of network predictions with y: {nmi_score}", 1)

                if self.n_clusters == verify_n_classes:
                    acc_score = get_accuracy(cluster_assignments,
                                             y_for_verification)
                    self._print_with_verbosity(
                        f"accuracy of cluster labels: {acc_score}", 2)

                if np.unique(preds).shape[0] == verify_n_classes:
                    acc_score = get_accuracy(preds, y_for_verification)
                    self._print_with_verbosity(
                        f"accuracy of network predictions: {acc_score}", 1)
                else:
                    self._print_with_verbosity(
                        f"number of predicted classes did not match number of clusters so not computing accuracy, correct {verify_n_classes} vs {self.n_clusters}",
                        2)

            if plot:
                if self.n_components == 2:
                    plot_2d(transformed,
                            cluster_assignments,
                            show=False,
                            no_legend=True)

                    if use_y_to_verify_performance:
                        plot_2d(transformed,
                                y_for_verification,
                                show=False,
                                no_legend=True)

                    plt.show()

                elif self.n_components == 3:
                    plot_3d(transformed, cluster_assignments, show=False)

                    if use_y_to_verify_performance:
                        plot_3d(transformed, y_for_verification, show=False)

                    plt.show()
Пример #13
0
        sequences_1 = [sequence[0] for sequence in input_variables]
        sequences_2 = [sequence[1] for sequence in input_variables]
        batch_size = len(sequences_1)

        # Make a tensor for the similarity scores

        sim_scores_2d = torch.zeros([batch_size, 2])
        for j in range(batch_size):
            if similarity_scores[j] == 0:
                sim_scores_2d[j] = fake_label
            else:
                sim_scores_2d[j] = real_label

        sim_scores_2d = sim_scores_2d.cuda()

        temp = rnn.pad_sequence(sequences_1 + sequences_2)
        sequences_1 = temp[:, :batch_size]
        sequences_2 = temp[:, batch_size:]

        model_optimizer.zero_grad()
        loss_s = 0.0

        optimizerG.zero_grad()
        loss_g = 0.0

        optimizerD.zero_grad()
        loss_d = 0.0

        loss_f = 0.0

        # Initialise the hidden state and pass through the maLSTM
Пример #14
0
 def to_torch(batch, **kwargs):
     return pad_sequence(
         [torch.tensor(b, dtype=torch.long) for b in batch], batch_first=False)
Пример #15
0
def collate_batch(batch):
    """Collate a whole batch of utterances."""
    flatten = [u for s in batch for u in s]
    return pad_sequence(flatten, batch_first=True, padding_value=0)
Пример #16
0
        stocks = stocks.set_index('symbol', drop=True)
        train_df = stocks.drop(test_symbols, axis=0)
        test_df = stocks.drop(stocks.index.difference(test_symbols), axis=0)

        train_symbols = train_df.index.unique().tolist()

        train_tensors = []
        train_seq_lens = []
        for sym in train_symbols:
            stock_data, stock_data_len = prepare_stock_data(sym)
            stock_data = normalize_stock_data(stock_data)
            stock_tensor = torch.Tensor(stock_data)
            train_seq_lens.append(stock_data_len)
            train_tensors.append(stock_tensor)
        X = pad_sequence(train_tensors).T.unsqueeze(-1)
        y = torch.Tensor(train_seq_lens)
        train_dataset = TensorDataset(X, y)

        test_seq_lens = []
        test_tensors = []
        for sym in test_symbols:
            stock_data, stock_data_len = prepare_stock_data(sym)
            stock_data = normalize_stock_data(stock_data)
            test_seq_lens.append(stock_data_len)
            stock_tensor = torch.Tensor(stock_data)
            test_tensors.append(stock_tensor)
        X = pad_sequence(test_tensors).T.unsqueeze(-1)
        y = torch.Tensor(test_seq_lens)
        test_dataset = TensorDataset(X, y)
Пример #17
0
        hidden_state = torch.cat([ht_final[i] for i in range(ht_final.size(0))], dim=1)

        # apply attention
        hidden_state = hidden_state.unsqueeze(2) # (B, hidden * 2, 1)
        attention_scores = torch.bmm(out, hidden_state).squeeze(2)
        soft_attention_weights = F.softmax(attention_scores, 1).unsqueeze(2)   # (B, L, 1)
        attention_out = torch.bmm(out.permute(0, 2, 1), soft_attention_weights).squeeze(2)

        features = torch.cat([hidden_state.squeeze(2), attention_out], dim=1)
        features = self.dropout_1(features)
        features = self.fc1(features)
        features = self.bn1(features)
        features = F.relu(features)
        features = self.dropout_2(features)
        logits = self.fc2(features)
        return logits

if __name__ == '__main__':
    import torch
    from torch.nn.utils.rnn import pad_sequence

    sentences = [torch.LongTensor([2, 3, 4, 5]), torch.LongTensor([6, 7, 8]), torch.LongTensor([9, 10])]
    x = pad_sequence(sentences, batch_first=True,
                     padding_value=0)
    masks = (x != 0).type(torch.FloatTensor)
    
    len_x = [4, 3, 2]
    model = TopicClassLSTM(vocab_size=11, emb_size=10, embedding_tensor=None, freeze=False,
                           dropout=0.2, lstm_hidden=200, num_classes=16)
    out = model(x, len_x)
Пример #18
0
    def train_model(cls, online_net, target_net, optimizer, batch, batch_size,
                    sequence_length, gamma, use_deeper_net):

        # def slice_burn_in(item):
        #     return item[:, burn_in_length:, :]

        # batch.state is a list of tensors of shape (seq_length, input_dim)
        # so seq.size()[0] = the length of the sequence
        lengths = np.array([seq.size()[0] for seq in batch.state])
        max_length = int(np.max(lengths))

        # ===== compute loss mask =====

        # for example, if sequence_length == 3, then lower_triangular_matrix =
        # 1 0 0
        # 1 1 0
        # 1 1 1
        # suppose lengths == np.array([2, 3, 1]), then lengths - 1 == np.array([1, 2, 0]) and
        # the loss_mask computed from lower_triangular_matrix[lengths-1] is
        # 1 1 0
        # 1 1 1
        # 1 0 0
        # which corresponds to lengths correctly

        lower_triangular_matrix = np.tril(
            np.ones((sequence_length, sequence_length)))
        loss_mask = lower_triangular_matrix[
            lengths - 1]  # first convert from 1-based to 0-based indexing
        loss_mask = torch.tensor(loss_mask)  # has shape (bs, seq_len)

        if use_deeper_net:

            states = pad_sequence(batch.state, batch_first=True)
            next_states = pad_sequence(batch.next_state, batch_first=True)

        else:

            states = pack_padded_sequence(pad_sequence(batch.state,
                                                       batch_first=True),
                                          lengths=lengths,
                                          batch_first=True,
                                          enforce_sorted=False)

            next_states = pack_padded_sequence(pad_sequence(batch.next_state,
                                                            batch_first=True),
                                               lengths=lengths,
                                               batch_first=True,
                                               enforce_sorted=False)

        # max_length == sequence_length most of the times, but not always
        actions = pad_sequence(batch.action, batch_first=True).view(
            batch_size, max_length, -1).long()  # has shape (bs, seq_len, 1)
        rewards = pad_sequence(batch.reward, batch_first=True).view(
            batch_size, max_length, -1)  # has shape (bs, seq_len, 1)
        masks = pad_sequence(batch.mask, batch_first=True).view(
            batch_size, max_length, -1)  # has shape (bs, seq_len, 1)

        h0 = torch.stack([
            rnn_state[0, 0, :] for rnn_state in batch.rnn_state
        ]).unsqueeze(0).detach()  # has shape (1, bs, hidden_size)
        c0 = torch.stack([
            rnn_state[0, 1, :] for rnn_state in batch.rnn_state
        ]).unsqueeze(0).detach()  # has shape (1, bs, hidden_size)

        h1 = torch.stack([
            rnn_state[1, 0, :] for rnn_state in batch.rnn_state
        ]).unsqueeze(0).detach()  # has shape (1, bs, hidden_size)
        c1 = torch.stack([
            rnn_state[1, 1, :] for rnn_state in batch.rnn_state
        ]).unsqueeze(0).detach()  # has shape (1, bs, hidden_size)

        # states = torch.stack(batch.state).view(batch_size, sequence_length, online_net.num_inputs)
        # next_states = torch.stack(batch.next_state).view(batch_size, sequence_length, online_net.num_inputs)
        # actions = torch.stack(batch.action).view(batch_size, sequence_length, -1).long()
        # rewards = torch.stack(batch.reward).view(batch_size, sequence_length, -1)
        # masks = torch.stack(batch.mask).view(batch_size, sequence_length, -1)
        # rnn_state = torch.stack(batch.rnn_state).view(batch_size, sequence_length, 2, -1)

        # [h0, c0] = rnn_state[:, 0, :, :].transpose(0, 1)
        # h0 = h0.unsqueeze(0).detach()
        # c0 = c0.unsqueeze(0).detach()

        # [h1, c1] = rnn_state[:, 1, :, :].transpose(0, 1)
        # h1 = h1.unsqueeze(0).detach()
        # c1 = c1.unsqueeze(0).detach()

        pred, _ = online_net(states, (h0, c0),
                             inference=False,
                             max_length=max_length,
                             lengths=lengths)
        next_pred, _ = target_net(next_states, (h1, c1),
                                  inference=False,
                                  max_length=max_length,
                                  lengths=lengths)

        # if burn_in_length > 0:
        #     pred = slice_burn_in(pred)
        #     next_pred = slice_burn_in(next_pred)
        #     actions = slice_burn_in(actions)
        #     rewards = slice_burn_in(rewards)
        #     masks = slice_burn_in(masks)

        pred = pred.gather(2, actions).squeeze()  # has shape (bs, seq_len)

        target = rewards + masks * gamma * next_pred.max(2, keepdim=True)[0]
        target = target.squeeze()  # has shape (bs, seq_len)

        loss = torch.mean(((pred - target.detach())**2) * loss_mask.float())
        # loss = F.mse_loss(pred, target.detach())
        optimizer.zero_grad()
        loss.backward()
        torch.nn.utils.clip_grad_norm_(online_net.parameters(), 1.0)
        optimizer.step()

        return loss
Пример #19
0
def padded_collate(batch):
    # Sort batch by the longest sequence desc
    batch.sort(key=lambda sequence: len(sequence[3]), reverse=True)

    graph_ids, targetss, char_indices, *index_groups = zip(*batch)

    #print("sens",[(i.shape, j.shape) for i,j in zip(index_groups[0], index_groups[-1])])
    # The number of words in each sequence
    seq_lengths = torch.LongTensor(
        [len(indices) for indices in index_groups[0]])
    max_word_count = seq_lengths[0]

    #print(max_word_count, targets[0].shape, len(heads[0]), len(index_groups[0][0]))
    padded_targetss = [] # when only having a primary (no other) loss
    unpadding_mask = None
    targetss = tuple(zip(*targetss))
    #print(seq_lengths)
    #print(len(targetss[0]))
    #print([x.shape for x in targetss[0]])
    for targets in targetss:
        if not targets[0] is None:
            padded_targets = torch.zeros(
                len(seq_lengths), max_word_count, max_word_count, dtype=torch.long)
            if unpadding_mask is None:
                #unpadding_mask = torch.zeros_like(padded_targets, dtype=torch.uint8)
                unpadding_mask = torch.zeros_like(padded_targets, dtype=torch.bool)
            for i, target in enumerate(targets):
                padded_targets[i, :seq_lengths[i], :seq_lengths[i]] = target
                unpadding_mask[i, :seq_lengths[i], :seq_lengths[i]] = 1
        else:
            padded_targets = None
            unpadding_mask = None
        padded_targetss.append(padded_targets)

    # Batch specific word vocabulary where each word
    # is expressed by its character indices
    batch_voc = list({word for sentence in char_indices for word in sentence})
    batch_voc.append(PAD_WORD)

    batch_voc.sort(key=lambda word: len(word), reverse=True)
    voc_lengths = torch.LongTensor([len(word) for word in batch_voc])
    voc_lookup = {word: i for i, word in enumerate(batch_voc)}
    batch_voc = pad_sequence([torch.LongTensor(tup) for tup in batch_voc],
                             batch_first=True)
    index_mapping = torch.full(
        size=(len(batch), max_word_count),
        fill_value=voc_lookup[PAD_WORD],
        dtype=torch.long)

    # Map each word in the batch to an index in the char word vocabulary
    for i, sentence in enumerate(char_indices):
        for j, word in enumerate(sentence):
            index_mapping[i, j] = voc_lookup[word]

    padded = PaddedBatch(
        graph_ids, padded_targetss,
        unpadding_mask,
        CharContainer(index_mapping, batch_voc, voc_lengths), seq_lengths,
        [pad_sequence(indices, batch_first=True) for indices in index_groups])

    return padded
Пример #20
0
    def test_case4(self):
        device = torch.device('cuda', device_id)
        # combine case1 to case3 to a minibatch
        # the first example (a): input_length: 1, label_length: 1
        # the second example (c, c): input_length: 3, label_length: 2
        # the third example (b, c): input_length: 3, label_length: 2
        label_lengths_tensor = torch.tensor([1, 2, 2], dtype=torch.int32)
        input_lengths_tensor = torch.tensor([1, 3, 3], dtype=torch.int32)

        alphabet_size = 5
        minibatch = 3
        info = ctc.CtcOptions()
        info.loc = ctc.CtcComputeLocation.CTC_GPU
        info.blank_label = 0

        label_lengths = kaldi.IntSubVectorFromDLPack(
            to_dlpack(label_lengths_tensor))

        input_lengths = kaldi.IntSubVectorFromDLPack(
            to_dlpack(input_lengths_tensor))

        status, size_in_bytes = ctc.GetWorkspaceSize(
            label_lengths=label_lengths,
            input_lengths=input_lengths,
            alphabet_size=alphabet_size,
            minibatch=minibatch,
            info=info)
        self.assertEqual(status, ctc.CtcStatus.CTC_STATUS_SUCCESS)
        num_floats = size_in_bytes // 4 + 1
        workspace_tensor = torch.empty(
            num_floats, dtype=torch.float32).contiguous().to(device)

        ex1 = torch.tensor([[0.2, 0.2, 0.2, 0.2, 0.2]], dtype=torch.float32)

        ex2 = torch.tensor(
            [[1, 2, 3, 4, 5], [6, 7, 8, 9, 10], [11, 12, 13, 14, 15]],
            dtype=torch.float32)

        ex3 = torch.tensor([[-5, -4, -3, -2, -1], [-10, -9, -8, -7, -6],
                            [-15, -14, -13, -12, -11]],
                           dtype=torch.float32)

        activations_tensor = pad_sequence([ex1, ex2, ex3], batch_first=False)

        activations_tensor = activations_tensor.contiguous().view(-1).to(device)
        gradients_tensor = torch.empty_like(activations_tensor)

        # labels are: (a), (c, c) (b, c)
        # which are:  (1), (3, 3), (2, 3)
        flat_labels_tensor = torch.tensor([1, 3, 3, 2, 3], dtype=torch.int32)
        costs_tensor = torch.empty(minibatch, dtype=torch.float32)

        activations = kaldi.CuSubVectorFromDLPack(to_dlpack(activations_tensor))
        gradients = kaldi.CuSubVectorFromDLPack(to_dlpack(gradients_tensor))
        flat_labels = kaldi.IntSubVectorFromDLPack(
            to_dlpack(flat_labels_tensor))
        costs = kaldi.FloatSubVectorFromDLPack(to_dlpack(costs_tensor))
        workspace = kaldi.CuSubVectorFromDLPack(to_dlpack(workspace_tensor))

        status = ctc.ComputeCtcLossGpu(activations=activations,
                                       gradients=gradients,
                                       flat_labels=flat_labels,
                                       label_lengths=label_lengths,
                                       input_lengths=input_lengths,
                                       alphabet_size=alphabet_size,
                                       minibatch=minibatch,
                                       costs=costs,
                                       workspace=workspace,
                                       options=info)

        self.assertAlmostEqual(costs[0], 1.6094379425049)
        self.assertAlmostEqual(costs[1], 7.355742931366)
        self.assertAlmostEqual(costs[2], 4.938850402832, places=6)
Пример #21
0
 def collate(batch):
     return [
         pad_sequence(x, batch_first=True).to(dtype=torch.long)
         for x in zip(*batch)
     ]
def pad_collate(batch):
    sent, label = zip(*batch)
    sent_pad = pad_sequence(sent, batch_first=True, padding_value=0)
    return sent_pad, torch.LongTensor(label)
Пример #23
0
 def collate(batch):
     q_tokens, qids, qrel_sets = zip(*batch)
     padded = pad_sequence(q_tokens, batch_first=True).to(dtype=torch.long)
     return padded, qids, qrel_sets
Пример #24
0
 def _process_data_batch(self, data_batch):
     # pad the sequences, each seq must be (L, *)
     seq_lens = [len(x) for x in data_batch]
     seq_batch = pad_sequence(data_batch, batch_first=True)
     return seq_batch.unsqueeze(1).cuda(), seq_lens
Пример #25
0
 def collate_fn(samples):
     # samples: [(seq_len, channel), ...]
     samples = pad_sequence(samples, batch_first=True)
     # samples: (batch_size, max_len, channel)
     return samples.transpose(-1, -2).contiguous()
Пример #26
0
    def forward(self, features, labels,
                records, logger, prefix, global_step, **kwargs):
        """
        This function will be used in both train/dev/test, you can use
        self.training (bool) to control the different behavior for
        training or evaluation (dev/test)

        Args:
            features:
                list of unpadded features [feat1, feat2, ...]
                each feat is in torch.FloatTensor and already
                put in the device assigned by command-line args

            your_other_contents1, ... :
                in the order defined by your dataloader (dataset + collate_fn)
                these are all in cpu, and you can move them to the same device
                as features

            records:
                defaultdict(list), by dumping contents into records,
                these contents can be averaged and logged on Tensorboard
                later by self.log_records

                Note1. downstream/runner.py will call self.log_records
                    1. every log_step during training
                    2. once after evalute the whole dev/test dataloader

                Note2. log_step is defined in your downstream config

            logger:
                Tensorboard SummaryWriter, given here for logging/debugging convenience
                please use f'{prefix}your_content_name' as key name
                to log your customized contents

            prefix:
                used to indicate downstream and train/test on Tensorboard
                eg. 'phone/train-'

            global_step:
                global_step in runner, which is helpful for Tensorboard logging

        Return:
            loss:
                the loss to be optimized, should not be detached
                a single scalar in torch.FloatTensor
        """
        features_pad = pad_sequence(features, batch_first=True)
        
        attention_mask = [torch.ones((feature.shape[0])) for feature in features] 

        attention_mask_pad = pad_sequence(attention_mask,batch_first=True)

        attention_mask_pad = (1.0 - attention_mask_pad) * -100000.0

        features_pad = self.connector(features_pad)
        predicted = self.model(features_pad, attention_mask_pad.cuda())

        labels = torch.LongTensor(labels).to(features_pad.device)
        loss = self.objective(predicted, labels)

        predicted_classid = predicted.max(dim=-1).indices
        records['acc'] += (predicted_classid == labels).view(-1).cpu().float().tolist()

        if not self.training:
            # some evaluation-only processing, eg. decoding
            pass

        return loss
Пример #27
0
def collect(sequences):
    sequences = sorted(sequences, key = lambda x: x.size(), reverse =  True)
    mols = [seq for seq in sequences]
    lengths = [seq.size(0) for seq in mols]
    return pad_sequence(mols,batch_first=True), lengths
def collate_fn(batch):
    token, label = zip(*batch)
    label = torch.tensor(label)
    token = pad_sequence(token, batch_first=True)
    return token, label
Пример #29
0
    def forward(self, mode, features, utter_idx, labels, records, **kwargs):
        """
        Args:
            features:
                the features extracted by upstream
                put in the device assigned by command-line args

            labels:
                the speaker labels

            records:
                defaultdict(list), by appending scalars into records,
                these scalars will be averaged and logged on Tensorboard

            logger:
                Tensorboard SummaryWriter, given here for logging/debugging
                convenience, please use "self.downstream/your_content_name" as key
                name to log your customized contents

            global_step:
                global_step in runner, which is helpful for Tensorboard logging

        Return:
            loss:
                the loss to be optimized, should not be detached
        """

        features_pad = pad_sequence(features, batch_first=True)

        if self.modelrc['module'] == "XVector":
            # TDNN layers in XVector will decrease the total sequence length by fixed 14
            attention_mask = [
                torch.ones((feature.shape[0] - 14)) for feature in features
            ]
        else:
            attention_mask = [
                torch.ones((feature.shape[0])) for feature in features
            ]

        attention_mask_pad = pad_sequence(attention_mask, batch_first=True)
        attention_mask_pad = (1.0 - attention_mask_pad) * -100000.0

        features_pad = self.connector(features_pad)

        if mode == 'train':
            agg_vec = self.model(features_pad, attention_mask_pad.cuda())
            labels = torch.LongTensor(labels).to(features_pad.device)
            loss = self.objective(agg_vec, labels)
            records['loss'].append(loss.item())
            return loss

        elif mode in ['dev', 'test']:
            agg_vec = self.model.inference(features_pad,
                                           attention_mask_pad.cuda())
            agg_vec = torch.nn.functional.normalize(agg_vec, dim=-1)

            # separate batched data to pair data.
            vec1, vec2 = self.separate_data(agg_vec)
            names1, names2 = self.separate_data(utter_idx)

            scores = self.score_fn(vec1, vec2).cpu().detach().tolist()
            records['scores'].extend(scores)
            records['labels'].extend(labels)
            records['pair_names'].extend(
                [f"{name1}_{name2}" for name1, name2 in zip(names1, names2)])

            return torch.tensor(0)
Пример #30
0
 def collect_fn_quadkey(batch,
                        data_source,
                        sampler,
                        region_processer,
                        loc2quadkey=None,
                        k=5,
                        with_trg_quadkey=True):
     src, trg = zip(*batch)
     user, loc, time, region = [], [], [], []
     data_size = []
     trg_ = []
     trg_probs_ = []
     for e in src:
         u_, l_, t_, r_, b_ = zip(*e)
         data_size.append(len(u_))
         user.append(torch.tensor(u_))
         loc.append(torch.tensor(l_))
         time.append(torch.tensor(t_))
         r_ = region_processer.numericalize(list(r_))  # (L, LEN_QUADKEY)
         region.append(r_)
     user_ = pad_sequence(user,
                          batch_first=True)  # (N,T) 下同,返回时通过.t()变为(T,N)
     loc_ = pad_sequence(loc, batch_first=True)
     time_ = pad_sequence(time, batch_first=True)
     # (T, N, LEN_QUADKEY)
     region_ = pad_sequence(region, batch_first=False)
     if with_trg_quadkey:
         batch_trg_regs = []
         for i, seq in enumerate(trg):
             pos = torch.tensor([[e[1]] for e in seq])
             neg, probs = sampler(seq, k, user=seq[0][0])
             # (L, k+1), k即为负采样的k
             trg_seq = torch.cat([pos, neg], dim=-1)
             trg_.append(trg_seq)
             trg_regs = []
             for trg_seq_idx in range(trg_seq.size(0)):
                 regs = []
                 for loc in trg_seq[trg_seq_idx]:
                     regs.append(loc2quadkey[loc])
                 trg_regs.append(region_processer.numericalize(regs))
             batch_trg_regs.append(torch.stack(trg_regs))
             trg_probs_.append(probs)
         # (N, T, k+1, LEN_QUADKEY)
         batch_trg_regs = pad_sequence(batch_trg_regs, batch_first=True)
         # [(1+k) * T, N, LEN_QUADKEY)
         batch_trg_regs = batch_trg_regs.permute(2, 1, 0,
                                                 3).contiguous().view(
                                                     -1,
                                                     batch_trg_regs.size(0),
                                                     batch_trg_regs.size(3))
         trg_ = pad_sequence(trg_, batch_first=True)
         trg_probs_ = pad_sequence(trg_probs_,
                                   batch_first=True,
                                   padding_value=1.0)
         trg_ = trg_.permute(2, 1, 0).contiguous().view(-1, trg_.size(0))
         trg_nov_ = [[not e[-1] for e in seq] for seq in trg]
         return user_.t(), loc_.t(), time_.t(
         ), region_, trg_, batch_trg_regs, trg_nov_, trg_probs_, data_size
     else:
         for i, seq in enumerate(trg):
             pos = torch.tensor([[e[1]] for e in seq])
             neg, probs = sampler(seq, k, user=seq[0][0])
             trg_.append(torch.cat([pos, neg], dim=-1))
             trg_probs_.append(probs)
         trg_ = pad_sequence(trg_, batch_first=True)
         trg_probs_ = pad_sequence(trg_probs_,
                                   batch_first=True,
                                   padding_value=1.0)
         trg_ = trg_.permute(2, 1, 0).contiguous().view(-1, trg_.size(0))
         trg_nov_ = [[not e[-1] for e in seq] for seq in trg]
         return user_.t(), loc_.t(), time_.t(
         ), region_, trg_, trg_nov_, trg_probs_, data_size
Пример #31
0
    def forward(self, sentences, doc_lens=[], batch_sent_lens=[], log=False):
        word_embeddings = self.embedding(sentences)
        batch_len, word_len, embedding_dim = word_embeddings.shape

        packed_word_embeddings = pack_padded_sequence(
            word_embeddings, 
            torch.LongTensor(batch_sent_lens), 
            batch_first=True,
            enforce_sorted=False,
        )
        packed_word_encoder_hidden_states = self.word_encoder(packed_word_embeddings)
        word_encoder_hidden_states, _ = pad_packed_sequence(
            packed_word_encoder_hidden_states, batch_first=True
        )
        word_attention_weights = self.word_attention(word_encoder_hidden_states)

        sentence_vectors = torch.squeeze(
            torch.matmul(
                torch.unsqueeze(word_attention_weights, 1), word_encoder_hidden_states
            )
        )

        n = 0
        batch_sentence_vectors = []
        for doc_len in doc_lens:
            batch_sentence_vectors.append(sentence_vectors[n:n+doc_len])
            n += doc_len
        padded_sentence_vectors = pad_sequence(batch_sentence_vectors, batch_first=True)
        packed_sentence_vectors = pack_padded_sequence(
            padded_sentence_vectors, 
            torch.LongTensor(doc_lens), 
            batch_first=True,
            enforce_sorted=False,
        )

        packed_sentence_encoder_hidden_states = self.sentence_encoder(packed_sentence_vectors)
        sentence_encoder_hidden_states, _ = pad_packed_sequence(
            packed_sentence_encoder_hidden_states, batch_first=True
        )
        sentence_attention_weights = self.sentence_attention(
            sentence_encoder_hidden_states
        )

        document_vectors = torch.squeeze(
            torch.matmul(
                torch.unsqueeze(sentence_attention_weights, 1),
                sentence_encoder_hidden_states,
            )
        )

        batch_probs = []
        Cs = []
        Ms = []
        Ns = []
        Ps = []
        Pros = []
        for doc_index, doc_len in enumerate(doc_lens):
            o = torch.zeros(
                2 * self.hparams.lstm_hidden_size,
                device=self.device,
            )
            document_vector = document_vectors[doc_index]
            probs = []
            for pos in range(doc_len):
                sentence_vector = padded_sentence_vectors[doc_index, pos, :]

                C = self.content(sentence_vector)
                M = self.salience(sentence_vector, document_vector)
                N = self.novelty(sentence_vector, torch.tanh(o))

                pos_forward = self.pos_forward_embed(
                    torch.tensor([pos], dtype=torch.long, device=self.device)
                ).view(-1)
                pos_backward = self.pos_backward_embed(
                    torch.tensor(
                        [doc_len - pos - 1],
                        dtype=torch.long,
                        device=self.device,
                    )
                ).view(-1)
                positional_embedding = torch.cat((pos_forward, pos_backward))

                P = self.position(positional_embedding)

                prob = torch.sigmoid(C + M - N + P + self.bias)

                if doc_index == 0:
                    Cs.append(C.item())
                    Ms.append(M.item())
                    Ns.append(N.item())
                    Ps.append(P.item())
                    Pros.append(prob.item())
                    # print(C, M, N, P, prob)

                o = o + (prob * sentence_vector)

                if log:
                    print(
                        f"doc {doc_index+1}, sentence {pos+1}, C: {C.item():10.4f}, M: {M.item():10.4f}, N: {N.item():10.4f}, bias: {self.bias.item():10.4f}, prob: {prob.item():10.4f}, o: {o}"
                    )

                probs.append(prob)
            
            batch_probs.append(torch.cat(probs))
        return pad_sequence(batch_probs, batch_first=True)
Пример #32
0
 def collate(examples: List[torch.Tensor]):
     if tokenizer._pad_token is None:
         return pad_sequence(examples, batch_first=True)
     return pad_sequence(examples,
                         batch_first=True,
                         padding_value=tokenizer.pad_token_id)
Пример #33
0
 def _collate_fn(self, data):
     word = pad_sequence([x[0] for x in data], batch_first=True,
                         padding_value=0)
     label = pad_sequence([x[-1] for x in data], batch_first=True,
                          padding_value=0)
     return word, label
Пример #34
0
    stats_loss = []
    t0 = time.time()
    for i in range(max_iter):

        encoder_optimizer.zero_grad()
        decoder_optimizer.zero_grad()

        # data batch : TODO - Loader needed
        rnd_idx = np.random.randint(len(libri), size=batch_size)
        batches = [libri[r] for r in rnd_idx]
        xbs = [torch.tensor(b[0]) for b in batches]  # t,d
        xlen = torch.tensor([get_padded_len(_x, n_conv_layers)
                             for _x in xbs]).cuda()
        #xlen = torch.tensor([len(_x) for _x in xbs]).cuda()

        x_batches = pad_sequence(xbs).cuda()

        ybs = [b[1][:-1] for b in batches]
        y_inputs = pad_sequence(ybs, padding_value=EOS_TOKEN).long().cuda()
        ybs = [b[1][1:] for b in batches]
        y_outputs = pad_sequence(ybs, padding_value=PAD_TOKEN).long().cuda()

        out_enc = encoder(x_batches)
        h_enc = torch.zeros([batch_size, h_dim]).cuda()
        # model forward path
        #        out_enc, h_enc = encoder(x_batches, xlen)
        prediction, ce_loss = decoder(y_inputs, y_outputs, out_enc, h_enc, i)

        ce_loss.backward()

        _ = nn.utils.clip_grad_norm_(encoder.parameters(), grad_clip)
Пример #35
0
    def test_step(self, batch, batch_idx):
        sent_order = []
        sum_ids = []
        sum_sent_lens = []
        sum_sent_toks = []
        sum_len = 0
        source_ids_flat_pad, sum_ids_flat_pad, target_dist, counts, masks, metadata = batch
        gold_sent_order = list(np.argsort(tens_to_np(-target_dist.squeeze())))
        num_sents = len(metadata['source_sents'][0])
        mrn = metadata['mrn'][0]
        rel_ranks = []
        account = metadata['account'][0]
        for _ in range(min(num_sents, MAX_GEN_SUM_SENTS)):
            i0 = source_ids_flat_pad.to(self.device_name)
            i1 = sum_ids_flat_pad.to(self.device_name)
            i2 = {}
            i3 = {}
            for k, v in counts.items():
                i2[k] = v.to(self.device_name)
            for k, v in masks.items():
                i3[k] = v.to(self.device_name)

            y_hat_scores = self(i0, i1, i2, i3)
            y_hat_scores = tens_to_np(y_hat_scores.squeeze(0))
            if len(sent_order) > 0:
                y_hat_scores[sent_order] = float('-inf')

            max_idx = np.argmax(y_hat_scores)
            rel_ranks.append(gold_sent_order.index(max_idx))
            sent_sum_len = counts['source_sent_lens_flat'][max_idx]
            sum_len += sent_sum_len
            if sum_len > MAX_GEN_SUM_TOK_CT:
                break

            sent_order.append(max_idx)
            chosen_sent_toks = metadata['source_sents'][0][max_idx]
            sum_sent_toks.append(chosen_sent_toks)
            num_sum_sents = len(sent_order)
            chosen_sent_ids = list(
                tens_to_np(source_ids_flat_pad[max_idx][:sent_sum_len]))
            sum_ids.append(chosen_sent_ids)
            sum_sent_lens.append(sent_sum_len)
            sum_ids_flat = list(map(torch.LongTensor, sum_ids))
            sum_ids_flat_pad = pad_sequence(sum_ids_flat,
                                            batch_first=True,
                                            padding_value=0)
            sum_att_mask = mask_2D([num_sum_sents])
            counts['sum_sent_lens_flat'] = torch.LongTensor(sum_sent_lens)
            counts['sum_lens'] = torch.LongTensor([len(sent_order)])
            masks['sum_att_mask'] = sum_att_mask
        result = pl.EvalResult()
        result.mrn = mrn
        result.account = account
        result.sent_order = ','.join([str(s) for s in sent_order])
        result.sum_sent_toks = ' <s> '.join(sum_sent_toks)
        result.reference = metadata['reference'][0]

        result.rel_r1 = rel_ranks[0]
        result.rel_r2 = rel_ranks[1]
        result.rel_r3 = rel_ranks[2]
        result.rel_r4 = rel_ranks[3]
        result.rel_r5 = rel_ranks[4]
        result.rel_r5plus = sum(rel_ranks[5:]) / float(len(rel_ranks[5:]))

        return result
Пример #36
0
 def __call__(self, batch):
     dat = pd.DataFrame(batch)
     
     return [self.pad_collate(dat[i]) if i==0 else \
             pad_sequence(dat[i], True) if i < 7 else \
             dat[i].tolist() for i in dat]
Пример #37
0
            embeddings.weight.data[idx] = (torch.FloatTensor(values))


model = BiLSTM_CRF(len(word_to_ix), tag_to_ix, EMBEDDING_DIM, HIDDEN_DIM,
                   BS).to(device)
load_fastext_embeeding(model.word_embeds, word_to_ix, "wiki-news-300d-1M.vec")
model.word_embeds.requires_grad = False

optimizer = optim.Adam(model.parameters(), lr=1e-3, weight_decay=1e-4)

best_f1 = -1
for epoch in range(epochs):
    for i, batch in enumerate(train_dataloader):
        model.zero_grad()
        sents, labs, lens = batch
        sents = pad_sequence(sents, batch_first=True).to(device)
        labs = pad_sequence(labs, batch_first=True).to(device)
        lens = torch.tensor(lens).to(device)
        lens, idx = torch.sort(lens, descending=True)
        sents = sents[idx]
        labs = labs[idx]
        loss = model.neg_log_likelihood(sents, labs, lens)
        loss.backward()
        optimizer.step()
        score, preds = model(sents, lens)
        true_labs = [id2lab(labs[i, :l]) for i, l in enumerate(lens)]
        pred_labs = [id2lab(preds[i, :l]) for i, l in enumerate(lens)]
        acc = accuracy_score(true_labs, pred_labs)
        f1 = f1_score(true_labs, pred_labs)
        print(
            "Epoch {}, batch {}, train loss {:.4f}, train acc {:.4f}, train f1 {:.4f} "