Пример #1
0
 def init_hidden(self, cur_batch_size):
     return (to_cuda(
         torch.randn(self.num_layers * self.bidirectional_num,
                     cur_batch_size, self.hidden_size)),
             to_cuda(
                 torch.randn(self.num_layers * self.bidirectional_num,
                             cur_batch_size, self.hidden_size)))
Пример #2
0
 def predict_file(self, vector: Sequence[Vind]) -> Iterable[TokenResult]:
     seq = to_cuda(torch.LongTensor([vector]))
     length = to_cuda(torch.LongTensor([len(vector)]))
     forward, backward = self.rnn_model(seq, length)
     forward = F.softmax(forward, dim=-1)
     backward = F.softmax(backward, dim=-1)
     return [TokenResult(forward[0, s, :].numpy(), backward[0, s, :].numpy()) for s in range(forward.size()[1])]
Пример #3
0
def evaluate(model, valid_dataset, batch_size,
             evaluate_object_list: typing.List[Evaluator], train_loss_function,
             desc, label_preprocess_fn):
    model.eval()
    for o in evaluate_object_list:
        o.clear_result()
    train_total_loss = to_cuda(torch.Tensor([0]))
    steps = to_cuda(torch.Tensor([0]))
    with tqdm(total=math.ceil(len(valid_dataset) / batch_size),
              leave=False) as pbar:
        for batch_data in data_loader(valid_dataset,
                                      batch_size=batch_size,
                                      is_shuffle=False,
                                      drop_last=False):
            model.zero_grad()
            predict_logit = model.forward(batch_data)
            target = label_preprocess_fn(batch_data)
            train_loss = train_loss_function(predict_logit, target)
            for evaluator in evaluate_object_list:
                evaluator.add_result(predict_logit,
                                     target,
                                     batch_data=batch_data)
            train_total_loss += train_loss.data
            steps += 1
            pbar.update(1)
    return evaluate_object_list, train_total_loss / steps
Пример #4
0
 def parse_target_batch_data(batch_data):
     is_copy = to_cuda(
         torch.FloatTensor(
             PaddedList(batch_data['is_copy'], fill_value=ignore_token)))
     target = to_cuda(
         torch.LongTensor(list(more_itertools.flatten(
             batch_data['target']))))
     return is_copy, target
    def _preprocess(self, batch_data):
        from common.util import PaddedList
        s1 = batch_data["s1"]
        s2 = batch_data['s2']

        return to_cuda(torch.LongTensor(PaddedList(s1, fill_value=self._pad_idx))), \
               to_cuda(torch.LongTensor(PaddedList(batch_data['s1_char'], fill_value=self._character_pad_idx))), \
               to_cuda(torch.LongTensor(PaddedList(s2, fill_value=self._pad_idx))), \
               to_cuda(torch.LongTensor(PaddedList(batch_data['s2_char'],
                                  fill_value=self._character_pad_idx)))
Пример #6
0
 def parse_target_batch_data(batch_data, ):
     forward_target_seq = to_cuda(
         torch.LongTensor(
             PaddedList(batch_data['forward_target'],
                        fill_value=ignore_id)))
     backward_target_seq = to_cuda(
         torch.LongTensor(
             PaddedList(batch_data['backward_target'],
                        fill_value=ignore_id)))
     return forward_target_seq, backward_target_seq
 def parse_input_tensor(batch_data, do_sample=False):
     input_seq = to_cuda(
         torch.LongTensor(PaddedList(batch_data['input_seq'])))
     inp_seq_len = to_cuda(torch.LongTensor(batch_data['input_seq_len']))
     target_seq = to_cuda(
         torch.LongTensor(PaddedList(batch_data['target_seq'])))
     target_seq_len = to_cuda(torch.LongTensor(
         batch_data['target_seq_len']))
     return input_seq, inp_seq_len, None, target_seq, target_seq_len, None, batch_data[
         'masked_positions']
    def _preprocess(self, batch_data):
        from common.util import PaddedList
        s1 = batch_data["s1"]
        s2 = batch_data['s2']

        batch_size = len(s1)
        size = max(len(t1)+len(t2)+1 for t1, t2 in zip(s1, s2))
        if self._summary_node:
            size += 2
        # print("size:{}".format(size))

        if not self._summary_node:
            sentences = to_cuda(torch.LongTensor(
                PaddedList([t1 + [self._pad_idx] + t2 for t1, t2 in zip(s1, s2)], fill_value=self._pad_idx,)))

            sentences_char = to_cuda(torch.LongTensor(
                PaddedList([t1 + [[self._character_pad_idx]] + t2 for t1, t2 in zip(batch_data['s1_char'], batch_data['s2_char'])],
                           fill_value=self._character_pad_idx)))
        else:
            sentences = to_cuda(torch.LongTensor(
                PaddedList([t1 + [self._pad_idx] + t2 + [self._pad_idx, self._pad_idx] for t1, t2 in zip(s1, s2)],
                           fill_value=self._pad_idx, )))

            sentences_char = to_cuda(torch.LongTensor(
                PaddedList(
                    [t1 + [[self._character_pad_idx]] + t2 + [[self._character_pad_idx], [self._character_pad_idx]] for
                     t1, t2 in
                     zip(batch_data['s1_char'], batch_data['s2_char'])],
                    fill_value=self._character_pad_idx)))

        distance_matrix = np.ones((batch_size, size, size)) * float('-inf')
        for i, (t1, t2) in enumerate(zip(s1, s2)):
            s1_matrix = util.create_distance_node_matrix(len(t1))
            s2_matrix = util.create_distance_node_matrix(len(t2))
            distance_matrix[i, :len(t1), :len(t1)] = s1_matrix
            distance_matrix[i, len(t1)+1:len(t1)+len(t2)+1, len(t1)+1:len(t1)+len(t2)+1] = s2_matrix
            if self._summary_node:
                distance_matrix[i, :len(t1), -2] = 0
                distance_matrix[i, len(t1)+1:len(t1)+len(t2)+1, -1] = 0

        distance_matrix = to_cuda(torch.FloatTensor(np.stack(distance_matrix, axis=0)))

        # sentence_same_token_link_matrix = []
        # for t1, t2 in zip(s1, s2):
        #     idx, idy, data = util.create_sentence_pair_same_node_matrix(t1, 0, t2, len(t1)+1)
        #     sentence_same_token_link_matrix.append(
        #         sparse.coo_matrix(
        #             (data, (idx, idy)),
        #             shape=(size, size), dtype=np.float
        #         ).toarray()
        #     )
        # sentence_same_token_link_matrix = to_cuda(torch.FloatTensor(np.stack(sentence_same_token_link_matrix, axis=0)))

        return sentences, sentences_char, distance_matrix,
Пример #9
0
 def parse_target(batch_data):
     if 'error_line' not in batch_data.keys() or no_target:
         return None
     target_error_position = to_cuda(
         torch.LongTensor(PaddedList(batch_data['error_line'])))
     target_seq = to_cuda(
         torch.LongTensor(
             PaddedList(batch_data['target_line_ids'],
                        fill_value=ignore_id)))
     target_seq = target_seq[:, 1:]
     return target_error_position, target_seq
 def parse_input(batch_data, do_sample=False):
     inputs = to_cuda(torch.LongTensor(PaddedList(batch_data['input_seq'])))
     input_length = to_cuda(
         torch.LongTensor(PaddedList(batch_data['input_length'])))
     if not do_sample:
         targets = to_cuda(
             torch.LongTensor(PaddedList(batch_data['target_seq'])))
         targets_length = to_cuda(
             torch.LongTensor(PaddedList(batch_data['target_length'])))
     else:
         targets = None
         targets_length = None
     return inputs, input_length, targets, targets_length
Пример #11
0
 def _forward_pre_process(self, batch_data):
     input_seq = to_cuda(
         torch.LongTensor(PaddedList(batch_data['input_seq'])))
     input_length = to_cuda(torch.LongTensor(batch_data['input_length']))
     decoder_input = to_cuda(
         torch.LongTensor(PaddedList(batch_data['decoder_input'])))
     grammar_index = list(
         more_itertools.flatten(batch_data['grammar_index']))
     grammar_index_length = to_cuda(
         torch.LongTensor([len(t) for t in grammar_index]))
     grammar_index = to_cuda(torch.LongTensor(PaddedList(grammar_index)))
     target_index = batch_data['target_index']
     return input_seq, input_length, decoder_input, grammar_index, grammar_index_length, target_index
Пример #12
0
    def decoder(self, encoder_output, endocer_hidden, encoder_mask, **kwargs):
        batch_size = encoder_output.shape[0]
        continue_mask = to_cuda(
            torch.ByteTensor([1 for i in range(batch_size)]))
        outputs = to_cuda(
            torch.LongTensor([[self.start_label] for i in range(batch_size)]))
        decoder_output_list = []
        outputs_list = []
        hidden = endocer_hidden
        error_list = [0 for i in range(batch_size)]

        for i in range(self.max_length):
            one_step_decoder_output, hidden, error_ids = self.decoder_fn(
                outputs,
                continue_mask,
                start_index=i,
                hidden=hidden,
                encoder_output=encoder_output,
                encoder_mask=encoder_mask,
                **kwargs)
            if (error_ids is not None) and len(error_ids) != 0:
                error_ids_list = [0 for i in range(batch_size)]
                for err in error_ids:
                    # print('error index: {}'.format(err))
                    error_list[err] = 1
                    error_ids_list[err] = 1
                error_ids_tensor = to_cuda(torch.ByteTensor(error_ids_list))
                continue_mask = continue_mask & ~error_ids_tensor
                # continue_mask[err] = 0
            decoder_output_list += [one_step_decoder_output]

            outputs = self.create_next_output_fn(one_step_decoder_output,
                                                 **kwargs)
            outputs_list += [outputs]
            step_continue = torch.ne(outputs, self.end_label).view(batch_size)
            continue_mask = continue_mask & step_continue

            # try:
            if torch.sum(continue_mask) == 0:
                break
            # except Exception as e:
            #     print(e)
            #     print(error_list)
            #     print(outputs)
            #     print(step_continue)
            #     print(continue_mask)
            #     raise Exception(e)
        return decoder_output_list, outputs_list, error_list
Пример #13
0
    def parse_input(batch_data, do_sample=False):
        input_seq = to_cuda(
            torch.LongTensor(
                PaddedList(batch_data['error_token_ids'], fill_value=0)))
        input_line_length = to_cuda(
            torch.LongTensor(PaddedList(batch_data['error_line_length'])))
        input_line_token_length = to_cuda(
            torch.LongTensor(PaddedList(
                batch_data['error_line_token_length'])))

        input_length = to_cuda(
            torch.LongTensor(PaddedList(batch_data['error_token_length'])))
        if not use_ast:
            adj_matrix = to_cuda(torch.LongTensor(batch_data['adj']))
        else:
            adjacent_tuple = [[[i] + tt for tt in t]
                              for i, t in enumerate(batch_data['adj'])]
            adjacent_tuple = [
                list(t) for t in unzip(more_itertools.flatten(adjacent_tuple))
            ]
            size = max(batch_data['error_token_length'])
            # print("max length in this batch:{}".format(size))
            adjacent_tuple = torch.LongTensor(adjacent_tuple)
            adjacent_values = torch.ones(adjacent_tuple.shape[1]).long()
            adjacent_size = torch.Size(
                [len(batch_data['error_token_length']), size, size])
            info('batch_data input_length: ' +
                 str(batch_data['error_token_length']))
            info('size: ' + str(size))
            info('adjacent_tuple: ' + str(adjacent_tuple.shape))
            info('adjacent_size: ' + str(adjacent_size))
            adj_matrix = to_cuda(
                torch.sparse.LongTensor(
                    adjacent_tuple,
                    adjacent_values,
                    adjacent_size,
                ).float().to_dense())

        if not do_sample:
            target_error_position = to_cuda(
                torch.LongTensor(PaddedList(batch_data['error_line'])))
            target_seq = to_cuda(
                torch.LongTensor(
                    PaddedList(batch_data['target_line_ids'],
                               fill_value=ignore_id)))
            target_length = to_cuda(
                torch.LongTensor(PaddedList(batch_data['target_line_length'])))
        else:
            target_error_position = None
            target_seq = None
            target_length = None

        return input_seq, input_line_length, input_line_token_length, input_length, adj_matrix, target_error_position, target_seq, target_length
def parse_graph_input_from_mask_lm_output(input_seq,
                                          input_length,
                                          adj,
                                          use_ast=True):
    from common.problem_util import to_cuda
    from common.util import PaddedList

    def to_long(x):
        return to_cuda(torch.LongTensor(x))

    if not use_ast:
        adjacent_matrix = to_long(adj)
    else:
        adjacent_tuple = [[[i] + tt for tt in t] for i, t in enumerate(adj)]
        adjacent_tuple = [
            list(t) for t in unzip(more_itertools.flatten(adjacent_tuple))
        ]
        size = max(input_length)
        # print("max length in this batch:{}".format(size))
        adjacent_tuple = torch.LongTensor(adjacent_tuple)
        adjacent_values = torch.ones(adjacent_tuple.shape[1]).long()
        adjacent_size = torch.Size([len(input_length), size, size])
        # info('batch_data input_length: ' + str(batch_data['input_length']))
        # info('size: ' + str(size))
        # info('adjacent_tuple: ' + str(adjacent_tuple.shape))
        # info('adjacent_size: ' + str(adjacent_size))
        adjacent_matrix = to_cuda(
            torch.sparse.LongTensor(
                adjacent_tuple,
                adjacent_values,
                adjacent_size,
            ).float().to_dense())
    input_seq = to_long(PaddedList(input_seq))
    input_length = to_long(input_length)
    return adjacent_matrix, input_seq, input_length
def reverse_tensor(x, x_length):
    x_list = torch.unbind(x, dim=0)
    reverse_list = []
    for one, l in zip(x_list, x_length):
        idx = to_cuda(torch.arange(l.item()-1, -1, -1).long())
        r_one = one.index_select(dim=0, index=idx)
        reverse_list += [torch.cat([r_one, one[l:]], dim=0)]
    o = torch.stack(reverse_list, dim=0)
    return o
    def add_result(self,
                   output,
                   model_output,
                   model_target,
                   model_input,
                   ignore_token=None,
                   batch_data=None):
        model_output = [t.data for t in model_output]
        if ignore_token is None:
            ignore_token = self.ignore_token
        is_copy = (torch.sigmoid(model_output[2]) > 0.5).float()
        is_copy_target = model_target[2]
        is_copy_accuracy = self.is_copy_accuracy.add_result(
            is_copy, is_copy_target)
        p0 = torch.topk(F.softmax(model_output[0], dim=-1), dim=-1, k=1)[1]
        p1 = torch.topk(F.softmax(model_output[1], dim=-1), dim=-1, k=1)[1]
        position = torch.cat([p0, p1], dim=1)
        position_target = torch.stack([model_target[0], model_target[1]],
                                      dim=1)
        position_correct = self.position_correct.add_result(
            position, position_target)

        all_output, sample_output_ids = output
        target_output = to_cuda(
            torch.LongTensor(
                PaddedList(batch_data['target'], fill_value=ignore_token)))
        sample_output_ids, target_output = expand_tensor_sequence_to_same(
            sample_output_ids, target_output[:, 1:])
        output_accuracy = self.output_accuracy.add_result(
            sample_output_ids, target_output)

        full_output_target = to_cuda(
            torch.LongTensor(
                PaddedList(batch_data['full_output_target'],
                           fill_value=ignore_token)))
        all_output, full_output_target = expand_tensor_sequence_to_same(
            all_output, full_output_target, fill_value=ignore_token)
        all_correct = self.all_correct.add_result(all_output,
                                                  full_output_target)
        return "is_copy_accuracy evaluate:{}, position_correct evaluate:{}, output_accuracy evaluate:{}, " \
               "all_correct evaluate: {}".format(is_copy_accuracy, position_correct, output_accuracy, all_correct)
def expand_tensor_sequence_len(t, max_len, fill_value=0, dim=1):
    t_len = t.shape[dim]
    if max_len == t_len:
        return t
    expand_shape = list(t.shape)
    expand_shape[dim] = 1
    one_t = to_cuda(torch.ones(*expand_shape).float()) * fill_value
    expand_t = one_t.expand(*[-1 for i in range(dim)], max_len - t_len, *[-1 for i in range(len(t.shape) - 1 - dim)])
    if t.data.type() == 'torch.cuda.LongTensor' or t.data.type() == 'torch.LongTensor':
        expand_t = expand_t.long()
    elif t.data.type() == 'torch.cuda.ByteTensor' or t.data.type() == 'torch.ByteTensor':
        expand_t = expand_t.byte()
    res_t = torch.cat([t, expand_t], dim=dim)
    return res_t
Пример #18
0
def train(model, dataset, batch_size, loss_function, optimizer, clip_norm,
          epoch_ratio, parse_input_batch_data_fn, parse_target_batch_data_fn,
          create_output_ids_fn, evaluate_obj_list):
    total_loss = to_cuda(torch.Tensor([0]))
    steps = 0
    for o in evaluate_obj_list:
        o.clear_result()
    model.train()

    with tqdm(total=(len(dataset) * epoch_ratio)) as pbar:
        for batch_data in data_loader(dataset,
                                      batch_size=batch_size,
                                      is_shuffle=True,
                                      drop_last=True,
                                      epoch_ratio=epoch_ratio):
            model.zero_grad()

            model_input = parse_input_batch_data_fn(batch_data,
                                                    do_sample=False)
            model_output = model.forward(*model_input)

            model_target = parse_target_batch_data_fn(batch_data)
            loss = loss_function(*model_output, *model_target)

            loss.backward()
            optimizer.step()

            output_ids = create_output_ids_fn(model_output, model_input, False)
            for evaluator in evaluate_obj_list:
                evaluator.add_result(output_ids,
                                     model_output,
                                     model_target,
                                     model_input,
                                     batch_data=batch_data)

            total_loss += loss.data

            step_output = 'in train step {}  loss: {}'.format(
                steps, loss.data.item())
            # print(step_output)
            info(step_output)

            steps += 1
            pbar.update(batch_size)

    return evaluate_obj_list, (total_loss / steps).item()
Пример #19
0
    def _validate_args(self, inputs, encoder_hidden, encoder_outputs, function,
                       teacher_forcing_ratio):
        if self.use_attention:
            if encoder_outputs is None:
                raise ValueError(
                    "Argument encoder_outputs cannot be None when attention is used."
                )

        # inference batch size
        if inputs is None and encoder_hidden is None:
            batch_size = 1
        else:
            if inputs is not None:
                batch_size = inputs.size(0)
            else:
                if self.rnn_cell is nn.LSTM:
                    batch_size = encoder_hidden[0].size(1)
                elif self.rnn_cell is nn.GRU:
                    batch_size = encoder_hidden.size(1)

        # set default input and max decoding length
        if inputs is None:
            if teacher_forcing_ratio > 0:
                raise ValueError(
                    "Teacher forcing has to be disabled (set 0) when no inputs is provided."
                )
            inputs = torch.LongTensor([self.sos_id] * batch_size).view(
                batch_size, 1)
            if torch.cuda.is_available():
                from common.problem_util import to_cuda
                inputs = to_cuda(inputs)
            max_length = self.max_length
        else:
            max_length = inputs.size(
                1) - 1  # minus the start of sequence symbol

        return inputs, batch_size, max_length
    def decoder(self, encoder_output, endocer_hidden, encoder_mask, **kwargs):
        batch_size = encoder_output.shape[0]
        continue_mask_stack = to_cuda(torch.ByteTensor([[1 for _ in range(self.beam_size)] for i in range(batch_size)]))
        beam_outputs = to_cuda(torch.LongTensor([[[self.start_label] for _ in range(self.beam_size)] for i in range(batch_size)]))
        # outputs_stack = [to_cuda(torch.LongTensor([[self.start_label] for i in range(batch_size)])) for _ in range(self.beam_size)]
        probability_stack = to_cuda(torch.FloatTensor([[0.0 for _ in range(self.beam_size)] for _ in range(batch_size)]))
        decoder_output_list = []
        outputs_list = []
        hidden_stack = [endocer_hidden for _ in range(self.beam_size)]
        error_stack = to_cuda(torch.ByteTensor([[0 for _ in range(self.beam_size)] for i in range(batch_size)]))

        for i in range(self.max_length):
            # beam * (output * [batch, 1, ...])
            # beam_one_step_decoder_output = []
            beam_outputs_list = []
            beam_log_probs_list = []
            beam_decoder_output_list = []
            # beam * [batch, hidden]
            beam_hidden_list = []
            # beam * [batch, error_count]
            beam_error_ids = []
            for b in range(self.beam_size):
                outputs = beam_outputs[:, b]
                continue_mask = continue_mask_stack[:, b]
                hidden = hidden_stack[b]
                one_step_decoder_output, hidden, error_ids = self.decoder_fn(outputs, continue_mask, start_index=i,
                                                                             hidden=hidden,
                                                                             encoder_output=encoder_output,
                                                                             encoder_mask=encoder_mask,
                                                                             **kwargs)

                # if (error_ids is not None) and len(error_ids) != 0:
                error_ids_list = [0 for i in range(batch_size)]
                for err in error_ids:
                    error_ids_list[err] = 1
                error_ids_tensor = to_cuda(torch.ByteTensor(error_ids_list))
                beam_error_ids += [error_ids_tensor]

                beam_hidden_list += [hidden]

                # one_beam_outputs: [batch, beam, seq]
                # beam_probs: [batch, beam]
                # one_beam_decoder_output: tuple of [batch, beam, seq]
                one_beam_outputs, one_beam_log_probs, one_beam_decoder_output = self.create_beam_next_output_fn(one_step_decoder_output, continue_mask=continue_mask, beam_size=self.beam_size, **kwargs)
                beam_outputs_list += [one_beam_outputs]
                beam_log_probs_list += [one_beam_log_probs]
                beam_decoder_output_list += [one_beam_decoder_output]

                if i == 0:
                    break

            # beam_step_log_probs: [batch, outer_beam, inner_beam]
            beam_step_log_probs = torch.stack(beam_log_probs_list, dim=1)
            if i != 0:
                beam_total_log_probs = torch.unsqueeze(probability_stack, dim=-1) + torch.squeeze(beam_step_log_probs, dim=-1)
            else:
                beam_total_log_probs = torch.squeeze(beam_step_log_probs, dim=-1)
            probability_stack, sort_index = torch.topk(
                beam_total_log_probs.view(batch_size, beam_total_log_probs.shape[1] * beam_total_log_probs.shape[2]),
                k=self.beam_size, dim=-1)
            stack_sort_index = sort_index / self.beam_size

            # beam_outputs: [batch, outer_beam * inner_beam, seq]
            beam_outputs = beam_stack_and_reshape(beam_outputs_list)
            beam_outputs = batch_index_select(beam_outputs, sort_index, batch_size)
            outputs_list = [batch_index_select(outputs, stack_sort_index, batch_size)
                            for outputs in outputs_list]
            outputs_list += [beam_outputs]

            beam_decoder_output = [beam_stack_and_reshape(one_output_list)
                                   for one_output_list in zip(*beam_decoder_output_list)]
            beam_decoder_output = [batch_index_select(one_output, sort_index, batch_size)
                                   for one_output in beam_decoder_output]

            decoder_output_list = [[batch_index_select(one_output, stack_sort_index, batch_size)
                                    for one_output in decoder_output]
                                   for decoder_output in decoder_output_list]
            decoder_output_list += [beam_decoder_output]

            # beam_error = beam_stack_and_reshape(beam_error_ids)
            beam_error = torch.stack(beam_error_ids, dim=1)
            beam_error = batch_index_select(beam_error, stack_sort_index, batch_size=batch_size)
            beam_continue = torch.ne(beam_outputs, self.end_label).view(batch_size, self.beam_size)
            beam_continue = beam_continue & ~beam_error
            continue_mask_stack = batch_index_select(continue_mask_stack, stack_sort_index, batch_size) & beam_continue
            error_stack = batch_index_select(error_stack, stack_sort_index, batch_size) | beam_error

            if isinstance(beam_hidden_list[0], list):
                one_hidden_list = zip(*beam_hidden_list)
                hidden_stack = list(zip(*[deal_beam_hidden(one_hidden_beam_list, stack_sort_index, batch_size)
                                          for one_hidden_beam_list in one_hidden_list]))
            else:
                hidden_stack = deal_beam_hidden(beam_hidden_list, stack_sort_index, batch_size)

            # try:
            if torch.sum(continue_mask_stack) == 0:
                break
            # except Exception as e:
            #     print(e)
            #     print(error_list)
            #     print(outputs)
            #     print(step_continue)
            #     print(continue_mask)
            #     raise Exception(e)
        return decoder_output_list, outputs_list, error_stack
 def parse_output(batch_data):
     target_seq = [t[1:] for t in batch_data['target_seq']]
     targets = to_cuda(
         torch.LongTensor(PaddedList(target_seq, fill_value=ignore_id)))
     return [targets]
Пример #22
0
    is_debug = args.debug
    just_evaluate = args.just_evaluate

    p_config = parameter_config.__dict__.get(args.config_name)(is_debug,
                                                               args.output_log)
    epoches = p_config.get("epcohes", 20)
    lr = p_config.get("lr", 20)
    batch_size = p_config.get("batch_size", 32)
    train_loss_fn = p_config.get("train_loss", nn.CrossEntropyLoss)()
    clip_norm = p_config.get("clip_norm", 10)
    optimizer = p_config.get("optimizer", optim.SGD)
    optimizer_dict = p_config.get("optimizer_dict", dict())
    epoch_ratio = p_config.get("epoch_ratio", 0.5)
    evaluate_object_list = p_config.get("evaluate_object_list")
    label_preprocess_fn = p_config.get(
        "label_preprocess", lambda x: to_cuda(torch.LongTensor(x['label'])))
    scheduler_fn = p_config.get(
        "scheduler_fn", lambda x: torch.optim.lr_scheduler.ReduceLROnPlateau(
            x, 'min', patience=3, verbose=True))
    save_root_path = os.path.join(config.DATA_PATH, p_config.get("name"))
    util.make_dir(save_root_path)
    need_pad = p_config.get("need_pad", False)
    print("save_root_path:{}".format(save_root_path))
    model_path = os.path.join(save_root_path, "model.pkl")
    model = get_model(p_config['model_fn'],
                      p_config['model_dict'],
                      p_config['pre_process_module_fn'],
                      p_config['pre_process_module_dict'],
                      model_path,
                      load_previous=load_previous,
                      parallel=problem_util.Parallel,
Пример #23
0
def sequence_transform_data_config3(is_debug, output_log=None):
    from model.encoder_decoder_graph import SEDWithInitialStatePreproceser
    import numpy as np
    from read_data.sequencec_transform_data.load_data import load_generated_random_target_data
    train, valid, test = load_generated_random_target_data(is_debug)
    valid.train = False
    test.train = False
    max_index = 10
    def new_id():
        nonlocal max_index
        max_index += 1
        return max_index
    max_length = 20
    begin_index = new_id()
    end_index = new_id()
    delimiter_index = new_id()
    pad_index = new_id()
    decoder_init_idx = new_id()
    for t in [train, valid, test]:
        t.end = [end_index]
    train_size = len(train)
    itr_num = 80
    batch_size = 14
    from model.transformer_lm import dotdict
    from model.encoder_decoder_graph import SEDWithInitialState
    return {
        "model_fn": SEDWithInitialState,
        "model_dict": {
            "cfg": dotdict({
                'n_embd': 768,
                'n_head': 12,
                'n_layer': 12,
                'embd_pdrop': 0.1,
                'attn_pdrop': 0.1,
                'resid_pdrop': 0.1,
                'afn': 'gelu',
                'clf_pdrop': 0.1}),
            "vocab": max_index + 1 + max_length * 2 + 4,
            "n_source_ctx": max_length + 2,
            "n_ctx": max_length * 2 + 4,
            "decoder_init_idx": decoder_init_idx,
        },
        "pre_process_module_fn": SEDWithInitialStatePreproceser,
        "pre_process_module_dict": {
            "begin_idx":  begin_index,
            "delimeter_idx": delimiter_index,
            "summary_idx": decoder_init_idx,
            "pad_idx": pad_index,
            "source_ctx": max_length+2,
            "position_embedding_base": max_index+1,
        },
        "data": [train, valid, test],
        "label_preprocess": lambda x: to_cuda(torch.LongTensor([PaddedList(t, fill_value=pad_index, shape=[max_length+1]) for t in x['y']])),
        "batch_size": batch_size,
        "train_loss": lambda: NCE_train_loss(ignore_index=pad_index),
        "clip_norm": 1,
        "name": "SEDWithInitialState",
        "optimizer": OpenAIAdam,
        "need_pad": True,
        "optimizer_dict": {
                           "schedule": 'warmup_linear',
                           "warmup": 0.002,
                           "t_total": (train_size//batch_size)*itr_num,
                           "b1": 0.9,
                           "b2": 0.999,
                           "e": 1e-8,
                           "l2": 0.01,
                           "vector_l2": 'store_true',
                           "max_grad_norm": 1},
        "epcohes": itr_num,
        "lr": 6.25e-5,
        "evaluate_object_list": [SequenceExactMatch(gpu_index=get_gpu_index(), ignore_token=pad_index),
                                 SequenceOutputIDToWord(vocab=None, file_path=output_log, ignore_token=pad_index)],
        "epoch_ratio": 1,
        "scheduler_fn": None
    }
Пример #24
0
def train(model, dataset, batch_size, loss_function, optimizer, clip_norm,
          epoch_ratio, evaluate_object_list, desc, label_preprocess_fn):
    total_loss = to_cuda(torch.Tensor([0]))
    steps = to_cuda(torch.Tensor([0]))
    # previous_char_max = 0
    # previous_word_max = 0
    for o in evaluate_object_list:
        o.clear_result()
    model.train()
    with tqdm(total=len(dataset) // batch_size, desc=desc,
              leave=False) as pbar:
        for batch_data in data_loader(dataset,
                                      batch_size=batch_size,
                                      is_shuffle=True,
                                      drop_last=True,
                                      epoch_ratio=epoch_ratio):
            # print(batch_data['terminal_mask'])
            # print('batch_data size: ', len(batch_data['terminal_mask'][0]), len(batch_data['terminal_mask'][0][0]))
            # res = list(more_itertools.collapse(batch_data['terminal_mask']))
            # print('res len: ', len(res))
            # res = util.padded(batch_data['terminal_mask'], deepcopy=True, fill_value=0)
            # print('batch_data size: ', len(res[0]), len(res[0][0]))
            # res = list(more_itertools.collapse(res))
            # print('res len: ', len(res))
            # previous_char_max = max(previous_char_max, max(batch_data['q1_char_length']), max(batch_data['q2_char_length']))
            # previous_word_max = max(previous_word_max, max(batch_data['q1_word_length']), max(batch_data['q2_word_length']))
            # print('max q1_length:{},{}'.format(max(batch_data['q1_char_length']), max(batch_data['q1_word_length'])))
            # print("max q2_length:{},{}".format(max(batch_data['q2_char_length']), max(batch_data['q2_word_length'])))
            # print("previous_char_max:{}, previous_word_max:{}".format(previous_char_max, previous_word_max))
            model.zero_grad()
            log_probs = model.forward(batch_data)
            # log_probs.register_hook(create_hook_fn("log_probs"))

            # print("log_probs sizze:{}".format(log_probs.size()))
            label = label_preprocess_fn(batch_data)
            loss = loss_function(log_probs, label)

            # loss.register_hook(create_hook_fn("loss"))
            loss.backward()

            if clip_norm is not None:
                torch.nn.utils.clip_grad_norm_(model.parameters(), clip_norm)

            # print()
            # print("The loss is nan:{}".format(is_nan(loss.detach())))
            # print("The loss grad is nan:{}".format(is_nan(loss.grad)))
            # print("The log_probs is nan:{}".format(is_nan(log_probs.detach())))
            # print("The log_probs grad is nan:{}".format(is_nan(log_probs.grad)))
            # for name, param in model.named_parameters():
            #     print("name of {}: has nan:{}".format(name, is_nan(param.detach())))
            #     print("the gradient of {}: has nan:{}".format(name, is_nan(param.grad)))
            # if HAS_NAN:
            #     for k, v in batch_data.items():
            #         print("{}:{}".format(k, show_tensor(v)))
            #     print("{}:{}".format("target", show_tensor(target)))
            # print()

            optimizer.step()

            # print("loss:{}".format(loss.data))
            total_loss += loss.data
            steps += 1
            for evaluator in evaluate_object_list:
                evaluator.add_result(log_probs, label, batch_data=batch_data)
            pbar.update(1)
    return evaluate_object_list, total_loss / steps
Пример #25
0
 def to_long(x):
     return to_cuda(torch.LongTensor(x))
 def parse_target_tensor(batch_data):
     masked_target_seq = to_cuda(
         torch.LongTensor(
             PaddedList(batch_data['target_seq'], fill_value=ignore_id)))
     return [masked_target_seq]
            0 if i % 2 == 0 else np.pi / 2 for i in range(self.d)
        ]).unsqueeze(1),
                                   requires_grad=False)

    def forward(self, x):

        l = x.shape[-1]

        # computing signal
        pos = torch.arange(l).repeat(self.d, 1).to(x.device)
        tmp = pos * self.freqs + self.phases
        pos_enc = torch.sin(tmp)
        pos_enc = Variable(pos_enc)
        x = x + pos_enc

        return x


if __name__ == "__main__":

    mdl = to_cuda(PositionEncoding())

    batch_size = 8
    n_channels = 128
    n_items = 60

    input = Variable(torch.ones(batch_size, n_channels, n_items))

    input = to_cuda(input)

    out = mdl(input)
Пример #28
0
def evaluate(model,
             dataset,
             batch_size,
             loss_function,
             parse_input_batch_data_fn,
             parse_target_batch_data_fn,
             do_sample=False,
             print_output=False,
             create_output_ids_fn=None,
             evaluate_obj_list=[],
             expand_output_and_target_fn=None):
    total_loss = to_cuda(torch.Tensor([0]))
    total_batch = to_cuda(torch.Tensor([0]))
    steps = 0
    for o in evaluate_obj_list:
        o.clear_result()
    model.eval()

    with tqdm(total=len(dataset)) as pbar:
        with torch.no_grad():
            for batch_data in data_loader(dataset,
                                          batch_size=batch_size,
                                          drop_last=True):
                model.zero_grad()

                # model_input = parse_input_batch_data(batch_data)
                model_input = parse_input_batch_data_fn(batch_data,
                                                        do_sample=do_sample)
                # model_output = model.forward(*model_input, test=do_sample)
                if do_sample:
                    model_output = model.forward(*model_input, do_sample=True)

                    model_target = parse_target_batch_data_fn(batch_data)

                    model_output, model_target = expand_output_and_target_fn(
                        model_output, model_target)
                else:
                    model_output = model.forward(*model_input)
                    model_target = parse_target_batch_data_fn(batch_data)

                loss = loss_function(*model_output, *model_target)

                output_ids = create_output_ids_fn(model_output, model_input,
                                                  do_sample)
                total_loss += loss.data
                total_batch += batch_size

                step_output = 'in evaluate step {}  loss: {}, '.format(
                    steps, loss.data.item())
                for evaluator in evaluate_obj_list:
                    res = evaluator.add_result(output_ids,
                                               model_output,
                                               model_target,
                                               model_input,
                                               batch_data=batch_data)
                    step_output += res
                # print(step_output)
                info(step_output)

                if print_output and steps % 10 == 0:
                    pass

                steps += 1
                pbar.update(batch_size)

    return evaluate_obj_list, (total_loss / steps).item()
Пример #29
0
def multi_step_evaluate(model,
                        dataset,
                        batch_size,
                        parse_input_batch_data_fn,
                        parse_target_batch_data_fn,
                        do_sample=False,
                        print_output=False,
                        create_output_ids_fn=None,
                        evaluate_obj_list=[],
                        expand_output_and_target_fn=None,
                        max_step_times=0,
                        vocabulary=None,
                        file_path='',
                        create_multi_step_next_input_batch_fn=None,
                        extract_includes_fn=lambda x: x['includes'],
                        print_output_fn=None,
                        do_beam_search=False,
                        target_file_path='main.out',
                        log_file_path='main.log',
                        do_save_data=False,
                        max_save_distance=None,
                        save_records_to_database=False,
                        db_path='',
                        table_name='',
                        change_output_records_to_batch_fn=None,
                        create_save_database_records_fn=None,
                        error_stop_type='normal'):
    total_loss = to_cuda(torch.Tensor([0]))
    total_batch = to_cuda(torch.Tensor([0]))
    steps = 0
    compile_evaluator = CompileResultEvaluate()
    compile_evaluator.clear_result()
    for o in evaluate_obj_list:
        o.clear_result()

    model.eval()

    from common.pycparser_util import tokenize_by_clex_fn
    tokenize_fn = tokenize_by_clex_fn()
    save_data_dict = {}
    save_records_list = []

    # file_path = add_pid_to_file_path(file_path)
    # target_file_path = add_pid_to_file_path(target_file_path)

    with tqdm(total=len(dataset)) as pbar:
        with torch.no_grad():
            for batch_data in data_loader(dataset,
                                          batch_size=batch_size,
                                          drop_last=False):
                model.zero_grad()

                input_data = batch_data.copy()
                final_output_list = []
                output_records_list = []
                continue_list = [True for _ in range(batch_size)]
                result_list = [False for _ in range(batch_size)]
                result_records_list = []
                sample_steps = [-1 for _ in range(batch_size)]
                error_count_list = batch_data['error_count']

                for i in range(max_step_times):
                    model_input = parse_input_batch_data_fn(input_data,
                                                            do_sample=True)

                    model_output = model.forward(*model_input,
                                                 do_sample=True,
                                                 do_beam_search=do_beam_search)

                    input_data, final_output, output_records, final_output_name_list, continue_list = create_multi_step_next_input_batch_fn(
                        input_data, model_input, model_output, continue_list,
                        do_beam_search)
                    final_output_list += [final_output]
                    output_records_list += [output_records]

                    continue_list, result_list, cur_error_count_list = compile_code_ids_list(
                        final_output_name_list,
                        continue_list,
                        result_list,
                        vocabulary=vocabulary,
                        includes_list=extract_includes_fn(input_data),
                        file_path=file_path,
                        target_file_path=target_file_path,
                        log_file_path=log_file_path,
                        do_compile_pool=True,
                        need_transform=False)

                    if error_stop_type == 'oracle':
                        reject_list = [
                            True if c and n > o else False
                            for c, o, n in zip(continue_list, error_count_list,
                                               cur_error_count_list)
                        ]
                    elif error_stop_type == 'normal':
                        reject_list = [False for _ in range(batch_size)]
                    error_count_list = [
                        n if n < o and n >= 0 else o
                        for o, n in zip(error_count_list, cur_error_count_list)
                    ]
                    for i_f, rej in enumerate(reject_list):
                        if rej:
                            # use last output
                            final_output_name_list[i_f] = input_data[
                                'last_input_seq_name'][i_f]
                            continue_list[i_f] = False

                    sample_steps = [
                        i + 1 if s == -1 and not c and not r else s for s, c, r
                        in zip(sample_steps, continue_list, reject_list)
                    ]
                    sample_steps = [
                        i if s == -1 and not c and r else s for s, c, r in zip(
                            sample_steps, continue_list, reject_list)
                    ]

                    result_records_list += [result_list]
                    if sum(continue_list) == 0:
                        break
                sample_steps = [
                    max_step_times if s == -1 else s for s in sample_steps
                ]

                if do_save_data:
                    batch_data['input_seq_name'] = batch_data[
                        'final_output_name']
                    save_res_dict = save_addition_data(
                        original_states=batch_data,
                        states=input_data,
                        tokenize_fn=tokenize_fn,
                        batch_size=batch_size,
                        file_path=file_path,
                        target_file_path=target_file_path,
                        vocabulary=vocabulary,
                        max_distande=max_save_distance,
                        only_error=True)
                    for k, v in save_res_dict.items():
                        save_data_dict[k] = save_data_dict.get(k, []) + v

                if save_records_to_database:
                    batch_output_records = change_output_records_to_batch_fn(
                        output_records_list, sample_steps)
                    records_list = create_save_database_records_fn(
                        batch_data, sample_steps, final_output_name_list,
                        result_list, batch_output_records, input_data)
                    save_records_list += records_list

                step_output = 'in evaluate step {}: '.format(steps)
                res = compile_evaluator.add_result(result_list)
                step_output += res
                for evaluator in evaluate_obj_list:
                    # customer evaluator interface
                    res = evaluator.add_result(result_list,
                                               batch_data=batch_data)
                    step_output += res
                # print(step_output)
                info(step_output)

                if print_output and steps % 1 == 0:
                    print_output_fn(output_records=output_records_list,
                                    final_output=final_output_list,
                                    batch_data=batch_data,
                                    step_i=steps,
                                    vocabulary=vocabulary,
                                    compile_result_list=result_records_list)

                steps += 1
                pbar.update(batch_size)
    evaluate_obj_list = [compile_evaluator] + evaluate_obj_list

    if save_records_to_database:
        create_table(db_path,
                     DATA_RECORDS_DEEPFIX,
                     replace_table_name=table_name)
        run_sql_statment(db_path,
                         DATA_RECORDS_DEEPFIX,
                         'insert_ignore',
                         save_records_list,
                         replace_table_name=table_name)

    if steps == 0:
        t_loss = 0
    else:
        t_loss = (total_loss / steps).item()
    return evaluate_obj_list, t_loss, save_data_dict
Пример #30
0
def sample_and_save(model,
                    dataset,
                    batch_size,
                    loss_function,
                    parse_input_batch_data_fn,
                    parse_target_batch_data_fn,
                    do_sample=False,
                    print_output=False,
                    create_output_ids_fn=None,
                    evaluate_obj_list=[],
                    expand_output_and_target_fn=None,
                    add_data_record_fn=None,
                    db_path='',
                    table_name=''):
    # total_loss = to_cuda(torch.Tensor([0]))
    total_batch = to_cuda(torch.Tensor([0]))
    saved_count = 0
    steps = 1
    for o in evaluate_obj_list:
        o.clear_result()
    model.eval()

    total_saved_list = []

    with tqdm(total=len(dataset)) as pbar:
        with torch.no_grad():
            for batch_data in data_loader(dataset,
                                          batch_size=batch_size,
                                          drop_last=True):
                model.zero_grad()

                # model_input = parse_input_batch_data(batch_data)
                model_input = parse_input_batch_data_fn(batch_data,
                                                        do_sample=do_sample)
                # model_output = model.forward(*model_input, test=do_sample)
                if do_sample:
                    model_output = model.forward(*model_input, do_sample=True)

                    model_target = parse_target_batch_data_fn(batch_data)

                    model_output, model_target = expand_output_and_target_fn(
                        model_output, model_target)
                else:
                    model_output = model.forward(*model_input)
                    model_target = parse_target_batch_data_fn(batch_data)

                # loss = loss_function(*model_output, *model_target)

                output_ids = create_output_ids_fn(model_output, model_input)
                # total_loss += loss.data
                total_batch += batch_size

                # step_output = 'in evaluate step {}  loss: {}, '.format(steps, loss.data.item())
                step_output = 'in evaluate step {} '.format(steps)
                for evaluator in evaluate_obj_list:
                    res = evaluator.add_result(output_ids,
                                               model_output,
                                               model_target,
                                               model_input,
                                               batch_data=batch_data)
                    step_output += res
                # print(step_output)
                info(step_output)

                saved_list = add_data_record_fn(output_ids, model_output,
                                                batch_data)
                total_saved_list += saved_list

                if steps % 100 == 0:
                    create_table(db_path, table_name)
                    insert_items(db_path, table_name, total_saved_list)
                    saved_count += len(total_saved_list)
                    print('saved {} record in total {}. '.format(
                        saved_count, total_batch.item()))
                    total_saved_list = []

                if print_output and steps % 100 == 0:
                    pass
                    # output_ids = output_ids.tolist()
                    # target_ids = batch_data['ac_tokens']
                    # is_copy = (is_copy > 0.5).tolist()
                    # target_is_copy = target_is_copy.tolist()
                    # value_output = torch.squeeze(torch.topk(F.softmax(value_output, dim=-1), k=1, dim=-1)[1], dim=-1)
                    # value_output = value_output.tolist()
                    # target_ac_tokens = target_ac_tokens.tolist()
                    # pointer_output = torch.squeeze(torch.topk(F.softmax(pointer_output, dim=-1), k=1, dim=-1)[1], dim=-1)
                    # pointer_output = pointer_output.tolist()
                    # target_pointer_output = target_pointer_output.tolist()
                    # target_length = torch.sum(output_mask, dim=-1)
                    # target_length = target_length.tolist()
                    # for out, tar, cop, tar_cop, val, tar_val, poi, tar_poi, tar_len in zip(output_ids, target_ids, is_copy,
                    #                                                               target_is_copy, value_output,
                    #                                                               target_ac_tokens,
                    #                                                               pointer_output,
                    #                                                               target_pointer_output, target_length):
                    # # for out, tar,  in zip(output_ids, target_ids):
                    #     out_code, end_pos = convert_one_token_ids_to_code(out, id_to_word_fn=vocab.id_to_word, start=start_id,
                    #                                          end=end_id, unk=unk_id)
                    #     tar_code, tar_end_pos = convert_one_token_ids_to_code(tar[1:], id_to_word_fn=vocab.id_to_word, start=start_id,
                    #                                          end=end_id, unk=unk_id)
                    #     info('-------------- step {} ------------------------'.format(steps))
                    #     info('output: {}'.format(out_code))
                    #     info('target: {}'.format(tar_code))
                    #     cop = [str(c) for c in cop]
                    #     tar_cop = [str(int(c)) for c in tar_cop]
                    #     poi = [str(c) for c in poi]
                    #     tar_poi = [str(c) for c in tar_poi]
                    #     info('copy output: {}'.format(' '.join(cop[:tar_len])))
                    #     info('copy target: {}'.format(' '.join(tar_cop[:tar_len])))
                    #     info('pointer output: {}'.format(' '.join(poi[:tar_len])))
                    #     info('pointer target: {}'.format(' '.join(tar_poi[:tar_len])))
                    #
                    #     value_list = []
                    #     target_list = []
                    #     for c, v, t in zip(tar_cop, val, tar_val):
                    #         if c == '1':
                    #             value_list += ['<COPY>']
                    #             target_list += ['<COPY>']
                    #         else:
                    #             value_list += [vocab.id_to_word(int(v))]
                    #             target_list += [vocab.id_to_word(int(t))]
                    #     info('value output: {}'.format(' '.join(value_list[:tar_len])))
                    #     info('value target: {}'.format(' '.join(target_list[:tar_len])))

                steps += 1
                pbar.update(batch_size)

    create_table(db_path, table_name)
    insert_items(db_path, table_name, total_saved_list)
    saved_count += len(total_saved_list)
    print('saved {} record in total {}. '.format(saved_count,
                                                 total_batch.item()))

    return evaluate_obj_list