示例#1
0
def _predict_batch_by_batch(predict_fn, batched_inputs, non_batched_inputs=None, batch_size=BATCH_SIZE, num_outputs=1):
    """
    Splits prediction for big dataset in order to save GPU memory.
    Equivalent to predict_fn(*batched_inputs + non_batched_inputs).

    predict_fn: Compiled theano-function.
    batched_inputs:
        Inputs that we split into batches. On each iteration, we only pass one batch of this data into theano function.
    non_batched_inputs:
        Inputs that we do not split into batches. These inputs are the same for each call of predict_fn
    batch_size: int
        Size of each batch that we split our batched_inputs into
    num_ouputs: int, default=1
        Number of returned items of each theano function.
    """
    if non_batched_inputs is None:
        non_batched_inputs = []

    results = [[] for _ in range(num_outputs)]

    for inputs_batch in get_training_batch(batched_inputs, batch_size):
        args = list(inputs_batch) + non_batched_inputs
        cur_result = predict_fn(*args)
        if num_outputs > 1:
            for i in range(num_outputs):
                results[i].append(cur_result[i])
        else:
            results[0].append(cur_result)

    if num_outputs > 1:
        return tuple(np.concatenate(results[i]) for i in range(num_outputs))
    else:
        return np.concatenate(results[0])
示例#2
0
def _predict_batch_by_batch(predict_fn, batched_inputs, non_batched_inputs=None, batch_size=BATCH_SIZE, num_outputs=1):
    """
    Splits prediction for big dataset in order to save GPU memory.
    Equivalent to predict_fn(*batched_inputs + non_batched_inputs).

    predict_fn: Compiled theano-function.
    batched_inputs:
        Inputs that we split into batches. On each iteration, we only pass one batch of this data into theano function.
    non_batched_inputs:
        Inputs that we do not split into batches. These inputs are the same for each call of predict_fn
    batch_size: int
        Size of each batch that we split our batched_inputs into
    num_ouputs: int, default=1
        Number of returned items of each theano function.
    """
    if non_batched_inputs is None:
        non_batched_inputs = []

    results = [[] for _ in xrange(num_outputs)]

    for inputs_batch in get_training_batch(batched_inputs, batch_size):
        args = list(inputs_batch) + non_batched_inputs
        cur_result = predict_fn(*args)
        if num_outputs > 1:
            for i in xrange(num_outputs):
                results[i].append(cur_result[i])
        else:
            results[0].append(cur_result)

    if num_outputs > 1:
        return tuple(np.concatenate(results[i]) for i in xrange(num_outputs))
    else:
        return np.concatenate(results[0])
示例#3
0
def train_model(nn_model):
    _logger.info('\nDefault model save path:\n{}\n'.format(
        nn_model.model_save_path))

    datasets_collection = _get_datasets(nn_model, nn_model.is_reverse_model)
    _logger.info('Finished preprocessing! Start training')

    batch_id = 0
    best_val_perplexities = (float('inf'), float('inf'))
    cur_val_metrics = None

    batches_num = (datasets_collection.train.x.shape[0] + BATCH_SIZE -
                   1) // BATCH_SIZE
    # The adding (BATCH_SIZE - 1) should be used here to count the last batch
    # that may be smaller than BATCH_SIZE

    cur_loss = 0
    total_training_time = 0
    start_time = time.time()

    for epoch_id in range(EPOCHS_NUM):
        _logger.info('Starting epoch #{}'.format(epoch_id))

        for train_batch in get_training_batch(
                datasets_collection.train,
                BATCH_SIZE,
                random_permute=SHUFFLE_TRAINING_BATCHES):
            train_stats = TrainStats(
                cur_batch_id=batch_id,
                batches_num=batches_num,
                start_time=start_time,
                total_training_time=total_training_time,
                cur_loss=cur_loss,
                best_val_perplexities=best_val_perplexities,
                cur_val_metrics=cur_val_metrics)

            best_val_perplexities, cur_val_metrics = \
                _analyse_model_performance_and_dump_results(nn_model, datasets_collection, train_stats)

            best_val_perplexities = \
                _update_saved_nn_model(nn_model, cur_val_metrics, best_val_perplexities, train_stats)

            prev_time = time.time()

            loss = nn_model.train(*train_batch)
            cur_loss = _get_decayed_avg_loss(cur_loss,
                                             loss) if batch_id else loss

            total_training_time += time.time() - prev_time
            batch_id += 1
    def _get_training_batch_generator(self):
        # set unique random seed for different workers to correctly process batches in multi-gpu training
        horovod_seed = self._horovod.rank() if self._horovod else 0
        epoch_id = 0

        while True:  # inifinite batches generator
            epoch_id += 1

            for train_batch in get_training_batch(
                    self._training_data,
                    self._params.train_batch_size,
                    random_permute=SHUFFLE_TRAINING_BATCHES,
                    random_seed=RANDOM_SEED * epoch_id + horovod_seed):

                context_tokens_ids, response_tokens_ids, condition_id = train_batch
                # response tokens are wraped with _start_ and _end_ tokens
                # output shape == (batch_size, seq_len)

                # get input response ids by removing last sequence token (_end_)
                input_response_tokens_ids = response_tokens_ids[:, :-1]
                # output shape == (batch_size, seq_len - 1)

                # get target response ids by removing the first (_start_) token of the sequence
                target_response_tokens_ids = response_tokens_ids[:, 1:]
                # output shape == (batch_size, seq_len - 1)

                # workaround for using sparse_categorical_crossentropy loss
                # see https://github.com/tensorflow/tensorflow/issues/17150#issuecomment-399776510
                target_response_tokens_ids = np.expand_dims(
                    target_response_tokens_ids, axis=-1)
                # output shape == (batch_size, seq_len - 1, 1)

                init_dec_hs = np.zeros(shape=(context_tokens_ids.shape[0],
                                              self._decoder_depth,
                                              self._params.hidden_layer_dim),
                                       dtype=K.floatx())

                yield [
                    context_tokens_ids, input_response_tokens_ids,
                    condition_id, init_dec_hs
                ], target_response_tokens_ids
    def _get_batch_generator(self, input_data, batch_size):
        RANDOM_SEED = 7
        DECODER_DEPTH = 2
        UTT_HIDDEN_DIM = 600
        epoch_id = 0

        while True:  # inifinite batches generator
            epoch_id += 1

            for train_batch in get_training_batch(input_data,
                                                  batch_size,
                                                  random_permute=False,
                                                  random_seed=RANDOM_SEED *
                                                  epoch_id):

                context_tokens_ids, response_tokens_ids = train_batch

                # response tokens are wraped with _start_ and _end_ tokens
                # output shape == (batch_size, seq_len)

                # get input response ids by removing last sequence token (_end_)
                input_response_tokens_ids = response_tokens_ids[:, :-1]
                # output shape == (batch_size, seq_len - 1)

                # get target response ids by removing the first (_start_) token of the sequence
                target_response_tokens_ids = response_tokens_ids[:, 1:]
                # output shape == (batch_size, seq_len - 1)

                # workaround for using sparse_categorical_crossentropy loss
                # see https://github.com/tensorflow/tensorflow/issues/17150#issuecomment-399776510
                target_response_tokens_ids = np.expand_dims(
                    target_response_tokens_ids, axis=-1)
                # output shape == (batch_size, seq_len - 1, 1)

                init_dec_hs = np.zeros(shape=(context_tokens_ids.shape[0],
                                              DECODER_DEPTH, UTT_HIDDEN_DIM),
                                       dtype=K.floatx())

                yield [
                    context_tokens_ids, input_response_tokens_ids, init_dec_hs
                ], target_response_tokens_ids
示例#6
0
def train_model(nn_model, is_reverse_model=False):
    """
    Main function fo training. Refactoring anticipated.
    """
    validation_prediction_mode = PREDICTION_MODES.sampling if is_reverse_model else PREDICTION_MODE_FOR_TESTS

    train = load_conditioned_train_set(nn_model.token_to_index,
                                       nn_model.condition_to_index)

    context_free_val = load_context_free_val(nn_model.token_to_index)

    context_sensitive_val = load_context_sensitive_val(
        nn_model.token_to_index, nn_model.condition_to_index)
    if is_reverse_model:
        service_tokens = ServiceTokensIDs(nn_model.token_to_index)
        train = reverse_nn_input(train, service_tokens)
        context_free_val = reverse_nn_input(context_free_val, service_tokens)
        context_sensitive_val = reverse_nn_input(context_sensitive_val,
                                                 service_tokens)

    # Train subset of same size as a context-free val for metrics calculation
    train_subset = generate_subset(train, VAL_SUBSET_SIZE)

    # Context-sensitive val subset of same size as a context-free val for metrics calculation
    context_sensitive_val_subset = generate_subset(context_sensitive_val,
                                                   VAL_SUBSET_SIZE)

    _logger.info('Finished preprocessing! Start training')

    batch_id = 0
    avg_loss = 0
    total_training_time = 0
    best_val_perplexities = (float('inf'), float('inf'))
    batches_num = (train.x.shape[0] - 1) / BATCH_SIZE + 1
    start_time = time.time()
    cur_val_metrics = None

    try:
        for epoches_counter in xrange(1, EPOCHES_NUM + 1):
            _logger.info(
                'Starting epoch #%d; time = %0.2f s(training of it = %0.2f s)'
                % (epoches_counter, time.time() - start_time,
                   total_training_time))

            for train_batch in get_training_batch(
                [train.x, train.y, train.condition_ids],
                    BATCH_SIZE,
                    random_permute=SHUFFLE_TRAINING_BATCHES):
                x_train_batch, y_train_batch, condition_ids_train_batch = train_batch

                batch_id += 1
                prev_time = time.time()
                loss = nn_model.train(x_train_batch, y_train_batch,
                                      condition_ids_train_batch)

                cur_time = time.time()
                total_training_time += cur_time - prev_time
                total_time = cur_time - start_time
                avg_loss = LOG_LOSS_DECAY * avg_loss + (
                    1 - LOG_LOSS_DECAY) * loss if batch_id > 1 else loss

                progress = 100 * float(batch_id) / batches_num
                avr_time_per_sample = total_time / batch_id
                expected_time_per_epoch = avr_time_per_sample * batches_num

                # use print here for better readability
                _logger.info('batch %s / %s (%d%%) \t'
                             'loss: %.2f \t '
                             'time: epoch %.1f h | '
                             'total %0.1f h | '
                             'train %0.1f h (%.1f%%)' %
                             (batch_id, batches_num, progress, avg_loss,
                              expected_time_per_epoch / 3600,
                              total_time / 3600, total_training_time / 3600,
                              100 * total_training_time / total_time))

                if batch_id % SCREEN_LOG_FREQUENCY_PER_BATCHES == 0:
                    _log_sample_answers(
                        context_free_val.x[:SCREEN_LOG_NUM_TEST_LINES],
                        nn_model, validation_prediction_mode, is_reverse_model)

                if batch_id % LOG_FREQUENCY_PER_BATCHES == 0:
                    _calc_and_save_train_metrics(nn_model, train_subset,
                                                 avg_loss)

                    val_metrics = _calc_and_save_val_metrics(
                        nn_model,
                        context_sensitive_val_subset,
                        context_free_val,
                        prediction_mode=validation_prediction_mode)
                    _save_val_results(
                        nn_model,
                        context_free_val.x,
                        context_sensitive_val_subset.x,
                        val_metrics,
                        train_info=(start_time, batch_id, batches_num),
                        prediction_mode=validation_prediction_mode)
                    cur_val_metrics = val_metrics

                    best_val_perplexities = \
                        _update_saved_nn_model(nn_model,
                                               (val_metrics['context_free_perplexity'],
                                                val_metrics['context_sensitive_perplexity']),
                                               best_val_perplexities,
                                               is_reverse_model=is_reverse_model)

    except (KeyboardInterrupt, SystemExit):
        _logger.info('Training cycle is stopped manually')
        _save_model(nn_model, get_model_full_path(is_reverse_model) + '_final')
        _save_val_results(nn_model,
                          context_free_val.x,
                          context_sensitive_val_subset.x,
                          cur_val_metrics,
                          train_info=(start_time, batch_id, batches_num),
                          suffix='_final',
                          prediction_mode=validation_prediction_mode)
示例#7
0
def train_model(nn_model, is_reverse_model=False):
    """
    Main function fo training. Refactoring anticipated.
    """
    validation_prediction_mode = PREDICTION_MODES.sampling if is_reverse_model else PREDICTION_MODE_FOR_TESTS

    train = load_conditioned_train_set(nn_model.token_to_index, nn_model.condition_to_index)

    context_free_val = load_context_free_val(nn_model.token_to_index)

    context_sensitive_val = load_context_sensitive_val(nn_model.token_to_index, nn_model.condition_to_index)
    if is_reverse_model:
        service_tokens = ServiceTokensIDs(nn_model.token_to_index)
        train = reverse_nn_input(train, service_tokens)
        context_free_val = reverse_nn_input(context_free_val, service_tokens)
        context_sensitive_val = reverse_nn_input(context_sensitive_val, service_tokens)

    # Train subset of same size as a context-free val for metrics calculation
    train_subset = generate_subset(train, VAL_SUBSET_SIZE)

    # Context-sensitive val subset of same size as a context-free val for metrics calculation
    context_sensitive_val_subset = generate_subset(context_sensitive_val, VAL_SUBSET_SIZE)

    _logger.info('Finished preprocessing! Start training')

    batch_id = 0
    avg_loss = 0
    total_training_time = 0
    best_val_perplexities = (float('inf'), float('inf'))
    batches_num = (train.x.shape[0] - 1) / BATCH_SIZE + 1
    start_time = time.time()
    cur_val_metrics = None

    try:
        for epoches_counter in xrange(1, EPOCHES_NUM + 1):
            _logger.info('Starting epoch #%d; time = %0.2f s(training of it = %0.2f s)' %
                         (epoches_counter, time.time() - start_time, total_training_time))

            for train_batch in get_training_batch(
                [train.x, train.y, train.condition_ids], BATCH_SIZE, random_permute=SHUFFLE_TRAINING_BATCHES):
                x_train_batch, y_train_batch, condition_ids_train_batch = train_batch

                batch_id += 1
                prev_time = time.time()
                loss = nn_model.train(x_train_batch, y_train_batch, condition_ids_train_batch)

                cur_time = time.time()
                total_training_time += cur_time - prev_time
                total_time = cur_time - start_time
                avg_loss = LOG_LOSS_DECAY * avg_loss + (1 - LOG_LOSS_DECAY) * loss if batch_id > 1 else loss

                progress = 100 * float(batch_id) / batches_num
                avr_time_per_sample = total_time / batch_id
                expected_time_per_epoch = avr_time_per_sample * batches_num

                # use print here for better readability
                _logger.info('batch %s / %s (%d%%) \t'
                             'loss: %.2f \t '
                             'time: epoch %.1f h | '
                             'total %0.1f h | '
                             'train %0.1f h (%.1f%%)' %
                             (batch_id, batches_num, progress, avg_loss, expected_time_per_epoch / 3600,
                              total_time / 3600, total_training_time / 3600, 100 * total_training_time / total_time))

                if batch_id % SCREEN_LOG_FREQUENCY_PER_BATCHES == 0:
                    _log_sample_answers(context_free_val.x[:SCREEN_LOG_NUM_TEST_LINES], nn_model,
                                        validation_prediction_mode, is_reverse_model)

                if batch_id % LOG_FREQUENCY_PER_BATCHES == 0:
                    _calc_and_save_train_metrics(nn_model, train_subset, avg_loss)

                    val_metrics = _calc_and_save_val_metrics(
                        nn_model,
                        context_sensitive_val_subset,
                        context_free_val,
                        prediction_mode=validation_prediction_mode)
                    _save_val_results(
                        nn_model,
                        context_free_val.x,
                        context_sensitive_val_subset.x,
                        val_metrics,
                        train_info=(start_time, batch_id, batches_num),
                        prediction_mode=validation_prediction_mode)
                    cur_val_metrics = val_metrics

                    best_val_perplexities = \
                        _update_saved_nn_model(nn_model,
                                               (val_metrics['context_free_perplexity'],
                                                val_metrics['context_sensitive_perplexity']),
                                               best_val_perplexities,
                                               is_reverse_model=is_reverse_model)

    except (KeyboardInterrupt, SystemExit):
        _logger.info('Training cycle is stopped manually')
        _save_model(nn_model, get_model_full_path(is_reverse_model) + '_final')
        _save_val_results(
            nn_model,
            context_free_val.x,
            context_sensitive_val_subset.x,
            cur_val_metrics,
            train_info=(start_time, batch_id, batches_num),
            suffix='_final',
            prediction_mode=validation_prediction_mode)