Python sample示例，neural.sample Python示例

示例#1

0

显示文件

文件： dialogue.py 项目： futurulus/colors-in-context

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []

        batches = iterators.iter_batches(eval_instances, self.options.listener_eval_batch_size)
        num_batches = (len(eval_instances) - 1) // self.options.listener_eval_batch_size + 1

        if self.options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)

            xs, (y,) = self._data_to_arrays(batch, test=True)

            probs = self.model.predict(xs)
            if random:
                indices = sample(probs)
                predictions.extend(indices)
            else:
                predictions.extend(probs.argmax(axis=1))
            scores_arr = np.log(probs[np.arange(len(batch)), y])
            scores.extend(scores_arr.tolist())
        progress.end_task()
        if self.options.verbosity >= 9:
            print('%s %ss:') % (self.id, 'sample' if random else 'prediction')
            for inst, prediction in zip(eval_instances, predictions):
                print('%s -> %s' % (repr(inst.input), repr(prediction)))

        return predictions, scores

示例#2

0

显示文件

文件： dialogue.py 项目： gaybro8777/coop-nets

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        predictions = []
        scores = []

        batches = iterators.iter_batches(eval_instances,
                                         self.options.listener_eval_batch_size)
        num_batches = (len(eval_instances) -
                       1) // self.options.listener_eval_batch_size + 1

        if self.options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)

            xs, (y, ) = self._data_to_arrays(batch, test=True)

            probs = self.model.predict(xs)
            if random:
                indices = sample(probs)
                predictions.extend(indices)
            else:
                predictions.extend(probs.argmax(axis=1))
            scores_arr = np.log(probs[np.arange(len(batch)), y])
            scores.extend(scores_arr.tolist())
        progress.end_task()
        if self.options.verbosity >= 9:
            print('%s %ss:') % (self.id, 'sample' if random else 'prediction')
            for inst, prediction in zip(eval_instances, predictions):
                print('%s -> %s' % (repr(inst.input), repr(prediction)))

        return predictions, scores

示例#3

0

显示文件

文件： speaker.py 项目： timcanby/color-describer

    def predict(self, eval_instances, random=False, verbosity=0):
        result = []
        batches = iterators.iter_batches(eval_instances,
                                         self.options.speaker_eval_batch_size)
        num_batches = (len(eval_instances) -
                       1) // self.options.speaker_eval_batch_size + 1

        eos_index = self.seq_vec.vectorize(['</s>'])[0]

        if self.options.verbosity + verbosity >= 2:
            print('Predicting')
        if self.options.verbosity + verbosity >= 1:
            progress.start_task('Predict batch', num_batches)
        for batch_num, batch in enumerate(batches):
            if self.options.verbosity + verbosity >= 1:
                progress.progress(batch_num)
            batch = list(batch)

            (c, _p, mask), (_y, ) = self._data_to_arrays(batch, test=True)
            assert mask.all()  # We shouldn't be masking anything in prediction

            beam_size = 1 if random else self.options.speaker_beam_size
            done = np.zeros((len(batch), beam_size), dtype=np.bool)
            beam = np.zeros((len(batch), beam_size, self.seq_vec.max_len),
                            dtype=np.int32)
            beam[:, :, 0] = self.seq_vec.vectorize(['<s>'])[0]
            beam_scores = np.log(np.zeros((len(batch), beam_size)))
            beam_scores[:, 0] = 0.0

            c = np.repeat(c, beam_size, axis=0)
            mask = np.repeat(mask, beam_size, axis=0)

            for length in range(1, self.seq_vec.max_len):
                if done.all():
                    break
                p = beam.reshape(
                    (beam.shape[0] * beam.shape[1], beam.shape[2]))[:, :-1]
                probs = self.model.predict([c, p, mask])
                if random:
                    indices = sample(probs[:, length - 1, :])
                    beam[:, 0, length] = indices
                    done = np.logical_or(done, indices == eos_index)
                else:
                    assert probs.shape[1] == p.shape[1], (probs.shape[1],
                                                          p.shape[1])
                    assert probs.shape[2] == len(
                        self.seq_vec.tokens), (probs.shape[2],
                                               len(self.seq_vec.tokens))
                    scores = np.log(probs)[:, length - 1, :].reshape(
                        (beam.shape[0], beam.shape[1], probs.shape[2]))
                    beam_search_step(scores, length, beam, beam_scores, done,
                                     eos_index)
            outputs = self.seq_vec.unvectorize_all(beam[:, 0, :])
            result.extend([' '.join(strip_invalid_tokens(o)) for o in outputs])
        if self.options.verbosity + verbosity >= 1:
            progress.end_task()

        return result

示例#4

0

显示文件

 def sample(self, num_samples=1):
     indices = np.array([[
         sample(self.counts.get_value() * 1.0 / self.total.get_value())
         for _t in range(self.vec.max_len)
     ] for _s in range(num_samples)],
                        dtype=np.int32)
     return [
         instance.Instance(' '.join(strip_invalid_tokens(s)))
         for s in self.vec.unvectorize_all(indices)
     ]

示例#5

0

显示文件

文件： ref_game.py 项目： futurulus/colors-in-context

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        options = config.options()
        predictions = []
        scores = []

        all_utts = self.base.seq_vec.tokens
        sym_vec = vectorizers.SymbolVectorizer()
        sym_vec.add_all(all_utts)
        prior_scores = self.prior_scores(all_utts)

        base_is_listener = (type(self.base) in listener.LISTENERS.values())

        true_batch_size = options.listener_eval_batch_size / len(all_utts)
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            context = len(
                batch[0].alt_inputs) if batch[0].alt_inputs is not None else 0
            if context:
                output_grid = [
                    (instance.Instance(utt, color)
                     if base_is_listener else instance.Instance(color, utt))
                    for inst in batch for color in inst.alt_inputs
                    for utt in sym_vec.tokens
                ]
                assert len(output_grid) == context * len(batch) * len(all_utts), \
                    'Context must be the same number of colors for all examples'
                true_indices = np.array([inst.input for inst in batch])
            else:
                output_grid = [
                    (instance.Instance(utt, inst.input) if base_is_listener
                     else instance.Instance(inst.input, utt)) for inst in batch
                    for utt in sym_vec.tokens
                ]
                true_indices = sym_vec.vectorize_all(
                    [inst.input for inst in batch])
                if len(true_indices.shape) == 2:
                    # Sequence vectorizer; we're only using single tokens for now.
                    true_indices = true_indices[:, 0]
            scores = self.base.score(output_grid, verbosity=verbosity)
            if context:
                log_probs = np.array(scores).reshape(
                    (len(batch), context, len(all_utts)))
                orig_log_probs = log_probs[np.arange(len(batch)),
                                           true_indices, :]
                # Renormalize over only the context colors, and extract the score of
                # the true color.
                log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis, :]
                log_probs = log_probs[np.arange(len(batch)), true_indices, :]
            else:
                log_probs = np.array(scores).reshape(
                    (len(batch), len(all_utts)))
                orig_log_probs = log_probs
            assert log_probs.shape == (len(batch), len(all_utts))
            # Add in the prior scores, if used (S1 \propto L0 * P)
            if prior_scores is not None:
                log_probs = log_probs + 0.5 * prior_scores
            if options.exhaustive_base_weight:
                w = options.exhaustive_base_weight
                log_probs = w * orig_log_probs + (1.0 - w) * log_probs
            # Normalize across utterances. Note that the listener returns probability
            # densities over colors.
            log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis]
            if random:
                pred_indices = sample(np.exp(log_probs))
            else:
                pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(sym_vec.unvectorize_all(pred_indices))
            scores.extend(log_probs[np.arange(len(batch)),
                                    true_indices].tolist())
        progress.end_task()

        return predictions, scores

示例#6

0

显示文件

文件： ref_game.py 项目： futurulus/colors-in-context

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        options = self.get_options()
        predictions = []
        scores = []

        if options.verbosity + verbosity >= 2:
            print('Building alternative utterance list')
        sym_vec = vectorizers.SymbolVectorizer()
        sym_vec.add_all([inst.input for inst in self.get_dataset(self.base)])

        assert eval_instances[0].alt_outputs, \
            'Context required for L(S(L)): %s' % eval_instances[0].__dict__
        context_len = len(eval_instances[0].alt_outputs)
        if options.exhaustive_num_samples > 0:
            num_alt_utts = options.exhaustive_num_samples * context_len + 1
            num_sample_sets = options.exhaustive_num_sample_sets
        else:
            num_alt_utts = len(sym_vec.tokens) + 1
            num_sample_sets = 1
        true_batch_size = max(
            options.listener_eval_batch_size /
            (num_alt_utts * num_sample_sets * context_len), 1)
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.exhaustive_output_speaker_samples:
            self.truncate_utterances_files('s1_samples.%s.jsons',
                                           num_sample_sets)
        if options.exhaustive_output_speaker_predictions:
            self.truncate_utterances_files('s1_predictions.%s.jsons',
                                           num_sample_sets)
        if options.exhaustive_output_all_grids:
            self.truncate_utterances_files('grids.%s.jsons.gz', 1)

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            output_grid = self.build_grid(batch, sym_vec.tokens)
            assert len(output_grid) == len(batch) * num_sample_sets * context_len * num_alt_utts, \
                'Context must be the same number of colors for all examples %s' % \
                ((len(output_grid), len(batch), num_sample_sets, context_len, num_alt_utts),)
            true_indices = np.array([inst.output for inst in batch])
            grid_scores = self.base.score(output_grid, verbosity=verbosity)
            l0_log_probs = np.array(grid_scores).reshape(
                (len(batch), num_sample_sets, context_len, num_alt_utts))
            # Renormalize over only the context colors, and extract the score of
            # the true color according to the base model.
            l0_log_probs -= logsumexp(l0_log_probs, axis=2)[:, :,
                                                            np.newaxis, :]
            assert l0_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len,
                                          num_alt_utts), l0_log_probs.shape
            orig_log_probs = l0_log_probs[np.arange(len(batch)), 0, :, 0]
            assert orig_log_probs.shape == (len(batch),
                                            context_len), orig_log_probs.shape
            # Apply temperature parameter before speaker.
            utilities = options.exhaustive_inv_temperature * l0_log_probs
            # Normalize across utterances. Note that the listener returns probability
            # densities over colors.
            s1_log_probs = utilities - logsumexp(utilities, axis=3)[:, :, :,
                                                                    np.newaxis]
            assert s1_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len,
                                          num_alt_utts), s1_log_probs.shape
            if options.exhaustive_output_speaker_samples or \
                    options.exhaustive_output_speaker_predictions:
                speaker_dist = s1_log_probs[np.arange(len(batch)), :,
                                            true_indices, 1:]
                if options.exhaustive_output_speaker_samples:
                    speaker_sample_indices = sample(np.exp(speaker_dist))
                    self.write_speaker_utterances('s1_samples.%s.jsons',
                                                  output_grid,
                                                  speaker_sample_indices,
                                                  l0_log_probs.shape)
                if options.exhaustive_output_speaker_predictions:
                    speaker_pred_indices = np.argmax(speaker_dist, axis=2)
                    self.write_speaker_utterances('s1_predictions.%s.jsons',
                                                  output_grid,
                                                  speaker_pred_indices,
                                                  l0_log_probs.shape)
            # Normalize again across context colors.
            l2_log_probs = s1_log_probs - logsumexp(
                s1_log_probs, axis=2)[:, :, np.newaxis, :]
            assert l2_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len,
                                          num_alt_utts), l2_log_probs.shape
            # Extract the score of each color for the input utterance according to the L2 model.
            log_probs = l2_log_probs[np.arange(len(batch)), :, :, 0]
            assert log_probs.shape == (len(batch), num_sample_sets,
                                       context_len), log_probs.shape
            # Blend L0 and L2 (if enabled) to produce final score.
            if options.exhaustive_base_weight:
                w = options.exhaustive_base_weight
                # Bump zero probabilities up to epsilon ~= 3e-23, because previously we would
                # only have -inf log probs, but now if w < 0 we could get NaNs.
                log_probs = (
                    w * np.maximum(orig_log_probs[:, np.newaxis, :], -52.0) +
                    (1.0 - w) * np.maximum(log_probs, -52.0))
            # Normalize across context one more time to prevent cheating when
            # blending.
            log_probs -= logsumexp(log_probs, axis=2)[:, :, np.newaxis]
            # Average (in probability space) over sample sets
            log_probs = logsumexp(log_probs, axis=1) - np.log(
                log_probs.shape[1])
            if options.exhaustive_output_all_grids:
                self.write_grids(output_grid, l0_log_probs, s1_log_probs,
                                 l2_log_probs, log_probs)
            if random:
                pred_indices = sample(np.exp(log_probs))
            else:
                pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(pred_indices)
            # Extract the score of the true color according to the combined model.
            scores.extend(log_probs[np.arange(len(batch)),
                                    true_indices].tolist())
        progress.end_task()

        return predictions, scores

示例#7

0

显示文件

文件： listener.py 项目： futurulus/colors-in-context

 def sample(self, num_samples=1):
     indices = np.array([[sample(self.counts.get_value() * 1.0 / self.total.get_value())
                          for _t in range(self.vec.max_len)]
                         for _s in range(num_samples)], dtype=np.int32)
     return [instance.Instance(' '.join(strip_invalid_tokens(s)))
             for s in self.vec.unvectorize_all(indices)]

示例#8

0

显示文件

文件： ref_game.py 项目： futurulus/colors-in-context

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        options = config.options()
        predictions = []
        scores = []

        all_utts = self.base.seq_vec.tokens
        sym_vec = vectorizers.SymbolVectorizer()
        sym_vec.add_all(all_utts)
        prior_scores = self.prior_scores(all_utts)

        base_is_listener = (type(self.base) in listener.LISTENERS.values())

        true_batch_size = options.listener_eval_batch_size / len(all_utts)
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            context = len(batch[0].alt_inputs) if batch[0].alt_inputs is not None else 0
            if context:
                output_grid = [(instance.Instance(utt, color)
                                if base_is_listener else
                                instance.Instance(color, utt))
                               for inst in batch for color in inst.alt_inputs
                               for utt in sym_vec.tokens]
                assert len(output_grid) == context * len(batch) * len(all_utts), \
                    'Context must be the same number of colors for all examples'
                true_indices = np.array([inst.input for inst in batch])
            else:
                output_grid = [(instance.Instance(utt, inst.input)
                                if base_is_listener else
                                instance.Instance(inst.input, utt))
                               for inst in batch for utt in sym_vec.tokens]
                true_indices = sym_vec.vectorize_all([inst.input for inst in batch])
                if len(true_indices.shape) == 2:
                    # Sequence vectorizer; we're only using single tokens for now.
                    true_indices = true_indices[:, 0]
            scores = self.base.score(output_grid, verbosity=verbosity)
            if context:
                log_probs = np.array(scores).reshape((len(batch), context, len(all_utts)))
                orig_log_probs = log_probs[np.arange(len(batch)), true_indices, :]
                # Renormalize over only the context colors, and extract the score of
                # the true color.
                log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis, :]
                log_probs = log_probs[np.arange(len(batch)), true_indices, :]
            else:
                log_probs = np.array(scores).reshape((len(batch), len(all_utts)))
                orig_log_probs = log_probs
            assert log_probs.shape == (len(batch), len(all_utts))
            # Add in the prior scores, if used (S1 \propto L0 * P)
            if prior_scores is not None:
                log_probs = log_probs + 0.5 * prior_scores
            if options.exhaustive_base_weight:
                w = options.exhaustive_base_weight
                log_probs = w * orig_log_probs + (1.0 - w) * log_probs
            # Normalize across utterances. Note that the listener returns probability
            # densities over colors.
            log_probs -= logsumexp(log_probs, axis=1)[:, np.newaxis]
            if random:
                pred_indices = sample(np.exp(log_probs))
            else:
                pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(sym_vec.unvectorize_all(pred_indices))
            scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist())
        progress.end_task()

        return predictions, scores

示例#9

0

显示文件

文件： ref_game.py 项目： futurulus/colors-in-context

    def predict_and_score(self, eval_instances, random=False, verbosity=0):
        options = self.get_options()
        predictions = []
        scores = []

        if options.verbosity + verbosity >= 2:
            print('Building alternative utterance list')
        sym_vec = vectorizers.SymbolVectorizer()
        sym_vec.add_all([inst.input for inst in self.get_dataset(self.base)])

        assert eval_instances[0].alt_outputs, \
            'Context required for L(S(L)): %s' % eval_instances[0].__dict__
        context_len = len(eval_instances[0].alt_outputs)
        if options.exhaustive_num_samples > 0:
            num_alt_utts = options.exhaustive_num_samples * context_len + 1
            num_sample_sets = options.exhaustive_num_sample_sets
        else:
            num_alt_utts = len(sym_vec.tokens) + 1
            num_sample_sets = 1
        true_batch_size = max(options.listener_eval_batch_size /
                              (num_alt_utts * num_sample_sets * context_len), 1)
        batches = iterators.iter_batches(eval_instances, true_batch_size)
        num_batches = (len(eval_instances) - 1) // true_batch_size + 1

        if options.exhaustive_output_speaker_samples:
            self.truncate_utterances_files('s1_samples.%s.jsons', num_sample_sets)
        if options.exhaustive_output_speaker_predictions:
            self.truncate_utterances_files('s1_predictions.%s.jsons', num_sample_sets)
        if options.exhaustive_output_all_grids:
            self.truncate_utterances_files('grids.%s.jsons.gz', 1)

        if options.verbosity + verbosity >= 2:
            print('Testing')
        progress.start_task('Eval batch', num_batches)
        for batch_num, batch in enumerate(batches):
            progress.progress(batch_num)
            batch = list(batch)
            output_grid = self.build_grid(batch, sym_vec.tokens)
            assert len(output_grid) == len(batch) * num_sample_sets * context_len * num_alt_utts, \
                'Context must be the same number of colors for all examples %s' % \
                ((len(output_grid), len(batch), num_sample_sets, context_len, num_alt_utts),)
            true_indices = np.array([inst.output for inst in batch])
            grid_scores = self.base.score(output_grid, verbosity=verbosity)
            l0_log_probs = np.array(grid_scores).reshape((len(batch), num_sample_sets,
                                                          context_len, num_alt_utts))
            # Renormalize over only the context colors, and extract the score of
            # the true color according to the base model.
            l0_log_probs -= logsumexp(l0_log_probs, axis=2)[:, :, np.newaxis, :]
            assert l0_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len, num_alt_utts), l0_log_probs.shape
            orig_log_probs = l0_log_probs[np.arange(len(batch)), 0, :, 0]
            assert orig_log_probs.shape == (len(batch), context_len), orig_log_probs.shape
            # Apply temperature parameter before speaker.
            utilities = options.exhaustive_inv_temperature * l0_log_probs
            # Normalize across utterances. Note that the listener returns probability
            # densities over colors.
            s1_log_probs = utilities - logsumexp(utilities, axis=3)[:, :, :, np.newaxis]
            assert s1_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len, num_alt_utts), s1_log_probs.shape
            if options.exhaustive_output_speaker_samples or \
                    options.exhaustive_output_speaker_predictions:
                speaker_dist = s1_log_probs[np.arange(len(batch)), :, true_indices, 1:]
                if options.exhaustive_output_speaker_samples:
                    speaker_sample_indices = sample(np.exp(speaker_dist))
                    self.write_speaker_utterances('s1_samples.%s.jsons', output_grid,
                                                  speaker_sample_indices, l0_log_probs.shape)
                if options.exhaustive_output_speaker_predictions:
                    speaker_pred_indices = np.argmax(speaker_dist, axis=2)
                    self.write_speaker_utterances('s1_predictions.%s.jsons', output_grid,
                                                  speaker_pred_indices, l0_log_probs.shape)
            # Normalize again across context colors.
            l2_log_probs = s1_log_probs - logsumexp(s1_log_probs, axis=2)[:, :, np.newaxis, :]
            assert l2_log_probs.shape == (len(batch), num_sample_sets,
                                          context_len, num_alt_utts), l2_log_probs.shape
            # Extract the score of each color for the input utterance according to the L2 model.
            log_probs = l2_log_probs[np.arange(len(batch)), :, :, 0]
            assert log_probs.shape == (len(batch), num_sample_sets, context_len), log_probs.shape
            # Blend L0 and L2 (if enabled) to produce final score.
            if options.exhaustive_base_weight:
                w = options.exhaustive_base_weight
                # Bump zero probabilities up to epsilon ~= 3e-23, because previously we would
                # only have -inf log probs, but now if w < 0 we could get NaNs.
                log_probs = (w * np.maximum(orig_log_probs[:, np.newaxis, :], -52.0) +
                             (1.0 - w) * np.maximum(log_probs, -52.0))
            # Normalize across context one more time to prevent cheating when
            # blending.
            log_probs -= logsumexp(log_probs, axis=2)[:, :, np.newaxis]
            # Average (in probability space) over sample sets
            log_probs = logsumexp(log_probs, axis=1) - np.log(log_probs.shape[1])
            if options.exhaustive_output_all_grids:
                self.write_grids(output_grid,
                                 l0_log_probs, s1_log_probs, l2_log_probs, log_probs)
            if random:
                pred_indices = sample(np.exp(log_probs))
            else:
                pred_indices = np.argmax(log_probs, axis=1)
            predictions.extend(pred_indices)
            # Extract the score of the true color according to the combined model.
            scores.extend(log_probs[np.arange(len(batch)), true_indices].tolist())
        progress.end_task()

        return predictions, scores