Пример #1
0
    def _compute_metrics(cls, editor, examples, num_evaluate_examples, noiser, batch_size=256, edit_dropout=False, draw_samples=False):
        with random_seed(0):
            sample = sample_if_large(examples, num_evaluate_examples, replace=False)
        if edit_dropout:
            noised_sample = noiser(sample)
        else:
            noised_sample = sample

        # compute loss and log to TensorBoard
        # need to break the sample into batches, in case the sample is too large to fit in GPU memory
        losses, weights = [], []
        for batch in chunks(noised_sample, batch_size):
            weights.append(len(batch))
            loss_var, _, _ = editor.loss(batch, draw_samples)
            losses.append(loss_var.data[0])
        losses, weights = np.array(losses), np.array(weights)
        loss = np.sum(losses * weights) / np.sum(weights)  # weighted average

        # compute BLEU score and log to TensorBoard
        outputs, edit_traces = editor.edit(noised_sample)
        bleus = []
        for ex, output in izip(noised_sample, outputs):
            # outputs is a list(over batches)[ list(over beams) [ list(over tokens) [ unicode ] ] ] object.
            bleus.append(bleu(ex.target_words, output[0]))
        avg_bleu = np.mean(bleus)
        return loss, avg_bleu, edit_traces
Пример #2
0
    def __init__(self,
                 filenames,
                 filename_to_examples,
                 relative_path=True,
                 shuffle=True):
        """Construct the dataset based on the data in the files.

        Args:
            filenames (unicode or list[unicode]): names of the files
            filename_to_examples: a callable that takes a filename
                and yields Examples
            relative_path: whether to resolve the filename on DataDirectory.root
        """
        self._examples = []
        if isinstance(filenames, basestring):
            filenames = [filenames]
        for filename in filenames:
            if relative_path:
                filename = os.path.join(DataDirectory.root, filename)
            self._examples.extend(filename_to_examples(filename))
        if shuffle:
            with random_seed(42):
                random.shuffle(self._examples)
        logging.info('Read {} examples ({}) from {}'.format(
            len(self._examples), 'shuffled' if shuffle else 'not shuffled',
            filenames))
Пример #3
0
def emulate_distribution(shape, target_samples, seed=None):
    m = np.mean(target_samples)
    s = np.std(target_samples)

    with random_seed(seed):
        samples = np.random.normal(m, s, size=shape)

    return samples.astype(np.float32)
Пример #4
0
def emulate_distribution(shape, target_samples, seed=None):
    m = np.mean(target_samples)
    s = np.std(target_samples)

    with random_seed(seed):
        samples = np.random.normal(m, s, size=shape)

    return samples
Пример #5
0
    def _initialize_train_state(cls, config):
        """Set up all the state necessary to begin training."""
        with random_seed(config.seed):
            editor = cls._build_editor(config.editor, config.num_iter, config.eps, config.momentum)
            optimizer = optim.Adam(editor.parameters(), lr=config.optim.learning_rate)
            train_steps = 0
            max_grad_norm = 0
            random_state = RandomState()

        return TrainState(editor=editor, optimizer=optimizer, train_steps=train_steps,
                          random_state=random_state, max_grad_norm=max_grad_norm)
Пример #6
0
    def evaluate(self, step):
        print 'Evaluate at step {}'.format(step)
        num_examples = self.config.num_evaluate_examples
        with random_seed(0):
            train_sample = sample_if_large(self.train_examples,
                                           num_examples,
                                           replace=False)
        with random_seed(0):
            valid_sample = sample_if_large(self.valid_examples,
                                           num_examples,
                                           replace=False)
        train_eval = self.evaluate_on_examples(step, train_sample,
                                               self.train_visualizer)
        valid_eval = self.evaluate_on_examples(step, valid_sample,
                                               self.valid_visualizer)

        # Log to TensorBoard
        train_eval.json_summarize(self.workspace.codalab, step)
        train_eval.tboard_summarize(self.tb_logger, step)
        valid_eval.json_summarize(self.workspace.codalab, step)
        valid_eval.tboard_summarize(self.tb_logger, step)
Пример #7
0
        def evaluate_helper(examples, prefix):
            with random_seed(0):
                sample = sample_if_large(examples, num_samples, replace=False)
            eval = self.evaluate_on_examples(step=step,
                                             examples=sample,
                                             visualizer=silent_visualizer)

            # wrap with BernoulliSequenceStat, for conf intervals
            for name, stat in eval.stats.items():
                if name.startswith('denoAcc'):
                    eval.stats[name] = BernoulliSequenceStat(stat)

            with open(full_eval_path, 'a') as f:
                eval.summarize(f, prefix=prefix)

            eval.tboard_summarize(self.tb_logger, step, prefix=prefix)
            eval.json_summarize(self.workspace.codalab, step, prefix=prefix)
Пример #8
0
    def __init__(self, filenames, filename_to_examples, relative_path=True, shuffle=True):
        """Construct the dataset based on the data in the files.

        Args:
            filenames (unicode or list[unicode]): names of the files
            filename_to_examples: a callable that takes a filename
                and yields Examples
            relative_path: whether to resolve the filename on DataDirectory.root
        """
        self._examples = []
        if isinstance(filenames, basestring):
            filenames = [filenames]
        for filename in filenames:
            if relative_path:
                filename = os.path.join(DataDirectory.root, filename)
            self._examples.extend(filename_to_examples(filename))
        if shuffle:
            with random_seed(42):
                random.shuffle(self._examples)
        logging.info('Read {} examples ({}) from {}'.format(
                len(self._examples), 'shuffled' if shuffle else 'not shuffled', filenames))
Пример #9
0
 def case_sample(examples):
     """Get a random sample of supervised ParseCases."""
     with random_seed(0):
         example_sample = sample_if_large(examples, 30)
     return list(examples_to_supervised_cases(example_sample))