예제 #1
0
    def check_dev_disc(self):
        print('Evaluating F1 on development set...')

        # Predict trees
        trees = self.predict(self.dev_treebank)
        with open(self.dev_pred_path, 'w') as f:
            print('\n'.join(trees), file=f)

        # Compute f-score
        dev_fscore = evalb(self.evalb_dir,
                           self.dev_pred_path,
                           self.dev_path,
                           self.dev_result_path,
                           param_file=self.evalb_param_file)

        print(f'Current dev F1 {dev_fscore}')

        # Log score to tensorboard
        self.tensorboard_writer.add_scalar('dev/f-score', dev_fscore,
                                           self.current_epoch)

        self.current_dev_fscore = dev_fscore

        if dev_fscore > self.best_dev_fscore:
            print(
                f'Saving new best model to `{self.model_checkpoint_path}`...')
            self.best_dev_epoch = self.current_epoch
            self.best_dev_fscore = dev_fscore
            self.save_checkpoint()
예제 #2
0
파일: predict.py 프로젝트: daandouwe/thesis
def predict_perplexity_from_samples(args):

    print('Predicting perplexity with Generative RNNG.')
    print(f'Loading model from `{args.checkpoint}`.')
    print(f'Loading proposal samples from `{args.proposal_samples}`.')
    print(f'Loading lines from directory `{args.infile}`.')
    print(f'Writing predictions to `{args.outfile}`.')

    np.random.seed(args.numpy_seed)

    model = load_model(args.checkpoint)
    decoder = GenerativeDecoder(model=model,
                                num_samples=args.num_samples,
                                alpha=args.alpha)

    print('Computing perplexity...')
    trees, perplexity = decoder.predict_from_proposal_samples(
        args.proposal_samples)

    # Compute f-score from trees
    base_name = os.path.splitext(args.outfile)[0]
    pred_path = base_name + '.trees'
    result_path = base_name + '.result'
    with open(pred_path, 'w') as f:
        print('\n'.join(trees), file=f)
    fscore = evalb(args.evalb_dir, pred_path, args.infile, result_path)

    print(f'Results: {fscore} fscore, {perplexity} perplexity.')

    with open(args.outfile, 'w') as f:
        print('proposals',
              'perplexity',
              'fscore',
              'num-samples',
              'temp',
              'seed',
              sep='\t',
              file=f)
        print(args.proposal_samples,
              perplexity,
              fscore,
              args.num_samples,
              args.alpha,
              args.numpy_seed,
              sep='\t',
              file=f)
예제 #3
0
파일: predict.py 프로젝트: daandouwe/thesis
def predict_tree_file(args):
    assert os.path.exists(args.infile), 'specifiy file to parse with --infile.'

    print(f'Predicting trees for lines in `{args.infile}`.')

    with open(args.infile, 'r') as f:
        lines = [
            fromstring(line.strip()).words() for line in f if line.strip()
        ]

    if args.model_type == 'disc':
        print('Loading discriminative model...')
        parser = load_model(args.checkpoint)
        parser.eval()
        print('Done.')

    elif args.model_type == 'gen':
        exit('Not yet...')

        print('Loading generative model...')
        parser = GenerativeDecoder()
        parser.load_model(path=args.checkpoint)
        if args.proposal_model:
            parser.load_proposal_model(path=args.proposal_model)
        if args.proposal_samples:
            parser.load_proposal_samples(path=args.proposal_samples)

    trees = []
    for line in tqdm(lines):
        tree, _ = parser.parse(line)
        trees.append(tree.linearize())

    pred_path = os.path.join(args.outfile)
    result_path = args.outfile + '.results'
    # Save the predicted trees.
    with open(pred_path, 'w') as f:
        print('\n'.join(trees), file=f)
    # Score the trees.
    fscore = evalb(args.evalb_dir, pred_path, args.infile, result_path)
    print(
        f'Predictions saved in `{pred_path}`. Results saved in `{result_path}`.'
    )
    print(f'F-score {fscore:.2f}.')
예제 #4
0
    def check_dev_gen(self):
        print(
            f'Evaluating F1 and perplexity on development set using {self.num_dev_samples} samples...'
        )

        decoder = GenerativeDecoder(model=self.parser,
                                    num_samples=self.num_dev_samples)

        trees, dev_perplexity = decoder.predict_from_proposal_samples(
            inpath=self.dev_proposal_samples, unlabeled=self.unlabeled)

        with open(self.dev_pred_path, 'w') as f:
            print('\n'.join(trees), file=f)

        dev_fscore = evalb(self.evalb_dir,
                           self.dev_pred_path,
                           self.dev_path,
                           self.dev_result_path,
                           param_file=self.evalb_param_file)

        print(
            f'Current dev F1 {dev_fscore}, current dev perplexity {dev_perplexity}'
        )

        # Log score to tensorboard
        self.tensorboard_writer.add_scalar('dev/f-score', dev_fscore,
                                           self.current_epoch)
        self.tensorboard_writer.add_scalar('dev/perplexity', dev_perplexity,
                                           self.current_epoch)

        self.current_dev_fscore = dev_fscore
        self.current_dev_perplexity = dev_perplexity

        # but model selection is based on perplexity
        if dev_perplexity < self.best_dev_perplexity:
            print(
                f'Saving new best model to `{self.model_checkpoint_path}`...')
            self.best_dev_epoch = self.current_epoch
            self.best_dev_perplexity = dev_perplexity
            self.best_dev_fscore = dev_fscore
            self.save_checkpoint()
예제 #5
0
    def check_test_gen(self):
        print(
            f'Evaluating F1 and perplexity on test set using {self.num_test_samples} samples...'
        )

        print(
            f'Loading best saved model from `{self.model_checkpoint_path}` '
            f'(epoch {self.best_dev_epoch}, fscore {self.best_dev_fscore}, perplexity {self.best_dev_perplexity})...'
        )
        self.load_checkpoint()
        self.parser.eval()

        decoder = GenerativeDecoder(model=self.parser,
                                    num_samples=self.num_test_samples)

        trees, test_perplexity = decoder.predict_from_proposal_samples(
            inpath=self.test_proposal_samples, unlabeled=self.unlabeled)

        with open(self.test_pred_path, 'w') as f:
            print('\n'.join(trees), file=f)

        # Compute f-score.
        test_fscore = evalb(self.evalb_dir,
                            self.test_pred_path,
                            self.test_path,
                            self.test_result_path,
                            param_file=self.evalb_param_file)

        # Log score to tensorboard.
        self.tensorboard_writer.add_scalar('test/f-score', test_fscore,
                                           self.current_epoch)
        self.tensorboard_writer.add_scalar('test/perplexity', test_perplexity,
                                           self.current_epoch)

        self.test_fscore = test_fscore
        self.test_perplexity = test_perplexity
예제 #6
0
    def check_test_disc(self):
        print('Evaluating F1 on test set...')

        print(
            f'Loading best saved model from `{self.model_checkpoint_path}` '
            f'(epoch {self.best_dev_epoch}, fscore {self.best_dev_fscore})...')
        self.load_checkpoint()

        # Predict trees.
        trees = self.predict(self.test_treebank)
        with open(self.test_pred_path, 'w') as f:
            print('\n'.join(trees), file=f)

        # Compute f-score.
        test_fscore = evalb(self.evalb_dir,
                            self.test_pred_path,
                            self.test_path,
                            self.test_result_path,
                            param_file=self.evalb_param_file)

        self.tensorboard_writer.add_scalar('test/f-score', test_fscore,
                                           self.current_epoch)

        self.test_fscore = test_fscore