def run_pipeline(config_files, out_fn):
    # In the pipeline mode we have two config files,
    # separate for each stage. First, we do syntactic ordering,
    # then surface realization.

    # Stage 1
    syn_config_fn = config_files[0]
    syn_config_d = load_yaml_config(syn_config_fn)
    log_fn = '%s.prediction_log' % (os.path.abspath(out_fn))
    logger = set_logging(logging.DEBUG, log_fn)

    fix_seed(syn_config_d['random_seed'])

    logger.info('PIPELINE MODE')
    logger.info('STAGE 1: syntactic ordering')

    syn_config_d['output_fn'] = out_fn
    syn_engine = Engine(syn_config_d, mode='predict')
    syn_engine.setup()
    depgraphs = syn_engine.run()

    # Stage 2
    print('\nSTAGE 2: morphological inflection')
    morph_config_fn = config_files[1]
    morph_config_d = load_yaml_config(morph_config_fn)
    morph_config_d['output_fn'] = out_fn
    morph_engine = Engine(morph_config_d, mode='predict')
    morph_engine.setup()

    # load the weights
    morph_model_fname = morph_engine.config["model_fn"]
    load_model(morph_engine.model_module, morph_model_fname)

    if use_cuda:
        morph_engine.model_module.cuda()

    assert type(depgraphs) == dict
    logger.info('Predicting on Syn outputs ...')
    for data_split, digraphs in depgraphs.items():
        for dg in digraphs:  # can later examine digraphs, if needed
            for node_id in dg.graph['node_order']:
                form = morph_engine.nlgen.predict_from_dgnode(
                    morph_engine.model_module, morph_engine.data_module.vocab,
                    dg, node_id)

                dg.node[node_id]['PRED_FORM'] = form

    for data_split, dgs in depgraphs.items():
        fname = '%s.%s.final.txt' % (out_fn, data_split)
        logger.info('Saving Pipeline outputs (*%s*) to --> %s', data_split,
                    fname)
        targets, predicted_snts = save_final_predictions_from_dgs(dgs, fname)

        if data_split == 'dev':
            SynEvaluator.compute_final_scores(targets, predicted_snts)
Exemplo n.º 2
0
    def start_prediction(self):
        stage_name = self.config['stage']
        model_fname = self.config["model_fn"]
        load_model(self.model_module, model_fname)

        output_fname_base = self.config.get('output_fn', None)
        if output_fname_base is None:
            output_fname_base = model_fname

        all_predictions = {}

        if use_cuda:
            self.model_module.cuda()

        if self.data_module.fnames.dev_fn != None:
            predictions_fname_base = '%s.dev.%s' % (output_fname_base,
                                                    stage_name)
            predictions_fname = '%s.predictions' % predictions_fname_base
            reference_fname = '%s.reference' % predictions_fname_base
            errors_fname = '%s.errors' % predictions_fname_base

            dev_predictions = self.nlgen.predict_from_file(
                self.model_module, self.data_module.config['dev_data'],
                self.data_module.vocab)

            all_predictions['dev'] = dev_predictions

            # Saving predictions, references and evaluating the predictions
            self.nlgen.save_predictions(dev_predictions, predictions_fname)

        if self.data_module.fnames.test_fn != None:
            predictions_fname_base = '%s.test.%s' % (output_fname_base,
                                                     stage_name)
            predictions_fname = '%s.predictions' % predictions_fname_base
            test_predictions = self.nlgen.predict_from_file(
                self.model_module, self.data_module.config['test_data'],
                self.data_module.vocab)

            self.nlgen.save_predictions(test_predictions, predictions_fname)
            all_predictions['test'] = test_predictions

        return all_predictions
Exemplo n.º 3
0
def run(config_dict):

    # Fetch all relevant modules.
    data_module = config_dict['data-module']
    model_module = config_dict['model-module']
    training_module = config_dict['training-module']
    evaluation_module = config_dict.get('evaluation-module', None)
    mode = config_dict['mode']

    # Load the modules
    DataClass = importlib.import_module(data_module).component
    ModelClass = importlib.import_module(model_module).component
    TrainingClass = importlib.import_module(training_module).component
    EvaluationClass = importlib.import_module(
        evaluation_module).component if evaluation_module else None

    model_dirname = make_model_dir(config_dict)
    logger = set_logger(config_dict["log_level"],
                        os.path.join(model_dirname, "log.txt"))

    # Setup the data
    data = DataClass(config_dict["data_params"])
    data.setup()

    # Setup the model
    fix_seed(config_d['random_seed'])  # fix seed generators
    model = ModelClass(config_dict["model_params"])
    model.setup(
        data)  # there are some data-specific params => pass data as arg

    if mode == "train":
        training_params = config_dict['training_params']
        trainer = TrainingClass(training_params)
        trainer.training_start(model, data)
        save_config(config_dict, os.path.join(model_dirname, 'config.json'))
    elif mode == "predict":
        assert evaluation_module is not None, "No evaluation module -- check config file!"
        evaluator = EvaluationClass(config_dict)
        model_fname = config_dict["model_fn"]
        load_model(model, model_fname)
        id2word = data.vocab.id2tok
        # predict on dev set
        if 'dev' in data.fnames:
            logger.info("Predicting on dev data")
            predicted_ids, attention_weights = evaluator.evaluate_model(
                model, data.dev[0])
            data_lexicalizations = data.lexicalizations['dev']
            predicted_snts = evaluator.lexicalize_predictions(
                predicted_ids, data_lexicalizations, id2word)
            save_predictions_txt(predicted_snts,
                                 '%s.devset.predictions.txt' % model_fname)
        # predict on test set
        if 'test' in data.fnames:
            logger.info("Predicting on test data")
            predicted_ids, attention_weights = evaluator.evaluate_model(
                model, data.test[0])
            data_lexicalizations = data.lexicalizations['test']
            predicted_snts = evaluator.lexicalize_predictions(
                predicted_ids, data_lexicalizations, id2word)
            save_predictions_txt(predicted_snts,
                                 '%s.testset.predictions.txt' % model_fname)
    else:
        logger.warning("Check the 'mode' field in the config file: %s" % mode)

    logger.info('DONE')
Exemplo n.º 4
0
def run(config_dict):

    # Fetch all relevant modules.
    data_module = config_dict['data-module']
    model_module = config_dict['model-module']
    training_module = config_dict['training-module']
    evaluation_module = config_dict.get('evaluation-module', None)
    mode = config_dict['mode']

    # Load the modules
    DataClass = importlib.import_module(data_module).component
    ModelClass = importlib.import_module(model_module).component
    TrainingClass = importlib.import_module(training_module).component
    EvaluationClass = importlib.import_module(
        evaluation_module).component if evaluation_module else None

    model_dirname = make_model_dir(config_dict)
    logger = set_logger(config_dict["log_level"],
                        os.path.join(model_dirname, "log.txt"))

    # Setup the data
    data = DataClass(config_dict["data_params"])
    data.setup()

    # Setup the model
    fix_seed(config_d['random_seed'])  # fix seed generators
    model = ModelClass(config_dict["model_params"])
    print("build model done")
    model.setup(
        data)  # there are some data-specific params => pass data as arg
    print("setup data done")
    #print(len(data.lexicalizations['test']))
    if mode == "train":
        training_params = config_dict['training_params']
        trainer = TrainingClass(training_params)
        trainer.training_start(model, data)
        save_config(config_dict, os.path.join(model_dirname, 'config.json'))

    elif mode == "predict":
        assert evaluation_module is not None, "No evaluation module -- check config file!"
        evaluator = EvaluationClass(config_dict)
        model_fname = config_dict["model_fn"]
        load_model(model, model_fname)
        #print(model.state_dict())
        model = model.to('cuda')
        id2word = data.vocab.id2tok
        beam_size = None  #10
        alpha = 0.3
        #"""
        if 'dev' in data.fnames:
            logger.info("Predicting on dev data")
            print(len(data.uni_mr['dev']), len(data.dev[0]),
                  len(data.lexicalizations['dev']))
            dec_snt_beam, fw_beam = [], []
            predicted_ids, fw_beam = evaluator.evaluate_model(
                model,
                data.dev[0],
                data.uni_mr['dev'],
                beam_size=beam_size,
                alpha=alpha)
            data_lexicalizations = data.lexicalizations['dev']
            #dec_snt_beam = []
            #for _ in range(beam_size):
            #    predicted_snt = evaluator.lexicalize_predictions(predicted_ids[_], data_lexicalizations, id2word)
            #    dec_snt_beam.append( predicted_snt )
            print(len(predicted_ids), len(data_lexicalizations))
            predicted_snts = evaluator.lexicalize_predictions(
                predicted_ids, data_lexicalizations, id2word)
            #save_predictions_txt(predicted_snts, '%s.devset.predictions.txt_incre_0.7' % model_fname)
            save_predictions_txt(
                predicted_snts,
                '%s.devset.predictions.txt_incre_%.1f' % (model_fname, alpha))
            # for beam_idx in range(beam_size):
            #     predicted_ids, attention_weights = evaluator.evaluate_model(model, data.dev[0], data.uni_mr['dev'])
            #     data_lexicalizations = data.lexicalizations['dev']
            #     #print(len(predicted_ids), len(data_lexicalizations))
            #     predicted_snts = evaluator.lexicalize_predictions(predicted_ids,
            #                                                   data_lexicalizations,
            #                                                   id2word)
            #     fw_prob = [[x.item() for x in x_list ] for x_list in attention_weights]
            #     #print(len(fw_prob), len(fw_prob[0]))#, fw_prob)
            #     dec_snt_beam.append( predicted_snts  ), fw_beam.append( fw_prob  )
            #     save_predictions_txt(predicted_snts, '%s.devset.predictions.txt' % model_fname)
            #save_beam_fw(fw_beam, dec_snt_beam, beam_size, '%s.devset.recs.txt' % model_fname)
            exit()
        #"""
        if 'test' in data.fnames:
            logger.info("Predicting on test data")
            print(len(data.test[0]))
            predicted_ids, attention_weights = evaluator.evaluate_model(
                model,
                data.test[0],
                data.uni_mr['test'],
                beam_size=beam_size,
                alpha=alpha)
            data_lexicalizations = data.lexicalizations['test']
            print(len(predicted_ids), len(data_lexicalizations))
            predicted_snts = evaluator.lexicalize_predictions(
                predicted_ids, data_lexicalizations, id2word)

            save_predictions_txt(
                predicted_snts,
                '%s.testset.predictions.txt_inre_%.1f' % (model_fname, alpha))

    else:
        logger.warning("Check the 'mode' field in the config file: %s" % mode)

    logger.info('DONE')