Exemplo n.º 1
0
def _get_learn_job(lconf, rconf, subpack, paths, task):
    'learn a model and write it to the given output path'

    if task == Task.attach:
        sub_rconf = rconf.attach
        output_path = paths.attach
    elif task == Task.relate:
        sub_rconf = rconf.relate or rconf.attach
        output_path = paths.relate
    else:
        raise ValueError('Unknown learning task: {}'.format(task))

    if sub_rconf.key == 'oracle':
        return None
    elif fp.exists(output_path):
        print(("reusing {key} {task} model (already built): {path}"
               "").format(key=sub_rconf.key,
                          task=task.name,
                          path=fp.relpath(output_path, lconf.scratch_dir)),
              file=sys.stderr)
    else:
        learn_fn = ath_learn.learn
        learners = Team(attach=rconf.attach,
                        relate=rconf.relate or rconf.attach)
        learners = learners.fmap(lambda x: x.payload)
        return delayed(learn_fn)(subpack, learners, task, output_path,
                                 quiet=False)
Exemplo n.º 2
0
 def test_postlabel_parser(self):
     learners = LEARNERS +\
         [
              Team(attach=StructuredPerceptron(MST_DECODER,
                                               LOCAL_PERC_ARGS),
                   label=SklearnLabelClassifier(LogisticRegression())),
         ]
     for l, d in itr.product(learners, DECODERS):
         parser = PostlabelPipeline(learner_attach=l.attach,
                                    learner_label=l.label,
                                    decoder=d)
         self._test_parser(parser)
Exemplo n.º 3
0
 def test_intra_parsers(self):
     'test all intra/inter parsers on a dpack'
     learner = Team(attach=SklearnAttachClassifier(LogisticRegression()),
                    label=SklearnLabelClassifier(LogisticRegression()))
     # note: these are chosen a bit randomly
     p_intra = JointPipeline(learner_attach=learner.attach,
                             learner_label=learner.label,
                             decoder=MST_DECODER)
     p_inter = PostlabelPipeline(learner_attach=learner.attach,
                                 learner_label=learner.label,
                                 decoder=MST_DECODER)
     parsers = [
         mk_p(IntraInterPair(intra=p_intra, inter=p_inter))
         for mk_p in [SentOnlyParser, SoftParser, HeadToHeadParser]
     ]
     for parser in parsers:
         self._test_parser(parser)
Exemplo n.º 4
0
    os.makedirs(TMP_OUTPUT)

# load the data
mpack = load_multipack(PREFIX + '.edus',
                       PREFIX + '.pairings',
                       PREFIX + '.features.sparse',
                       PREFIX + '.features.sparse.vocab',
                       verbose=True)

# divide the dataset into folds
num_folds = min((10, len(mpack)))
fold_dict = make_n_fold(mpack, num_folds, mk_rng())

# select a decoder and a learner team
decoder = MstDecoder(root_strategy=MstRootStrategy.fake_root)
learners = Team(attach=SklearnAttachClassifier(LogisticRegression()),
                label=SklearnLabelClassifier(LogisticRegression()))

# put them together as a parser
parser = JointPipeline(learner_attach=learners.attach,
                       learner_label=learners.label,
                       decoder=decoder)

# run cross-fold evaluation
scores = []
for fold in range(num_folds):
    print(">>> doing fold ", fold + 1, file=sys.stderr)
    print("training ... ", file=sys.stderr)
    # learn a model for the training data for this fold
    train_packs = select_training(mpack, fold_dict, fold).values()
    parser.fit(train_packs, [x.target for x in train_packs])