def _get_learn_job(lconf, rconf, subpack, paths, task): 'learn a model and write it to the given output path' if task == Task.attach: sub_rconf = rconf.attach output_path = paths.attach elif task == Task.relate: sub_rconf = rconf.relate or rconf.attach output_path = paths.relate else: raise ValueError('Unknown learning task: {}'.format(task)) if sub_rconf.key == 'oracle': return None elif fp.exists(output_path): print(("reusing {key} {task} model (already built): {path}" "").format(key=sub_rconf.key, task=task.name, path=fp.relpath(output_path, lconf.scratch_dir)), file=sys.stderr) else: learn_fn = ath_learn.learn learners = Team(attach=rconf.attach, relate=rconf.relate or rconf.attach) learners = learners.fmap(lambda x: x.payload) return delayed(learn_fn)(subpack, learners, task, output_path, quiet=False)
def test_postlabel_parser(self): learners = LEARNERS +\ [ Team(attach=StructuredPerceptron(MST_DECODER, LOCAL_PERC_ARGS), label=SklearnLabelClassifier(LogisticRegression())), ] for l, d in itr.product(learners, DECODERS): parser = PostlabelPipeline(learner_attach=l.attach, learner_label=l.label, decoder=d) self._test_parser(parser)
def test_intra_parsers(self): 'test all intra/inter parsers on a dpack' learner = Team(attach=SklearnAttachClassifier(LogisticRegression()), label=SklearnLabelClassifier(LogisticRegression())) # note: these are chosen a bit randomly p_intra = JointPipeline(learner_attach=learner.attach, learner_label=learner.label, decoder=MST_DECODER) p_inter = PostlabelPipeline(learner_attach=learner.attach, learner_label=learner.label, decoder=MST_DECODER) parsers = [ mk_p(IntraInterPair(intra=p_intra, inter=p_inter)) for mk_p in [SentOnlyParser, SoftParser, HeadToHeadParser] ] for parser in parsers: self._test_parser(parser)
os.makedirs(TMP_OUTPUT) # load the data mpack = load_multipack(PREFIX + '.edus', PREFIX + '.pairings', PREFIX + '.features.sparse', PREFIX + '.features.sparse.vocab', verbose=True) # divide the dataset into folds num_folds = min((10, len(mpack))) fold_dict = make_n_fold(mpack, num_folds, mk_rng()) # select a decoder and a learner team decoder = MstDecoder(root_strategy=MstRootStrategy.fake_root) learners = Team(attach=SklearnAttachClassifier(LogisticRegression()), label=SklearnLabelClassifier(LogisticRegression())) # put them together as a parser parser = JointPipeline(learner_attach=learners.attach, learner_label=learners.label, decoder=decoder) # run cross-fold evaluation scores = [] for fold in range(num_folds): print(">>> doing fold ", fold + 1, file=sys.stderr) print("training ... ", file=sys.stderr) # learn a model for the training data for this fold train_packs = select_training(mpack, fold_dict, fold).values() parser.fit(train_packs, [x.target for x in train_packs])