def attach_learner_dp_perc(): "return a keyed instance of perceptron learner" return Keyed( 'dp-perc', SklearnAttachClassifier( Perceptron(n_iter=LOCAL_N_ITER, verbose=VERBOSE, average=LOCAL_AVG, use_prob=LOCAL_USE_PROB)))
def attach_learner_dp_pa(): "return a keyed instance of passive aggressive learner" return Keyed( 'dp-pa', SklearnAttachClassifier( PassiveAggressive(C=LOCAL_C, n_iter=LOCAL_N_ITER, verbose=VERBOSE, average=LOCAL_AVG, use_prob=LOCAL_USE_PROB)))
def test_intra_parsers(self): 'test all intra/inter parsers on a dpack' learner_intra = Team( attach=SklearnAttachClassifier(LogisticRegression()), label=SklearnLabelClassifier(LogisticRegression())) learner_inter = Team( attach=SklearnAttachClassifier(LogisticRegression()), label=SklearnLabelClassifier(LogisticRegression())) # note: these are chosen a bit randomly p_intra = JointPipeline(learner_attach=learner_intra.attach, learner_label=learner_intra.label, decoder=MST_DECODER) p_inter = PostlabelPipeline(learner_attach=learner_inter.attach, learner_label=learner_inter.label, decoder=MST_DECODER) parsers = [ mk_p(IntraInterPair(intra=p_intra, inter=p_inter)) for mk_p in [SentOnlyParser, SoftParser, HeadToHeadParser] ] for parser in parsers: self._test_parser(parser)
class TinyHarness(Harness): """Example harness that runs on the example data """ _maxent_a = Keyed('maxent', SklearnAttachClassifier(LogisticRegression())) _maxent_l = Keyed('maxent', SklearnLabelClassifier(LogisticRegression())) _maxent = LearnerConfig(attach=_maxent_a, label=_maxent_l) _decoder1 = MstDecoder(root_strategy=MstRootStrategy.fake_root) _decoder2 = LocallyGreedy() _parser1 = Keyed( "mst-j", JointPipeline(_maxent.attach.payload, _maxent.label.payload, _decoder1)) _parser2 = Keyed( "greedy-p", PostlabelPipeline(_maxent.attach.payload, _maxent.label.payload, _decoder2)) _evaluations = [ EvaluationConfig(key="maxent-mst-j", settings=Keyed('j', None), learner=_maxent, parser=_parser1), EvaluationConfig(key="maxent-greedy-p", settings=Keyed('p', None), learner=_maxent, parser=_parser2) ] def __init__(self): self._datadir = mkdtemp() for cpath in glob.glob('doc/example-corpus/*'): shutil.copy(cpath, self._datadir) super(TinyHarness, self).__init__('tiny', None) def run(self): """Run the evaluation """ runcfg = RuntimeConfig.empty() eval_dir, scratch_dir = prepare_dirs(runcfg, self._datadir) self.load(runcfg, eval_dir, scratch_dir) evaluate_corpus(self) @property def evaluations(self): return self._evaluations @property def test_evaluation(self): return None def create_folds(self, mpack): return attelo.fold.make_n_fold(mpack, 2, None) def mpack_paths(self, _, stripped=False): """Return a dict of paths needed to read a datapack. The 2nd argument denoted by '_' is test_data, which is unused in this example. """ core_path = fp.join(self._datadir, 'tiny') return { 'edu_input': core_path + '.edus', 'pairings': core_path + '.pairings', 'features': core_path + '.features.sparse', 'vocab': core_path + '.features.sparse.vocab' } def _model_basename(self, rconf, mtype, ext): "Basic filename for a model" if 'attach' in mtype: rsubconf = rconf.attach else: rsubconf = rconf.label template = '{dataset}.{learner}.{task}.{ext}' return template.format(dataset=self.dataset, learner=rsubconf.key, task=mtype, ext=ext) def model_paths(self, rconf, fold, parser): if fold is None: parent_dir = self.combined_dir_path() else: parent_dir = self.fold_dir_path(fold) def _eval_model_path(mtype): "Model for a given loop/eval config and fold" bname = self._model_basename(rconf, mtype, 'model') return fp.join(parent_dir, bname) return { 'attach': _eval_model_path("attach"), 'label': _eval_model_path("relate") }
os.makedirs(TMP_OUTPUT) # load the data mpack = load_multipack(PREFIX + '.edus', PREFIX + '.pairings', PREFIX + '.features.sparse', PREFIX + '.features.sparse.vocab', verbose=True) # divide the dataset into folds num_folds = min((10, len(mpack))) fold_dict = make_n_fold(mpack, num_folds, mk_rng()) # select a decoder and a learner team decoder = MstDecoder(root_strategy=MstRootStrategy.fake_root) learners = Team(attach=SklearnAttachClassifier(LogisticRegression()), label=SklearnLabelClassifier(LogisticRegression())) # put them together as a parser parser = JointPipeline(learner_attach=learners.attach, learner_label=learners.label, decoder=decoder) # run cross-fold evaluation scores = [] for fold in range(num_folds): print(">>> doing fold ", fold + 1, file=sys.stderr) print("training ... ", file=sys.stderr) # learn a model for the training data for this fold train_packs = select_training(mpack, fold_dict, fold).values() parser.fit(train_packs, [x.target for x in train_packs])
def attach_learner_dp_pa(): "return a keyed instance of passive aggressive learner" return Keyed('dp-pa', SklearnAttachClassifier(PassiveAggressive(LOCAL_PA_ARGS)))
def attach_learner_dp_perc(): "return a keyed instance of perceptron learner" return Keyed('dp-perc', SklearnAttachClassifier(Perceptron(LOCAL_PERC_ARGS)))
def attach_learner_pa(): "return a keyed instance of passive aggressive learner" learner = sk.PassiveAggressiveClassifier(n_iter=LOCAL_PA_ARGS.iterations) return Keyed('pa', SklearnAttachClassifier(learner))
def attach_learner_perc(): "return a keyed instance of perceptron learner" learner = sk.Perceptron(n_iter=LOCAL_PERC_ARGS.iterations) return Keyed('perc', SklearnAttachClassifier(learner))
def attach_learner_rndforest(): "return a keyed instance of random forest learner" return Keyed('rndforest', SklearnAttachClassifier(RandomForestClassifier()))
def attach_learner_dectree(): "return a keyed instance of decision tree learner" return Keyed('dectree', SklearnAttachClassifier(DecisionTreeClassifier()))
def attach_learner_maxent(): "return a keyed instance of maxent learner" return Keyed('maxent', SklearnAttachClassifier(LogisticRegression()))
def attach_learner_pa(): "return a keyed instance of passive aggressive learner" learner = sk.PassiveAggressiveClassifier(C=LOCAL_C, n_iter=LOCAL_N_ITER, class_weight=LOCAL_CLASS_WEIGHT) return Keyed('pa', SklearnAttachClassifier(learner))
def attach_learner_perc(): "return a keyed instance of perceptron learner" learner = sk.Perceptron(n_iter=LOCAL_N_ITER, class_weight=LOCAL_CLASS_WEIGHT) return Keyed('perc', SklearnAttachClassifier(learner))
def attach_learner_rndforest(): "return a keyed instance of random forest learner" return Keyed('rndforest', SklearnAttachClassifier(RandomForestClassifier( n_estimators=100, n_jobs=1)))