def make_material_feats(part, force): spacy_dir = join(LOCAL_DIR, part, 'spacy') lempos_feats_dir = '_{}/Material/lempos_feats'.format(part) if force or not exists(lempos_feats_dir): generate_feats(spacy_dir, lempos_feats_dir, lemma_pos_feats, nlp=nlp) word_feats_dir = '_{}/Material/word_feats'.format(part) if force or not exists(word_feats_dir): generate_feats(spacy_dir, word_feats_dir, lambda sent: word_feats(sent, context_size=1), nlp=nlp) wn_feats_dir = '_{}/Material/wordnet_feats'.format(part) if force or not exists(wn_feats_dir): generate_feats(spacy_dir, wn_feats_dir, lambda s: wordnet_feats(s, context_size=2), nlp=nlp) dep_feats_dir = '_{}/Material/dep_feats'.format(part) if force or not exists(dep_feats_dir): generate_feats(spacy_dir, dep_feats_dir, lambda s: dep_feats(s, context_size=2), nlp=nlp) return [lempos_feats_dir, word_feats_dir, wn_feats_dir]
def make_task_feats(part, force): spacy_dir = join(LOCAL_DIR, part, 'spacy') lempos_feats_dir = '_{}/Task/lempos_feats'.format(part) if force or not exists(lempos_feats_dir): generate_feats(spacy_dir, lempos_feats_dir, lambda s: lemma_pos_feats(s, context_size=1), nlp=nlp) word_feats_dir = '_{}/Task/word_feats'.format(part) if force or not exists(word_feats_dir): generate_feats(spacy_dir, word_feats_dir, lambda sent: word_feats(sent, context_size=0), nlp=nlp) dep_feats_dir = '_{}/Task/dep_feats'.format(part) if force or not exists(dep_feats_dir): generate_feats(spacy_dir, dep_feats_dir, lambda sent: dep_feats(sent, context_size=1), nlp=nlp) return [lempos_feats_dir, word_feats_dir]
token_feats['{}:wnhypernym3'.format(j)] = wnhypernym3 sent_feats.append(token_feats) return sent_feats # Step 1: Generate features spacy_dir = join(LOCAL_DIR, 'train', 'spacy') feats_dir = join('_train', 'features2') # If you want to save time by resusing feats from crf1-exp/py, # comment out the line below: generate_feats(spacy_dir, feats_dir, features2) # Step 2: Collect data for running CRF classifier true_iob_dir = join(LOCAL_DIR, 'train', 'iob') data = collect_crf_data(true_iob_dir, feats_dir) # Step 3: Create folds # create folds from complete texts only (i.e. instances of the same text # are never in different folds) # TODO How to set seed for random generator? group_k_fold = GroupKFold(n_splits=5) # use same split for all three entities
for synset in synsets: try: token_feats['{}:{}'.format(j, synset.hypernyms()[0].hypernyms()[0].hypernyms()[0].name())] = 1 except: pass #token_feats['{}:lemma'.format(j)] = lemma #token_feats['{}:pos'.format(j)] = pos sent_feats.append(token_feats) return sent_feats generate_feats(spacy_dir, wn_feats_dir, wnfeats1) # Step 2: Run experiments crf = PruneCRF(c1=0.1, c2=0.1, all_possible_transitions=True) feat_dirs = [base_feats_dir, wn_feats_dir, word_feats_dir] preds = {} for label in ENTITIES: preds[label] = run_exp_train_cv(crf, feat_dirs, label, n_folds=5) # Step 3: Evaluate eval_exp_train(preds)
from os.path import join from sklearn_crfsuite import CRF from sie import ENTITIES, LOCAL_DIR from sie.feats import generate_feats, features1 from sie.exp import run_exp_test, eval_exp_train # Step 1: Generate features train_spacy_dir = join(LOCAL_DIR, 'train', 'spacy') train_base_feats_dir = join('_train', 'features1') # If you want to save time by reusing existing feats, comment out the line below: generate_feats(train_spacy_dir, train_base_feats_dir, features1) dev_spacy_dir = join(LOCAL_DIR, 'dev', 'spacy') dev_base_feats_dir = join('_dev', 'features1') # If you want to save time by reusing existing feats, comment out the line below: generate_feats(dev_spacy_dir, dev_base_feats_dir, features1) test_spacy_dir = join(LOCAL_DIR, 'test', 'spacy') test_base_feats_dir = join('_test', 'features1') generate_feats(test_spacy_dir, test_base_feats_dir, features1) # Step 2: Run experiments crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True)
from os.path import join from sie import ENTITIES, LOCAL_DIR, EXPS_DIR from sie.crf import PruneCRF from sie.exp import run_exp_train_cv, eval_exp_train from sie.feats import generate_feats, wordnet_feats # Step 1: Generate features base_feats_dir = join(EXPS_DIR, 'crf1/_train/features1') word_feats_dir = join(EXPS_DIR, 'wordfeats/_train/wordfeats1') spacy_dir = join(LOCAL_DIR, 'train', 'spacy') wn_feats_dir = join('_train', 'wnfeats1') generate_feats(spacy_dir, wn_feats_dir, wordnet_feats) # Step 2: Run experiments crf = PruneCRF() #c1=0.1, c2=0.1, min_freq=5)#, all_possible_transitions=True) feat_dirs = [base_feats_dir, wn_feats_dir, word_feats_dir] preds = {} for label in ENTITIES[:1]: preds[label] = run_exp_train_cv(crf, feat_dirs, label, n_folds=5, n_jobs=-1) # Step 3: Evaluate
with word features """ from os.path import join from sie.crf import PruneCRF from sie import ENTITIES, LOCAL_DIR, EXPS_DIR from sie.feats import generate_feats, word_feats from sie.exp import run_exp_train_cv, eval_exp_train # Step 1: Generate features spacy_dir = join(LOCAL_DIR, 'train', 'spacy') word_feats_dir = join('_train', 'wordfeats1') generate_feats(spacy_dir, word_feats_dir, lambda sent: word_feats(sent, context_size=1)) # Step 2: Run experiments crf = PruneCRF() #c1=0.1, c2=0.1, all_possible_transitions=True) base_feats_dir = join(EXPS_DIR, 'crf1/_train/features1') feat_dirs = [base_feats_dir, word_feats_dir] preds = {} for label in ENTITIES: preds[label] = run_exp_train_cv(crf, feat_dirs, label) # Step 3: Evaluate eval_exp_train(preds)
from os.path import join from sie import ENTITIES, LOCAL_DIR, EXPS_DIR from sie.crf import PruneCRF from sie.exp import run_exp_train_cv, eval_exp_train from sie.feats import generate_feats, brown_feats # Step 1: Generate features spacy_dir = join(LOCAL_DIR, 'train', 'spacy') brown_feats_dir = join('_train', 'brown_feats') base_feats_dir = join(EXPS_DIR, 'crf1/_train/features1') word_feats_dir = join(EXPS_DIR, 'wordfeats/_train/wordfeats1') generate_feats(spacy_dir, brown_feats_dir, brown_feats) # Step 2: Run experiments crf = PruneCRF() #c1=0.1, c2=0.1, all_possible_transitions=True) base_feats_dir = join(EXPS_DIR, 'crf1/_train/features1') feat_dirs = [base_feats_dir, brown_feats_dir, word_feats_dir] preds = {} for label in ENTITIES: preds[label] = run_exp_train_cv(crf, feat_dirs, label) # Step 3: Evaluate eval_exp_train(preds)
from os.path import join from sie import ENTITIES, LOCAL_DIR, EXPS_DIR from sie.crf import PruneCRF from sie.exp import run_exp_train_cv, eval_exp_train from sie.feats import generate_feats, dep_feats # Step 1: Generate features base_feats_dir = join(EXPS_DIR, 'best/_train/Material/lempos_feats') word_feats_dir = join(EXPS_DIR, 'best/_train/Material/word_feats') wordnet_feats_dir = join(EXPS_DIR, 'best/_train/Material/wordnet_feats') spacy_dir = join(LOCAL_DIR, 'train', 'spacy') dep_feats_dir = join('_train', 'dep_feats') generate_feats(spacy_dir, dep_feats_dir, lambda sent: dep_feats(sent, context_size=1)) # Step 2: Run experiments crf = PruneCRF() # c1=0.1, c2=0.1, all_possible_transitions=True) feat_dirs = [ #base_feats_dir, #word_feats_dir, #wordnet_feats_dir, dep_feats_dir ] preds = {} for label in ENTITIES:
""" from os.path import join from sklearn_crfsuite import CRF from sie import ENTITIES, LOCAL_DIR from sie.feats import generate_feats, features1 from sie.exp import run_exp_train, eval_exp_train # Step 1: Generate features spacy_dir = join(LOCAL_DIR, 'train', 'spacy') base_feats_dir = join('_train', 'features1') # If you want to save time by reusing existing feats, comment out the line below: generate_feats(spacy_dir, base_feats_dir, features1) # Step 2: Run experiments crf = CRF(c1=0.1, c2=0.1, all_possible_transitions=True) feat_dirs = [base_feats_dir] preds = {} for label in ENTITIES: preds[label] = run_exp_train(crf, feat_dirs, label) # Step 3: Evaluate eval_exp_train(preds)