def train_zero_saia_longlist(word_list=noun_list): with open('../indata/saia_zeroshot_nounslong_splits.json', 'r') as f: ttsplit = json.load(f) w2v = linwac.load_w2v() X = np.load('../indata/saiapr.npz') Xsaia = X['arr_0'] print "Xsaia", Xsaia.shape W = np.load('../indata/saiapr_wmat.npz') Wsaia = W['arr_0'] print "Wsaia", Wsaia.shape long_noun_list = [ l.strip() for l in open('noun_list_long.txt').readlines() ] long_noun_ind = [msim.word2ind[n] for n in long_noun_list] for x, spl in enumerate(ttsplit): print "SPLIT", x if x > 0: Xsaia_train = Xsaia[spl['train']] Xsaia_test = Xsaia[spl['test']] Wsaia_t = Wsaia.transpose() Wsaia_t.shape Wsaia_t_train = Wsaia_t[spl['train']] Wsaia_train = Wsaia_t_train.transpose() print "Train linwac" linwac.train_all_nouns(Wsaia_train, Xsaia_train, w2v, ssim="500n_zeroshot_split" + str(x), word_list=long_noun_list) print "Train transfer" linmap.train_mappings(msim.w2v_vecs, long_noun_ind, Wsaia_train, Xsaia_train, split="500n_zeroshot_split" + str(x)) print "Train logwac" logwac.train_saia_nosamp(Xsaia_train, Wsaia_train, word_list=noun_list, ssim="nouns_zeroshot_split" + str(x))
def train_zero_mixed_plural_saia_models(): with open('../indata/saia_zeroshot_mixedpluralsplit.json', 'r') as f: ttsplit = json.load(f) w2v = linwac.load_w2v() X = np.load('../indata/saiapr.npz') Xsaia = X['arr_0'] print "Xsaia", Xsaia.shape W = np.load('../indata/saiapr_wmat.npz') Wsaia = W['arr_0'] print "Wsaia", Wsaia.shape Xsaia_train = Xsaia[ttsplit['train']] Xsaia_test = Xsaia[ttsplit['test']] Wsaia_t = Wsaia.transpose() Wsaia_t.shape Wsaia_t_train = Wsaia_t[ttsplit['train']] Wsaia_train = Wsaia_t_train.transpose() print "Plurals", ttsplit['nouns'] print "Singulars", ttsplit['singulars'] print "Train linwac" this_wordlist = ttsplit['nouns'] + ttsplit['singulars'] word_ind = [msim.word2ind[n] for n in this_wordlist] print "Wordlist", len(this_wordlist) linwac.train_all_nouns(Wsaia_train, Xsaia_train, w2v, ssim="_zeroshot_mixedpluralsplit", word_list=this_wordlist) print "Train transfer" linmap.train_mappings(msim.w2v_vecs, word_ind, Wsaia_train, Xsaia_train, split="_zeroshot_mixedpluralsplit") print "Train logwac" logwac.train_saia_nosamp(Xsaia_train, Wsaia_train, word_list=this_wordlist, ssim="nouns_zeroshot_mixedpluralsplit")
def train_zero_hypern_saia_models(): with open('../indata/saia_zeroshot_hypernsplit.json', 'r') as f: ttsplit = json.load(f) w2v = linwac.load_w2v() X = np.load('../indata/saiapr.npz') Xsaia = X['arr_0'] print "Xsaia", Xsaia.shape W = np.load('../indata/saiapr_wmat.npz') Wsaia = W['arr_0'] print "Wsaia", Wsaia.shape Xsaia_train = Xsaia[ttsplit['train']] Xsaia_test = Xsaia[ttsplit['test']] Wsaia_t = Wsaia.transpose() Wsaia_t.shape Wsaia_t_train = Wsaia_t[ttsplit['train']] Wsaia_train = Wsaia_t_train.transpose() print ttsplit['nouns'] print "Train linwac" this_wordlist = noun_list + [ n for n in ttsplit['nouns'] if not n in noun_list ] print "Wordlist", len(this_wordlist) linwac.train_all_nouns(Wsaia_train, Xsaia_train, w2v, ssim="_zeroshot_hypernsplit", word_list=this_wordlist) print "Train transfer" linmap.train_mappings(msim.w2v_vecs, noun_ind, Wsaia_train, Xsaia_train, split="_zeroshot_hypernsplit") print "Train logwac" logwac.train_saia_nosamp(Xsaia_train, Wsaia_train, word_list=noun_list, ssim="nouns_zeroshot_hypernsplit")
def train_zero_saia_models(word_list=noun_list): with open('../indata/saia_zeroshot_nounsplits.json', 'r') as f: ttsplit = json.load(f) w2v = linwac.load_w2v() X = np.load('../indata/saiapr.npz') Xsaia = X['arr_0'] print "Xsaia", Xsaia.shape W = np.load('../indata/saiapr_wmat.npz') Wsaia = W['arr_0'] print "Wsaia", Wsaia.shape for x, spl in enumerate(ttsplit): print "SPLIT", x if x > 0: Xsaia_train = Xsaia[spl['train']] Xsaia_test = Xsaia[spl['test']] Wsaia_t = Wsaia.transpose() Wsaia_t.shape Wsaia_t_train = Wsaia_t[spl['train']] Wsaia_train = Wsaia_t_train.transpose() print "Train linwac" linwac.train_all_nouns(Wsaia_train, Xsaia_train, w2v, ssim="_zeroshot_split" + str(x)) print "Train transfer" linmap.train_mappings(msim.w2v_vecs, noun_ind, Wsaia_train, Xsaia_train, split="_zeroshot_split" + str(x)) print "Train logwac" logwac.train_saia_nosamp(Xsaia_train, Wsaia_train, word_list=noun_list, ssim="nouns_zeroshot_split" + str(x))
def train_standard_plural_saia_models(): with open('../indata/saia_standard_pluralsplit.json', 'r') as f: ttsplit = json.load(f) print "Nouns", len(ttsplit['nouns']) w2v = linwac.load_w2v() Xsaia_t, Wsaia_t = linwac.load_saia_train() Xsaia_train = Xsaia_t[ttsplit['train']] Wsaia_tt = Wsaia_t.transpose() print Wsaia_tt.shape Wsaia_t_train = Wsaia_tt[ttsplit['train']] Wsaia_train = Wsaia_t_train.transpose() print "Train linwac" this_wordlist = ttsplit['nouns'] word_ind = [msim.word2ind[n] for n in this_wordlist] print "Wordlist", len(this_wordlist) linwac.train_all_nouns(Wsaia_train, Xsaia_train, w2v, ssim="_standard_pluralsplit", word_list=this_wordlist) print "Train transfer" linmap.train_mappings(msim.w2v_vecs, word_ind, Wsaia_train, Xsaia_train, split="_standard_pluralsplit") print "Train logwac" logwac.train_saia_nosamp(Xsaia_train, Wsaia_train, word_list=this_wordlist, ssim="nouns_standard_pluralsplit")