def train_zero_saia_longlist(word_list=noun_list):

    with open('../indata/saia_zeroshot_nounslong_splits.json', 'r') as f:
        ttsplit = json.load(f)

    w2v = linwac.load_w2v()

    X = np.load('../indata/saiapr.npz')
    Xsaia = X['arr_0']
    print "Xsaia", Xsaia.shape

    W = np.load('../indata/saiapr_wmat.npz')
    Wsaia = W['arr_0']
    print "Wsaia", Wsaia.shape

    long_noun_list = [
        l.strip() for l in open('noun_list_long.txt').readlines()
    ]
    long_noun_ind = [msim.word2ind[n] for n in long_noun_list]

    for x, spl in enumerate(ttsplit):
        print "SPLIT", x

        if x > 0:

            Xsaia_train = Xsaia[spl['train']]
            Xsaia_test = Xsaia[spl['test']]

            Wsaia_t = Wsaia.transpose()
            Wsaia_t.shape

            Wsaia_t_train = Wsaia_t[spl['train']]
            Wsaia_train = Wsaia_t_train.transpose()

            print "Train linwac"
            linwac.train_all_nouns(Wsaia_train,
                                   Xsaia_train,
                                   w2v,
                                   ssim="500n_zeroshot_split" + str(x),
                                   word_list=long_noun_list)

            print "Train transfer"
            linmap.train_mappings(msim.w2v_vecs,
                                  long_noun_ind,
                                  Wsaia_train,
                                  Xsaia_train,
                                  split="500n_zeroshot_split" + str(x))

            print "Train logwac"
            logwac.train_saia_nosamp(Xsaia_train,
                                     Wsaia_train,
                                     word_list=noun_list,
                                     ssim="nouns_zeroshot_split" + str(x))
def train_zero_mixed_plural_saia_models():

    with open('../indata/saia_zeroshot_mixedpluralsplit.json', 'r') as f:
        ttsplit = json.load(f)

    w2v = linwac.load_w2v()

    X = np.load('../indata/saiapr.npz')
    Xsaia = X['arr_0']
    print "Xsaia", Xsaia.shape

    W = np.load('../indata/saiapr_wmat.npz')
    Wsaia = W['arr_0']
    print "Wsaia", Wsaia.shape

    Xsaia_train = Xsaia[ttsplit['train']]
    Xsaia_test = Xsaia[ttsplit['test']]

    Wsaia_t = Wsaia.transpose()
    Wsaia_t.shape
    Wsaia_t_train = Wsaia_t[ttsplit['train']]
    Wsaia_train = Wsaia_t_train.transpose()

    print "Plurals", ttsplit['nouns']
    print "Singulars", ttsplit['singulars']

    print "Train linwac"
    this_wordlist = ttsplit['nouns'] + ttsplit['singulars']
    word_ind = [msim.word2ind[n] for n in this_wordlist]

    print "Wordlist", len(this_wordlist)
    linwac.train_all_nouns(Wsaia_train,
                           Xsaia_train,
                           w2v,
                           ssim="_zeroshot_mixedpluralsplit",
                           word_list=this_wordlist)

    print "Train transfer"
    linmap.train_mappings(msim.w2v_vecs,
                          word_ind,
                          Wsaia_train,
                          Xsaia_train,
                          split="_zeroshot_mixedpluralsplit")

    print "Train logwac"
    logwac.train_saia_nosamp(Xsaia_train,
                             Wsaia_train,
                             word_list=this_wordlist,
                             ssim="nouns_zeroshot_mixedpluralsplit")
def train_zero_hypern_saia_models():

    with open('../indata/saia_zeroshot_hypernsplit.json', 'r') as f:
        ttsplit = json.load(f)

    w2v = linwac.load_w2v()

    X = np.load('../indata/saiapr.npz')
    Xsaia = X['arr_0']
    print "Xsaia", Xsaia.shape

    W = np.load('../indata/saiapr_wmat.npz')
    Wsaia = W['arr_0']
    print "Wsaia", Wsaia.shape

    Xsaia_train = Xsaia[ttsplit['train']]
    Xsaia_test = Xsaia[ttsplit['test']]

    Wsaia_t = Wsaia.transpose()
    Wsaia_t.shape
    Wsaia_t_train = Wsaia_t[ttsplit['train']]
    Wsaia_train = Wsaia_t_train.transpose()

    print ttsplit['nouns']

    print "Train linwac"
    this_wordlist = noun_list + [
        n for n in ttsplit['nouns'] if not n in noun_list
    ]
    print "Wordlist", len(this_wordlist)
    linwac.train_all_nouns(Wsaia_train,
                           Xsaia_train,
                           w2v,
                           ssim="_zeroshot_hypernsplit",
                           word_list=this_wordlist)

    print "Train transfer"
    linmap.train_mappings(msim.w2v_vecs,
                          noun_ind,
                          Wsaia_train,
                          Xsaia_train,
                          split="_zeroshot_hypernsplit")

    print "Train logwac"
    logwac.train_saia_nosamp(Xsaia_train,
                             Wsaia_train,
                             word_list=noun_list,
                             ssim="nouns_zeroshot_hypernsplit")
def train_zero_saia_models(word_list=noun_list):

    with open('../indata/saia_zeroshot_nounsplits.json', 'r') as f:
        ttsplit = json.load(f)

    w2v = linwac.load_w2v()

    X = np.load('../indata/saiapr.npz')
    Xsaia = X['arr_0']
    print "Xsaia", Xsaia.shape

    W = np.load('../indata/saiapr_wmat.npz')
    Wsaia = W['arr_0']
    print "Wsaia", Wsaia.shape

    for x, spl in enumerate(ttsplit):
        print "SPLIT", x

        if x > 0:

            Xsaia_train = Xsaia[spl['train']]
            Xsaia_test = Xsaia[spl['test']]

            Wsaia_t = Wsaia.transpose()
            Wsaia_t.shape

            Wsaia_t_train = Wsaia_t[spl['train']]
            Wsaia_train = Wsaia_t_train.transpose()

            print "Train linwac"
            linwac.train_all_nouns(Wsaia_train,
                                   Xsaia_train,
                                   w2v,
                                   ssim="_zeroshot_split" + str(x))

            print "Train transfer"
            linmap.train_mappings(msim.w2v_vecs,
                                  noun_ind,
                                  Wsaia_train,
                                  Xsaia_train,
                                  split="_zeroshot_split" + str(x))

            print "Train logwac"
            logwac.train_saia_nosamp(Xsaia_train,
                                     Wsaia_train,
                                     word_list=noun_list,
                                     ssim="nouns_zeroshot_split" + str(x))
def train_standard_plural_saia_models():

    with open('../indata/saia_standard_pluralsplit.json', 'r') as f:
        ttsplit = json.load(f)

    print "Nouns", len(ttsplit['nouns'])

    w2v = linwac.load_w2v()

    Xsaia_t, Wsaia_t = linwac.load_saia_train()

    Xsaia_train = Xsaia_t[ttsplit['train']]

    Wsaia_tt = Wsaia_t.transpose()
    print Wsaia_tt.shape
    Wsaia_t_train = Wsaia_tt[ttsplit['train']]
    Wsaia_train = Wsaia_t_train.transpose()

    print "Train linwac"
    this_wordlist = ttsplit['nouns']
    word_ind = [msim.word2ind[n] for n in this_wordlist]

    print "Wordlist", len(this_wordlist)
    linwac.train_all_nouns(Wsaia_train,
                           Xsaia_train,
                           w2v,
                           ssim="_standard_pluralsplit",
                           word_list=this_wordlist)

    print "Train transfer"
    linmap.train_mappings(msim.w2v_vecs,
                          word_ind,
                          Wsaia_train,
                          Xsaia_train,
                          split="_standard_pluralsplit")

    print "Train logwac"
    logwac.train_saia_nosamp(Xsaia_train,
                             Wsaia_train,
                             word_list=this_wordlist,
                             ssim="nouns_standard_pluralsplit")