Пример #1
0
def main():
    ### preprocessing ###
    feats, labs = reader.read_csv_file(FLAG.trans_file)
    dic, rev_dic = reader.build_dic(FLAG.word_dic)
    targets = reader.build_targets(FLAG.target_words, dic)
    test_feat_dic = PCA.extract_targets(feats, labs, targets)
    word_color_dict = reader.build_label_color_list(targets, color_list)

    ave_test_feat_dic = PCA.average_over_words(test_feat_dic)
    ave_test_feat_list = [ave_test_feat_dic[i] for i in ave_test_feat_dic]
    test_all_feats, test_all_delta_labs = PCA.target_dic2list(test_feat_dic)

    ave_num_feats, ave_num_lab = PCA.average_over_words_num(
        test_feat_dic, targets)
    #ave_num_feats, ave_num_lab = PCA.target_dic2list(ave_num_feat_lists)
    #sampled_feats, sampled_delta_lab = PCA.sampling(test_all_feats,
    #    test_all_delta_labs)
    sampled_feats, _ = PCA.PCA_transform(ave_num_feats)
    ave_test_feat_trans = PCA.TSNE_transform(sampled_feats, FLAG.tsne_dim)
    fig = plt.figure()
    ax = fig.add_subplot(111)
    ax = PCA.plot_all_color(ave_test_feat_trans, ave_num_lab, rev_dic, ax,
                            word_color_dict)
    #ax = PCA.plot_with_anno( ave_test_feat_trans, anno_list, rev_dic, ax)
    #ax.legend(loc='upper right')
    plt.show()

    return
Пример #2
0
def main():

    q_feats, q_labs = reader.read_csv_file(FLAG.q_fn)
    db_feats, db_labs = reader.read_csv_file(FLAG.db_fn)
    dic, rev_dic = reader.build_dic(FLAG.word_dic)
    targets = reader.build_targets(FLAG.target_words)
    word_color_dict = reader.build_label_color_list(targets, color_list)
    model = joblib.load(FLAG.model_fn)

    proj_feat_dic = PCA.extract_targets(db_feats, db_labs, targets)

    ### get the target words average ###
    ave_target_feat_dic = PCA.average_over_words(proj_feat_dic)
    ave_target_feat_list = [ave_test_feat_dic[i] for i in ave_target_feat_dic]
    ave_num_target_feat_list = PCA.average_over_words_num(
        proj_feat_dic, targets)

    anno_list = [i for i in ave_target_feat_dic]
    ave_target_trans = model.transform(ave_target_feat_list)

    all_feats, delta_lab_list = PCA.target_dic2list(proj_feat_dic)
    all_feats_trans = model.transform(all_feats)

    sampled_feats, sampled_delta_lab = MAP.sampling(all_feats_trans,
                                                    delta_lab_list)

    fig = plt.figure()
    if FLAG.pca_dim == 2:
        ### start plotting the average results ###
        ax = fig.add_subplot(111)
        ax = PCA.plot_with_anno(ave_test_trans, anno_list, rev_dic, ax)
        ### plotting all word utterance ###
        ax = PCA.plot_all_color(sampled_feats, sampled_delta_lab, rev_dic, ax,
                                word_color_dict)

    elif FLAG.pca_dim == 3:
        #### start plotting the 3D projections ####
        ax = fig.add_subplot(111, projection='3d')
        ax = PCA.plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax)
        ax = PCA.plot_all_color_3d(sampled_feats, sampled_delta_lab, rev_dic,
                                   ax, word_color_dict)

    else:
        print("no plotting but testing through MAP")

        all_list = []
        feat_trans = model.transform(feats)
        print(len(feat_trans[0]))
        for i in range(len(feats)):
            all_list.append((feat_trans[i], labs[i]))

        train_list, test_list = MAP.split_train_test(all_list)
        print(MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim))

        return

    ax.legend(loc='upper right')
    plt.show()
    return
Пример #3
0
def main():
    ### preprocessing ###
    feats, labs = reader.read_csv_file(FLAG.train_file)
    dic, rev_dic = reader.build_dic(FLAG.word_dic)
    targets = reader.build_targets(FLAG.target_words, dic)
    test_feat_dic = extract_targets(feats, labs, targets)
    word_color_dict = reader.build_label_color_list(targets, color_list)

    ### PCA through the average target words ###
    ave_test_feat_dic = average_over_words(test_feat_dic)
    ave_test_feat_list = [ ave_test_feat_dic[i] for i in ave_test_feat_dic]
    
    ave_num_feat_lists, tmp_lab = average_over_words_num(test_feat_dic, targets)
    ave_num_trans, model = PCA_transform(ave_num_feat_lists)
    
    anno_list = [ i for i in ave_test_feat_dic ]
    ave_test_trans = model.transform(ave_test_feat_list)
    
    ### use the PCA model to transform only testing data ###
    all_feats, delta_lab_list = target_dic2list(test_feat_dic)
    all_feats_trans = model.transform(all_feats)
    ### samples number of word occurances  ###

    sampled_feats, sampled_delta_lab = sampling(all_feats_trans, delta_lab_list)

    if FLAG.save_model :
        s = joblib.dump(model,FLAG.model_fn)

    if FLAG.pca_dim == 2:
        fig = plt.figure()
        ### start plotting the average results ###
        ax = fig.add_subplot(111)
        ax = plot_with_anno(ave_test_trans, anno_list, rev_dic, ax)
        ### plotting all word utterance ###
        ax = plot_all_color(sampled_feats, sampled_delta_lab, rev_dic, ax,
            word_color_dict)
        
    elif FLAG.pca_dim ==3 :
        fig = plt.figure()
        #### start plotting the 3D projections #### 
        ax = fig.add_subplot(111, projection='3d')
        ax = plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax)
        ax = plot_all_color_3d(sampled_feats, sampled_delta_lab, rev_dic, ax,
            word_color_dict)

    else:
        print ("no plotting but testing through MAP")

        all_list = []
        feat_trans = model.transform(feats)
        # print (len(feat_trans[0]))
        for i in range(len(feats)):
            all_list.append((feat_trans[i],labs[i]))
        
        train_list, test_list = MAP.split_train_test(all_list)
        print (MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim))
        
        return 
    ### get the words that not using for PCA ###
    if FLAG.other_words != 'None':
        other_target = reader.build_targets(FLAG.other_words,dic)
        extract_others = extract_targets(feats, labs, other_target)
        ave_feat, other_lb_list = extract_additional_words(extract_others, other_target)
        ave_ftrans  = model.transform(ave_feat)
        
        if FLAG.pca_dim == 2:
            plot_additional_words(ave_ftrans, other_lb_list, rev_dic, ax)
        else :
            plot_additional_words_3d(ave_ftrans, other_lb_list, rev_dic, ax)

    ax.legend(loc='upper right')
    plt.show()
    return 
Пример #4
0
def main():
    ### preprocessing ###
    feats, labs = reader.read_csv_file(FLAG.train_file)
    dic, rev_dic = reader.build_dic(FLAG.word_dic)
    targets = reader.build_targets(FLAG.target_words, dic)
    test_feat_dic = extract_targets(feats, labs, targets)
    ave_dic = average_over_words(test_feat_dic)
    ave_feat_list = [ave_dic[i] for i in ave_dic]
    anno_list = [i for i in ave_dic]

    for i in test_feat_dic:
        print(i)
        print(len(test_feat_dic[i]))
    #word_color_dict = reader.build_label_color_list(targets, color_list)

    another_feats, another_labs = reader.read_csv_file(FLAG.apply_file)
    another_dic, another_rev_dic = reader.build_dic(FLAG.apply_dic)
    another_tar = reader.build_targets(FLAG.another_target_words, another_dic)
    another_test_dic = extract_targets(another_feats, another_labs,
                                       another_tar)
    another_color_dict = reader.build_label_color_list(another_tar,
                                                       another_color_list)
    test_feats2, test_delta_labs2 = target_dic2list(another_test_dic)
    ave_dic2 = average_over_words(another_test_dic)
    ave_feat_list2 = [ave_dic2[i] for i in ave_dic2]
    anno_list2 = [i for i in ave_dic2]
    ### PCA through all average words, eliminating the less occurance of words? ###

    feats_trans, model = PCA_transform(feats)
    test_feats, test_delta_labs = target_dic2list(test_feat_dic)
    print(len(test_feats))
    test_feats_trans = model.transform(test_feats)
    ### PCA through the average target words ###
    ave_feat_trans_list = model.transform(ave_feat_list)
    #ave_feat_trans_list = tsne.fit_transform(ave_feat_trans_list)
    test_feats2_trans = model.transform(test_feats2)
    ave_feat_trans_list2 = model.transform(ave_feat_list2)

    if FLAG.pca_dim == 2:
        fig = plt.figure()
        ### start plotting the average results ###
        ax = fig.add_subplot(111)
        ax = plot_with_anno(ave_feat_trans_list, anno_list, rev_dic, ax, 'o',
                            'German')
        ax = plot_with_anno(ave_feat_trans_list2, anno_list2, another_rev_dic,
                            ax, 'x', 'French')

        ### plotting all word utterance ###
        #ax = plot_all_color(test_feats_trans, test_delta_labs, rev_dic, ax,
        #    word_color_dict)
    #    ax = plot_all_color(test_feats2_trans, test_delta_labs2,
    #        another_rev_dic, ax, another_color_dict)

    elif FLAG.pca_dim == 3:
        fig = plt.figure()
        #### start plotting the 3D projections ####
        ax = fig.add_subplot(111, projection='3d')
        # ax = plot_with_anno_3d(ave_test_trans, anno_list, rev_dic, ax)
        ax = plot_all_color_3d(test_feats_trans, test_delta_labs, rev_dic, ax,
                               word_color_dict)

    else:
        print("no plotting but testing through MAP")

        all_list = []
        feat_trans = model.transform(feats)
        # print (len(feat_trans[0]))
        for i in range(len(feats)):
            all_list.append((feat_trans[i], labs[i]))

        train_list, test_list = MAP.split_train_test(all_list)
        print(MAP.MAP(test_list[:100], train_list, feat_dim=FLAG.pca_dim))

        return
    ### get the words that not using for PCA ###
    #if FLAG.other_words != 'None':
    #    other_target = reader.build_targets(FLAG.other_words,dic)
    #    extract_others = extract_targets(feats, labs, other_target)
    #    ave_feat, other_lb_list = extract_additional_words(extract_others, other_target)
    #    ave_ftrans  = model.transform(ave_feat)
    #
    #    if FLAG.pca_dim == 2:
    #        plot_additional_words(ave_ftrans, other_lb_list, rev_dic, ax)
    #    else :
    #        plot_additional_words_3d(ave_ftrans, other_lb_list, rev_dic, ax)

    ax.legend(loc='upper right')
    plt.show()
    return