示例#1
0
文件: LIME.py 项目: yandachen/GI_2019
                            unigram_observe_ids=unigram_observe_ids)
                        results[analysis_key] = lime.lime()

                    pkl.dump(results, open(pkl_dir, 'wb'))
                    lime_results[(model_id, run_idx, fold_idx,
                                  class_idx)] = results

    pkl.dump(lime_results, open("../data/lime_results.pkl", 'wb'))


def get_lime_arguments():
    # parsing the command line argument
    parser = ArgumentParser()
    parser.add_argument('--num_folds', type=int, default=5)
    parser.add_argument('--num_runs', type=int, default=5)
    parser.add_argument('--num_models', type=int, default=12)
    args = parser.parse_args()
    return vars(args)


if __name__ == '__main__':
    args = get_lime_arguments()
    data_loader = Data_loader(labeled_only=True, option='word')
    stopwords = ['a', 'on', 'da', 'into', 'of', 'that']
    tokens_analyzed = [
        data_loader.convert2int_arr(stopword)[0] for stopword in stopwords
    ]
    lime_analysis(tokens_analyzed,
                  num_folds=args['num_folds'],
                  num_runs=args['num_runs'],
                  num_models=args['num_models'])
        pkl.dump(tweet_elmo_property,
                 open("%s%d.pkl" % (data_dir, tweet_id), 'wb'))


if __name__ == '__main__':
    #create_adversarial_ELMo_representation(domain_specific=True, input_file="../data/insert_on_natural_sentence.pkl",
    #                                        output_dir="DS_ELMo_adversarial_insert_on/", parameter_dir="ELMo_weights/4-23-9pm")
    #create_ELMo_representation(domain_specific=False, output_dir="Non_DS_ELMo_rep/")
    #create_masked_unigram_ELMo_representation("DS_ELMo_rep_masked_a/", parameter_dir="ELMo_weights/4-23-9pm",
    #                                          masked_unigram_id=9)
    from data_loader import Data_loader
    dl_word = Data_loader(labeled_only=True, option='word')

    stopwords = [
        'o', '...lol', 'let', 'yeah', 'got', 'any', 'into', 'thats', 'who',
        'out', 'that', "'s", 'yo', 'as', 'we', 'be', 'of', 'u', 'do', 'in'
    ]
    for stopword in stopwords:
        unigram_id = dl_word.convert2int_arr(stopword)[0]
        if unigram_id != 1:  #the stopword is in the vocabulary
            create_all_tweets_ELMo_representation(
                domain_specific=True,
                output_dir="DS_ELMo_rep_masked_%s/" % stopword,
                parameter_dir="ELMo_weights/4-23-9pm",
                masked_unigram_id=unigram_id)
    #create_masked_unigram_ELMo_representation("DS_ELMo_rep_masked_da/", parameter_dir="ELMo_weights/4-23-9pm",
    #                                          masked_unigram_id=24)
    #create_masked_unigram_ELMo_representation(domain_specific=False, output_dir="NonDS_ELMo_rep_masked_a/", masked_unigram_id=9)
    #create_masked_unigram_ELMo_representation(domain_specific=False, output_dir="NonDS_ELMo_rep_masked_on/", masked_unigram_id=13)
    #create_masked_unigram_ELMo_representation(domain_specific=False, output_dir="NonDS_ELMo_rep_masked_da/", masked_unigram_id=24)