('ps_calc_10math_mean_range', SimpleTransform()), ('ps_calc_11math_median_range', SimpleTransform()), ('ps_calc_11math_mean_range', SimpleTransform()), ('ps_calc_12math_median_range', SimpleTransform()), ('ps_calc_12math_mean_range', SimpleTransform()), ('ps_calc_13math_median_range', SimpleTransform()), ('ps_calc_13math_mean_range', SimpleTransform()), ('ps_calc_14math_median_range', SimpleTransform()), ('ps_calc_14math_mean_range', SimpleTransform()), ## both feats ('both_ps_car_13_x_ps_reg_03', SimpleTransform()), ## nan feats ('nan_sum', SimpleTransform()), ] ##loading data## dfTrain = pd.read_csv(config.original_train_data_path) dfTest = pd.read_csv(config.original_test_data_path) with open( "%s/stratifiedKFold.%s.pkl" % (config.data_folder, config.stratified_label), "rb") as f: skf = pickle.load(f) gen_info(feat_path_name="the1ow_1023") combine_feat(feat_names, feat_path_name="the1ow_1023", dfTrain=dfTrain, dfTest=dfTest, skf=skf)
# ('description_tfidf_common_vocabulary', SimpleTransform()), ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ] gen_info(feat_path_name="svd100_and_bow_Jun23") combine_feat(feat_names, feat_path_name="svd100_and_bow_Jun23")
SimpleTransform()), # ('query_tfidf_common_vocabulary_common_svd150', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd150', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd150', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()), ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ] gen_info(feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14") combine_feat(feat_names, feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")
# ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()), # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()), # ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), # ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), # ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()), # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()), # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()), # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()), ######################### ## Cooccurrence TF-IDF ## ######################### # ('question1_unigram_question2_unigram', SimpleTransform()), # ('question1_unigram_question2_bigram', SimpleTransform()), # ('question1_bigram_question2_unigram', SimpleTransform()), # ('question1_bigram_question2_bigram', SimpleTransform()), ###################### ## word match share ## ###################### ('ratio_of_question1_question2_unigram_share', SimpleTransform()), ('ratio_of_question1_question2_unigram_share_tfidf', SimpleTransform()), ] gen_info(feat_path_name="LSA_and_stats_feat_May03_CV_random") # combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_May03_CV_random")
('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ] gen_info(feat_path_name="svd100_and_bow_Jun23") combine_feat(feat_names, feat_path_name="svd100_and_bow_Jun23")
# ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ] # 724 维 gen_info(feat_path_name="LSA_and_stats_feat_Jun09") combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Jun09")
# ('title_tfidf_common_vocabulary', SimpleTransform()), # ('description_tfidf_common_vocabulary', SimpleTransform()), ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), # ('query_tfidf_common_vocabulary_common_svd150', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd150', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd150', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()), ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd150_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ] gen_info(feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14") combine_feat(feat_names, feat_path_name="LSA_svd150_and_Jaccard_coef_Jun14")
if __name__ == "__main__": feat_names = [ ## jaccard coef ('jaccard_coef_of_unigram_between_query_title', SimpleTransform()), ('jaccard_coef_of_unigram_between_query_description', SimpleTransform()), ('jaccard_coef_of_unigram_between_title_description', SimpleTransform()), ('jaccard_coef_of_bigram_between_query_title', SimpleTransform()), ('jaccard_coef_of_bigram_between_query_description', SimpleTransform()), ('jaccard_coef_of_bigram_between_title_description', SimpleTransform()), ('jaccard_coef_of_trigram_between_query_title', SimpleTransform()), ('jaccard_coef_of_trigram_between_query_description', SimpleTransform()), ('jaccard_coef_of_trigram_between_title_description', SimpleTransform()), ## dice dist ('dice_dist_of_unigram_between_query_title', SimpleTransform()), ('dice_dist_of_unigram_between_query_description', SimpleTransform()), ('dice_dist_of_unigram_between_title_description', SimpleTransform()), ('dice_dist_of_bigram_between_query_title', SimpleTransform()), ('dice_dist_of_bigram_between_query_description', SimpleTransform()), ('dice_dist_of_bigram_between_title_description', SimpleTransform()), ('dice_dist_of_trigram_between_query_title', SimpleTransform()), ('dice_dist_of_trigram_between_query_description', SimpleTransform()), ('dice_dist_of_trigram_between_title_description', SimpleTransform()), ] gen_info(feat_path_name="HD_dist") combine_feat(feat_names, feat_path_name="HD_dist")
# ('question2_bow_common_vocabulary_common_svd100', SimpleTransform()), # ('question1_bow_common_vocabulary_common_svd150', SimpleTransform()), # ('question2_bow_common_vocabulary_common_svd150', SimpleTransform()), # ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()), # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()), # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()), # ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), # ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), # ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()), # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()), # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()), # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()), ######################### ## Cooccurrence TF-IDF ## ######################### # ('question1_unigram_question2_unigram', SimpleTransform()), # ('question1_unigram_question2_bigram', SimpleTransform()), # ('question1_bigram_question2_unigram', SimpleTransform()), # ('question1_bigram_question2_bigram', SimpleTransform()), ] gen_info(feat_path_name="LSA_and_stats_feat_Apr29") combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Apr29")
# ('title_tfidf_common_vocabulary', SimpleTransform()), # ('description_tfidf_common_vocabulary', SimpleTransform()), ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('title_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_cosine_sim', SimpleTransform()), # ('query_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_common_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), # ('query_tfidf_common_vocabulary_title_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('query_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('title_tfidf_common_vocabulary_description_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), ('query_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), ('title_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('title_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ('description_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_relevance', SimpleTransform()), # ('description_tfidf_common_vocabulary_individual_svd100_cosine_sim_stats_feat_by_query_relevance', SimpleTransform()), ] gen_info(feat_path_name="LSA_and_stats_feat_Jun09") combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Jun09")
# ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()), # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()), # ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), # ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), # ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()), # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()), # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()), # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()), ######################### ## Cooccurrence TF-IDF ## ######################### # ('question1_unigram_question2_unigram', SimpleTransform()), # ('question1_unigram_question2_bigram', SimpleTransform()), # ('question1_bigram_question2_unigram', SimpleTransform()), # ('question1_bigram_question2_bigram', SimpleTransform()), ###################### ## word match share ## ###################### ('ratio_of_question1_question2_unigram_share', SimpleTransform()), ('ratio_of_question1_question2_unigram_share_tfidf', SimpleTransform()), ] gen_info(feat_path_name="LSA_and_stats_feat_May03_CV_Time") combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_May03_CV_Time")
# ('question2_bow_common_vocabulary_common_svd100', SimpleTransform()), # ('question1_bow_common_vocabulary_common_svd150', SimpleTransform()), # ('question2_bow_common_vocabulary_common_svd150', SimpleTransform()), # ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd100_cosine_sim', SimpleTransform()), # ('question1_tfidf_common_vocabulary_question2_tfidf_common_vocabulary_tfidf_common_svd150_cosine_sim', SimpleTransform()), # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd100_cosine_sim', SimpleTransform()), # ('question1_bow_common_vocabulary_question2_bow_common_vocabulary_bow_common_svd150_cosine_sim', SimpleTransform()), # ('question1_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('question2_tfidf_common_vocabulary_individual_svd100', SimpleTransform()), # ('question1_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), # ('question2_tfidf_common_vocabulary_individual_svd150', SimpleTransform()), # ('question1_bow_common_vocabulary_individual_svd100', SimpleTransform()), # ('question2_bow_common_vocabulary_individual_svd100', SimpleTransform()), # ('question1_bow_common_vocabulary_individual_svd150', SimpleTransform()), # ('question2_bow_common_vocabulary_individual_svd150', SimpleTransform()), ######################### ## Cooccurrence TF-IDF ## ######################### # ('question1_unigram_question2_unigram', SimpleTransform()), # ('question1_unigram_question2_bigram', SimpleTransform()), # ('question1_bigram_question2_unigram', SimpleTransform()), # ('question1_bigram_question2_bigram', SimpleTransform()), ] gen_info(feat_path_name="LSA_and_stats_feat_Map02") combine_feat(feat_names, feat_path_name="LSA_and_stats_feat_Map02")