def convert_to_tfidf(filename): with open(filename,"w+") as f: query_keyword = Tfidf("query_ids.txt","keyword_ids.txt") query_title = Tfidf("query_ids.txt","title_ids.txt") query_description = Tfidf("query_ids.txt","desc_ids.txt") keyword_title = Tfidf("keyword_ids.txt","title_ids.txt") keyword_description = Tfidf("keyword_ids.txt","desc_ids.txt") title_description = Tfidf("title_ids.txt","desc_ids.txt") data = csv_io.read_train("10percent_5lakh_preprocessed_training_data.txt") count = 0 with open("2lakh_training_data.txt") as f1: for line in f1: count = count + 1 sample = csv_io.split(line,[',']) queryid = sample[7] keywordid = ''+sample[8] titleid = ''+sample[9] descriptionid = ''+sample[10] qk_sim = query_keyword.classify(queryid,keywordid) qt_sim = query_title.classify(queryid,titleid) qd_sim = query_description.classify(queryid,descriptionid) kt_sim = keyword_title.classify(keywordid,titleid) kd_sim = keyword_description.classify(keywordid,descriptionid) td_sim = title_description.classify(titleid,descriptionid) sample.append('%.2f' % qk_sim[0][0]) sample.append('%.2f' % qt_sim[0][0]) sample.append('%.2f' % qd_sim[0][0]) sample.append('%.2f' % kt_sim[0][0]) sample.append('%.2f' % kd_sim[0][0]) sample.append('%.2f' % td_sim[0][0]) f.write(",".join(sample)) f.write("\n")
def dummy(): id1 = '1' id2 = '3' tfidf = Tfidf("dummy.txt","dummy2.txt") return tfidf.classify(id1,id2)