from util import precision_at_k import sys DIMENSION = 20 cs_vectors = KeyedVectors.load_word2vec_format("./bi2vec_vectors/cs_vectors_11_20.txt",binary=False) java_vectors = KeyedVectors.load_word2vec_format("./bi2vec_vectors/java_vectors_11_20.txt",binary=False) with open("./sentences/sentences_cs_11.txt","r") as cs_f: cs_data = cs_f.readlines() with open("./sentences/sentences_java_11.txt","r") as java_f: java_data = java_f.readlines() cs_sentences = [x for x in cs_data] java_sentences = [x for x in java_data] cs_word2weight = word2weight(cs_sentences) java_word2weight = word2weight(java_sentences) with codecs.open("./evaluation_data/keywords.csv","r") as f_csv: reader = csv.reader(f_csv) java_keywords = list() cs_keywords = list() mapping_java = {} mapping_cs = {} for i,row in enumerate(reader): if i != 0: # print "#########################" expr_cs = row[1].strip("\"") expr_java = row[2].strip("\"") java_keywords.append(expr_java)
java_vectors = KeyedVectors.load_word2vec_format( "./bi2vec_vectors/java_vectors_11_25.txt", binary=False) # cs_vectors = KeyedVectors.load_word2vec_format("./bi2vec_vectors/cs_vectors_10_25_include_functions.txt",binary=False) # java_vectors = KeyedVectors.load_word2vec_format("./bi2vec_vectors/java_vectors_10_25_include_functions.txt",binary=False) smallprojects = ["factual", "mongodb", "log4j", "datastax"] with open("./sentences/sentences_cs_11.txt", "r") as cs_f: cs_data = cs_f.readlines() with open("./sentences/sentences_java_11.txt", "r") as java_f: java_data = java_f.readlines() cs_sentences = [x for x in cs_data] java_sentences = [x for x in java_data] cs_word2weight = word2weight(cs_sentences) java_word2weight = word2weight(java_sentences) def get_random_range(project): if project == "fpml": return 527 if project == "antlr": return 2019 if project == "factual": return 95 if project == "lucene": return 7720 if project == "mongodb": return 65 if project == "spring":