예제 #1
0
from util import precision_at_k
import sys

DIMENSION = 20
cs_vectors = KeyedVectors.load_word2vec_format("./bi2vec_vectors/cs_vectors_11_20.txt",binary=False)
java_vectors = KeyedVectors.load_word2vec_format("./bi2vec_vectors/java_vectors_11_20.txt",binary=False)

with open("./sentences/sentences_cs_11.txt","r") as cs_f:
	cs_data = cs_f.readlines()
with open("./sentences/sentences_java_11.txt","r") as java_f:
	java_data = java_f.readlines()

cs_sentences = [x for x in cs_data]
java_sentences = [x for x in java_data]

cs_word2weight = word2weight(cs_sentences)
java_word2weight = word2weight(java_sentences)


with codecs.open("./evaluation_data/keywords.csv","r") as f_csv:
	reader = csv.reader(f_csv)
	java_keywords = list()
	cs_keywords = list()
	mapping_java = {}
	mapping_cs = {}
	for i,row in enumerate(reader):
		if i != 0:
			# print "#########################"
			expr_cs = row[1].strip("\"")
			expr_java = row[2].strip("\"")
			java_keywords.append(expr_java)
예제 #2
0
java_vectors = KeyedVectors.load_word2vec_format(
    "./bi2vec_vectors/java_vectors_11_25.txt", binary=False)

# cs_vectors = KeyedVectors.load_word2vec_format("./bi2vec_vectors/cs_vectors_10_25_include_functions.txt",binary=False)
# java_vectors = KeyedVectors.load_word2vec_format("./bi2vec_vectors/java_vectors_10_25_include_functions.txt",binary=False)

smallprojects = ["factual", "mongodb", "log4j", "datastax"]
with open("./sentences/sentences_cs_11.txt", "r") as cs_f:
    cs_data = cs_f.readlines()
with open("./sentences/sentences_java_11.txt", "r") as java_f:
    java_data = java_f.readlines()

cs_sentences = [x for x in cs_data]
java_sentences = [x for x in java_data]

cs_word2weight = word2weight(cs_sentences)
java_word2weight = word2weight(java_sentences)


def get_random_range(project):
    if project == "fpml":
        return 527
    if project == "antlr":
        return 2019
    if project == "factual":
        return 95
    if project == "lucene":
        return 7720
    if project == "mongodb":
        return 65
    if project == "spring":