Пример #1
0
try:
    num = int(sys.argv[2])
except IndexError:
    num = 504

with open(sys.argv[1]) as data_file:
    data = json.load(data_file)

print(len(data))
g = Graph()

for company in data.keys()[0:num]:
    g.add_nodes(company,data[company])

g.link_all_nodes()


for i in xrange(0,num):
    matrix.append([])
    print(str(i)+"\t"+data.keys()[i])
    for j in xrange(0,num):
        a = g.return_links(data.keys()[i],data.keys()[j])
        if a is None:
            a = []
        matrix[i].append(len(a))

mat = np.array(matrix,dtype=float)
print mat

Пример #2
0
def get_linkage_matrix(nameOfFile, size):
	matrix = []

	try:
	    num = size
	except IndexError:
	    num = 504

	#Collecting data from the links.json. This file essentially has all
	#the wikipedia links coming out of a particular company's page.
	with open(nameOfFile) as data_file:
	    data = json.load(data_file)

	g = Graph()

	#Creating nodes
	for company in data.keys()[0:num]:
	    g.add_nodes(company,data[company])

	#Adding these nodes in a graph by considering
	#the common links. Here we check for direct equivalence. Unlike
	#the industry graph where we take into consideration semantics and using
	#NLP techniques
	g.link_all_nodes()


	for i in xrange(0,num):
	    matrix.append([])
	    # print(str(i)+"\t"+data.keys()[i])
	    for j in xrange(0,num):
	        a = g.return_links(data.keys()[i],data.keys()[j])
	        if a is None:
	            a = []
	        matrix[i].append(len(a))

	mat = np.array(matrix,dtype=float)
	# print mat

	#Alloting the scores in accordance to the number of links to a given
	#company
	for i in xrange(0,num):
	    for j in xrange(0,num):
	        if i!=j:
				if mat[i][i] != 0:
					mat[i][j] /= mat[i][i]

	# #normalize
	# def norm(array, identity_index, arr_len):
	# 	length = 0
	# 	for i in xrange(0,arr_len):
	# 	    if i == identity_index:
	# 	        continue
	# 	    length += array[i]
	# 	if length == 0:
	# 		return array
	# 	for i in xrange(0,arr_len):
	# 	    if i == identity_index:
	# 	        continue
	# 	    array[i] = array[i]/length
	# 	return array

	#insert 1's after normalization
	for i in xrange(0,num):
	    mat[i][i] = 1
	    mat[i] = norm(mat[i],i,num)

	#mat = np.round(mat,3)
	mat = np.multiply(mat, 0.6)
	return mat