Пример #1
0
def get_industry_matrix(nameOfFile, size):
	matrix = []
	try:
	    num = size
	except IndexError:
	    num = 504

	#Collecting data from the industry.json. This file 
	#gives the details of the industry
	with open(nameOfFile) as data_file:
	    data = json.load(data_file)

	g = Graph()

	#Creating nodes
	for company in data.keys()[0:num]:
		g.add_nodes(company,data[company])

	#Adding these nodes in a graph by considering
	#the links. In order to create the links and do significant
	#comparisons we take into consideration semantic analysis using NLP (spacy).
	#We compare the different descriptions about a particular company's industry
	#to other companies and check for the linkages in accordance to those values.
	g.link_all_industry()


	#Creating teh matrix
	for i in xrange(0,num):
	    matrix.append([])
	    print(str(i)+"\t"+data.keys()[i])
	    for j in xrange(0,num):
	        a = g.return_links(data.keys()[i],data.keys()[j])
	        if a is None:
	            a = []
	        matrix[i].append(len(a))


	#Alloting the scores in accordance to the number of links to a given
	#company
	mat = np.array(matrix,dtype=float)
	for i in xrange(0,num):
	    for j in xrange(0,num):
	        if i!=j:
				if(mat[i][i] != 0):
					mat[i][j] /= mat[i][i]


	
	for i in xrange(0,num):
	    mat[i][i]=1
	    mat[i] = norm(mat[i],i,num)
	#mat = (np.round(mat,3))

	#Giving the value a 40% percent for the final matrix
	mat = np.multiply(mat, 0.4)
	return mat
Пример #2
0
    data = json.load(data_file)

print(len(data))
g = Graph()

for company in data.keys()[0:num]:
    g.add_nodes(company,data[company])

g.link_all_nodes()


for i in xrange(0,num):
    matrix.append([])
    print(str(i)+"\t"+data.keys()[i])
    for j in xrange(0,num):
        a = g.return_links(data.keys()[i],data.keys()[j])
        if a is None:
            a = []
        matrix[i].append(len(a))

mat = np.array(matrix,dtype=float)
print mat


for i in xrange(0,num):
    for j in xrange(0,num):
        if i!=j:
            mat[i][j] /= mat[i][i]

#normalize
def norm(array, identity_index, arr_len):
Пример #3
0
def get_linkage_matrix(nameOfFile, size):
	matrix = []

	try:
	    num = size
	except IndexError:
	    num = 504

	#Collecting data from the links.json. This file essentially has all
	#the wikipedia links coming out of a particular company's page.
	with open(nameOfFile) as data_file:
	    data = json.load(data_file)

	g = Graph()

	#Creating nodes
	for company in data.keys()[0:num]:
	    g.add_nodes(company,data[company])

	#Adding these nodes in a graph by considering
	#the common links. Here we check for direct equivalence. Unlike
	#the industry graph where we take into consideration semantics and using
	#NLP techniques
	g.link_all_nodes()


	for i in xrange(0,num):
	    matrix.append([])
	    # print(str(i)+"\t"+data.keys()[i])
	    for j in xrange(0,num):
	        a = g.return_links(data.keys()[i],data.keys()[j])
	        if a is None:
	            a = []
	        matrix[i].append(len(a))

	mat = np.array(matrix,dtype=float)
	# print mat

	#Alloting the scores in accordance to the number of links to a given
	#company
	for i in xrange(0,num):
	    for j in xrange(0,num):
	        if i!=j:
				if mat[i][i] != 0:
					mat[i][j] /= mat[i][i]

	# #normalize
	# def norm(array, identity_index, arr_len):
	# 	length = 0
	# 	for i in xrange(0,arr_len):
	# 	    if i == identity_index:
	# 	        continue
	# 	    length += array[i]
	# 	if length == 0:
	# 		return array
	# 	for i in xrange(0,arr_len):
	# 	    if i == identity_index:
	# 	        continue
	# 	    array[i] = array[i]/length
	# 	return array

	#insert 1's after normalization
	for i in xrange(0,num):
	    mat[i][i] = 1
	    mat[i] = norm(mat[i],i,num)

	#mat = np.round(mat,3)
	mat = np.multiply(mat, 0.6)
	return mat