Пример #1
0
def get_industry_matrix(nameOfFile, size):
	matrix = []
	try:
	    num = size
	except IndexError:
	    num = 504

	#Collecting data from the industry.json. This file 
	#gives the details of the industry
	with open(nameOfFile) as data_file:
	    data = json.load(data_file)

	g = Graph()

	#Creating nodes
	for company in data.keys()[0:num]:
		g.add_nodes(company,data[company])

	#Adding these nodes in a graph by considering
	#the links. In order to create the links and do significant
	#comparisons we take into consideration semantic analysis using NLP (spacy).
	#We compare the different descriptions about a particular company's industry
	#to other companies and check for the linkages in accordance to those values.
	g.link_all_industry()


	#Creating teh matrix
	for i in xrange(0,num):
	    matrix.append([])
	    print(str(i)+"\t"+data.keys()[i])
	    for j in xrange(0,num):
	        a = g.return_links(data.keys()[i],data.keys()[j])
	        if a is None:
	            a = []
	        matrix[i].append(len(a))


	#Alloting the scores in accordance to the number of links to a given
	#company
	mat = np.array(matrix,dtype=float)
	for i in xrange(0,num):
	    for j in xrange(0,num):
	        if i!=j:
				if(mat[i][i] != 0):
					mat[i][j] /= mat[i][i]


	
	for i in xrange(0,num):
	    mat[i][i]=1
	    mat[i] = norm(mat[i],i,num)
	#mat = (np.round(mat,3))

	#Giving the value a 40% percent for the final matrix
	mat = np.multiply(mat, 0.4)
	return mat
Пример #2
0
from graph import Node, Graph

matrix = []
try:
    num = int(sys.argv[2])
except IndexError:
    num = 504

with open(sys.argv[1]) as data_file:
    data = json.load(data_file)

g = Graph()
for company in data.keys()[0:num]:
    g.add_nodes(company,data[company])

g.link_all_industry()


for i in xrange(0,num):
    matrix.append([])
    # print(str(i)+"\t"+data.keys()[i])
    for j in xrange(0,num):
        a = g.return_links(data.keys()[i],data.keys()[j])
        if a is None:
            a = []
        matrix[i].append(len(a))


mat = np.array(matrix,dtype=float)
for i in xrange(0,num):
    for j in xrange(0,num):