Exemplo n.º 1
0
def make_sim_graph(akw, col_graph):
	"""
	Takes an {author: author keywords} dict and the collaboration graph for a school and creates a graph
	where the nodes are authors linked if their keywords are similar.
	Returns the similarity graph
	"""

	sim_graph = nx.Graph()
	sim_threshold = 0.2

	authors = akw.keys()
	values = akw.values()

	for i in range (0, len(authors)):
		author1 = authors[i]
		# make keywords into set to remove duplicates then back into list to maintain order
		keywords = list(set(values[i]["keywords"]))
		# Add the author to the similarity graph
		add_sim_graph_node(author1, keywords, sim_graph, col_graph)
		# Get a stemmed version of the author's keywords
		stemmed1 = (tu.stem_word_list(keywords[:]))

		# Compare author against each other author in graph
		for j in range(i+1, len(authors)):
			author2 = authors[j]
			keywords2 = list(set(values[j]["keywords"]))
			add_sim_graph_node(author2, keywords2, sim_graph, col_graph)
		
			stemmed2 = (tu.stem_word_list(keywords2[:]))

			# Check similarity of keywords
			sim = tu.check_kw_sim(stemmed1, stemmed2)
			# the similarity score
			ratio = sim[0]
			# the indices (in the longest of the two author keyword lists) of the keywords that are similar
			indices = sim[1]
			matched_words = []

			if len(keywords) > len(keywords2):
				longest = keywords
			else:
				longest = keywords2

			# Get the keywords in the indices returned from check_sim
			for i in indices:
				matched_words.append(longest[i])

			# If similarity score greater than threshold, add edge between authors
			if ratio > sim_threshold:
				sim_graph.add_edge(author1, author2, {"num_collabs":ratio, "sim_kw": matched_words})
				# Indicate whether authors are actual coauthors
				if col_graph.has_edge(author1, author2):
					sim_graph[author1][author2]["areCoauthors"] = True

	return sim_graph
Exemplo n.º 2
0
	def test_check_sim(self):
		kw1 = ["java", "python", "django"]
		kw2 = ["python", "programming", "django", "graphs"]
		result = tu.check_kw_sim(kw1, kw2)
		self.assertEqual(result[0], 0.5)
		self.assertEqual(result[1], [0, 2])