Пример #1
0
def main():
	baseDir = "/Users/francis/Documents/cpe480_texts/basicmaterials/chemicals"

	os.chdir(baseDir)
	articles = glob.glob('*.txt')

	graph = nx.Graph()
	print "Let's get to work!"
	for index in range(0, len(articles)):
		start = time.time()
		article = articles[index]

		print article, "- (",index + 1,"of", len(articles),")"

		significant = blib.filterWords(article)
		blib.handleDocumentNouns(significant["NN"], graph)
		
		print article, "done."
		print "Time Elapsed:", time.time() - start, "seconds"
		print
		print
		# sentiment = determineSemtiment(significant[0])

	# Export graph data to a file
	edgeListFile = "chemicals.graph"
	print "Writing graph to file...."
	nx.write_gml(graph, edgeListFile)
	print "done"
Пример #2
0
def reinforce(graph, path, posReinforce):
	significant = blib.filterWords(path)

	for noun in significant["NN"]:
		try:
			node = graph.node[noun]

			if posReinforce:
				node["value"] += 1
			else:
				node["value"] -= 1
		except:
			continue
Пример #3
0
def main():
    # List all the avaliable graphs and their names
    baseDir = "/Users/francis/Documents/cpe480_texts/"
    graphs = {
        "Basic Materials : Oil And Gas": baseDir + "basicmaterials/oilngasdrilling/oilngasdrilling.graph",
        "Basic Materials : Chemicals": baseDir + "basicmaterials/chemicals/chemicals.graph",
    }

    matchRate = {}

    currentArticleLoc = "/Users/francis/Documents/cpe480_texts/unmatched/" + sys.argv[1]

    articleWords = blib.filterWords(currentArticleLoc)
    # For each graph in graphs
    begin = time.time()
    print "Begin Matching..."
    for name, graphLoc in graphs.iteritems():
        print "Matching", name, "graph..."
        # load the graph
        graph = nx.read_gml(graphLoc)
        # find the match rate
        rate = findMatchRate(graph, articleWords["NN"])
        # add match rate to the match rate dictionary
        matchRate[name] = rate

        # Find highest and print Match Rate
    highest = ("", 0)
    for name, rate in matchRate.iteritems():
        if rate > highest[1]:
            highest = (name, rate)

        print name, ":", rate

    print sys.argv[1], "..."
    print "Matched to", highest[0], ": Match Rate", highest[1]
    print "Time Elapsed:", (time.time() - begin) / 60, "mins"