Exemplo n.º 1
0
def plotOne(arg): #plotOne() is designed to plot the frequency of a keyword across the journals contained in a database, or by the frequency throughout the years of publication for all of  			the journals combined.
	if args.ngram:
		ka.writeOne(args.keyword, args.database, "%s" % docId, ngram=true) #In plotOne, as well as plotTwo() a separate keyword hunter is used. This allows the plotting program to  											run more smoothly and offers other programs the ability to sift through keywords as well.
	else: 
		ka.writeOne(args.keyword, args.database, "%s" % docId)
	doc = "%s.txt" % args.keyword
	docnumbers = [line.strip() for line in open(doc)]
	stats = []
	names = cur.fetchall()
	for i in range(len(rows)):
		for x in range(len(docnumbers)):
			ID = int(docnumbers[x])
			cur.execute("SELECT %s FROM %s WHERE %s = %d" % (arg, table, docId, ID))
			check = cur.fetchall()
			if len(check) >0:

				if rows[i]==check[0] and rows[i][0] is None:
					stats.append(-1)
				elif rows[i]==check[0]:
					stats.append(tup2int(rows[i]))
	print(stats)
	rows_ = list(itertools.chain(*rows))
	if args.year:
		years=[stats.count(y) for y in rows_]
		plt.plot(rows_, years, marker='o', linestyle='--', color='r',label=args.keyword)
		labels=ax.set_xticklabels(rows_, rotation=90, fontsize='small')
		plt.ylim([0,max(years)])
	else:
		ax.hist(stats, bins = range(min(rows_), max(rows_)+2, 1), alpha=0.5, label = args.keyword)
	ax.legend(loc='upper right')
        ax.set_xticks(rows_)
	ax.set_xlabel("JournalID")
	ax.set_ylabel("Frequency")
	ax.set_title("Occurrences of %s" % args.keyword)
	if args.file:
		fig.savefig(args.file)
	else:
		fig.savefig("%s.png"%args.keyword)
	plt.show()	
Exemplo n.º 2
0
def keywordCrawl(filename):#crawl without a keyword will generate the web of inner citations throughout Annual Reviews. It will store all of the docIDs in the firstSet list, which it will then 			     iterate through in an attempt to find all of the titles of it's citations. If the citation exists, it then selects each individual work title from the larger list.  			      This is where the problems occur. For whatever reason, sometimes it will only extract a few words from a title. Future versions should fix this. When the title is   			       extracted it checks to see if that paper whas published in Annual reviews. If it is, the program records the source's docID and the citations docID onto a csv.      				Future versions should plot this on a map.              
	csvfile = open(filename, 'wb')
        writer = csv.writer(csvfile, delimiter=',')
	ka.writeOne(args.keyword, args.database, 'docID')
	file = open('%s.txt' % args.keyword, 'r')
	firstSet = file.readlines()
	secondSet = []
	for i in range(len(firstSet)):
		citation = []
		cur.execute("SELECT work_title FROM citations WHERE docID = %s" % firstSet[i])
		rows = cur.fetchall()
		for k in range(len(rows)):
			citation.append(tup2str(rows[k]))
		secondSet.append(citation)
		for j in range(len(secondSet)):
			citations = []
			for l in range(len(secondSet[j])):
				doc = ''.join([x if x != '\n' else '' for x in secondSet[j][l]])
				if doc != 'None':
					cur.execute('SELECT docID FROM docs WHERE Title LIKE "%%%s%%"' % doc)
					checks = cur.fetchall()
					if len(checks)>0:
						docID = tup2int(rows[0])
						writer.writerow(["%s"%firstSet[i]]+["%d"%docID])