Exemplo n.º 1
0
def make_all_frequency():

	vector = vector_search()
	conn = pymysql.connect(host=DB_HOST,
						   port=DB_PORT,
						   user=DB_USER,
						   passwd=DB_PW,
						   db=DB_NAME,
						   charset="utf8")
	cursor = conn.cursor()

	# Get amount of documents not included ngram_relation table
	cursor.execute("select count(*) from blog_data where analyzed_flag=0")
	amount = cursor.fetchone()[0]

	# Divide and process all documents
	while amount!=0:
		print amount
		if amount < 10000:
			sql = "select no, title, body, tags, url from blog_data where analyzed_flag=0 limit 0, %d" % amount
			amount = 0

		else:
			amount -= 10000
			sql = "select no, title, body, tags, url from blog_data where analyzed_flag=0 limit 0, 10000"

		cursor.execute(sql)
		document_result = cursor.fetchall()

		# Each document
		for no, title, body, tags, url in document_result:
			print no, title
			frequency_result = vector.make_frequency(title, body, tags)

			# Each term in a document
			# info[0]=frequency, info[1]=weight, info[2]=Ns(the number of combined terms)
			data_list = [(no, term, info[0], info[2]) for term, info in frequency_result]
			cursor.executemany("insert ignore into ngram_relation(blogid, term, frequency, Ns) values(%s, %s, %s, %s)", data_list)

			# After calculating the frequency of each term, set analyzed flag from 0 to 1
			cursor.execute("update blog_data set analyzed_flag=1 where no=%s", (no,))

			conn.commit()

	cursor.close()
	conn.close()
Exemplo n.º 2
0
def search():
	error = None
	# GET request
	search_query = request.args.get("q", '')

	## Get query data from GET request
	## Calculate the frequency of each term
	## Calculate TF, IDF, weight of each term
	vector = vector_search()

	# Make the frequency of each term
	analyzed_query = vector.make_frequency(title=search_query)
	analyzed_query = sorted(analyzed_query, key=lambda x:x[1][0], reverse=True)

	for i, (keyword, info) in enumerate(analyzed_query):
		print i, keyword, info

	# Get maximum value of frequency of terms
	try:
		max_f = analyzed_query[0][1][0]
	except Exception, e:
		print e
		error = u"검색 결과가 존재하지 않습니다."
		return render_template("search.html", error=error)