Пример #1
0
def crawl_now():
	db = MySQLdb.connect("localhost","root","9314","webbed", charset='utf8')
	
	cursor = db.cursor()
	#remove the below comments when you need to insert into db
	
	index = {}
	import final_crawler
	index, pagedata = final_crawler.start_crawl()
	#var = dict.keys()
	cursor.execute("TRUNCATE TABLE `index_table`")
	cursor.execute("TRUNCATE TABLE `url_info`")
	import json
	for i in index.items():
		term = i[0]
		urls = ', '.join(i[1])
		sql = """INSERT INTO index_table (term, urls) VALUES (%s, %s)"""
		cursor.execute(sql, (term, urls))
	for i in pagedata.items():
		url = i[0]
		title = i[1][0]
		description = i[1][1]
		sql = """INSERT INTO url_info (url, title, description) VALUES (%s, %s, %s)"""
		cursor.execute(sql, (url, title, description))
	db.commit()
	db.close()
	return None
Пример #2
0
def crawl_now(request):
	db = MySQLdb.connect("localhost","root","9314","webbed", charset='utf8')
	cursor = db.cursor()
	#remove the below comments when you need to insert into db
	
	index = {}
	import final_crawler
	index, pagedata = final_crawler.start_crawl()
	#var = dict.keys()
	cursor.execute("TRUNCATE TABLE `index_table`")
	cursor.execute("TRUNCATE TABLE `url_info`")
	import json
	for i in index.items():
		term = i[0]
		urls = ', '.join(i[1])
		sql = """INSERT INTO index_table (term, urls) VALUES (%s, %s)"""
		cursor.execute(sql, (term, urls))
	for i in pagedata.items():
		url = i[0]
		title = i[1][0]
		description = i[1][1]
		pagerank = i[1][2]
		sql = """INSERT INTO url_info (url, title, description, pagerank) VALUES (%s, %s, %s, %s)"""
		cursor.execute(sql, (url, title, description, pagerank))
	db.commit()
	db.close()
	return render_to_response('update_index.html')
Пример #3
0
def test():
	dict = {}
	import final_crawler
	dict = final_crawler.start_crawl()
	for i in dict.items():
		print i[0]