示例#1
0
文件: crawl.py 项目: Lhfcws/ai_hw
def main():
	p = re.compile("http[0-9|:|/| |.|a-z|A-Z]*")
	# connect MYSQL
	res = dbconfig()
	conn = MySQLdb.connect(host=res[0], user=res[1], passwd=res[2])
	conn.select_db('ai_hw')
	cursor = conn.cursor()
	cursor.execute("set names 'utf8'")
	# Init
	M = Main("*****@*****.**", "aiai2012")
	M.start()
	M.login()
	M.clear()

	# Test day
	M.period("2012-09-20", "2012-09-20")

	r = model(cursor)
	key = r[1]
	M.keyword(r[1])
	M.config()

	stop = 0
	while True:
		if stop == 10:
			break
		stop += 1
		if M.end():
			break
		# If we meet a captcha we can solve it by login.
		if M.captcha():
			M.save()
			M.logout()
			M.restart()
			M.login()
			M.load()
	
		stat = M.getUsers()	
		
		# Write the user list into database
		for i in range(len(stat[0])):
			value = [stat[0][i], M.getKeyword(), stat[1][i].encode("utf-8")]
			value[2] = re.sub(p,"",value[2])
			cursor.execute("insert into users value(%s, %s, %s)", value)

		# Flip to next page.
		M.flip()

		conn.commit()

	M.quit()
	value = [M.getKeyword()]
	#cursor.execute("delete from request where keyword='%s'",value);
	#cursor.execute("insert into finish value(%s)", value);
	#conn.commit()
	cursor.close()
	conn.close()
示例#2
0
文件: finish.py 项目: Lhfcws/ai_hw
def finish():
	# connect MYSQL
	res = dbconfig()
	conn = MySQLdb.connect(host=res[0], user=res[1], passwd=res[2])
	conn.select_db('ai_hw')
	cursor = conn.cursor()
	cursor.execute("set names 'utf8'")
	cursor.execute("SELECT * FROM `request` LIMIT 1")
	ls = cursor.fetchall()
	keyword = ls[0][1]
	cursor.execute("INSERT INTO `finish` VALUES('"+keyword+"')")
	conn.commit()
	cursor.close()
	conn.close()
示例#3
0
def main():
	con = dbconf.dbconfig()
	conn = MySQLdb.connect(host=con[0], user=con[1], passwd=con[2])
	conn.select_db("ai_hw")
	cursor = conn.cursor()
	cursor.execute("SET NAMES 'utf8'")
	conn.commit()
	cursor.execute("SELECT * FROM `request` order by id desc limit 1")
	ls = cursor.fetchall()
	keyword = ls[0][1]
	print keyword

	P_HT = get_xml_data("bayes/music_HT_dict.xml")
	P_H = get_xml_data("bayes/music_H_dict.xml")
	P_MT = get_xml_data("bayes/music_MT_dict.xml")
	P_M = get_xml_data("bayes/music_M_dict.xml")
	store = get_xml_data("bayes/store.xml")

	P_H = P_H['hits']
	P_M = P_M['miss']
	P_HT = transinger.main(P_HT, keyword)
	P_MT = transinger.main(P_MT, keyword)

	#lines = bayes.readFile("bayes/testFile.txt")
	cursor.execute("SELECT weibo FROM `users` WHERE users.keyword = '"+keyword+"'")
	ls = cursor.fetchall()
	lines = []
	for i in range(len(ls)):
		lines.append(ls[i][0])

	resList = []
	msl = []
	for line in lines:
		tokens = bayes.segword(line)
		hitP = bayes.hitProbability(tokens, P_HT, P_MT, P_H, P_M, store)
		missP = bayes.missProbability(tokens, P_HT, P_MT, P_H, P_M, store)
		if missP/ hitP < 10.0:
			resList.append(line+'\n')
		else:
			msl.append(line+'\n')

	bayes.writeFile(resList, "bayes/testResult.txt")
	bayes.writeFile(msl, "bayes/MisResult.txt")
	cursor.execute("insert into `music_love` values("+str(len(lines))+","+str(len(resList))+", '"+keyword+"')")
	conn.commit()
	cursor.close()
	conn.close()
示例#4
0
文件: plots.py 项目: Lhfcws/ai_hw
def main():
	# connect MYSQL
	res = dbconfig()
	conn = MySQLdb.connect(host=res[0], user=res[1], passwd=res[2])
	conn.select_db('ai_hw')
	cursor = conn.cursor()
	cursor.execute("set names 'utf8'")
	# init 
	M = Main("*****@*****.**", "aiai2012")
	M.start()
	M.login()

	l = model(cursor)
	conn.commit()

	M.keyword(l[1])

	# get Plots
	#plot = trend(cursor, M, date(ls[1], ls[2], ls[3]), date(ls[4], ls[5], ls[6]))
	plot = trend(cursor, M, date(2012,9,20), date(2012,9,30))
	conn.commit()

	cursor.close()
	conn.close()