示例#1
0
def getWeiboIds():
    weiboids = []
    myconnect = GetConnect()
    sql = 'select weiboid from profile where is_education = -1'
    results = myconnect.getData(sql)
    if results:
        for r in results:
            weiboids.append(r[0])
    return weiboids
def getWeiboIds(schooltable):
	weiboids = []
	myconnect = GetConnect()
	sql = 'select weiboid from %s where is_profile = -1' % schooltable
	results = myconnect.getData(sql)
	if results:
		for r in results:
			weiboids.append(r[0])
	return weiboids
def getWeiboIds():
	weiboids = []
	myconnect = GetConnect()
	sql = 'select weiboid from profile where is_education = -1'
	results = myconnect.getData(sql)
	if results:
		for r in results:
			weiboids.append(r[0])
	return weiboids
def get_school_weibo(schoolname):
	myconnect = GetConnect()
	school_weibo_table = schoolname + '_wordsegment'
	school_weibo = 'select segments, is_meaningful from %s' % school_weibo_table
	weibo_content_results = myconnect.getData(school_weibo)
	print len(weibo_content_results)
	for i in xrange(100):
		print weibo_content_results[i][0],weibo_content_results[i][1]

	pickle.dump(weibo_content_results, open('dict weibo data\\%s_seg_weibo.pkl' % (schoolname), 'w'))
def getSchooWeiboMeaning(schoolname):
	schoolname = schoolname + '_wordsegment'
	countMeaning = []
	myconnect = GetConnect()
	get_school_weibo_meaning_num = 'select count(*) as meaningcount,is_meaningful from %s group by is_meaningful;' % schoolname
	meaning_num_results = myconnect.getData(get_school_weibo_meaning_num)
	if meaning_num_results:
		countMeaning.append(int(meaning_num_results[1][0]))
		countMeaning.append(int(meaning_num_results[0][0]))
	return countMeaning
def getWeiboIds(schoolname=None):
	'获取schoolname表中没有下载原创无图微博的用户'
	weiboids = []
	sql = "select weiboid from %s order by rand() limit 20" % schoolname
	myconnect = GetConnect() 
	results = myconnect.getData(sql)
	if results:
		for r in results:
			weiboids.append(r[0])
			#print r[0]
	return weiboids
示例#7
0
def getWeiboIds(schoolname=None):
    '获取schoolname表中没有下载原创无图微博的用户'
    weiboids = []
    sql = "select weiboid from %s order by rand() limit 20" % schoolname
    myconnect = GetConnect()
    results = myconnect.getData(sql)
    if results:
        for r in results:
            weiboids.append(r[0])
            #print r[0]
    return weiboids
def get_one_weibo_data(schoolname, weiboid=None):
	myconnect = GetConnect()
	if weiboid is None:
		get_weibo_id_sql = "select weiboid from %s where is_wb_ori_no_pic = 1 order by rand() limit 1;" % schoolname
		results = myconnect.getData(get_weibo_id_sql)
		if results:
			weiboid = results[0][0]
		else:
			print "get weiboid wrong"
			weiboid = '2591961830'

	if schoolname == 'dlut':
		school_weibo_table = 'wb_ori_no_pic'
	else:
		school_weibo_table = schoolname + '_wb_ori_no_pic'

	get_weibo_content_sql = "select content, upvotes, forwards, reviews, weiboid from %s where weiboid = %s" % (school_weibo_table, weiboid)
	weibo_content_results = myconnect.getData(get_weibo_content_sql)
	if weibo_content_results:
		return weibo_content_results
	else:
		return None
def getMarkedWeibo():
	marked_weibo_data = []
	pos_weibo = [] # 表达积极情绪的微博
	neg_weibo = [] # 表达消极情绪的微博
	act_weibo = [] # 代表参与一个活动随手转发的微博,一般都是广告,含有链接
	obj_weibo = [] # 客观的微博,即不包含任何感情的微博
	# Read txt file contain sentiment stopwords
	sentiment_stopwords = get_txt_data('sentiment_stopword.txt', 'lines')

	# 获取已经标注过的微博数据,-1代表未知,1代表积极,2代表消极,
	# 3代表活动(有链接,一般是广告),4是客观(没有表达任何情绪)
	get_mark_weibo_sql = "select content, mark from markedweibo;"
	myconnect = GetConnect()
	results = myconnect.getData(get_mark_weibo_sql)
	for weibo in results:
		if weibo[1] == 1:
			pos_weibo.append(weibo[0])
		elif weibo[1] == 2:
			neg_weibo.append(weibo[0])
		elif weibo[1] == 3:
			act_weibo.append(weibo[0])
		elif weibo[1] == 4:
			obj_weibo.append(weibo[0])
	# Filter stopwords from reviews
	seg_pos_result = []
	seg_neg_result = []
	seg_act_result = []
	seg_obj_result = []
	for weibo in pos_weibo:
		fil = [word for word in weibo if word not in sentiment_stopwords and word != ' ']
		seg_pos_result.append(fil)
		fil = []
	for weibo in neg_weibo:
		fil = [word for word in weibo if word not in sentiment_stopwords and word != ' ']
		seg_neg_result.append(fil)
		fil = []
	for weibo in act_weibo:
		fil = [word for word in weibo if word not in sentiment_stopwords and word != ' ']
		seg_act_result.append(fil)
		fil = []
	for weibo in obj_weibo:
		fil = [word for word in weibo if word not in sentiment_stopwords and word != ' ']
		seg_obj_result.append(fil)
		fil = []
	# 将这些数据存储成pickle
	pickle.dump(pos_weibo, open("maked weibo\\seg_pos_result.pkl", 'w'))
	pickle.dump(neg_weibo, open("maked weibo\\seg_neg_result.pkl", 'w'))
	pickle.dump(act_weibo, open("maked weibo\\seg_act_result.pkl", 'w'))
	pickle.dump(obj_weibo, open("maked weibo\\seg_obj_result.pkl", 'w'))
def conveyToSchoolTable(schoolname, schooltable):
	'将education表中所有大学的微博用户添加到表中'
	global GetConnect
	sql = "select * from education where school = '%s'" % schoolname
	myconnect = GetConnect()
	results = myconnect.getData(sql)
	school_d = School_Db(schooltable)
	if results:
		for r in results:
			schoolers = School_Info(r[1])
			school_d.insertIntoDB(schoolers)
			#print r[1]
		countsql = "select * from %s" % schooltable
		count = myconnect.getCount(countsql)
		return count
示例#11
0
def conveyToSchoolTable(schoolname):
	'将education表中所有大连理工大学的微博用户添加到dlut表中'
	global GetConnect
	sql = "select * from education where school = '%s'" % schoolname
	myconnect = GetConnect()
	results = myconnect.getData(sql)
	dlut_d = Dlut_Db()
	if results:
		for r in results:
			dluters = Dlut(r[1])
			dlut_d.insertIntoDB(dluters)
			#print r[1]
		countsql = "select * from dlut"
		count = myconnect.getCount(countsql)
		return count
示例#12
0
def conveyToSchoolTable(schoolname, schooltable):
    '将education表中所有大学的微博用户添加到表中'
    global GetConnect
    sql = "select * from education where school = '%s'" % schoolname
    myconnect = GetConnect()
    results = myconnect.getData(sql)
    school_d = School_Db(schooltable)
    if results:
        for r in results:
            schoolers = School_Info(r[1])
            school_d.insertIntoDB(schoolers)
            #print r[1]
        countsql = "select * from %s" % schooltable
        count = myconnect.getCount(countsql)
        return count
def get_school_weibo_and_save(schoolname):
	if schoolname == 'dlut':
		schooltable = 'wb_ori_no_pic'
	else:
		schooltable = schoolname + '_wb_ori_no_pic'
	get_weibo_sql = "select content from %s;" % schooltable
	myconnect = GetConnect()
	results = myconnect.getData(get_weibo_sql)
	school_weibo = []
	index = 0
	for i in results:
		school_weibo.append(i[0])
		if index < 100:
			print i[0].encode('utf-8')
			index += 1
	pickle.dump(school_weibo, open("machine learning data\\%s_weibo.pkl" % schoolname,'w'))
def getWordFrequency(schoolname):
	get_keyword_sql = 'select keywords from %s where is_meaningful = 1' % (schoolname+'_wordsegment')
	myconnect = GetConnect()
	results = myconnect.getData(get_keyword_sql)
	worddict = {} # 单词字典,记录所有出现的单词以及出现的次数
	print len(results)
	index = 0
	for r in results:
		for w in r[0].split():
			index += 1
			if worddict.has_key(w) == False:
				worddict[w] = 1
			else:
				worddict[w] += 1
	print index, 'index' # 这是单词出现的总数 dlut有27万词
	print len(worddict) # 这是存储到字典中的单词总数,dlut有6.5万词
	pickle.dump(worddict, open('weibo word contrast\\%s_worddict.pkl' % schoolname, 'w'))
def getWordFrequency(schoolname):
    get_keyword_sql = 'select keywords from %s where is_meaningful = 1' % (
        schoolname + '_wordsegment')
    myconnect = GetConnect()
    results = myconnect.getData(get_keyword_sql)
    worddict = {}  # 单词字典,记录所有出现的单词以及出现的次数
    print len(results)
    index = 0
    for r in results:
        for w in r[0].split():
            index += 1
            if worddict.has_key(w) == False:
                worddict[w] = 1
            else:
                worddict[w] += 1
    print index, 'index'  # 这是单词出现的总数 dlut有27万词
    print len(worddict)  # 这是存储到字典中的单词总数,dlut有6.5万词
    pickle.dump(worddict,
                open('weibo word contrast\\%s_worddict.pkl' % schoolname, 'w'))