def check_monitor_status(mid_dic_batch): midlist = [item["mid"] for item in mid_dic_batch] midlist_format = "" for mid in midlist: midlist_format += "'{}',".format(mid) midlist_format = midlist_format[:-1] cursor = pi_cur() sql = "SELECT e.monitor_status, i.i_id FROM `Event` e JOIN Event_information ei ON e.e_id = ei.event_id JOIN Information i ON i.i_id = ei.information_id WHERE i.mid IN ({})".format( midlist_format) cursor.execute(sql) result = cursor.fetchall() check_dic = {} for item in result: i_id = item["i_id"] monitor_status = item["monitor_status"] if i_id in check_dic: check_dic[i_id].append(monitor_status) else: check_dic[i_id] = [monitor_status] params = [] for i_id in check_dic: if sum(check_dic[i_id]) == 0: params.append((0, i_id)) sql = "UPDATE Information SET monitor_status = %s WHERE i_id = %s" n = cursor.executemany(sql, params) conn.commit()
def stop_monite(): cursor = conn.cursor() sql = 'select uid from Figure where monitorstatus=1' cursor.execute(sql) results = cursor.fetchall() uid_list = [item['uid'] for item in results] if len(uid_list) != 0: uids = '' for uid in uid_list: uids += uid + ',' day_timestamp = int(time.mktime( datetime.date.today().timetuple())) - 86400 * 100 sql = "select uid from Information where uid in (%s) and timestamp>%s " % ( uids[:-1], day_timestamp) cursor.execute(sql) results = cursor.fetchall() update_uid = list( set(uid_list).difference(set([item['uid'] for item in results]))) if len(update_uid) != 0: uids = '' for uid in update_uid: uids += uid + ',' sql = 'UPDATE Figure SET monitorstatus = 0 WHERE uid in (%s)' % uids[: -1] cursor.execute(sql) conn.commit()
def update_cal_status(mid_dic, cal_status): cursor = pi_cur() sql = "UPDATE Information SET cal_status = %s WHERE i_id = %s" params = [(cal_status, item["i_id"]) for item in mid_dic] n = cursor.executemany(sql, params) conn.commit()
def update_process_status(eid, cal_status): cursor = pi_cur() sql = "UPDATE EventPositive SET process_status = %s WHERE e_id = %s and store_type = 2" params = [(cal_status, eid)] cursor.executemany(sql, params) conn.commit()
def get_user_political(uid_list, start_ts, end_ts): ''' 用户政治倾向计算函数 left为左倾 mid为中立 right为右倾 ''' uids = '' for uid in uid_list: uids += uid + ',' sql = 'select uid,wordcount from WordCount where uid in (%s) and timestamp >= %s and timestamp <= %s' % ( uids[:-1], start_ts, end_ts) cursor.execute(sql) word_c = defaultdict(dict) result = cursor.fetchall() for i in result: item = json.loads(i['wordcount']) for k, v in item.items(): try: word_c[i['uid']][k] += v except: word_c[i['uid']][k] = v political = political_classify(word_c) sql = 'UPDATE Figure SET political=%s where uid=%s' val = [] for i, j in political.items(): val.append((j, i)) # 执行sql语句 n = cursor.executemany(sql, val) print("update success") conn.commit()
def insert_uid(uid_list, e_id): uids = '' if len(uid_list) == 0: return 0 for uid in uid_list: uids += uid + ',' cusor = pi_cur() sql = "select figure_id from Event_figure where figure_id in (%s) and event_id='%s'" % ( uids[:-1], e_id) cusor.execute(sql) results = cusor.fetchall() # print(results) id_list = [item['figure_id'] for item in results] # print(id_list) insert = list(set(uid_list).difference(set(id_list))) # print(insert) val = [] for uid in insert: val.append((uid, e_id)) insert_sql = 'insert into Event_figure(figure_id,event_id) values (%s,%s)' try: # print(val) cusor.executemany(insert_sql, val) # 获取所有记录列表 conn.commit() except: print('错误') conn.rollback()
def update_cal_status(uid_list, computestatus): cursor = pi_cur() sql = "UPDATE Figure SET computestatus = %s WHERE uid = %s" params = [(computestatus, item) for item in uid_list] n = cursor.executemany(sql, params) conn.commit()
def update_cal_status(eid_dict, cal_status): cursor = pi_cur() sql = "UPDATE Event SET cal_status = %s WHERE e_id = %s" params = [(cal_status, item['e_id']) for item in eid_dict] n = cursor.executemany(sql, params) conn.commit()
def sql_insert_many(table_name, primary_key, data_dict): cursor = pi_cur() columns = [] params = [] columns.append(primary_key) for item_id in data_dict: item = data_dict[item_id] param_one = [] param_one.append(item_id) for k, v in item.items(): if k not in columns: columns.append(k) param_one.append(v) params.append(tuple(param_one)) columns_sql = ",".join(columns) values = [] for i in range(len(columns)): values.append("%s") values_sql = ",".join(values) sql = 'replace into %s (%s) values (%s)' % (table_name, columns_sql, values_sql) if len(params): n = cursor.executemany(sql, params) m = len(params) print("insert {} success, {} failed".format(m, m - n)) conn.commit() else: print('empty data')
def sql_insert_many(cursor, table_name, primary_key, data_dict): columns = [] params = [] columns.append(primary_key) for item_id in data_dict: item = data_dict[item_id] param_one = [] param_one.append(item_id) for k, v in item.items(): if k not in columns: columns.append(k) param_one.append(v) params.append(tuple(param_one)) columns_sql = ",".join(columns) values = [] for i in range(len(columns)): values.append("%s") values_sql = ",".join(values) sql = 'insert into %s (%s) values (%s)' % (table_name, columns_sql, values_sql) # print(sql) n = cursor.executemany(sql, params) m = len(params) if n == m: print("insert %d success" % m) conn.commit() else: print("failed")
def update_count_max(eid, max_num): cursor = pi_cur() sql = "UPDATE Event SET count_max = %s WHERE e_id = %s" params = [(max_num, eid)] n = cursor.executemany(sql, params) conn.commit()
def update_mysql_hazard_index(): cur = conn.cursor() sql = "SELECT * FROM Information, Event_information WHERE Event_information.event_id = 'event_ostudent' and Event_information.information_id = Information.i_id" cur.execute(sql) data = cur.fetchall() cursor = pi_cur() for index, item in enumerate(data): mid = item["mid"] i_id = item["i_id"] sql = "SELECT * FROM Informationspread WHERE mid = '{}' and predict = 0 order by store_date desc".format( mid) cursor.execute(sql) try: hazard_index = cursor.fetchone()["hazard_index"] update_sql = "UPDATE Information SET hazard_index={} WHERE i_id='{}'".format( hazard_index, i_id) print(index) except: update_sql = "UPDATE Information SET cal_status=0 WHERE i_id='{}'".format( i_id) print(index, "没找到。。") cursor.execute(update_sql) conn.commit()
def update_monitor_status(eid, status): cursor = pi_cur() sql = "UPDATE Event SET monitor_status = %s WHERE e_id = %s" params = [(status, eid)] n = cursor.executemany(sql, params) conn.commit()
def update(s_dict): sql = 'update Figure set domain = %s where uid = %s' val = [] for m in s_dict.values(): val.append((m['main_domain'], m['uid'])) cursor.executemany(sql, val) conn.commit()
def update_cal_status(uid_list, computestatus): uids = '' for uid in uid_list: uids += uid + ',' cursor = pi_cur() sql = "UPDATE Figure SET computestatus = %s WHERE uid in (%s)"%(computestatus,uids[:-1]) cursor.execute(sql) # params = [(computestatus, item) for item in uid_list] # n = cursor.executemany(sql, params) conn.commit()
def save_figure(save_dict): cusor = pi_cur() val = [] for uid in save_dict: val.append((uid,uid,save_dict[uid])) sql = "INSERT INTO Figure(f_id,uid,identitystatus,computestatus,monitorstatus) VALUE(%s,%s,%s,0,1) ON DUPLICATE KEY UPDATE " \ "uid=values(uid),f_id=values(f_id),identitystatus=values(identitystatus)" # try: cusor.executemany(sql,val) # 获取所有记录列表 conn.commit()
def store_pos(e_id, pos_data): cursor = pi_cur() t = nowts() sql = 'insert into EventPositive set e_id=%s,text=%s,vector=%s,store_timestamp=%s,store_type=0,process_status=1' val = [] for index, row in pos_data.iterrows(): val.append((e_id, row['text'], row['vec'].tostring(), t)) # 执行sql语句 n = cursor.executemany(sql, val) print("事件相关正类入库成功 %d 条" % n) conn.commit()
def store_event_para(e_id, p_name): cursor = pi_cur() sql = 'replace into EventParameter values (%s, %s, %s, %s, %s)' parameters = {'sentiment_neg':(e_id+'_sentiment_neg','sentiment_neg',0.2,e_id,'信息情感极性负面阈值'), 'sentiment_pos':(e_id+'_sentiment_pos','sentiment_pos',0.7,e_id,'信息情感极性正面阈值'), 'pos_num':(e_id+'_pos_num','pos_num',1000,e_id,'敏感计算时正类数量'), 'neg_num':(e_id+'_neg_num','neg_num',15000,e_id,'敏感计算时负类数量'), 'weibo_num':(e_id+'_weibo_num','weibo_num',100000,e_id,'每日LDA聚类时采样的微博总数'), 'stop_percent':(e_id+'_stop_percent','stop_percent',0.05,e_id,'停止监测的信息数量阈值(峰值的百分比)'), 'pos_neg':(e_id+'_pos_neg','pos_neg',15,e_id,'敏感计算时负类比正类比例'), 'extend_scale':(e_id+'_extend_scale','extend_scale',10,e_id,'扩线比例阈值')} cursor.executemany(sql, [parameters[p_name]]) conn.commit()
def event_sensitivity(e_id, data_dict): if len(data_dict) == 0: print('no data') else: cursor = pi_cur() sql = 'insert into Event_information set event_id=%s,information_id=%s' val = [] for i, j in data_dict.items(): val.append((e_id, j.get('source', None) + i)) # 执行sql语句 n = cursor.executemany(sql, val) print("入库成功 %d 条" % n) conn.commit()
def event_semantic(e_id, e_name, data, thedate, WEIBO_NUM): corpus_tfidf, dictionary = data_process(data, int(WEIBO_NUM)) result = lda_analyze(corpus_tfidf, dictionary, num_topics=5) result = json.dumps(result) timestamp = date2ts(thedate) es_id = str(timestamp) + e_id # sql = "insert into Event_Semantic set es_id=%s,e_id=%s,e_name=%s,topics=%s,timestamp=%s,into_date=%s" % (es_id,e_id,e_name,result,timestamp,thedate) sql = "replace into Event_Semantic values(%s,%s,%s,%s,%s,%s)" val = [(es_id,e_id,e_name,result,timestamp,thedate)] try: n = cursor.executemany(sql, val) print("insert %d success" % n) conn.commit() except: print('出现数据库错误')
def store_event_para(e_id, p_name): cursor = pi_cur() sql = 'replace into EventParameter values (%s, %s, %s, %s, %s)' parameters = { 'sentiment_neg': (e_id + '_sentiment_neg', 'sentiment_neg', 0.2, e_id, '信息情感极性负面阈值'), 'sentiment_pos': (e_id + '_sentiment_pos', 'sentiment_pos', 0.7, e_id, '信息情感极性正面阈值'), 'pos_num': (e_id + '_pos_num', 'pos_num', 1000, e_id, '敏感计算时正类数量'), 'neg_num': (e_id + '_neg_num', 'neg_num', 15000, e_id, '敏感计算时负类数量'), 'weibo_num': (e_id + '_weibo_num', 'weibo_num', 100000, e_id, '每日LDA聚类时采样的微博总数') } cursor.executemany(sql, [parameters[p_name]]) conn.commit()
def update_mysql_geo(): cur = conn.cursor() sql = "SELECT * from Information, Event_information WHERE Event_information.event_id = 'xianggangshijian_1581919160' and Information.i_id = Event_information.information_id" cur.execute(sql) data = cur.fetchall() cur.close() cursor = pi_cur() for index, item in enumerate(data): geo = item["geo"] i_id = item["i_id"] geo_new = geo_transfer(geo) update_sql = "UPDATE Information SET geo='{}' WHERE i_id='{}'".format( geo_new, i_id) cursor.execute(update_sql) print(index) conn.commit()
def save(save_dict): val = [] for mid in save_dict: print(mid) for date in save_dict[mid]: timestamp = date2ts(date) val.append((mid + str(timestamp), mid, timestamp, int(save_dict[mid][date]['retweet']), int(save_dict[mid][date]['comment']), save_dict[mid][date]['message_type'], date, 1)) sql = 'insert into Informationspread(is_id,mid,timestamp,retweet_count,comment_count,message_type,store_date,predict) values (%s,%s,%s,%s,%s,%s,%s,%s )' cursor = conn.cursor() try: cursor.executemany(sql, val) conn.commit() except: conn.rollback() print('错误')
def save_figure(save_dict): cursor = pi_cur() val = [] date = str(datetime.date.today()) for uid in save_dict: cursor.execute( 'select count(*) from Event_figure where figure_id = %s', uid) # print(cursor.fetchone()) event_count = cursor.fetchone()['count(*)'] cursor.execute('select count(*) from Information where uid = %s', uid) info_count = cursor.fetchone()['count(*)'] val.append((uid, uid, save_dict[uid], info_count, event_count, date)) sql = "INSERT INTO Figure(f_id,uid,identitystatus,info_count,event_count,into_date,computestatus,monitorstatus) VALUE(%s,%s,%s,%s,%s,%s,0,1) ON DUPLICATE KEY UPDATE " \ "uid=values(uid),f_id=values(f_id),identitystatus=values(identitystatus),info_count=values(info_count),event_count=values(event_count),into_date=values(into_date)" # try: cursor.executemany(sql, val) # 获取所有记录列表 conn.commit()
def sensitivity_store(data_dict): if len(data_dict) == 0: print('no data') else: cursor = pi_cur() sql = 'replace into Information set i_id=%s,uid=%s,root_uid=%s,mid=%s,text=%s,timestamp=%s,' \ 'send_ip=%s,geo=%s,message_type=%s,root_mid=%s,source=%s,monitor_status=1,hazard_index=NULL,' \ 'cal_status=0,add_manully=0' val = [] for i, j in data_dict.items(): val.append((j.get('source',None)+i,j.get('uid',None),j.get('root_uid',None),i,j.get('text',None), j.get('timestamp',None),j.get('send_ip',None),j.get('geo',None), j.get('message_type',None),j.get('root_mid',None),j.get('source',None))) # 执行sql语句 n = cursor.executemany(sql, val) print("入库成功 %d 条" % n) conn.commit() uid_list = [int(j['uid']) for _i,j in data_dict.items() if j.get('uid',None)!=None] nick_name(uid_list)
def store_extend_info(e_id, mid, data_dict): """ 存储至扩线新增库 :param e_id: 相关事件id :param mid: 信息id :param data_dict: 数据 :return: None """ cursor = pi_cur() sql = 'replace into ExtendReview set ie_id=%s,e_id=%s,uid=%s,root_uid=%s,mid=%s,' \ 'text=%s,timestamp=%s,send_ip=%s,geo=%s,message_type=%s,root_mid=%s,' \ 'source=%s,process_status=0' val = [] for i in mid: val.append((e_id+i,e_id,data_dict[i].get('uid',None),data_dict[i].get('root_uid',None), i,data_dict[i].get('text',None),data_dict[i].get('timestamp',None), data_dict[i].get('send_ip',None),data_dict[i].get('geo',None), data_dict[i].get('message_type',None),data_dict[i].get('root_mid',None), data_dict[i].get('source',None))) # 执行sql语句 n = cursor.executemany(sql, val) print("入扩线新增库成功 %d 条" % n) conn.commit()
def nick_name(uid_list): uid_list = list(set(uid_list)) query_body = { "query": { "bool": { "must": [ { "bool": { "should": [ {"terms": { "u_id": uid_list } } ] } } ] } } } r = elasticsearch.helpers.scan(es, index="weibo_user_big", query=query_body) data_dict = {} for item in r: uid = item['_source']["u_id"] data_dict[uid] = item['_source']['name'] # print(data_dict) cursor = pi_cur() sql = 'UPDATE Information SET nick_name=%s where uid=%s' val = [] for i, j in data_dict.items(): val.append((j, str(i))) # print(val) # 执行sql语句 n = cursor.executemany(sql, val) print("update success %s" %n) conn.commit()
def event_analyze(e_id, data, date=thedate): end_time = datetime.datetime.strptime(date + " 23:59:59", '%Y-%m-%d %H:%M:%S').timestamp() #end_time = int(time.mktime(date.timetuple())) start_time = end_time - 24 * 60 * 60 #td = str(date) + " 23:59:59" #data_dict = defaultdict(list) #sdata_dict = defaultdict(list) #idata_dict = defaultdict(list) analyze_dict = {} geo_dict = {} out_dict = {} in_dict = {} in_info_dict = {} out_info_dict = {} sensitive = 0 negative = 0 weibo_count = len(data) ''' cursor.execute('select Information.timestamp,Information.i_id from Event_information ei \ left join Information on ei.information_id = Information.i_id \ where ei.event_id = %s and Information.timestamp<=%s and Information.timestamp>=%s',(e_id,end_time,start_time)) result = cursor.fetchall() for res in result: print(re) lt = time.localtime(res['timestamp']) day = time.strftime('%Y-%m-%d',lt) query_body = { "query": { "bool": { "must": [ {"range": {"time": {"lte": td}}}, ] } } } if index_name ==" ": index_name = "weibo_all" r = scan(es, index=index_name, query=query_body) ''' user_list = {} user_count = 0 pattern = re.compile(r'(\u4e2d\u56fd)') pattern2 = re.compile(r'(\u672a\u77e5)') #pattern1 = re.compile(r'(\u9999\u6e2f|\u6fb3\u95e8|\u5b81\u590f|\u5e7f\u897f|\u65b0\u7586|\u897f\u85cf|\u5185\u8499\u53e4|\u9ed1\u9f99\u6c5f)') #pattern2 = re.compile(r'([\u4e00-\u9fa5]{2,5}?(\u7701|\u5e02|\u81ea\u6cbb\u533a))') #\u7701省 \u5e02市 \u81ea\u6cbb\u533a自治区 #result = Event.objects.filter(e_id=eid).first().information.all().filter(timestamp__range=(start_time,end_time)) #.count() cursor.execute( 'select figure_id from Event_figure ef where ef.event_id = %s ', (e_id)) figure_count = len(cursor.fetchall()) cursor.execute( 'select Information.geo from Event_information ei \ left join Information on ei.information_id = Information.i_id \ where ei.event_id = %s and Information.timestamp<=%s and Information.timestamp>=%s', (e_id, end_time, start_time)) result = cursor.fetchall() info_count = len(result) if len(result): for i in result: #print(i['geo']) sensitive += 1 p = pattern.match(i['geo']) if p is None: p2 = pattern2.match(i['geo']) if p2 is None: geo = i['geo'].split('&')[0] try: out_info_dict[geo] += 1 except: out_info_dict[geo] = 1 else: geo = i['geo'].split('&')[1] try: in_info_dict[geo] += 1 except: in_info_dict[geo] = 1 for item in data: #weibo_count += 1 #day = item['_source']["time"][0:10] # 后期使用if item["sentiment_polarity"]<0: if int(item["sentiment_polarity"]) < 0: negative += 1 try: user_list[item["uid"]] = 1 except: continue k = pattern.match(item["geo"]) if k is None: k2 = pattern2.match(item['geo']) if k2 is None: geo = item['geo'].split('&')[0] try: out_dict[geo] += 1 except: out_dict[geo] = 1 else: geo = item['geo'].split('&')[1] try: in_dict[geo] += 1 except: in_dict[geo] = 1 user_count = len(user_list.keys()) in_json = json.dumps(in_dict) out_json = json.dumps(out_dict) in_info_json = json.dumps(in_info_dict) out_info_json = json.dumps(out_info_dict) analyze_dict["%s_%s" % (str(end_time), e_id)] = { "event_name": e_id, "hot_index": weibo_count, "sensitive_index": sensitive, "negative_index": negative, "geo_weibo_inland": in_json, "geo_weibo_outland": out_json, "geo_info_inland": in_info_json, "geo_info_outland": out_info_json, "user_count": user_count, "weibo_count": weibo_count, "info_count": info_count, "figure_count": figure_count, "into_date": date, "timestamp": end_time } sql_insert_many("Event_Analyze", "e_id", analyze_dict) #cursor.execute('insert into Event(sensitive_figure_ratio,sensitive_info_ratio) values(%s,%s) where e_id = %s ',(figure_count/user_count,info_count/weibo_count,e_id)) cursor.execute( 'select sum(user_count) as uc,sum(weibo_count) as wc,sum(info_count) as ic from Event_Analyze where event_name = %s', (e_id)) res = cursor.fetchone() if res['uc'] == 0: res['uc'] = None if res['wc'] == 0: res['wc'] = None if len(res) and res['uc'] and res['ic'] and res['wc']: cursor.execute( 'update Event set sensitive_figure_ratio=%s,sensitive_info_ratio=%s where e_id = %s ', (figure_count / res['uc'], res['ic'] / res['wc'], e_id)) conn.commit() if cursor: print("insert into Event successfully") else: print("failed into Event") else: print("数据错误")
def cal_hazard_index(mid_dic, insert_dic, start_date, end_date): cursor = pi_cur() mid_type = {item["mid"]: item["message_type"] for item in mid_dic} mid_list = [item["mid"] for item in mid_dic] hazard_index_dic = defaultdict(dict) # 对过去30天的结果进行聚合 for date in get_datelist_v2(start_date, end_date): agg_sql = "SELECT sum(comment_count), sum(retweet_count), mid from Informationspread WHERE `timestamp` >= {} and `timestamp` <= {} and mid in ('{}') GROUP BY mid".format( date2ts(date) - 30 * 86400, date2ts(date), "','".join(mid_list)) cursor.execute(agg_sql) result = cursor.fetchall() for item in result: mid = item["mid"] count = item["sum(comment_count)"] + item["sum(retweet_count)"] # 危害指数的计算公式 basic_decay = 100 - basic_fraction if mid_type[mid] == 1: hazard_index = float( count) * basic_decay / max_count + basic_fraction else: hazard_index = float( count ) * basic_decay * decay_ratio / max_count + basic_fraction if hazard_index > 100: hazard_index = 100 hazard_index_dic[date][mid] = hazard_index # 构建更新数据格式 update_dic = {} update_dic_information = {} for date in get_datelist_v2(start_date, end_date): for mid in mid_type: is_id = "{}_{}".format(date2ts(date), mid) if is_id not in insert_dic: continue update_dic[is_id] = {"hazard_index": hazard_index_dic[date][mid]} update_dic_information[mid] = { "hazard_index": hazard_index_dic[date][mid] } # 更新至信息态势库 sql = "UPDATE Informationspread SET hazard_index = %s WHERE is_id = %s" params = [(update_dic[is_id]["hazard_index"], is_id) for is_id in update_dic] n = cursor.executemany(sql, params) conn.commit() # 更新至信息库,无数据的置为初始值 for item in mid_dic: if item["mid"] in update_dic_information: update_dic_information[item["i_id"]] = update_dic_information.pop( item["mid"]) else: update_dic_information[item["i_id"]] = { "hazard_index": basic_fraction } sql = "UPDATE Information SET hazard_index = %s WHERE i_id = %s" params = [(update_dic_information[i_id]["hazard_index"], i_id) for i_id in update_dic_information] n = cursor.executemany(sql, params) conn.commit()
else: print("数据错误") if __name__ == '__main__': #thedate = datetime.date.today() ''' sql = 'select e_id,es_index_name from Event' cursor.execute(sql) result = cursor.fetchall() for re in result: event_analyze(re['es_index_name'],re['e_id']) eid = 'xianggangshijian_1581919160' event_analyze("weibo_all",eid,) ''' #eid = 'xianggang_1582357500' e_id = 'xianggangshijian_1581919160' cursor.execute( 'select sum(user_count) as uc,sum(weibo_count) as wc,sum(info_count) as ic,max(figure_count) as fc from Event_Analyze where event_name = %s', (e_id)) res = cursor.fetchone() print(res) #cursor.execute('update Event set sensitive_figure_ratio=%s,sensitive_info_ratio=%s where e_id = %s ',(res['fc']/res['uc'],res['ic']/res['wc'],e_id)) cursor.execute( 'update Event set sensitive_figure_ratio=%s,sensitive_info_ratio=%s where e_id = %s ', (res['fc'] / res['uc'], res['ic'] / res['wc'], e_id)) conn.commit() if cursor: print("insert into Event successfully") #print(cursor.fetchone())