def generate_weibo_topics(): L.info("Start update weibo topic.") db_weibo = pymysql.connect("localhost", "root", "root", "weibo") cursor = db_weibo.cursor() cursor.execute( "select publish_time, content from weibo order by publish_time asc") data = cursor.fetchall() db = Database() now = datetime.date(2019, 12, 31) news = "" topic = {} sql = "insert into topic (date, topic) values (%s, %s)" for line in data: if line[0].date() == now: news += line[1] else: topic.clear() for keyword, weight in textrank(news, topK=20, withWeight=True): topic[keyword] = weight db.execute(sql, [now.strftime("%Y-%m-%d %H:%M:%S"), str(topic)]) L.info("\tNow processing {}".format(now.strftime("%Y-%m-%d"))) now = line[0].date() news = line[1] topic.clear() for keyword, weight in textrank(news, topK=20, withWeight=True): topic[keyword] = weight db.execute(sql, [now.strftime("%Y-%m-%d %H:%M:%S"), str(topic)]) L.info("\tFinished update weibo topic.")
def request_news_time_series(): L.info("Start update news time series.") with open('./json/DXYNews-TimeSeries.json', 'r') as file: data = json.load(file) L.info("\tRead {} lines from file".format(len(data))) db = Database() update_num = 0 for line in data: key = [ 'id', 'provinceId', 'title', 'summary', 'infoSource', 'sourceUrl', 'pubDate', 'province' ] data = db.select("select * from news where id={}".format(line['id'])) if not data: update_num += 1 sql = "insert into news (" + ','.join( key) + ") values (" + ', '.join(['%s' for k in key]) + ")" line['pubDate'] = TS2S(line['pubDate'] / 1000.0) if line['provinceId'] == "": line['provinceId'] = None line['summary'] = line['summary'][0:4096] if 'province' not in line: line['province'] = None params = [line[k] for k in key] db.execute(sql, params) L.info('\tUpdate {} news data.'.format(update_num))
def generate_topics(): L.info("Start update topic.") db = Database() db.run("delete from topic") data = db.select("select pubDate, summary from news order by pubDate asc") now = datetime.date(2019, 12, 31) news = "" topic = {} today = [] yesterday = [] dead = [] new = [] sql = "insert into topic (date, topic, dead, new) values (%s, %s, %s, %s)" for line in data: if line[0].date() == now: news += line[1] else: topic.clear() today.clear() for keyword, weight in textrank(news, topK=40, withWeight=True): topic[keyword] = weight today.append(keyword) for keyword in today: if keyword not in yesterday: new.append(keyword) for keyword in yesterday: if keyword not in today: dead.append(keyword) db.execute(sql, [ now.strftime("%Y-%m-%d %H:%M:%S"), str(topic), str(dead), str(new) ]) L.info("\tNow processing {}".format(now.strftime("%Y-%m-%d"))) now = line[0].date() news = line[1] yesterday = today.copy() new.clear() dead.clear() topic.clear() today.clear() for keyword, weight in textrank(news, topK=20, withWeight=True): topic[keyword] = weight today.append(keyword) for keyword in today: if keyword not in yesterday: new.append(keyword) for keyword in yesterday: if keyword not in today: dead.append(keyword) db.execute( sql, [now.strftime("%Y-%m-%d %H:%M:%S"), str(topic), str(dead), str(new)]) L.info("\tFinished update topic.")
def set_region_parent(): db = Database() sql = "select name, code, level from region" for (name, code, level) in db.select(sql): if len(str(code)) < 6: print('code short:', name, code, level) continue line = [] if level == 1: line = [code, 86] elif level == 2: line = [code, str(code)[:2] + "0000"] elif level == 3: line = [code, str(code)[:4] + "00"] sql = "update region set parent = %s where code = %s" db.execute(sql, (line[1], line[0]))
def create(self): db = Database() sql = "insert into project (name, createUser, description) values (%s, %s, %s)" flag = db.execute(sql, (self.name, self.create_user, self.description)) print(flag) return flag
def create(self): db = Database() sql = "insert into module (projectId, name, createUser, description) values (%s, %s, %s, %s)" flag = db.execute( sql, (self.project_id, self.name, self.create_user, self.description)) return flag
def create(self): db = Database() sql = "insert into task (projectId, moduleId, name, createUser, description, planEndTime, executor) \ values (%s, %s, %s, %s, %s, %s, %s)" flag = db.execute( sql, (self.project_id, self.module_id, self.name, self.create_user, self.description, self.plan_end_time, self.executor)) return flag
def request_rumor_time_series(): L.info("Start update rumor time series.") with open('./json/DXYRumors-TimeSeries.json', 'r') as file: data = json.load(file) L.info("\tRead {} lines from file".format(len(data))) db = Database() update_num = 0 for line in data: key = [ 'id', 'title', 'mainSummary', 'body', 'sourceUrl', 'rumorType', 'crawlTime' ] data = db.select("select * from rumor where id={}".format(line['id'])) if not data: update_num += 1 sql = "insert into rumor (" + ','.join( key) + ") values (" + ', '.join(['%s' for k in key]) + ")" line['crawlTime'] = TS2S(line['crawlTime'] / 1000.0) line['mainSummary'] = line['mainSummary'][0:1024] line['body'] = line['body'][0:1024] params = [line[k] for k in key] db.execute(sql, params) L.info('\tUpdate {} rumor data.'.format(update_num))
def create(self): db = Database() sql, params = self.get_sql() flag = db.execute(sql, params) return flag
def createUser(): user = ['admin', '123456', 5, TM()] sql = "insert into user (username, password, level, createTime) values (%s, %s, %s, %s)" db = Database() db.execute(sql, user)