def get_event_info(subject, verb, object, event_negaword): """ 根据主语、谓语、宾语查询“事件信息表”。 :param subject: array.主语 :param verb: array.谓语 :param object: array.宾语 :param event_negaword: string.同义词 :return 事件信息表数据。 """ db = DatabaseWrapper() try: # cursor = connect.cursor(cursor_factory=psycopg2.extras.RealDictCursor) sql = f"SELECT * FROM ebm_event_info WHERE subject IN %s AND verb IN %s " \ f"AND object IN %s AND event_negaword = %s" logger.info(sql) result = db.query( sql, (tuple(subject), tuple(verb), tuple(object), event_negaword), QueryResultType.JSON) # cursor.execute(sql) # result = cursor.fetchall() # result = json.loads(json.dumps(result)) except Exception as e: raise RuntimeError(e) finally: db.close() return result
def insert_sentattribute_rel(shorten_ssentence, attribute_id, sentence_id): """ 插入数据到“事件句子属性关系表”。 :param shorten_ssentence: string.事件原始短语。该数据在事件归并时才有效,用于记录相似事件的事件短语。 :param attribute_id: string.情感分析 :param sentence_id: string.事件发生日期 :return 关系id。 """ db = DatabaseWrapper() try: rel_id = gener_id_by_uuid() # cursor = connect.cursor() db.execute( "INSERT INTO ebm_sentattribute_rel(rel_id, shorten_ssentence, relation_id, sentence_id) VALUES " "(%s, %s, %s, %s)", (rel_id, shorten_ssentence, attribute_id, sentence_id)) db.commit() except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close() return rel_id
def query_sub_models_by_model_id(model_id): """ 根据模型编号查询出综合排名前10的子模型。 :param model_id: 模型编号 :return array. t_event_model_detail表数据 """ db = DatabaseWrapper(dbname=event_dbname) try: sql = "SELECT t1.model_name, t1.detail_id, t1.lag_date, t1.pca, t1.kernel_size, t1.pool_size, t2.days, " \ "t2.model_dir FROM t_event_model_detail t1 JOIN t_event_model t2 ON t1.model_id = t2.model_id " \ "JOIN t_event_model_tot t3 ON t1.detail_id = t3.detail_id " \ "WHERE t1.model_id = %s ORDER BY t3.score DESC LIMIT 10" event_predict = db.query(sql, (model_id, ), result_type=QueryResultType.BEAN, wild_class=EventPredict) if len(event_predict) < 1: raise RuntimeError( f"Query t_event_model_detail error, the {model_id} cannot get multi-row" ) return event_predict except Exception as e: raise RuntimeError(e) finally: db.close()
def get_article_info_by_id(event_id): """ 根据事件id查询该事件的文章信息。 :param event_id: string.事件id :return 事件的文章信息。 """ db = DatabaseWrapper() try: # cursor = connect.cursor(cursor_factory=psycopg2.extras.RealDictCursor) result = db.query( f"SELECT t4.translated_title, t4.translated_content, t4.pub_time, t4.create_date FROM " f"(SELECT t2.article_id FROM ebm_eventsent_rel t1 JOIN ebm_event_sentence t2 ON " f"t1.sentence_id = t2.sentence_id WHERE t1.event_id = '{event_id}' " f"GROUP BY t2.article_id) t3 " f"JOIN t_article_msg t4 ON t3.article_id = t4.article_id", (), QueryResultType.JSON) # result = cursor.fetchall() # result = json.loads(json.dumps(result)) except Exception as e: raise RuntimeError(e) finally: db.close() return result
def insert_event_info(subject, verb, object, shorten_sentence, cameo_code, triggerloc_index, event_negaword): """ 插入数据到“事件信息表”。 :param subject: string.主语 :param verb: string.谓语 :param object: string.宾语 :param shorten_sentence: string.事件短语 :param cameo_code: string.cameo编号 :param triggerloc_index: string.谓语下标 :param event_negaword: string.否定词 :return 事件id。 """ db = DatabaseWrapper() try: triggerloc_index = str(triggerloc_index).replace("'", "\"") event_id = gener_id_by_uuid() # cursor = connect.cursor() db.execute( "INSERT INTO ebm_event_info(event_id, subject, verb, object, shorten_sentence, cameo_id, " "triggerloc_index, event_negaword) VALUES (%s,%s,%s,%s,%s,%s,%s,%s)", (event_id, subject, verb, object, shorten_sentence, cameo_code, triggerloc_index, event_negaword)) db.commit() except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close() return event_id
def insert_event_sentence(article_id, event_sentence): """ 插入数据到“事件句子表”。 :param article_id: string.文章编号 :param event_sentence: string.事件句子 :return 文本编号、句子编号。 """ db = DatabaseWrapper() try: event_sentence = str(event_sentence).replace("'", "\"") sentence_id = gener_id_by_uuid() if article_id is None or article_id == '': article_id = gener_id_by_uuid() # cursor = connect.cursor() db.execute( "INSERT INTO ebm_event_sentence(sentence_id, event_sentence, article_id) VALUES " "(%s, %s, %s)", (sentence_id, event_sentence, article_id)) db.commit() except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close() return article_id, sentence_id
def get_articles_from_db(): import time db = DatabaseWrapper('mng') sql = 'select article_id, content from t_article_msg' t1 = time.time() rst = db.query(sql, result_type=QueryResultType.DB_NATURE) t2 = time.time() print(f'finished query, used {t2 - t1} secs') return rst
def get_event_rel(event_id): db = DatabaseWrapper() try: # 查询事件属性 event_rel = db.query( f"select * from ebm_eventsent_rel where event_id='{event_id}'", (), QueryResultType.DICT) except Exception as e: raise RuntimeError(e) finally: db.close() return event_rel
def get_article(): db = DatabaseWrapper() try: article = db.query(f"select article_id,content from t_article_msg " f"where is_clean='0'") return article except Exception as e: raise RuntimeError(e) finally: db.close() return article
def article_clean(article_id, content): db = DatabaseWrapper() try: db.execute( "update t_article_msg set is_clean='1',content_cleared=%s," "clean_finish_date=%s,clean_finish_time=%s " "where article_id=%s", (content, date_util.sys_date("%Y-%m-%d"), date_util.sys_time("%H:%M:%S"), article_id)) db.commit() except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close()
def query_table_2pandas(table_name): # 每次查询建立一次连接 database_config = DatabaseConfig() database_config.name = 'alg' db = DatabaseWrapper(database_config) try: sql = f"SELECT * FROM {table_name}" result = db.query(sql, (), result_type=QueryResultType.PANDAS) return result except Exception as e: raise RuntimeError(f"Query table error! {str(e)}") finally: # db不可能为None db.close()
def model_eval_done(top_scores, events_num): """ 更新t_event_model表的模型状态、训练开始日期、训练结束日期。 :param top_scores: :param events_num: """ db = DatabaseWrapper(dbname=event_dbname) try: sql = "insert into t_event_model_tot(tot_id, num, false_rate, recall_rate, false_alarm_rate, " \ "tier_precision, tier_recall, bleu, score, status, detail_id, create_date, create_time) values " \ "(%s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s, %s)" params = [] for score, bleu_summary, tier_precision_summary, tier_recall_summary, fr_summary, rc_summary, \ fa_summary, detail_id in top_scores: num_events = np.sum( [v for k, v in events_num.items() if str(k) != '0']) param = (UuidHelper.guid(), str(num_events), str(fr_summary), str(rc_summary), str(fa_summary), str(tier_precision_summary), str(tier_recall_summary), str(bleu_summary), str(score), DataStatus.SUCCESS.value, detail_id, sys_date(sys_date_formatter), sys_time(sys_time_formatter)) params.append(param) db.executemany(sql, params) db.commit() except Exception as e: raise RuntimeError(e) finally: db.close()
def query(sql, db: str = 'alg', parameter: tuple = ()): """ 从数据库查询数据 Args: sql: parameter: db: 连接的数据库名称. 'alg' 为算法所需源数据所在库, 如需指定算法生成数据及应用端数据 所在库, 将此参数指定为其他名称即可, 推荐使用 'mng' Returns: 查询结果 """ if not sql: raise RuntimeError("The sql must be not none!") database_config = DatabaseConfig() database_config.name = db db = DatabaseWrapper(database_config) try: result = db.query(sql, parameter, QueryResultType.DB_NATURE) result = [list(r.values()) for r in result] return result except Exception as e: raise RuntimeError(f"The query error! {e}") finally: db.close()
def insert_event_attribute(sentiment_analysis, event_date, event_local, event_state, nentity_place, nentity_org, nentity_person, nentity_misc, event_id): """ 插入数据到“事件属性表”。 :param sentiment_analysis: string.情感分析 :param event_date: string.事件发生日期 :param event_local: string.事件发生地点 :param event_state: string.事件状态 :param nentity_place: string.命名实体-地点 :param nentity_org: string.命名实体-组织机构 :param nentity_person: string.命名实体-人 :param nentity_misc: string.命名实体-杂项 :param event_id: string.事件编号 :return 属性id。 """ db = DatabaseWrapper() try: if not nentity_place: nentity_place = '' if not nentity_org: nentity_org = '' if not nentity_person: nentity_person = '' event_local = str(event_local).replace("'", "\"") nentity_place = str(nentity_place).replace("'", "\"") nentity_org = str(nentity_org).replace("'", "\"") nentity_person = str(nentity_person).replace("'", "\"") attritute_id = gener_id_by_uuid() create_date = date_util.sys_datetime("%Y-%m-%d") # cursor = connect.cursor() db.execute( "INSERT INTO ebm_eventsent_rel(relation_id, sentiment_analysis, event_date, event_local, " "event_state, nentity_place, nentity_org, nentity_person, nentity_misc, " "create_date, event_id) VALUES (%s,%s,%s,%s,%s,%s,%s,%s,%s,%s,%s)", (attritute_id, sentiment_analysis, event_date, event_local, event_state, nentity_place, nentity_org, nentity_person, nentity_misc, create_date, event_id)) db.commit() except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close() return attritute_id
def get_event_attribute(event_id, event_local, event_date, nentity_org, nentity_person): """ 根据事件id及事件发生地点、组织机构、人物查询事件信息,主要用于验证是否有重复事件。 :param event_id: string.事件id :param event_local: string.事件发生地点 :param event_date: string.事件发生日期 :param nentity_org: list.命名实体-组织机构 :param nentity_person: list.命名实体-人物 :return 事件信息表数据。 """ db = DatabaseWrapper() try: check_sql = f"SELECT * FROM ebm_eventsent_rel WHERE event_id = '{event_id}' " \ f"AND event_local = '{event_local}' " \ f"AND event_date = '{event_date}'" for org in nentity_org: check_sql = check_sql + f" AND (nentity_org LIKE '%%,{org}%%' " \ f"OR nentity_org LIKE '%%{org},%%' OR " \ f"nentity_org = '{org}')" for person in nentity_person: check_sql = check_sql + f" AND (nentity_person LIKE '%%,{person}%%' " \ f"OR nentity_person LIKE '%%{person},%%' " \ f"OR nentity_person = '{person}')" result = db.query(check_sql, (), QueryResultType.JSON) except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close() return result
def insert_corecontent(article_id, content): """ 插入数据到“t_article_msg_zh”表。 :param article_id: string.文章id :param content: string.文本内容 """ db = DatabaseWrapper() try: db.execute( "INSERT INTO t_article_msg_en(article_id, content) VALUES (%s, %s)", (article_id, content)) db.commit() except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close()
def query_data_table_2pandas(table_name): """ 查询数据库中指定数据表的所有数据。该方法会将查询到的数据中date_col字段数据转换为字符串类型。 :param table_name: string. 数据表名 :return dataframe.指定表的dataframe形式。 """ # 每次查询建立一次连接 db = DatabaseWrapper(dbname=data_dbname) try: sql = f"SELECT * FROM {table_name} ORDER BY rqsj ASC" result = db.query(sql, (), result_type=QueryResultType.PANDAS) return result except Exception as e: raise RuntimeError(f"Query table error! {str(e)}") finally: # db不可能为None db.close()
def get_synonym(): """ 查询“同义词表”获取同义词数据。 :return 同义词数据。 """ db = DatabaseWrapper() try: # cursor = connect.cursor(cursor_factory=psycopg2.extras.RealDictCursor) result = db.query("SELECT * FROM ebm_synonym_storage", (), QueryResultType.JSON) # result = cursor.fetchall() # result = json.loads(json.dumps(result)) except Exception as e: raise RuntimeError(e) finally: db.close() return result
def query_teventmodel_by_id(model_id): """ 根据模型编号查询t_event_model表数据。若该模型编号查询出的数据行数不为1则抛出RuntimeError异常。 :param model_id: string. 模型编号 :return EventModel.实体对象。 """ db = DatabaseWrapper(dbname=event_dbname) try: sql = "SELECT * FROM t_event_model WHERE model_id = %s" t_event_model = db.query(sql, (model_id, ), result_type=QueryResultType.BEAN, wild_class=EventModel) if len(t_event_model) != 1: raise RuntimeError( f"Query t_event_model error, the {model_id} cannot get only one row" ) return t_event_model[0] except Exception as e: raise RuntimeError(e) finally: db.close()
def get_sentence_attributes_by_id(event_id): """ 根据事件id查询该事件的详细信息。 :param event_id: string.事件id :return 事件的详细信息。 """ db = DatabaseWrapper() try: # cursor = connect.cursor(cursor_factory=psycopg2.extras.RealDictCursor) result = db.query( f"SELECT t1.*, t2.event_sentence, t2.article_id FROM ebm_eventsent_rel t1 " f"JOIN ebm_event_sentence t2 ON t1.sentence_id = t2.sentence_id " f"WHERE t1.event_id = '{event_id}'", (), QueryResultType.JSON) # result = cursor.fetchall() # result = json.loads(json.dumps(result)) except Exception as e: raise RuntimeError(e) finally: db.close() return result
def query_teventtask_by_id(task_id): """ 根据模型编号查询t_event_task表数据。若该模型编号查询出的数据行数不为1则抛出RuntimeError异常。 :param task_id: string. 预测任务编号 :return EventModel.实体对象。 """ db = DatabaseWrapper(dbname=event_dbname) try: sql = "SELECT t2.event_type, t2.model_type, t2.model_dir, t2.event, t1.* FROM t_event_task t1 " \ "JOIN t_event_model t2 ON t1.model_id = t2.model_id WHERE t1.task_id = %s" t_event_task = db.query(sql, (task_id, ), result_type=QueryResultType.BEAN, wild_class=EventTask) if len(t_event_task) != 1: raise RuntimeError( f"Query t_event_task error, the {task_id} cannot get only one row" ) return t_event_task[0] except Exception as e: raise RuntimeError(e) finally: db.close()
def query_event_table_2pandas(table_name, event_col, date_col): """ 查询数据库中指定事件表的所有数据,若传入了not_none_columns参数,则该方法不会返回指定参数中的列名在数据表为空的行。 :param table_name: string. 事件表名 :param event_col: string. 事件类别字段名。 :param date_col: string. 事件日期字段名。 :return dataframe.指定表的dataframe形式。 """ # 每次查询建立一次连接 db = DatabaseWrapper(dbname=data_dbname) try: sql = f"SELECT {event_col}, CAST({date_col} AS VARCHAR) AS {date_col} FROM {table_name} " \ f"WHERE qssj IS NOT NULL AND ({event_col} IS NOT NULL AND {event_col} <> '') AND " \ f"({date_col} IS NOT NULL AND CAST({date_col} AS VARCHAR) <> '') AND " \ f"LENGTH(CAST({date_col} AS VARCHAR)) > 8 ORDER BY {date_col} ASC" result = db.query(sql, (), result_type=QueryResultType.PANDAS) return result except Exception as e: raise RuntimeError(f"Query table error! {str(e)}") finally: # db不可能为None db.close()
def __modify(sql: str, parameter: tuple, error=''): """ 根据 sql 对数据进行增删改操作 Args: sql: str. sql字符串 parameter: tuple. 参数列表 error: 操作出错时则日志中输出的错误信息 """ if not sql: raise RuntimeError("The sql must be not none!") db = DatabaseWrapper(dbname=event_dbname) try: db.execute(sql, parameter) db.commit() # 配置文件中设置不自动提交,所以手动提交 except Exception as e: raise RuntimeError(f'{error}: {str(e)} ' if error else error) finally: db.close()
def insert_event_copy(copy_event_id, event_id): """ 插入数据到“事件信息镜像校验表”。 :param copy_event_id: string.镜像事件编号 :param event_id: string.事件编号 :return 镜像id。 """ db = DatabaseWrapper() try: copy_id = gener_id_by_uuid() # cursor = connect.cursor() db.execute( "INSERT INTO ebm_event_copy(copy_id, copy_event_id, event_id) VALUES (%s, %s, %s)", (copy_id, copy_event_id, event_id)) except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close() return copy_id
def get_article_list(event_id): db = DatabaseWrapper() article = [] try: if event_id != ['']: # 根据多个事件id,查询文章列表 article_ids = db.query( f"select distinct(a.article_id) from ebm_event_sentence a " f"join ebm_sentattribute_rel b on a.sentence_id=b.sentence_id " f"join ebm_eventsent_rel c on b.relation_id=c.relation_id " f"join ebm_event_info d on c.event_id=d.event_id " f"where d.event_id in (%s)" % ','.join(['%s'] * len(event_id)), event_id, QueryResultType.PANDAS) article_id_arr = [] for ids in article_ids.article_id: article_id_arr.append(ids) if article_id_arr: # 查询文章信息 article = db.query( "select a.article_id,b.title as translated_title,b.content_summary,a.spider_time,a.source " "from t_article_msg a " "left join t_article_msg_zh b on a.article_id=b.article_id " "where a.article_id in (%s) limit 10" % ','.join(['%s'] * len(article_id_arr)), article_id_arr, QueryResultType.JSON) # 查询文章关联的事件,用于前端连线 for a in article: article_event = db.query( f"select distinct(a.event_id) from ebm_event_info a " f"join ebm_eventsent_rel b on a.event_id=b.event_id " f"join ebm_sentattribute_rel c on b.relation_id=c.relation_id " f"join ebm_event_sentence d on c.sentence_id=d.sentence_id " f"where d.article_id='{a['article_id']}'", (), QueryResultType.JSON) a.update({"event_ids": article_event}) except Exception as e: raise RuntimeError(e) finally: db.close() return article
def get_article_info(article_id, event_id): db = DatabaseWrapper() try: # 查询指代消解后的文章详情 article = db.query( f"select content from t_article_msg_zh where article_id='{article_id}'", (), QueryResultType.JSON) # 查询事件关联的句子,用于页面高亮 sentence = db.query( f"select distinct(a.event_sentence) from ebm_event_sentence a " f"join ebm_sentattribute_rel b on a.sentence_id=b.sentence_id " f"join ebm_eventsent_rel c on b.relation_id=c.relation_id " f"join ebm_event_info d on c.event_id=d.event_id " f"where a.article_id='{article_id}' " f"and d.event_id in (%s)" % ','.join(['%s'] * len(event_id)), event_id, QueryResultType.JSON) except Exception as e: raise RuntimeError(e) finally: db.close() return article, sentence
def test_executemany(): db_config = __set_dbinfo(autocommit=False) db = DatabaseWrapper(db_config) try: db.execute( "CREATE TABLE db_wrapper_test(id int null, name varchar(64) null)") parameters = [] for i in range(0, 200): parameters.append((i, 'abc')) inser_num = db.executemany( "INSERT INTO db_wrapper_test(id, name) VALUES(%s, %s)", parameters) assert inser_num == 200 db.commit() result = db.query("SELECT COUNT(*) AS num FROM db_wrapper_test", (), QueryResultType.JSON) assert result[0]["num"] == 200 except Exception as e: raise RuntimeError(e) finally: db.execute("DROP TABLE db_wrapper_test") db.commit() db.close()
def test_execute(): db_config = __set_dbinfo(autocommit=True) db = DatabaseWrapper(db_config) try: db.execute( "CREATE TABLE db_wrapper_test(id int null, name varchar(64) null)") inser_num = db.execute( "INSERT INTO db_wrapper_test(id, name) VALUES(%s, %s)", (1, 'abc')) assert inser_num == 1 update_num = db.execute( "UPDATE db_wrapper_test SET name = %s WHERE id = %s", ('def', 1)) assert update_num == 1 delete_num = db.execute("DELETE FROM db_wrapper_test") assert delete_num == 1 # 因为设置了自动提交,所以不再手动调用commit() except Exception as e: raise RuntimeError(e) finally: db.execute("DROP TABLE db_wrapper_test") db.close()
def test_execute_by_config_file(): db = DatabaseWrapper() try: db.execute( "CREATE TABLE db_wrapper_test(id int null, name varchar(64) null)") # oracle的写法:db.execute("INSERT INTO db_wrapper_test(id, name) VALUES(:1, :2)", (1, 'abc')) inser_num = db.execute( "INSERT INTO db_wrapper_test(id, name) VALUES(%s, %s)", (1, 'abc')) assert inser_num == 1 update_num = db.execute( "UPDATE db_wrapper_test SET name = %s WHERE id = %s", ('def', 1)) assert update_num == 1 delete_num = db.execute("DELETE FROM db_wrapper_test") assert delete_num == 1 # 对于pgsql来说,CREATE TABLE不在事务控制范围内;对于mysql来说,CREATE TABLE、DROP TABLE不在事务控制范围内 # 所以下一行代码在测试pgsql时必须要提交,在测试mysql时可以放在finally中 # oracle语法不支持DROP TABLE IF EXISTS db.execute("DROP TABLE db_wrapper_test") db.commit() except Exception as e: db.rollback() raise RuntimeError(e) finally: db.close()
def test_query(): db_config = __set_dbinfo(autocommit=True) db = DatabaseWrapper(db_config) try: db.execute( "CREATE TABLE db_wrapper_test(id int null, name varchar(64) null)") # ORACLE写法:db.execute("INSERT INTO db_wrapper_test(id, name) VALUES(:1, :2)", (1, 'abc')) db.execute("INSERT INTO db_wrapper_test(id, name) VALUES(%s, %s)", (1, 'abc')) db.execute("INSERT INTO db_wrapper_test(id, name) VALUES(%s, %s)", (2, 'def')) db.execute("INSERT INTO db_wrapper_test(id, name) VALUES(%s, %s)", (3, 'ghi')) df = db.query( "SELECT * FROM db_wrapper_test WHERE id BETWEEN %s AND %s", (0, 4), QueryResultType.PANDAS) # 行数及列数 assert df.shape[0] == 3 assert df.shape[1] == 2 json_result = db.query( "SELECT * FROM db_wrapper_test WHERE id BETWEEN %s AND %s", (0, 4), QueryResultType.JSON) assert len(json_result) == 3 assert len(json_result[0].keys()) == 2 dict_result = db.query( "SELECT * FROM db_wrapper_test WHERE id BETWEEN %s AND %s", (0, 4), QueryResultType.DICT) assert len(dict_result) == 3 assert len(dict_result[0].keys()) == 2 class DbWrapperTest(object): def __init__(self): self._id = None self._name = None @property def id(self): return self._id @id.setter def id(self, id): self._id = id @property def name(self): return self._name @name.setter def name(self, name): self._name = name bean_result = db.query( "SELECT * FROM db_wrapper_test WHERE id BETWEEN %s AND %s", (0, 4), QueryResultType.BEAN, DbWrapperTest) assert len(bean_result) == 3 assert bean_result[0].id == 1 # 因为设置了自动提交,所以不再手动调用commit() except Exception as e: raise RuntimeError(e) finally: db.execute("DROP TABLE db_wrapper_test") db.close()