def getDatasetfromDB(vocabDic_path, properties_path): ''' connect to data base then build dataset ''' vocabDics = preprocessing.loadVocabDic(["pos", "rel"], vocabDic_path) pos2idx = vocabDics["pos"] rel2idx = vocabDics["rel"] dataset = [] cycle_counter = 0 try: properties = preprocessing.readDictionary(properties_path) connection = connect2DB(properties) with connection.cursor() as cursor: sentence_query = "SELECT * FROM sentence" annotation_query = "SELECT relation_root FROM annotation WHERE sid=%s" cursor.execute(sentence_query) results = cursor.fetchall() for result in results: print("#============building semantic graph [{}]============#". format(result['sid'])) cursor.execute(annotation_query, (result['sid'], )) annotations = cursor.fetchall() if len(annotations) == 0: annotations = [{'relation_root': 0}] listLabel = set([ annotation['relation_root'] for annotation in annotations ]) graph = preprocessing.buildSemanticGraph( DependencyTreeStr=result['dpTreeStr'], listLabel=listLabel, use_word=False, pos2idx=pos2idx, rel2idx=rel2idx, sid=result['sid']) if graph.hasCycle: cycle_counter += 1 else: print(graph) dataset.append( preprocessing.DataTuple( indexedWords=graph.indexedWords, graph=graph)) print("There are {} graphs has cycle then drop them from dataset!". format(cycle_counter)) except Exception as err: print(err) except ConnectionError as connerr: print(connerr) print("[Error] build semantic graph from db failed!") else: return dataset finally: connection.close()
options) print( "Epoch Eval Metrics: ACC[{:.2f}%] Loss[{:.6f}] \n\t\t\t\t\tP[{:.2f}%], R[{:.2f}%], F1[{:.2f}%]\n" .format((acc * 100), valid_loss, (p * 100), (r * 100), (F1 * 100))) saveMetrics(metrics, step_losses, local_time, options) plotMetrics(metrics, step_losses, local_time, options) print(" - [Info] Training stage ends") if __name__ == "__main__": # load options from properties file options_dic = preprocessing.readDictionary( "../src/properties/options.properties") fpath = preprocessing.readDictionary("../src/properties/fpath.properties") # set cuda devices if options_dic['use_cuda']: os.environ["CUDA_DEVICE_ORDER"] = "PCI_BUS_ID" os.environ["CUDA_VISIBLE_DEVICES"] = options_dic['cuda_device'] # get vocabulary vocabDics = preprocessing.loadVocabDic(["pos", "rel", "act"], fpath['vocabDic_path']) word2idx = vocabDics["word"] pos2idx = vocabDics["pos"] rel2idx = vocabDics["rel"] label2idx = vocabDics["act"]
def load_angelis_dataset(vocabDic_path, properties_path, test_id_path): vocabDics = preprocessing.loadVocabDic(["pos", "rel"], vocabDic_path) pos2idx = vocabDics["pos"] rel2idx = vocabDics["rel"] test_set = [] test_set_id = [] cycle_counter = 0 if not os.path.exists(test_id_path): raise Exception(test_id_path + " did not exit, please change to full data load mode!") with open(test_id_path, mode="r", encoding="utf-8") as t_id: t_id.readline() for line in t_id.readlines(): if str(line) != "\n": test_set_id.append(str(line).split("\n")[0]) try: properties = preprocessing.readDictionary(properties_path) connection = connect2DB(properties) with connection.cursor() as cursor: sentence_query = "SELECT * FROM sentence" annotation_query = "SELECT relation_root FROM annotation WHERE sid=%s" pred_query = "SELECT relation_root FROM angelis_preds WHERE sid=%s" cursor.execute(sentence_query) results = cursor.fetchall() for result in results: print("#============building semantic graph [{}]============#". format(result['sid'])) cursor.execute(annotation_query, (result['sid'], )) annotations = cursor.fetchall() if len(annotations) == 0: annotations = [{'relation_root': 0}] listLabel = set([ annotation['relation_root'] for annotation in annotations ]) graph = preprocessing.buildSemanticGraph( DependencyTreeStr=result['dpTreeStr'], listLabel=listLabel, use_word=False, pos2idx=pos2idx, rel2idx=rel2idx, sid=result['sid']) if graph.hasCycle: cycle_counter += 1 else: print(graph) if result["sid"] in test_set_id: cursor.execute(pred_query, result['sid']) preds = cursor.fetchall() list_pred = set( [pred['relation_root'] for pred in preds]) for word in graph.indexedWords: if word.sentence_index in list_pred: word.pred = 1 test_set.append( preprocessing.DataTuple( indexedWords=graph.indexedWords, graph=graph)) print("There are {} graphs has cycle then drop them from dataset!". format(cycle_counter)) except Exception as err: print(err) except ConnectionError as connerr: print(connerr) print("[Error] build semantic graph from db failed!") else: return test_set finally: connection.close()