def generateConfigReport(dataset, include_result=False, dump_path=None): mng = PathManager(dataset) report = {} warnings = [] for doc_dir in tqdm(os.listdir(mng.DocBase())): config_path = mng.DocBase() + doc_dir + '/' try: cfg = loadJson(config_path + 'config.json') report[int(cfg['version'])] = { '__model': cfg['modelName'], # 添加下划线便于排序 '_k-q-qk': '-'.join([str(cfg['k']), str(cfg['n']), str(cfg['qk'])]), 'desc': cfg['description'] } if include_result: res = loadJson(config_path + 'testResult.json') report[int(cfg['version'])]['results'] = res['results'] except Exception as e: warnings.append('Error occurred when process %s: %s' % (doc_dir, str(e))) for w in warnings: logging.warning(w) dump_path = mng.DatasetBase( ) + 'summary.json' if dump_path is None else dump_path dumpJson(report, dump_path, sort=True)
def revertDatasetSplit(dataset, dump_path): man = PathManager(dataset) split_dump = loadJson(dump_path) deleteDatasetSplit(man.DatasetBase()) for typ in ['train', 'validate', 'test']: # delete the existed split # os.system('rm -rf {path}/*'.format(path=man.DatasetBase()+typ)) print(typ) for folder in split_dump[typ]: shutil.copytree(src=man.DatasetBase()+'all/'+folder+'/', dst=man.DatasetBase()+typ+'/'+folder+'/') print('-- Done --')
for i in range(ft_epoch): sup_preds = model(supports.view(n*k,-1), support_len) loss_val = loss(sup_preds, support_labels) loss_val.backward() optimizer.step() model.eval() preds = model(queries, query_len) loss_val = loss(preds, query_labels) acc_val += test_task.metrics(preds, acc_only=False) # 记录任务batch的平均正确率和损失值 stat.record(acc_val[0], loss_val.item(), total_step=TestingEpoch) metrics += acc_val desc = cfg.desc() desc.append(f"{k}-shot {n}-way") desc.append('使用%s'%USED_SUB_DATASET) stat.report(doc_path=test_path_manager.Doc(), desc=desc) metrics /= TestingEpoch print('Precision:', metrics[1]*100) print('Recall:', metrics[2]*100) print('F1-Score:', metrics[3]*100) t.save(model.state_dict(), test_path_manager.DatasetBase()+f'/models/FT_v{version}.0')
shutil.copy(pj(base_dataset_path, tp, folder, item), pj(new_dataset_path, tp, folder, item)) shutil.copy(base_dataset_path+'data/matrix.npy', new_dataset_path+'/data/matrix.npy') shutil.copy(base_dataset_path+'data/wordMap.json', new_dataset_path+'/data/wordMap.json') if __name__ == '__main__': original_dataset_name = 'virushare-20-3gram-tfidf' new_dataset_name = 'virushare-20-3gram-tfidf-general' N = 10 seq_len = 200 pm = PathManager(dataset=original_dataset_name) makeGeneralTestDataset(base_dataset_path=pm.DatasetBase(), new_dataset_path=pm.ParentBase()+new_dataset_name+'/', train_num_per_class=N, include_test=False) for d_type in ['train', 'validate', 'test']: manager = PathManager(dataset=new_dataset_name, d_type=d_type) makeDataFile(json_path=manager.Folder(), w2idx_path=manager.WordIndexMap(), seq_length_save_path=manager.FileSeqLen(), data_save_path=manager.FileData(), idx2cls_mapping_save_path=manager.FileIdx2Cls(), num_per_class=N, max_seq_len=seq_len) # fetchPeFolders(json_path='D:/datasets/virushare-20-3gram/all/',