예제 #1
0
def generateConfigReport(dataset, include_result=False, dump_path=None):
    mng = PathManager(dataset)

    report = {}
    warnings = []

    for doc_dir in tqdm(os.listdir(mng.DocBase())):
        config_path = mng.DocBase() + doc_dir + '/'

        try:
            cfg = loadJson(config_path + 'config.json')

            report[int(cfg['version'])] = {
                '__model': cfg['modelName'],  # 添加下划线便于排序
                '_k-q-qk':
                '-'.join([str(cfg['k']),
                          str(cfg['n']),
                          str(cfg['qk'])]),
                'desc': cfg['description']
            }

            if include_result:
                res = loadJson(config_path + 'testResult.json')
                report[int(cfg['version'])]['results'] = res['results']

        except Exception as e:
            warnings.append('Error occurred when process %s: %s' %
                            (doc_dir, str(e)))

    for w in warnings:
        logging.warning(w)

    dump_path = mng.DatasetBase(
    ) + 'summary.json' if dump_path is None else dump_path
    dumpJson(report, dump_path, sort=True)
예제 #2
0
def revertDatasetSplit(dataset, dump_path):
    man = PathManager(dataset)
    split_dump = loadJson(dump_path)

    deleteDatasetSplit(man.DatasetBase())

    for typ in ['train', 'validate', 'test']:

        # delete the existed split
        # os.system('rm -rf {path}/*'.format(path=man.DatasetBase()+typ))

        print(typ)
        for folder in split_dump[typ]:
            shutil.copytree(src=man.DatasetBase()+'all/'+folder+'/',
                            dst=man.DatasetBase()+typ+'/'+folder+'/')

    print('-- Done --')
예제 #3
0
        for i in range(ft_epoch):
            sup_preds = model(supports.view(n*k,-1), support_len)
            loss_val = loss(sup_preds, support_labels)
            loss_val.backward()

            optimizer.step()

        model.eval()
        preds = model(queries, query_len)
        loss_val = loss(preds, query_labels)

        acc_val += test_task.metrics(preds, acc_only=False)

        # 记录任务batch的平均正确率和损失值
        stat.record(acc_val[0], loss_val.item(), total_step=TestingEpoch)
        metrics += acc_val

desc = cfg.desc()
desc.append(f"{k}-shot {n}-way")
desc.append('使用%s'%USED_SUB_DATASET)
stat.report(doc_path=test_path_manager.Doc(),
            desc=desc)

metrics /= TestingEpoch
print('Precision:', metrics[1]*100)
print('Recall:', metrics[2]*100)
print('F1-Score:', metrics[3]*100)

t.save(model.state_dict(), test_path_manager.DatasetBase()+f'/models/FT_v{version}.0')

예제 #4
0
                shutil.copy(pj(base_dataset_path, tp, folder, item),
                            pj(new_dataset_path, tp, folder, item))

    shutil.copy(base_dataset_path+'data/matrix.npy',
                new_dataset_path+'/data/matrix.npy')
    shutil.copy(base_dataset_path+'data/wordMap.json',
                new_dataset_path+'/data/wordMap.json')

if __name__ == '__main__':
    original_dataset_name = 'virushare-20-3gram-tfidf'
    new_dataset_name = 'virushare-20-3gram-tfidf-general'
    N = 10
    seq_len = 200

    pm = PathManager(dataset=original_dataset_name)
    makeGeneralTestDataset(base_dataset_path=pm.DatasetBase(),
                           new_dataset_path=pm.ParentBase()+new_dataset_name+'/',
                           train_num_per_class=N,
                           include_test=False)
    for d_type in ['train', 'validate', 'test']:
        manager = PathManager(dataset=new_dataset_name, d_type=d_type)

        makeDataFile(json_path=manager.Folder(),
                     w2idx_path=manager.WordIndexMap(),
                     seq_length_save_path=manager.FileSeqLen(),
                     data_save_path=manager.FileData(),
                     idx2cls_mapping_save_path=manager.FileIdx2Cls(),
                     num_per_class=N,
                     max_seq_len=seq_len)

    # fetchPeFolders(json_path='D:/datasets/virushare-20-3gram/all/',