示例#1
0
path_man = PathManager(dataset=dataset_name,
                       d_type=dataset_subtype,
                       model_name=model_name_norm,
                       version=version)
################################################
#----------------------读取模型参数------------------
################################################

if model_name == 'SIMPLE':
    model_cfg = TrainingConfigManager(path_man.Doc() + 'config.json')
else:
    model_cfg = TrainingConfigManager('../run/runConfig.json')

modelParams = model_cfg.modelParams()

dataset = SeqFileDataset(path_man.FileData(), path_man.FileSeqLen(), N=N)
dataloader = DataLoader(dataset,
                        batch_size=N,
                        collate_fn=batchSequenceWithoutPad)

if model_name != 'Random':
    state_dict = t.load(path_man.Model() + '_v%s.0' % version)
    word_matrix = state_dict['Embedding.weight']
else:
    word_matrix = t.Tensor(
        np.load(path_man.WordEmbedMatrix(), allow_pickle=True))

loss_fn = t.nn.NLLLoss().cuda()

if model_name == 'SIMPLE':
    model = SIMPLE(word_matrix, **modelParams)
示例#2
0
data_path_man = PathManager(dataset=data_dataset_name,
                           d_type=dataset_subtype)
model_path_man = PathManager(dataset=model_dataset_name,
                             version=version,
                             model_name=model_name)

################################################
#----------------------读取模型参数------------------
################################################

model_cfg = TrainingConfigManager(model_path_man.Doc()+'config.json')

modelParams = model_cfg.modelParams()

dataset = SeqFileDataset(data_path_man.FileData(), data_path_man.FileSeqLen(), N=N)

state_dict = t.load(model_path_man.Model() + '_v%s.0' % version)
# state_dict = t.load(path_man.DatasetBase()+'models/ProtoNet_v105.0')
word_matrix = state_dict['Embedding.weight']

if model_name == 'IMP':
    model = IMP(word_matrix,
                **modelParams)
elif model_name == 'SIMPLE':
    model = SIMPLE(word_matrix,
                   **modelParams)
elif model_name == 'HybridIMP':
    model = HybridIMP(word_matrix,
                      **modelParams)
elif model_name == 'ProtoNet':
示例#3
0
loss = t.nn.NLLLoss().cuda() \
    if loss_func=='nll' else \
    t.nn.MSELoss().cuda()

printState('init managers...')
train_path_manager = PathManager(dataset=data_folder,
                                 d_type='train',
                                 model_name=model_name,
                                 version=version)
val_path_manager = PathManager(dataset=data_folder,
                               d_type='validate',
                               model_name=model_name,
                               version=version)

train_dataset = SeqFileDataset(train_path_manager.FileData(),
                               train_path_manager.FileSeqLen(),
                               N)
val_dataset = SeqFileDataset(val_path_manager.FileData(),
                               val_path_manager.FileSeqLen(),
                               N)
# train_dataset = ImageFileDataset(train_path_manager.FileData(), N, rd_crop_size=224)
# val_dataset = ImageFileDataset(val_path_manager.FileData(), N, rd_crop_size=224)

# train_task = MatrixProtoEpisodeTask(k ,qk, n, N,
#                         dataset=train_dataset,
#                         cuda=True,
#                         label_expand=expand,
#                         unsqueeze=False)
# val_task = MatrixProtoEpisodeTask(k ,qk, n, N,
#                         dataset=val_dataset,
示例#4
0
                                d_type=USED_SUB_DATASET,
                                model_name=model_name,
                                version=version)

################################################
#----------------------读取模型参数------------------
################################################

model_cfg = TrainingConfigManager(test_path_manager.Doc() + 'config.json')

modelParams = model_cfg.modelParams()

LRDecayIters, LRDecayGamma, optimizer_type,\
weight_decay, loss_func, default_lr, lrs, taskBatchSize = model_cfg.trainingParams()

test_dataset = SeqFileDataset(test_path_manager.FileData(),
                              test_path_manager.FileSeqLen(), N)

expand = True if loss_func == 'mse' else False

if model_type in ADAPTED_MODELS:
    test_task = AdaptEpisodeTask(k,
                                 qk,
                                 n,
                                 N,
                                 test_dataset,
                                 cuda=True,
                                 expand=expand)
else:
    test_task = ProtoEpisodeTask(k,
                                 qk,
示例#5
0
# splitDatas(src=man.DatasetBase()+'train/',
#            dest=man.DatasetBase()+'test/',
#            ratio=30,
#            mode='x',
#            is_dir=True)
################################################################

# 制作基于下标的数据集
################################################################
for d_type in ['train', 'validate', 'test']:
    manager = PathManager(dataset='virushare-20-3gram-tfidf', d_type=d_type)

    makeDataFile(json_path=manager.Folder(),
                 w2idx_path=manager.WordIndexMap(),
                 seq_length_save_path=manager.FileSeqLen(),
                 data_save_path=manager.FileData(),
                 idx2cls_mapping_save_path=manager.FileIdx2Cls(),
                 num_per_class=20,
                 max_seq_len=700)
################################################################

# renameItemFolder('/home/asichurter/datasets/JSONs/LargePE-100-original/')

# 统计序列长度分布
################################################################
# apiStat('/home/asichurter/datasets/JSONs/HKS/all/',
#         ratio_stairs=[50, 100, 200, 400, 500, 1000, 2000, 5000],
#         dump_report_path=None,#'/home/asichurter/datasets/reports/HKS_3gram_tfidf_api_report.json',#None,#
#         dump_apiset_path=None,#'/home/asichurter/datasets/reports/HKS_3gram_tfidf_api_set.json',#None
#         class_dir=True)
################################################################