def test_eval(config): """ test evaluate """ data_path = config.data_path batch_size = config.batch_size ds_eval = create_dataset(data_path, train_mode=False, epochs=2, batch_size=batch_size) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) param_dict = load_checkpoint(config.ckpt_path) load_param_into_net(eval_net, param_dict) auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) model.eval(ds_eval, callbacks=eval_callback)
def test_eval(config): """ test evaluate """ data_path = config.data_path batch_size = config.batch_size if config.dataset_type == "tfrecord": dataset_type = DataType.TFRECORD elif config.dataset_type == "mindrecord": dataset_type = DataType.MINDRECORD else: dataset_type = DataType.H5 ds_eval = create_dataset(data_path, train_mode=False, epochs=1, batch_size=batch_size, data_type=dataset_type) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) ckpt_path = config.ckpt_path if ";" in ckpt_path: ckpt_paths = ckpt_path.split(';') param_list_dict = {} strategy = build_searched_strategy(config.stra_ckpt) for slice_path in ckpt_paths: param_slice_dict = load_checkpoint(slice_path) for key, value in param_slice_dict.items(): if 'optimizer' in key: continue if key not in param_list_dict: param_list_dict[key] = [] param_list_dict[key].append(value) param_dict = {} for key, value in param_list_dict.items(): if key in strategy: merged_parameter = merge_sliced_parameter(value, strategy) else: merged_parameter = merge_sliced_parameter(value) param_dict[key] = merged_parameter else: param_dict = load_checkpoint(ckpt_path) load_param_into_net(eval_net, param_dict) auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) model.eval(ds_eval, callbacks=eval_callback)
def __eval(self): # import from mindspore import Model, load_param_into_net, load_checkpoint from mindspore.nn.metrics import Accuracy # load params if self.__ckpt_path: load_param_into_net(self.__network, load_checkpoint(self.__ckpt_path)) else: print( "Warning: `ckpt_path` is None, Please call func: `set_ckpt_path($ckpt_path)`." ) return # loss_fn & optimizer & metrics model = Model(self.__network, loss_fn=self.__loss_fn, optimizer=self.__optimizer, metrics={"Accuracy": Accuracy()} if self.__metrics is None else self.__metrics) # eval print(">>>>>>>>>>>>>>>>>>>>> eval start ... <<<<<<<<<<<<<<<<<<<<<<") result = model.eval(self.__dataset) print( ">>>>>>>>>>>>>>>>>>>>> eval success ~ <<<<<<<<<<<<<<<<<<<<<<: result=", result)
def test_net(network, data_path, ckpt): """define the evaluation method""" print("============== Starting Testing ==============") #load the saved model for evaluation load_checkpoint(ckpt, net=network) #load testing dataset ds_eval = create_dataset(False, data_path) # config = GPTConfig(batch_size=4, # seq_length=1024, # vocab_size=50257, # embedding_size=1024, # num_layers=24, # num_heads=16, # expand_ratio=4, # post_layernorm_residual=False, # dropout_rate=0.1, # compute_dtype=mstype.float16, # use_past=False) # loss = CrossEntropyLoss(config) net_loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') model = Model(resnet, net_loss, metrics={"Accuracy": Accuracy()}) # model = Model(resnet, net_loss, metrics={"Accuracy": Accuracy()}, amp_level="O3") acc = model.eval(ds_eval, dataset_sink_mode=False) print("============== Accuracy:{} ==============".format(acc))
def train_and_eval(config): """ test_train_eval """ np.random.seed(1000) data_path = config.data_path batch_size = config.batch_size epochs = config.epochs print("epochs is {}".format(epochs)) ds_train = create_dataset(data_path, train_mode=True, epochs=epochs, batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size()) ds_eval = create_dataset(data_path, train_mode=False, epochs=epochs + 1, batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size()) print("ds_train.size: {}".format(ds_train.get_dataset_size())) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) train_net.set_train() auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) callback = LossCallBack(config=config) ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=config.ckpt_path, config=ckptconfig) out = model.eval(ds_eval) print("=====" * 5 + "model.eval() initialized: {}".format(out)) model.train(epochs, ds_train, callbacks=[TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback, ckpoint_cb])
def test_train_eval(): """ test_train_eval """ np.random.seed(1000) config = WideDeepConfig() data_path = config.data_path batch_size = config.batch_size epochs = config.epochs print("epochs is {}".format(epochs)) ds_train = create_dataset(data_path, train_mode=True, epochs=1, batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size()) ds_eval = create_dataset(data_path, train_mode=False, epochs=1, batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size()) print("ds_train.size: {}".format(ds_train.get_dataset_size())) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) train_net.set_train() auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) callback = LossCallBack(config=config) ckptconfig = CheckpointConfig( save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=config.ckpt_path, config=ckptconfig) out = model.eval(ds_eval) print("=====" * 5 + "model.eval() initialized: {}".format(out)) model.train(epochs, ds_train, callbacks=[ TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback, ckpoint_cb ]) expect_out0 = [0.792634, 0.799862, 0.803324] expect_out6 = [0.796580, 0.803908, 0.807262] if get_rank() == 0: assert np.allclose(eval_callback.eval_values, expect_out0) if get_rank() == 6: assert np.allclose(eval_callback.eval_values, expect_out6)
def resnet50_train(args): """Training the ResNet-50.""" epoch_size = args.epoch_size batch_size = 32 class_num = 10 loss_scale_num = 1024 local_data_path = '/cache/data' # set graph mode and parallel mode context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) context.set_context(device_id=device_id) if device_num > 1: context.set_auto_parallel_context(device_num=device_num, parallel_mode=ParallelMode.DATA_PARALLEL, gradients_mean=True) init() local_data_path = os.path.join(local_data_path, str(device_id)) # data download print('Download data.') mox.file.copy_parallel(src_url=args.data_url, dst_url=local_data_path) # create dataset print('Create train and evaluate dataset.') train_dataset = create_dataset(dataset_path=local_data_path, do_train=True, repeat_num=1, batch_size=batch_size) eval_dataset = create_dataset(dataset_path=local_data_path, do_train=False, repeat_num=1, batch_size=batch_size) train_step_size = train_dataset.get_dataset_size() print('Create dataset success.') # create model net = resnet50(class_num=class_num) # reduction='mean' means that apply reduction of mean to loss loss = SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean') lr = Tensor(get_lr(global_step=0, total_epochs=epoch_size, steps_per_epoch=train_step_size)) opt = Momentum(net.trainable_params(), lr, momentum=0.9, weight_decay=1e-4, loss_scale=loss_scale_num) loss_scale = FixedLossScaleManager(loss_scale_num, False) # amp_level="O2" means that the hybrid precision of O2 mode is used for training # the whole network except that batchnoram will be cast into float16 format and dynamic loss scale will be used # 'keep_batchnorm_fp32 = False' means that use the float16 format model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}, amp_level="O2", keep_batchnorm_fp32=False, loss_scale_manager=loss_scale) # define performance callback to show ips and loss callback to show loss for every epoch performance_cb = PerformanceCallback(batch_size) loss_cb = LossMonitor() cb = [performance_cb, loss_cb] print(f'Start run training, total epoch: {epoch_size}.') model.train(epoch_size, train_dataset, callbacks=cb) if device_num == 1 or device_id == 0: print(f'Start run evaluation.') output = model.eval(eval_dataset) print(f'Evaluation result: {output}.')
def test_train_eval(config): """ test_train_eval """ data_path = config.data_path batch_size = config.batch_size epochs = config.epochs sparse = config.sparse if config.dataset_type == "tfrecord": dataset_type = DataType.TFRECORD elif config.dataset_type == "mindrecord": dataset_type = DataType.MINDRECORD else: dataset_type = DataType.H5 ds_train = create_dataset(data_path, train_mode=True, epochs=1, batch_size=batch_size, data_type=dataset_type) ds_eval = create_dataset(data_path, train_mode=False, epochs=1, batch_size=batch_size, data_type=dataset_type) print("ds_train.size: {}".format(ds_train.get_dataset_size())) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) train_net.set_train() auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) callback = LossCallBack(config=config) ckptconfig = CheckpointConfig( save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=config.ckpt_path, config=ckptconfig) out = model.eval(ds_eval) print("=====" * 5 + "model.eval() initialized: {}".format(out)) model.train(epochs, ds_train, callbacks=[ TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback, ckpoint_cb ], dataset_sink_mode=(not sparse))
def test_eval(config): """ test evaluate """ data_path = config.data_path ckpt_path = config.ckpt_path batch_size = config.batch_size if config.dataset_type == "tfrecord": dataset_type = DataType.TFRECORD elif config.dataset_type == "mindrecord": dataset_type = DataType.MINDRECORD else: dataset_type = DataType.H5 # data upload print('Upload data from obs to modelarts server.') mox.file.copy_parallel(src_url=config.data_url, dst_url=data_path) mox.file.copy_parallel(src_url=config.ckpt_url, dst_url=ckpt_path) tar_file = data_path + "train_demo.tar.gz" untar(tar_file, data_path) data_path = data_path + config.dataset_type ds_eval = create_dataset(data_path, train_mode=False, epochs=1, batch_size=batch_size, data_type=dataset_type) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) param_dict = load_checkpoint(find_ckpt(ckpt_path)) load_param_into_net(eval_net, param_dict) auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) model.eval(ds_eval, callbacks=eval_callback)
def test_model_eval_error(): """ test_model_eval_error """ dataset_types = (np.float32, np.float32) dataset_shapes = ((32, 3, 224, 224), (32, 3)) dataset = MindData(size=2, batch_size=32, np_types=dataset_types, output_shapes=dataset_shapes, input_indexs=()) net = nn.ReLU() loss = nn.SoftmaxCrossEntropyWithLogits() context.set_context(mode=context.GRAPH_MODE) model_nometrics = Model(net, loss) with pytest.raises(ValueError): model_nometrics.eval(dataset) model_metrics_empty = Model(net, loss, metrics={}) with pytest.raises(ValueError): model_metrics_empty.eval(dataset)
def run_eval(): """eval method""" if not os.path.exists(config.output_path): os.makedirs(config.output_path) context.set_context(mode=context.GRAPH_MODE, device_target="Davinci", save_graphs=False, device_id=get_device_id()) layers = config.layers num_factors = config.num_factors topk = rconst.TOP_K num_eval_neg = rconst.NUM_EVAL_NEGATIVES ds_eval, num_eval_users, num_eval_items = create_dataset( test_train=False, data_dir=config.data_path, dataset=config.dataset, train_epochs=0, eval_batch_size=config.eval_batch_size) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) ncf_net = NCFModel(num_users=num_eval_users, num_items=num_eval_items, num_factors=num_factors, model_layers=layers, mf_regularization=0, mlp_reg_layers=[0.0, 0.0, 0.0, 0.0], mf_dim=16) param_dict = load_checkpoint(config.checkpoint_file_path) load_param_into_net(ncf_net, param_dict) loss_net = NetWithLossClass(ncf_net) train_net = TrainStepWrap(loss_net) eval_net = PredictWithSigmoid(ncf_net, topk, num_eval_neg) ncf_metric = NCFMetric() model = Model(train_net, eval_network=eval_net, metrics={"ncf": ncf_metric}) ncf_metric.clear() out = model.eval(ds_eval) eval_file_path = os.path.join(config.output_path, config.eval_file_name) eval_file = open(eval_file_path, "a+") eval_file.write("EvalCallBack: HR = {}, NDCG = {}\n".format( out['ncf'][0], out['ncf'][1])) eval_file.close() print("EvalCallBack: HR = {}, NDCG = {}".format(out['ncf'][0], out['ncf'][1])) print("=" * 100 + "Eval Finish!" + "=" * 100)
def test_eval(): """ test_eval """ dataset_types = (np.float32, np.float32) dataset_shapes = ((32, 3, 224, 224), (32, 3)) dataset = MindData(size=2, batch_size=32, np_types=dataset_types, output_shapes=dataset_shapes, input_indexs=(0, 1)) net = Net() context.set_context(mode=context.GRAPH_MODE) model = Model(net, loss_fn=nn.SoftmaxCrossEntropyWithLogits(), metrics={"loss"}) with pytest.raises(ValueError): model.eval(dataset) net2 = LossNet() model2 = Model(net2, eval_network=net2, eval_indexes=[0, 1, 2], metrics={"loss"}) with pytest.raises(ValueError): model2.eval(dataset) _ = LossNet() model3 = Model(net2, eval_network=net2, metrics={"loss"}) with pytest.raises(ValueError): model3.eval(dataset)
def test_net(data_dir, ckpt_path, cross_valid_ind=1, cfg=None): net = UNet(n_channels=cfg['num_channels'], n_classes=cfg['num_classes']) param_dict = load_checkpoint(ckpt_path) load_param_into_net(net, param_dict) criterion = CrossEntropyWithLogits() _, valid_dataset = create_dataset(data_dir, 1, 1, False, cross_valid_ind, False) model = Model(net, loss_fn=criterion, metrics={"dice_coeff": dice_coeff()}) print("============== Starting Evaluating ============") dice_score = model.eval(valid_dataset, dataset_sink_mode=False) print("Cross valid dice coeff is:", dice_score)
def train_and_eval(config): """ test_train_eval """ set_seed(1000) data_path = config.data_path batch_size = config.batch_size epochs = config.epochs if config.dataset_type == "tfrecord": dataset_type = DataType.TFRECORD elif config.dataset_type == "mindrecord": dataset_type = DataType.MINDRECORD else: dataset_type = DataType.H5 print("epochs is {}".format(epochs)) ds_train = create_dataset(data_path, train_mode=True, epochs=1, batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size(), data_type=dataset_type) ds_eval = create_dataset(data_path, train_mode=False, epochs=1, batch_size=batch_size, rank_id=get_rank(), rank_size=get_group_size(), data_type=dataset_type) print("ds_train.size: {}".format(ds_train.get_dataset_size())) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) train_net.set_train() auc_metric = AUCMetric() model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) eval_callback = EvalCallBack(model, ds_eval, auc_metric, config) callback = LossCallBack(config=config) ckptconfig = CheckpointConfig(save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=5) ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=config.ckpt_path + '/ckpt_' + str(get_rank()) + '/', config=ckptconfig) out = model.eval(ds_eval) print("=====" * 5 + "model.eval() initialized: {}".format(out)) callback_list = [TimeMonitor(ds_train.get_dataset_size()), eval_callback, callback] if get_rank() == 0: callback_list.append(ckpoint_cb) model.train(epochs, ds_train, callbacks=callback_list, sink_size=ds_train.get_dataset_size())
def test_net(data_dir, ckpt_path, cross_valid_ind=1, cfg=None): if cfg['model'] == 'unet_medical': net = UNetMedical(n_channels=cfg['num_channels'], n_classes=cfg['num_classes']) elif cfg['model'] == 'unet_nested': net = NestedUNet(in_channel=cfg['num_channels'], n_class=cfg['num_classes'], use_deconv=cfg['use_deconv'], use_bn=cfg['use_bn'], use_ds=False) elif cfg['model'] == 'unet_simple': net = UNet(in_channel=cfg['num_channels'], n_class=cfg['num_classes']) else: raise ValueError("Unsupported model: {}".format(cfg['model'])) param_dict = load_checkpoint(ckpt_path) load_param_into_net(net, param_dict) net = UnetEval(net) if 'dataset' in cfg and cfg['dataset'] == "Cell_nuclei": valid_dataset = create_cell_nuclei_dataset( data_dir, cfg['img_size'], 1, 1, is_train=False, eval_resize=cfg["eval_resize"], split=0.8) else: _, valid_dataset = create_dataset(data_dir, 1, 1, False, cross_valid_ind, False, do_crop=cfg['crop'], img_size=cfg['img_size']) model = Model(net, loss_fn=TempLoss(), metrics={"dice_coeff": dice_coeff()}) print("============== Starting Evaluating ============") eval_score = model.eval(valid_dataset, dataset_sink_mode=False)["dice_coeff"] print("============== Cross valid dice coeff is:", eval_score[0]) print("============== Cross valid IOU is:", eval_score[1])
def test_net(data_dir, ckpt_path, cross_valid_ind=1, cfg=None): if cfg['model'] == 'unet_medical': net = UNetMedical(n_channels=cfg['num_channels'], n_classes=cfg['num_classes']) elif cfg['model'] == 'unet_nested': net = NestedUNet(in_channel=cfg['num_channels'], n_class=cfg['num_classes']) elif cfg['model'] == 'unet_simple': net = UNet(in_channel=cfg['num_channels'], n_class=cfg['num_classes']) else: raise ValueError("Unsupported model: {}".format(cfg['model'])) param_dict = load_checkpoint(ckpt_path) load_param_into_net(net, param_dict) criterion = CrossEntropyWithLogits() _, valid_dataset = create_dataset(data_dir, 1, 1, False, cross_valid_ind, False, do_crop=cfg['crop'], img_size=cfg['img_size']) model = Model(net, loss_fn=criterion, metrics={"dice_coeff": dice_coeff()}) print("============== Starting Evaluating ============") dice_score = model.eval(valid_dataset, dataset_sink_mode=False) print("============== Cross valid dice coeff is:", dice_score)
if args.preprocess == "true": print("============== Starting Data Pre-processing ==============") convert_to_mindrecord(cfg.embed_size, args.aclimdb_path, args.preprocess_path, args.glove_path) embedding_table = np.loadtxt(os.path.join(args.preprocess_path, "weight.txt")).astype(np.float32) network = SentimentNet(vocab_size=embedding_table.shape[0], embed_size=cfg.embed_size, num_hiddens=cfg.num_hiddens, num_layers=cfg.num_layers, bidirectional=cfg.bidirectional, num_classes=cfg.num_classes, weight=Tensor(embedding_table), batch_size=cfg.batch_size) loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True) opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) loss_cb = LossMonitor() model = Model(network, loss, opt, {'acc': Accuracy()}) print("============== Starting Testing ==============") ds_eval = create_dataset(args.preprocess_path, cfg.batch_size, training=False) param_dict = load_checkpoint(args.ckpt_path) load_param_into_net(network, param_dict) if args.device_target == "CPU": acc = model.eval(ds_eval, dataset_sink_mode=False) else: acc = model.eval(ds_eval) print("============== Accuracy:{} ==============".format(acc))
from src.config import config parser = argparse.ArgumentParser(description="Deeplabv3 evaluation") parser.add_argument('--epoch_size', type=int, default=2, help='Epoch size.') parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument('--batch_size', type=int, default=2, help='Batch size.') parser.add_argument('--data_url', required=True, default=None, help='Evaluation data url') parser.add_argument('--checkpoint_url', default=None, help='Checkpoint path') args_opt = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) print(args_opt) if __name__ == "__main__": args_opt.crop_size = config.crop_size args_opt.base_size = config.crop_size eval_dataset = create_dataset(args_opt, args_opt.data_url, args_opt.epoch_size, args_opt.batch_size, usage="eval") net = deeplabv3_resnet50(config.seg_num_classes, [args_opt.batch_size, 3, args_opt.crop_size, args_opt.crop_size], infer_scale_sizes=config.eval_scales, atrous_rates=config.atrous_rates, decoder_output_stride=config.decoder_output_stride, output_stride=config.output_stride, fine_tune_batch_norm=config.fine_tune_batch_norm, image_pyramid=config.image_pyramid) param_dict = load_checkpoint(args_opt.checkpoint_url) load_param_into_net(net, param_dict) mIou = MiouPrecision(config.seg_num_classes) metrics = {'mIou': mIou} loss = OhemLoss(config.seg_num_classes, config.ignore_label) model = Model(net, loss, metrics=metrics) model.eval(eval_dataset)
init() epoch_size = args_opt.epoch_size net = resnet50(args_opt.batch_size, args_opt.num_classes) ls = SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, 0.9) model = Model(net, loss_fn=ls, optimizer=opt, metrics={'acc'}) # as for train, users could use model.train if args_opt.do_train: dataset = create_dataset() batch_num = dataset.get_dataset_size() config_ck = CheckpointConfig(save_checkpoint_steps=batch_num, keep_checkpoint_max=35) ckpoint_cb = ModelCheckpoint(prefix="train_resnet_cifar10", directory="./", config=config_ck) loss_cb = LossMonitor() model.train(epoch_size, dataset, callbacks=[ckpoint_cb, loss_cb]) # as for evaluation, users could use model.eval if args_opt.do_eval: if args_opt.checkpoint_path: param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) eval_dataset = create_dataset(training=False) res = model.eval(eval_dataset) print("result: ", res)
keep_checkpoint_max=35) ckpoint_cb = ModelCheckpoint(prefix="train_resnet50", directory=args.checkpoint_path, config=config_ck) loss_cb = LossMonitor() print("begin train") model.train(epoch_size, train_dataset, callbacks=[ckpoint_cb, loss_cb]) mox.file.copy_parallel(src_url=args.checkpoint_path, dst_url=args.train_url) # as for evaluation, users could use model.eval if args.do_eval: print("Testing Model:") if args.checkpoint_path: param_dict = load_checkpoint(dog_model_path) load_param_into_net(net, param_dict) eval_dataset = create_dataset(dog_dataset_path, os.path.join(dog_dataset_path, "validation.csv"), batch_size=args.batch_size, repeat_size=args.batch_size, device_num=args.device_num, rank_id=device_id) print("begin eval") res = model.eval(eval_dataset, dataset_sink_mode=False) # 测试网络性能,并把结果保存到res_metric print( "============== Test result:{} ==============".format(res_metric)) print(f"Total time:{int(time.time() - start_time)}")
def export_air_file(): """"Export file for eval""" parser = argparse_init() args, _ = parser.parse_known_args() if not os.path.exists(args.output_path): os.makedirs(args.output_path) layers = args.layers num_factors = args.num_factors topk = rconst.TOP_K num_eval_neg = rconst.NUM_EVAL_NEGATIVES ds_eval, num_eval_users, num_eval_items = create_dataset( test_train=False, data_dir=args.data_path, dataset=args.dataset, train_epochs=0, eval_batch_size=args.eval_batch_size) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) ncf_net = NCFModel(num_users=num_eval_users, num_items=num_eval_items, num_factors=num_factors, model_layers=layers, mf_regularization=0, mlp_reg_layers=[0.0, 0.0, 0.0, 0.0], mf_dim=16) param_dict = load_checkpoint(args.checkpoint_file_path) load_param_into_net(ncf_net, param_dict) loss_net = NetWithLossClass(ncf_net) train_net = TrainStepWrap(loss_net) train_net.set_train() eval_net = PredictWithSigmoid(ncf_net, topk, num_eval_neg) ncf_metric = NCFMetric() model = Model(train_net, eval_network=eval_net, metrics={"ncf": ncf_metric}) ncf_metric.clear() out = model.eval(ds_eval) eval_file_path = os.path.join(args.output_path, args.eval_file_name) eval_file = open(eval_file_path, "a+") eval_file.write("EvalCallBack: HR = {}, NDCG = {}\n".format( out['ncf'][0], out['ncf'][1])) eval_file.close() print("EvalCallBack: HR = {}, NDCG = {}".format(out['ncf'][0], out['ncf'][1])) param_dict = load_checkpoint(args.checkpoint_file_path) # load the parameter into net load_param_into_net(eval_net, param_dict) input_tensor_list = [] for data in ds_eval: for j in data: input_tensor_list.append(Tensor(j)) print(len(a)) break print(input_tensor_list) export(eval_net, *input_tensor_list, file_name='NCF.air', file_format='AIR')
def train_eval(config): """ test evaluate """ data_path = config.data_path + config.dataset_type ckpt_path = config.ckpt_path epochs = config.epochs batch_size = config.batch_size if config.dataset_type == "tfrecord": dataset_type = DataType.TFRECORD elif config.dataset_type == "mindrecord": dataset_type = DataType.MINDRECORD else: dataset_type = DataType.H5 ds_train = create_dataset(data_path, train_mode=True, epochs=1, batch_size=batch_size, data_type=dataset_type) print("ds_train.size: {}".format(ds_train.get_dataset_size())) ds_eval = create_dataset(data_path, train_mode=False, epochs=1, batch_size=batch_size, data_type=dataset_type) print("ds_eval.size: {}".format(ds_eval.get_dataset_size())) net_builder = ModelBuilder() train_net, eval_net = net_builder.get_net(config) train_net.set_train() train_model = Model(train_net) train_callback = LossCallBack(config=config) ckptconfig = CheckpointConfig( save_checkpoint_steps=ds_train.get_dataset_size(), keep_checkpoint_max=1) ckpoint_cb = ModelCheckpoint(prefix='widedeep_train', directory=config.ckpt_path, config=ckptconfig) train_model.train(epochs, ds_train, callbacks=[ TimeMonitor(ds_train.get_dataset_size()), train_callback, ckpoint_cb ]) # data download print('Download data from modelarts server to obs.') mox.file.copy_parallel(src_url=config.ckpt_path, dst_url=config.train_url) param_dict = load_checkpoint(find_ckpt(ckpt_path)) load_param_into_net(eval_net, param_dict) auc_metric = AUCMetric() eval_model = Model(train_net, eval_network=eval_net, metrics={"auc": auc_metric}) eval_callback = EvalCallBack(eval_model, ds_eval, auc_metric, config) eval_model.eval(ds_eval, callbacks=eval_callback)
print("========== The Training Model is Defined. ==========") # train the model and export the encrypted CheckPoint file through Callback config_ck = CheckpointConfig(save_checkpoint_steps=1875, keep_checkpoint_max=10, enc_key=b'0123456789ABCDEF', enc_mode='AES-GCM') ckpoint_cb = ModelCheckpoint(prefix='lenet_enc', directory=None, config=config_ck) model.train(10, train_dataset, dataset_sink_mode=False, callbacks=[ckpoint_cb, LossMonitor(1875)]) acc = model.eval(eval_dataset, dataset_sink_mode=False) print("Accuracy: {}".format(acc["Accuracy"])) # export the encrypted CheckPoint file through save_checkpoint save_checkpoint(network, 'lenet_enc.ckpt', enc_key=b'0123456789ABCDEF', enc_mode='AES-GCM') # load encrypted CheckPoint file and eval param_dict = load_checkpoint('lenet_enc-10_1875.ckpt', dec_key=b'0123456789ABCDEF', dec_mode='AES-GCM') load_param_into_net(network, param_dict) acc = model.eval(eval_dataset, dataset_sink_mode=False) print("Accuracy loading encrypted CheckPoint: {}".format(acc["Accuracy"]))
net_loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction="mean") repeat_size = 1 net_opt = nn.Momentum(network.trainable_params(), cfg.lr, cfg.momentum) model = Model(network, net_loss, net_opt, metrics={"Accuracy": Accuracy()}) if args.mode == 'train': # train ds_train = create_dataset(os.path.join(args.data_path, args.mode), batch_size=cfg.batch_size, repeat_size=repeat_size) print("============== Starting Training ==============") config_ck = CheckpointConfig( save_checkpoint_steps=cfg.save_checkpoint_steps, keep_checkpoint_max=cfg.keep_checkpoint_max) ckpoint_cb = ModelCheckpoint(prefix="checkpoint_lenet", config=config_ck, directory=args.ckpt_path) model.train(cfg['epoch_size'], ds_train, callbacks=[ckpoint_cb, LossMonitor()], dataset_sink_mode=args.dataset_sink_mode) elif args.mode == 'test': # test print("============== Starting Testing ==============") param_dict = load_checkpoint(args.ckpt_path) load_param_into_net(network, param_dict) ds_eval = create_dataset(os.path.join(args.data_path, "test"), 32, 1) acc = model.eval(ds_eval, dataset_sink_mode=args.dataset_sink_mode) print("============== Accuracy:{} ==============".format(acc)) else: raise RuntimeError( 'mode should be train or test, rather than {}'.format(args.mode))
""" import mindspore.nn as nn from mindspore.nn import Momentum, SoftmaxCrossEntropyWithLogits from mindspore import Model, context from mindspore.train.callback import ModelCheckpoint, CheckpointConfig, LossMonitor from src.dataset import create_train_dataset, create_eval_dataset from src.net import Net if __name__ == "__main__": context.set_context(mode=context.GRAPH_MODE) ds_train = create_train_dataset() ds_eval = create_eval_dataset() net = Net() net_opt = Momentum(net.trainable_params(), 0.01, 0.9) net_loss = SoftmaxCrossEntropyWithLogits(reduction='mean') metrics = { 'Accuracy': nn.Accuracy(), 'Loss': nn.Loss(), 'Precision': nn.Precision(), 'Recall': nn.Recall(), 'F1_score': nn.F1() } config_ck = CheckpointConfig(save_checkpoint_steps=1000, keep_checkpoint_max=10) ckpoint = ModelCheckpoint(prefix="CKPT", config=config_ck) model = Model(network=net, loss_fn=net_loss, optimizer=net_opt, metrics=metrics) model.train(epoch=2, train_dataset=ds_train, callbacks=[ckpoint, LossMonitor()]) result = model.eval(ds_eval) print(result)