def test_bert_performance(): """test bert performance""" context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", reserve_class_name_in_scope=False) data_set, new_repeat_count, sink_size = me_de_train_dataset(sink_mode=True) version = os.getenv('VERSION', 'large') config = get_config(version=version) netwithloss = BertNetworkWithLoss(config, True) lr = BertLearningRate(decay_steps=sink_size * new_repeat_count, learning_rate=5e-5, end_learning_rate=1e-9, power=10.0, warmup_steps=0) decay_filter = lambda x: 'layernorm' not in x.name.lower( ) and 'bias' not in x.name.lower() no_decay_filter = lambda x: 'layernorm' in x.name.lower( ) or 'bias' in x.name.lower() decay_params = list(filter(decay_filter, netwithloss.trainable_params())) other_params = list(filter(no_decay_filter, netwithloss.trainable_params())) group_params = [{ 'params': decay_params, 'weight_decay': 0.01 }, { 'params': other_params }, { 'order_params': netwithloss.trainable_params() }] optimizer = Lamb(group_params, lr) scale_window = 3 scale_manager = DynamicLossScaleManager(2**16, 2, scale_window) netwithgrads = BertTrainOneStepWithLossScaleCell( netwithloss, optimizer=optimizer, scale_update_cell=scale_manager.get_update_cell()) netwithgrads.set_train(True) model = Model(netwithgrads) callback = ModelCallback() params = netwithloss.trainable_params() for param in params: value = param.data name = param.name if isinstance(value, Tensor): if name.split('.')[-1] in ['weight']: if name.split('.')[-3] in ['cls2']: logger.info( "***************** BERT param name is 1 {}".format( name)) param.set_data(weight_variable(value.asnumpy().shape)) else: logger.info( "***************** BERT param name is 2 {}".format( name)) tempshape = value.asnumpy().shape shape = (tempshape[1], tempshape[0]) weight_value = weight_variable(shape).asnumpy() param.set_data(Tensor(np.transpose(weight_value, [1, 0]))) else: logger.info( "***************** BERT param name is 3 {}".format(name)) param.set_data(weight_variable(value.asnumpy().shape)) time_monitor_callback = TimeMonitor(sink_size) model.train(new_repeat_count, data_set, callbacks=[time_monitor_callback, callback], dataset_sink_mode=True, sink_size=sink_size) # assertion occurs while the loss value, overflow state or loss_scale value is wrong loss_value = np.array(callback.loss_list) expect_loss_value = [11.325791, 11.285011, 11.284766] print("loss value: {}".format(loss_value)) assert np.allclose(loss_value, expect_loss_value, 0, 0.0005) overflow = np.array(callback.overflow_list) expect_overflow = [True, True, True] print("overflow: {}".format(overflow)) assert (overflow == expect_overflow).all() loss_scale = np.array(callback.lossscale_list) expect_loss_scale = [65536.0, 65536.0, 65536.0] print("loss scale: {}".format(loss_scale)) assert np.allclose(loss_scale, expect_loss_scale, 0, 0) epoch_mseconds = np.array(time_monitor_callback.epoch_mseconds_list)[2] expect_epoch_mseconds = 1400 print("epoch mseconds: {}".format(epoch_mseconds)) assert epoch_mseconds <= expect_epoch_mseconds + 5 per_step_mseconds = np.array( time_monitor_callback.per_step_mseconds_list)[2] expect_per_step_mseconds = 14 print("per step mseconds: {}".format(per_step_mseconds)) assert per_step_mseconds <= expect_per_step_mseconds + 1
from mindspore.train.model import Model from mindspore.train.serialization import load_checkpoint, load_param_into_net from src.config import cifar_cfg as cfg from src.dataset import vgg_create_dataset from src.vgg import vgg16 if __name__ == '__main__': parser = argparse.ArgumentParser(description='Cifar10 classification') parser.add_argument('--device_target', type=str, default='Ascend', choices=['Ascend', 'GPU'], help='device where the code will be implemented. (Default: Ascend)') parser.add_argument('--data_path', type=str, default='./cifar', help='path where the dataset is saved') parser.add_argument('--checkpoint_path', type=str, default=None, help='checkpoint file path.') parser.add_argument('--device_id', type=int, default=None, help='device id of GPU or Ascend. (Default: None)') args_opt = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target) context.set_context(device_id=args_opt.device_id) net = vgg16(num_classes=cfg.num_classes) opt = Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), 0.01, cfg.momentum, weight_decay=cfg.weight_decay) loss = nn.SoftmaxCrossEntropyWithLogits(sparse=True, reduction='mean', is_grad=False) model = Model(net, loss_fn=loss, optimizer=opt, metrics={'acc'}) param_dict = load_checkpoint(args_opt.checkpoint_path) load_param_into_net(net, param_dict) net.set_train(False) dataset = vgg_create_dataset(args_opt.data_path, 1, False) res = model.eval(dataset) print("result: ", res)
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import numpy as np import pytest import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor from mindspore.ops import operations as P context.set_context(mode=context.GRAPH_MODE, device_target="GPU") class SoftplusNet(nn.Cell): def __init__(self): super(SoftplusNet, self).__init__() self.softplus = P.Softplus() def construct(self, x): return self.softplus(x) def SoftplusCompute(x): return np.log(1 + np.exp(x))
def setup_module(module): context.set_context(mode=context.PYNATIVE_MODE)
def test_sqrt_grad_ascend(): context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") test_sqrt_grad((16, 16), (16, 16), np.float16) test_sqrt_grad((16, 16), (16, 16), np.float32)
import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor, Parameter from mindspore.common.initializer import initializer from mindspore.ops import composite as C from mindspore.ops import operations as P from mindspore.ops import functional as F from mindspore.ops.operations import _grad_ops as G from mindspore.ops import prim_attr_register, PrimitiveWithInfer from ..ut_filter import non_graph_engine from ....mindspore_test_framework.mindspore_test import mindspore_test from ....mindspore_test_framework.pipeline.forward.compile_forward \ import pipeline_for_compile_forward_ge_graph_for_case_by_case_config from ....mindspore_test_framework.pipeline.forward.verify_exception \ import pipeline_for_verify_exception_for_case_by_case_config context.set_context(mode=context.GRAPH_MODE, save_graphs=True) def conv3x3(in_channels, out_channels, stride=1, padding=1): """3x3 convolution """ return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding) def conv1x1(in_channels, out_channels, stride=1, padding=0): """1x1 convolution""" return nn.Conv2d(in_channels, out_channels, kernel_size=1, stride=stride, padding=padding) class ResidualBlock(nn.Cell): """
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import numpy as np import mindspore as ms import mindspore.nn as nn from mindspore import Tensor from mindspore import context from mindspore.common.api import _executor from mindspore.ops import composite as C from mindspore.ops import operations as P from mindspore.nn.wrap.cell_wrapper import _VirtualDatasetCell context.set_context(mode=context.GRAPH_MODE) grad_all = C.GradOperation(get_all=True) class NetWithLoss(nn.Cell): def __init__(self, network, strategy3, strategy4, axis): super(NetWithLoss, self).__init__() self.one_hot = P.OneHot(axis=axis).shard(strategy3) self.on_value = Tensor(2.0, ms.float32) self.off_value = Tensor(1.0, ms.float32) self.loss = P.SoftmaxCrossEntropyWithLogits().shard(strategy4) self.network = network def construct(self, x, y, b): predict = self.network(x, y)
# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import numpy as np import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor from mindspore.common.api import ms_function from mindspore.ops import operations as P context.set_context(device_target="Ascend") class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.upsample = P.ResizeNearestNeighbor((2, 2)) @ms_function def construct(self, x): return self.upsample(x) def test_net(): x = np.random.random(size=(32, 3, 32, 32)).astype(np.float32) upsample = Net()
def setup_module(module): context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False) _reset_op_id()
def test_exec(): context.set_context(mode=context.GRAPH_MODE) return test_exec_case
def main(): parser = argparse.ArgumentParser(description="YOLOv3 train") parser.add_argument("--only_create_dataset", type=bool, default=False, help="If set it true, only create " "Mindrecord, default is false.") parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") parser.add_argument("--lr", type=float, default=0.001, help="Learning rate, default is 0.001.") parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink") parser.add_argument("--epoch_size", type=int, default=10, help="Epoch size, default is 10") parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.") parser.add_argument("--pre_trained", type=str, default=None, help="Pretrained checkpoint file path") parser.add_argument("--pre_trained_epoch_size", type=int, default=0, help="Pretrained epoch size") parser.add_argument("--save_checkpoint_epochs", type=int, default=5, help="Save checkpoint epochs, default is 5.") parser.add_argument("--loss_scale", type=int, default=1024, help="Loss scale, default is 1024.") parser.add_argument( "--mindrecord_dir", type=str, default="./Mindrecord_train", help= "Mindrecord directory. If the mindrecord_dir is empty, it wil generate mindrecord file by" "image_dir and anno_path. Note if mindrecord_dir isn't empty, it will use mindrecord_dir " "rather than image_dir and anno_path. Default is ./Mindrecord_train") parser.add_argument("--image_dir", type=str, default="", help="Dataset directory, " "the absolute image path is joined by the image_dir " "and the relative path in anno_path") parser.add_argument("--anno_path", type=str, default="", help="Annotation path.") args_opt = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) if args_opt.distribute: device_num = args_opt.device_num context.reset_auto_parallel_context() context.set_auto_parallel_context( parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True, device_num=device_num) init() rank = args_opt.device_id % device_num else: rank = 0 device_num = 1 print("Start create dataset!") # It will generate mindrecord file in args_opt.mindrecord_dir, # and the file name is yolo.mindrecord0, 1, ... file_num. if not os.path.isdir(args_opt.mindrecord_dir): os.makedirs(args_opt.mindrecord_dir) prefix = "yolo.mindrecord" mindrecord_file = os.path.join(args_opt.mindrecord_dir, prefix + "0") if not os.path.exists(mindrecord_file): if os.path.isdir(args_opt.image_dir) and os.path.exists( args_opt.anno_path): print("Create Mindrecord.") data_to_mindrecord_byte_image(args_opt.image_dir, args_opt.anno_path, args_opt.mindrecord_dir, prefix=prefix, file_num=8) print("Create Mindrecord Done, at {}".format( args_opt.mindrecord_dir)) else: print("image_dir or anno_path not exits.") if not args_opt.only_create_dataset: loss_scale = float(args_opt.loss_scale) # When create MindDataset, using the fitst mindrecord file, such as yolo.mindrecord0. dataset = create_yolo_dataset(mindrecord_file, repeat_num=args_opt.epoch_size, batch_size=args_opt.batch_size, device_num=device_num, rank=rank) dataset_size = dataset.get_dataset_size() print("Create dataset done!") net = yolov3_resnet18(ConfigYOLOV3ResNet18()) net = YoloWithLossCell(net, ConfigYOLOV3ResNet18()) init_net_param(net, "XavierUniform") # checkpoint ckpt_config = CheckpointConfig(save_checkpoint_steps=dataset_size * args_opt.save_checkpoint_epochs) ckpoint_cb = ModelCheckpoint(prefix="yolov3", directory=None, config=ckpt_config) if args_opt.pre_trained: if args_opt.pre_trained_epoch_size <= 0: raise KeyError( "pre_trained_epoch_size must be greater than 0.") param_dict = load_checkpoint(args_opt.pre_trained) load_param_into_net(net, param_dict) total_epoch_size = 60 if args_opt.distribute: total_epoch_size = 160 lr = Tensor( get_lr(learning_rate=args_opt.lr, start_step=args_opt.pre_trained_epoch_size * dataset_size, global_step=total_epoch_size * dataset_size, decay_step=1000, decay_rate=0.95, steps=True)) opt = nn.Adam(filter(lambda x: x.requires_grad, net.get_parameters()), lr, loss_scale=loss_scale) net = TrainingWrapper(net, opt, loss_scale) callback = [ TimeMonitor(data_size=dataset_size), LossMonitor(), ckpoint_cb ] model = Model(net) dataset_sink_mode = False if args_opt.mode == "sink": print("In sink mode, one epoch return a loss.") dataset_sink_mode = True print( "Start train YOLOv3, the first epoch will be slower because of the graph compilation." ) model.train(args_opt.epoch_size, dataset, callbacks=callback, dataset_sink_mode=dataset_sink_mode)
def __del__(self): """Disable the profiling collection service, called after training.""" os.environ['PROFILING_MODE'] = str("false") context.set_context(enable_profiling=False)
from mindspore import Tensor from mindspore.ops import operations as P from mindspore.nn.optim.momentum import Momentum from mindspore.common.initializer import TruncatedNormal from mindspore.train.model import Model, ParallelMode from mindspore import context import os import re import mindspore.ops.functional as F from mindspore.nn.loss.loss import _Loss from mindspore.parallel._utils import _reset_op_id as resset_op_id from mindspore.common.api import _executor from mindspore.parallel import set_algo_parameters from mindspore.parallel import _cost_model_context as cost_model_context context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") context.set_context(enable_hccl=True) context.set_context(enable_task_sink=True, device_id= 0) context.set_context(enable_ir_fusion=True) context.set_context(enable_loop_sink=False) def weight_variable(shape, factor=0.1): return TruncatedNormal(0.02) def _conv3x3(in_channels, out_channels, stride=1, padding=0, pad_mode='same'): """Get a conv2d layer with 3x3 kernel size.""" init_value = weight_variable((out_channels, in_channels, 3, 3)) return nn.Conv2d(in_channels, out_channels, kernel_size=3, stride=stride, padding=padding, pad_mode=pad_mode, weight_init=init_value) def _conv1x1(in_channels, out_channels, stride=1, padding=0, pad_mode='same'):
def run_ner(): """run ner task""" parser = argparse.ArgumentParser(description="run classifier") parser.add_argument("--device_target", type=str, default="Ascend", choices=["Ascend", "GPU"], help="Device type, default is Ascend") parser.add_argument( "--assessment_method", type=str, default="F1", choices=["F1", "clue_benchmark"], help="assessment_method include: [F1, clue_benchmark], default is F1") parser.add_argument("--do_train", type=str, default="false", choices=["true", "false"], help="Eable train, default is false") parser.add_argument("--do_eval", type=str, default="false", choices=["true", "false"], help="Eable eval, default is false") parser.add_argument("--use_crf", type=str, default="false", choices=["true", "false"], help="Use crf, default is false") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--epoch_num", type=int, default="1", help="Epoch number, default is 1.") parser.add_argument("--num_class", type=int, default="2", help="The number of class, default is 2.") parser.add_argument("--train_data_shuffle", type=str, default="true", choices=["true", "false"], help="Enable train data shuffle, default is true") parser.add_argument("--eval_data_shuffle", type=str, default="false", choices=["true", "false"], help="Enable eval data shuffle, default is false") parser.add_argument("--vocab_file_path", type=str, default="", help="Vocab file path, used in clue benchmark") parser.add_argument("--label2id_file_path", type=str, default="", help="label2id file path, used in clue benchmark") parser.add_argument("--save_finetune_checkpoint_path", type=str, default="", help="Save checkpoint path") parser.add_argument("--load_pretrain_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--load_finetune_checkpoint_path", type=str, default="", help="Load checkpoint file path") parser.add_argument("--train_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--eval_data_file_path", type=str, default="", help="Data path, it is better to use absolute path") parser.add_argument("--schema_file_path", type=str, default="", help="Schema path, it is better to use absolute path") args_opt = parser.parse_args() epoch_num = args_opt.epoch_num assessment_method = args_opt.assessment_method.lower() load_pretrain_checkpoint_path = args_opt.load_pretrain_checkpoint_path save_finetune_checkpoint_path = args_opt.save_finetune_checkpoint_path load_finetune_checkpoint_path = args_opt.load_finetune_checkpoint_path if args_opt.do_train.lower() == "false" and args_opt.do_eval.lower( ) == "false": raise ValueError( "At least one of 'do_train' or 'do_eval' must be true") if args_opt.do_train.lower( ) == "true" and args_opt.train_data_file_path == "": raise ValueError( "'train_data_file_path' must be set when do finetune task") if args_opt.do_eval.lower( ) == "true" and args_opt.eval_data_file_path == "": raise ValueError( "'eval_data_file_path' must be set when do evaluation task") if args_opt.assessment_method.lower( ) == "clue_benchmark" and args_opt.vocab_file_path == "": raise ValueError("'vocab_file_path' must be set to do clue benchmark") if args_opt.use_crf.lower( ) == "true" and args_opt.label2id_file_path == "": raise ValueError("'label2id_file_path' must be set to use crf") if args_opt.assessment_method.lower( ) == "clue_benchmark" and args_opt.label2id_file_path == "": raise ValueError( "'label2id_file_path' must be set to do clue benchmark") target = args_opt.device_target if target == "Ascend": context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) elif target == "GPU": context.set_context(mode=context.GRAPH_MODE, device_target="GPU") if bert_net_cfg.compute_type != mstype.float32: logger.warning('GPU only support fp32 temporarily, run with fp32.') bert_net_cfg.compute_type = mstype.float32 else: raise Exception("Target error, GPU or Ascend is supported.") tag_to_index = None if args_opt.use_crf.lower() == "true": with open(args_opt.label2id_file_path) as json_file: tag_to_index = json.load(json_file) max_val = max(tag_to_index.values()) tag_to_index["<START>"] = max_val + 1 tag_to_index["<STOP>"] = max_val + 2 number_labels = len(tag_to_index) else: number_labels = args_opt.num_class netwithloss = BertNER(bert_net_cfg, True, num_labels=number_labels, use_crf=(args_opt.use_crf.lower() == "true"), tag_to_index=tag_to_index, dropout_prob=0.1) if args_opt.do_train.lower() == "true": ds = create_ner_dataset( batch_size=bert_net_cfg.batch_size, repeat_count=1, assessment_method=assessment_method, data_file_path=args_opt.train_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.train_data_shuffle.lower() == "true")) do_train(ds, netwithloss, load_pretrain_checkpoint_path, save_finetune_checkpoint_path, epoch_num) if args_opt.do_eval.lower() == "true": if save_finetune_checkpoint_path == "": load_finetune_checkpoint_dir = _cur_dir else: load_finetune_checkpoint_dir = make_directory( save_finetune_checkpoint_path) load_finetune_checkpoint_path = LoadNewestCkpt( load_finetune_checkpoint_dir, ds.get_dataset_size(), epoch_num, "ner") if args_opt.do_eval.lower() == "true": ds = create_ner_dataset( batch_size=bert_net_cfg.batch_size, repeat_count=1, assessment_method=assessment_method, data_file_path=args_opt.eval_data_file_path, schema_file_path=args_opt.schema_file_path, do_shuffle=(args_opt.eval_data_shuffle.lower() == "true")) do_eval(ds, BertNER, args_opt.use_crf, number_labels, assessment_method, args_opt.eval_data_file_path, load_finetune_checkpoint_path, args_opt.vocab_file_path, args_opt.label2id_file_path, tag_to_index)
parser.add_argument('--dataset_path', type=str, default='', help='Dataset path') parser.add_argument('--platform', type=str, default='GPU', choices=('Ascend', 'GPU'), help='run platform') args_opt = parser.parse_args() if args_opt.platform != 'GPU': raise ValueError("Only supported GPU training.") context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.platform, device_id=0) net = ShuffleNetV2(n_class=cfg.num_classes) ckpt = load_checkpoint(args_opt.checkpoint) load_param_into_net(net, ckpt) net.set_train(False) dataset = create_dataset(args_opt.dataset_path, False, 0, 1) loss = CrossEntropySmooth(sparse=True, reduction='mean', smooth_factor=0.1, num_classes=cfg.num_classes) eval_metrics = { 'Loss': nn.Loss(), 'Top1-Acc': nn.Top1CategoricalAccuracy(), 'Top5-Acc': nn.Top5CategoricalAccuracy() }
type=str, default="./", help='path where the dataset is saved') parser.add_argument('--ckpt_path', type=str, default="./ckpt", help='if is test, must provide\ path where the trained ckpt file') parser.add_argument('--dataset_sink_mode', type=bool, default=False, help='dataset_sink_mode is False or True') args = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args.device_target, enable_mem_reuse=False) network = AlexNet(cfg.num_classes) loss = nn.SoftmaxCrossEntropyWithLogits(is_grad=False, sparse=True, reduction="mean") repeat_size = cfg.epoch_size opt = nn.Momentum(network.trainable_params(), cfg.learning_rate, cfg.momentum) model = Model(network, loss, opt, metrics={"Accuracy": Accuracy()}) # test print("============== Starting Testing ==============") param_dict = load_checkpoint(args.ckpt_path) load_param_into_net(network, param_dict) ds_eval = create_dataset(args.data_path, cfg.batch_size, 1, "test")
def test_tensor_auto_cast(): context.set_context(mode=context.GRAPH_MODE) Tensor([True, False], mstype.bool_) t_uint8 = Tensor(np.ones([2, 1, 2, 2]), mstype.uint8) t_int8 = Tensor(np.ones([2, 1, 2, 2]), mstype.int8) t_int16 = Tensor(np.ones([2, 1, 2, 2]), mstype.int16) t_int32 = Tensor(np.ones([2, 1, 2, 2]), mstype.int32) t_int64 = Tensor(np.ones([2, 1, 2, 2]), mstype.int64) t_fp16 = Tensor(np.ones([2, 1, 2, 2]), mstype.float16) t_fp32 = Tensor(np.ones([2, 1, 2, 2]), mstype.float32) t_fp64 = Tensor(np.ones([2, 1, 2, 2]), mstype.float64) net = TensorAutoCast() rs = net(t_uint8, t_int8) assert rs.dtype() == mstype.int16 rs = net(t_uint8, t_int16) assert rs.dtype() == mstype.int16 rs = net(t_uint8, t_int32) assert rs.dtype() == mstype.int32 rs = net(t_uint8, t_int64) assert rs.dtype() == mstype.int64 rs = net(t_int8, t_int16) assert rs.dtype() == mstype.int16 rs = net(t_int8, t_int32) assert rs.dtype() == mstype.int32 rs = net(t_int8, t_int64) assert rs.dtype() == mstype.int64 rs = net(t_int16, t_int32) assert rs.dtype() == mstype.int32 rs = net(t_int16, t_int64) assert rs.dtype() == mstype.int64 rs = net(t_int32, t_int64) assert rs.dtype() == mstype.int64 rs = net(t_fp16, t_fp32) assert rs.dtype() == mstype.float32 rs = net(t_fp16, t_fp64) assert rs.dtype() == mstype.float64 rs = net(t_fp32, t_fp64) assert rs.dtype() == mstype.float64 rs = net(t_uint8, t_fp16) assert rs.dtype() == mstype.float16 rs = net(t_uint8, t_fp32) assert rs.dtype() == mstype.float32 rs = net(t_uint8, t_fp64) assert rs.dtype() == mstype.float64 rs = net(t_int8, t_fp64) assert rs.dtype() == mstype.float64 rs = net(t_int16, t_fp64) assert rs.dtype() == mstype.float64 rs = net(t_int32, t_fp64) assert rs.dtype() == mstype.float64 rs = net(t_int64, t_fp64) assert rs.dtype() == mstype.float64 rs = net(t_fp16, t_int8) assert rs.dtype() == mstype.float16 rs = net(t_fp16, t_uint8) assert rs.dtype() == mstype.float16 rs = net(t_fp16, t_int16) assert rs.dtype() == mstype.float16 rs = net(t_fp16, t_int32) assert rs.dtype() == mstype.float16 rs = net(t_fp16, t_int64) assert rs.dtype() == mstype.float16 tint = TensorIntAutoCast() rs = tint(t_uint8) assert rs.dtype() == mstype.uint8 rs = tint(t_int8) assert rs.dtype() == mstype.int8 rs = tint(t_int16) assert rs.dtype() == mstype.int16 rs = tint(t_int32) assert rs.dtype() == mstype.int32 rs = tint(t_int64) assert rs.dtype() == mstype.int64 rs = tint(t_fp16) assert rs.dtype() == mstype.float16 rs = tint(t_fp32) assert rs.dtype() == mstype.float32 rs = tint(t_fp64) assert rs.dtype() == mstype.float64 tfp = TensorFPAutoCast() rs = tfp(t_uint8) assert rs.dtype() == mstype.float32 rs = tfp(t_int8) assert rs.dtype() == mstype.float32 rs = tfp(t_int16) assert rs.dtype() == mstype.float32 rs = tfp(t_int32) assert rs.dtype() == mstype.float32 rs = tfp(t_int64) assert rs.dtype() == mstype.float32 rs = tfp(t_fp16) assert rs.dtype() == mstype.float32 rs = tfp(t_fp32) assert rs.dtype() == mstype.float32 rs = tfp(t_fp64) assert rs.dtype() == mstype.float64 t_uint16 = Tensor(np.ones([2, 1, 2, 2]), mstype.uint16) t_uint32 = Tensor(np.ones([2, 1, 2, 2]), mstype.uint32) t_uint64 = Tensor(np.ones([2, 1, 2, 2]), mstype.uint64) with pytest.raises(TypeError): net(t_uint16, t_uint8) with pytest.raises(TypeError): net(t_uint16, t_int8) with pytest.raises(TypeError): net(t_uint16, t_int16) with pytest.raises(TypeError): net(t_uint16, t_int32) with pytest.raises(TypeError): net(t_uint16, t_int64) with pytest.raises(TypeError): net(t_uint32, t_uint8) with pytest.raises(TypeError): net(t_uint32, t_int8) with pytest.raises(TypeError): net(t_uint32, t_int16) with pytest.raises(TypeError): net(t_uint32, t_int32) with pytest.raises(TypeError): net(t_uint32, t_int64) with pytest.raises(TypeError): net(t_uint64, t_uint8) with pytest.raises(TypeError): net(t_uint64, t_int8) with pytest.raises(TypeError): net(t_uint64, t_int16) with pytest.raises(TypeError): net(t_uint64, t_int32) with pytest.raises(TypeError): net(t_uint64, t_int64) with pytest.raises(TypeError): net(t_uint16, t_fp16) with pytest.raises(TypeError): net(t_uint16, t_fp32) with pytest.raises(TypeError): net(t_uint16, t_fp64) with pytest.raises(TypeError): net(t_uint32, t_fp16) with pytest.raises(TypeError): net(t_uint32, t_fp32) with pytest.raises(TypeError): net(t_uint32, t_fp64) with pytest.raises(TypeError): net(t_uint64, t_fp16) with pytest.raises(TypeError): net(t_uint64, t_fp32) with pytest.raises(TypeError): net(t_uint64, t_fp64) with pytest.raises(TypeError): tfp(t_uint16) with pytest.raises(TypeError): tfp(t_uint32) with pytest.raises(TypeError): tfp(t_uint64) with pytest.raises(TypeError): tint(t_uint16) with pytest.raises(TypeError): tint(t_uint32) with pytest.raises(TypeError): tint(t_uint64) bnet = TensorBoolAutoCast() with pytest.raises(TypeError): bnet(t_uint8) with pytest.raises(TypeError): bnet(t_int8) with pytest.raises(TypeError): bnet(t_int16) with pytest.raises(TypeError): bnet(t_int32) with pytest.raises(TypeError): bnet(t_int64) with pytest.raises(TypeError): bnet(t_fp16) with pytest.raises(TypeError): bnet(t_fp32) with pytest.raises(TypeError): bnet(t_fp64)
help='path where the dataset is stored.') parser.add_argument('--glove_path', type=str, default="./glove", help='path where the GloVe is stored.') parser.add_argument('--preprocess_path', type=str, default="./preprocess", help='path where the pre-process data is stored.') parser.add_argument('--ckpt_path', type=str, default="./", help='the path to save the checkpoint file.') parser.add_argument('--pre_trained', type=str, default=None, help='the pretrained checkpoint file path.') parser.add_argument('--device_target', type=str, default="Ascend", choices=['GPU', 'CPU', 'Ascend'], help='the target device to run, support "GPU", "CPU". Default: "Ascend".') parser.add_argument('--train_url', required=True, default=None, help='Location of training outputs.') args = parser.parse_args() context.set_context( mode=context.GRAPH_MODE, save_graphs=False, device_target=args.device_target) # import moxing as mox # mox.file.copy_parallel(src_url='s3://zhengnj-course/lstm/glove', dst_url=args.glove_path) if args.device_target == 'Ascend': cfg = lstm_cfg_ascend else: cfg = lstm_cfg if args.preprocess == "true": print("============== Starting Data Pre-processing ==============") convert_to_mindrecord(cfg.embed_size, args.data_url, args.preprocess_path, args.glove_path)
def test_compile(): context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") return test_cases
from src.config import config_gpu from src.mobilenetV3 import mobilenet_v3_large parser = argparse.ArgumentParser(description='Image classification') parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') parser.add_argument('--device_target', type=str, default="GPU", help='run device_target') args_opt = parser.parse_args() if __name__ == '__main__': config = None if args_opt.device_target == "GPU": config = config_gpu context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False) else: raise ValueError("Unsupported device_target.") loss = nn.SoftmaxCrossEntropyWithLogits( is_grad=False, sparse=True, reduction='mean') net = mobilenet_v3_large(num_classes=config.num_classes) dataset = create_dataset(dataset_path=args_opt.dataset_path, do_train=False, config=config, device_target=args_opt.device_target, batch_size=config.batch_size) step_size = dataset.get_dataset_size() if args_opt.checkpoint_path:
parser = argparse.ArgumentParser(description='Image classification') parser.add_argument('--run_distribute', type=bool, default=False, help='Run distribute') parser.add_argument('--device_num', type=int, default=1, help='Device num.') parser.add_argument('--do_train', type=bool, default=True, help='Do train or not.') parser.add_argument('--do_eval', type=bool, default=False, help='Do eval or not.') parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') parser.add_argument('--device_target', type=str, default='Ascend', help='Device target') parser.add_argument('--pre_trained', type=str, default=None, help='Pretrained checkpoint path') args_opt = parser.parse_args() if __name__ == '__main__': target = args_opt.device_target if not args_opt.do_eval and args_opt.run_distribute: if target == "Ascend": device_id = int(os.getenv('DEVICE_ID')) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=device_id, enable_auto_mixed_precision=True) init() context.set_auto_parallel_context(device_num=args_opt.device_num, parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) auto_parallel_context().set_all_reduce_fusion_split_indices([107, 160]) ckpt_save_dir = config.save_checkpoint_path elif target == "GPU": context.set_context(mode=context.GRAPH_MODE, device_target="GPU", save_graphs=False) init("nccl") context.set_auto_parallel_context(device_num=get_group_size(), parallel_mode=ParallelMode.DATA_PARALLEL, mirror_mean=True) ckpt_save_dir = config.save_checkpoint_path + "ckpt_" + str(get_rank()) + "/" epoch_size = config.epoch_size net = resnet50(class_num=config.class_num)
from src.crossentropy import CrossEntropy from src.utils import _load_param_into_net from models.resnet_quant import resnet50_quant from mindspore import context from mindspore.train.model import Model from mindspore.train.serialization import load_checkpoint from mindspore.train.quant import quant parser = argparse.ArgumentParser(description='Image classification') parser.add_argument('--checkpoint_path', type=str, default=None, help='Checkpoint file path') parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') parser.add_argument('--device_target', type=str, default='Ascend', help='Device target') args_opt = parser.parse_args() context.set_context(mode=context.GRAPH_MODE, device_target=args_opt.device_target, save_graphs=False) config = config_quant if quant_set.quantization_aware else config_noquant if args_opt.device_target == "Ascend": device_id = int(os.getenv('DEVICE_ID')) context.set_context(device_id=device_id) if __name__ == '__main__': # define fusion network net = resnet50_quant(class_num=config.class_num) if quant_set.quantization_aware: # convert fusion network to quantization aware network net = quant.convert_quant_network(net, bn_fold=True, per_channel=[True, False], symmetric=[True, False])
def get_output(x, dout, enable_graph_kernel=False): if enable_graph_kernel: context.set_context(enable_graph_kernel=True) net = Net() output = net(x, dout) return output
def test(cloud_args=None): """test""" args = parse_args(cloud_args) context.set_context(mode=context.GRAPH_MODE, enable_auto_mixed_precision=True, device_target=args.platform, save_graphs=False) if os.getenv('DEVICE_ID', "not_set").isdigit(): context.set_context(device_id=int(os.getenv('DEVICE_ID'))) # init distributed if args.is_distributed: init() args.rank = get_rank() args.group_size = get_group_size() parallel_mode = ParallelMode.DATA_PARALLEL context.set_auto_parallel_context(parallel_mode=parallel_mode, device_num=args.group_size, parameter_broadcast=True, gradients_mean=True) else: args.rank = 0 args.group_size = 1 args.outputs_dir = os.path.join( args.log_path, datetime.datetime.now().strftime('%Y-%m-%d_time_%H_%M_%S')) args.logger = get_logger(args.outputs_dir, args.rank) args.logger.save_args(args) # network args.logger.important_info('start create network') if os.path.isdir(args.pretrained): models = list(glob.glob(os.path.join(args.pretrained, '*.ckpt'))) print(models) if args.graph_ckpt: f = lambda x: -1 * int( os.path.splitext(os.path.split(x)[-1])[0].split('-')[-1].split( '_')[0]) else: f = lambda x: -1 * int( os.path.splitext(os.path.split(x)[-1])[0].split('_')[-1]) args.models = sorted(models, key=f) else: args.models = [ args.pretrained, ] for model in args.models: de_dataset = classification_dataset(args.data_dir, image_size=args.image_size, per_batch_size=args.per_batch_size, max_epoch=1, rank=args.rank, group_size=args.group_size, mode='eval') eval_dataloader = de_dataset.create_tuple_iterator(output_numpy=True) network = get_network(args.backbone, args.num_classes, platform=args.platform) if network is None: raise NotImplementedError('not implement {}'.format(args.backbone)) param_dict = load_checkpoint(model) param_dict_new = {} for key, values in param_dict.items(): if key.startswith('moments.'): continue elif key.startswith('network.'): param_dict_new[key[8:]] = values else: param_dict_new[key] = values load_param_into_net(network, param_dict_new) args.logger.info('load model {} success'.format(model)) img_tot = 0 top1_correct = 0 top5_correct = 0 if args.platform == "Ascend": network.to_float(mstype.float16) else: auto_mixed_precision(network) network.set_train(False) t_end = time.time() it = 0 for data, gt_classes in eval_dataloader: output = network(Tensor(data, mstype.float32)) output = output.asnumpy() top1_output = np.argmax(output, (-1)) top5_output = np.argsort(output)[:, -5:] t1_correct = np.equal(top1_output, gt_classes).sum() top1_correct += t1_correct top5_correct += get_top5_acc(top5_output, gt_classes) img_tot += args.per_batch_size if args.rank == 0 and it == 0: t_end = time.time() it = 1 if args.rank == 0: time_used = time.time() - t_end fps = (img_tot - args.per_batch_size) * args.group_size / time_used args.logger.info( 'Inference Performance: {:.2f} img/sec'.format(fps)) results = [[top1_correct], [top5_correct], [img_tot]] args.logger.info('before results={}'.format(results)) if args.is_distributed: model_md5 = model.replace('/', '') tmp_dir = '/cache' if not os.path.exists(tmp_dir): os.mkdir(tmp_dir) top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format( args.rank, model_md5) top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format( args.rank, model_md5) img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format( args.rank, model_md5) np.save(top1_correct_npy, top1_correct) np.save(top5_correct_npy, top5_correct) np.save(img_tot_npy, img_tot) while True: rank_ok = True for other_rank in range(args.group_size): top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format( other_rank, model_md5) top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format( other_rank, model_md5) img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format( other_rank, model_md5) if not os.path.exists(top1_correct_npy) or not os.path.exists(top5_correct_npy) or \ not os.path.exists(img_tot_npy): rank_ok = False if rank_ok: break top1_correct_all = 0 top5_correct_all = 0 img_tot_all = 0 for other_rank in range(args.group_size): top1_correct_npy = '/cache/top1_rank_{}_{}.npy'.format( other_rank, model_md5) top5_correct_npy = '/cache/top5_rank_{}_{}.npy'.format( other_rank, model_md5) img_tot_npy = '/cache/img_tot_rank_{}_{}.npy'.format( other_rank, model_md5) top1_correct_all += np.load(top1_correct_npy) top5_correct_all += np.load(top5_correct_npy) img_tot_all += np.load(img_tot_npy) results = [[top1_correct_all], [top5_correct_all], [img_tot_all]] results = np.array(results) else: results = np.array(results) args.logger.info('after results={}'.format(results)) top1_correct = results[0, 0] top5_correct = results[1, 0] img_tot = results[2, 0] acc1 = 100.0 * top1_correct / img_tot acc5 = 100.0 * top5_correct / img_tot args.logger.info('after allreduce eval: top1_correct={}, tot={},' 'acc={:.2f}%(TOP1)'.format(top1_correct, img_tot, acc1)) args.logger.info('after allreduce eval: top5_correct={}, tot={},' 'acc={:.2f}%(TOP5)'.format(top5_correct, img_tot, acc5)) if args.is_distributed: release()
from mindspore.communication.management import init random.seed(1) np.random.seed(1) de.config.set_seed(1) parser = argparse.ArgumentParser(description='Image classification') parser.add_argument('--dataset_path', type=str, default=None, help='Dataset path') args_opt = parser.parse_args() device_id = int(os.getenv('DEVICE_ID')) rank_id = int(os.getenv('RANK_ID')) rank_size = int(os.getenv('RANK_SIZE')) run_distribute = rank_size > 1 context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=device_id, save_graphs=False) context.set_context(enable_task_sink=True) context.set_context(enable_loop_sink=True) context.set_context(enable_mem_reuse=True) class Monitor(Callback): """ Monitor loss and time. Args: lr_init (numpy array): train lr Returns: None.
from mindspore.context import ParallelMode from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.common import dtype as mstype import mindspore.dataset as de from src.data_preprocess import SingleScaleTrans from src.config import config from src.FaceDetection.yolov3 import HwYolov3 as backbone_HwYolov3 from src.FaceDetection import voc_wrapper from src.network_define import BuildTestNetwork, get_bounding_boxes, tensor_to_brambox, \ parse_gt_from_anno, parse_rets, calc_recall_precision_ap plt.switch_backend('agg') devid = int(os.getenv('DEVICE_ID')) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", save_graphs=False, device_id=devid) def parse_args(): '''parse_args''' parser = argparse.ArgumentParser('Yolov3 Face Detection') parser.add_argument('--mindrecord_path', type=str, default='', help='dataset path, e.g. /home/data.mindrecord') parser.add_argument('--pretrained', type=str, default='', help='pretrained model to load')
# Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import numpy as np import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor from mindspore.common import dtype as mstype from mindspore.ops import composite as C from mindspore.common import set_seed context.set_context(mode=context.GRAPH_MODE, device_target="Ascend") set_seed(20) class Net(nn.Cell): def __init__(self, shape, seed=0): super(Net, self).__init__() self.shape = shape self.seed = seed def construct(self, alpha, beta): return C.gamma(self.shape, alpha, beta, self.seed) def test_net_1D(): seed = 10 shape = (3, 2, 4)
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import numpy as np import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor from mindspore.common.initializer import initializer from mindspore.common.parameter import Parameter from mindspore.communication.management import init, NCCL_WORLD_COMM_GROUP, get_rank, get_group_size from mindspore.ops import operations as P context.set_context(mode=context.GRAPH_MODE, device_target='GPU') init() rank = get_rank() size = get_group_size() x = np.ones([3, 1, 3, 3]).astype(np.float32) * 0.01 * (rank + 1) class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.x1 = Parameter(initializer(Tensor(x), x.shape), name='x1') self.x2 = Parameter(initializer(Tensor(x), x.shape), name='x2') self.x3 = Parameter(initializer(Tensor(x), x.shape), name='x3') self.op0 = "sum"
# distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ import numpy as np import pytest import mindspore.context as context import mindspore.nn as nn from mindspore import Tensor from mindspore.common.api import ms_function from mindspore.ops import operations as P context.set_context(device_target='GPU') class Net(nn.Cell): def __init__(self): super(Net, self).__init__() self.add = P.AddN() @ms_function def construct(self, x, y, z): return self.add((x, y, z)) @pytest.mark.level0 @pytest.mark.platform_x86_gpu_training @pytest.mark.env_onecard
# # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # ============================================================================ """lenet_export.""" from mindspore import context, Tensor import mindspore.common.dtype as mstype from mindspore.train.serialization import export from lenet import LeNet5 import numpy as np from train_utils import TrainWrap n = LeNet5() n.set_train() context.set_context(mode=context.PYNATIVE_MODE, device_target="GPU", save_graphs=False) BATCH_SIZE = 32 x = Tensor(np.ones((BATCH_SIZE, 1, 32, 32)), mstype.float32) label = Tensor(np.zeros([BATCH_SIZE, 10]).astype(np.float32)) net = TrainWrap(n) export(net, x, label, file_name="lenet_tod.mindir", file_format='MINDIR') print("finished exporting")