def download_file(): """Get disabled unit tests""" ssl._create_default_https_context = ssl._create_unverified_context sysstr = sys.platform if sysstr == 'win32': url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_win') else: url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut') if paddle.is_compiled_with_rocm(): url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_rocm') if paddle.is_compiled_with_npu(): url = "https://sys-p0.bj.bcebos.com/prec/{}".format('disable_ut_npu') f = requests.get(url) data = f.text status_code = f.status_code if len(data.strip()) == 0 or status_code != 200: sys.exit(1) else: lt = data.strip().split('\n') lt = '^' + '$|^'.join(lt) + '$' print(lt) sys.exit(0)
def setUp(self): self.axis = 1 self.iter_num = 3 self.input_shape = [2, 3] self.x = np.random.random(self.input_shape).astype("float32") self.place = paddle.NPUPlace(0) \ if paddle.is_compiled_with_npu() else paddle.CPUPlace() self.set_program()
def test_dygraph_fp16(self): if paddle.is_compiled_with_npu(): place = paddle.NPUPlace(0) with fluid.dygraph.guard(place): input_x = np.random.random([4, 3]).astype("float16") input_y = np.random.random([3, 4]).astype("float16") x = paddle.to_tensor(input_x) y = paddle.to_tensor(input_y) result = paddle.matmul(x, y)
def get_env_device(): """ Return the device name of running enviroment. """ if paddle.is_compiled_with_cuda(): return 'gpu' elif paddle.is_compiled_with_npu(): return 'npu' elif paddle.is_compiled_with_rocm(): return 'rocm' elif paddle.is_compiled_with_xpu(): return 'xpu' return 'cpu'
def __init__(self, sparse_feature_number, sparse_feature_dim, dense_feature_dim, sparse_num_field): super(FM, self).__init__() self.sparse_feature_number = sparse_feature_number self.sparse_feature_dim = sparse_feature_dim self.dense_feature_dim = dense_feature_dim self.dense_emb_dim = self.sparse_feature_dim self.sparse_num_field = sparse_num_field self.init_value_ = 0.1 use_sparse = True if paddle.is_compiled_with_npu(): use_sparse = False # sparse coding self.embedding_one = paddle.nn.Embedding( sparse_feature_number, 1, padding_idx=0, sparse=use_sparse, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim))))) self.embedding = paddle.nn.Embedding( self.sparse_feature_number, self.sparse_feature_dim, sparse=use_sparse, padding_idx=0, weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim))))) # dense coding self.dense_w_one = paddle.create_parameter( shape=[self.dense_feature_dim], dtype='float32', default_initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim)))) self.dense_w = paddle.create_parameter( shape=[1, self.dense_feature_dim, self.dense_emb_dim], dtype='float32', default_initializer=paddle.nn.initializer.TruncatedNormal( mean=0.0, std=self.init_value_ / math.sqrt(float(self.sparse_feature_dim))))
def __init__(self, func): self.seed = 33 np.random.seed(self.seed) # debug mode self.debug = False # if debug mode=True choose whether test dygrpah or static self.static = True self.enable_backward = True self.dtype = None # function for paddle api self.func = func self.types = [] self.places = [] self.backward_dtype = [np.float16, np.float32, np.float64] # no grad var self.no_grad_var = [] # calculate grad delta, You can rewrite these value self.delta = 1e-6 self.gap = 0.001 self.rtol = 1e-7 # choose layertypes [functional or classional] self._layertypes(func) # run hook, use user define vars and initials self.hook() # check self.types if not isinstance(self.types, list): raise TypeError("Types must be a list.") if len(self.types) == 0: raise TypeError("You must define types in hook function.") # 设置执行device if len(self.places) == 0 and fluid.is_compiled_with_cuda() is True: #self.places = [fluid.CPUPlace(), fluid.CUDAPlace(7)] self.places = [fluid.CUDAPlace(7)] elif len(self.places) == 0 and paddle.is_compiled_with_npu() is True: # self.places = [fluid.CPUPlace(), paddle.NPUPlace(7)] self.places = [paddle.NPUPlace(7)] else: self.places = [fluid.CPUPlace()] # self.places = [paddle.NPUPlace(7)] if self.debug: logging.basicConfig( level=logging.DEBUG, format='%(asctime)s - %(levelname)s - %(message)s') else: logging.basicConfig( level=logging.ERROR, format='%(asctime)s - %(levelname)s - %(message)s')
def check_npu(use_npu): """ Log error and exit when set use_npu=true in paddlepaddle cpu/gpu/xpu version. """ err = "Config use_npu cannot be set as true while you are " \ "using paddlepaddle cpu/gpu/xpu version ! \nPlease try: \n" \ "\t1. Install paddlepaddle-npu to run model on NPU \n" \ "\t2. Set use_npu as false in config file to run " \ "model on CPU/GPU/XPU" try: if use_npu and not paddle.is_compiled_with_npu(): logger.error(err) sys.exit(1) except Exception as e: pass
def __init__(self, sparse_feature_number, sparse_feature_dim, dense_feature_dim, num_field, layer_sizes, sync_mode=None): super(DNNLayer, self).__init__() self.sync_mode = sync_mode self.sparse_feature_number = sparse_feature_number self.sparse_feature_dim = sparse_feature_dim self.dense_feature_dim = dense_feature_dim self.num_field = num_field self.layer_sizes = layer_sizes use_sparse = True if paddle.is_compiled_with_npu(): use_sparse = False self.embedding = paddle.nn.Embedding( self.sparse_feature_number, self.sparse_feature_dim, sparse=use_sparse, weight_attr=paddle.ParamAttr( name="SparseFeatFactors", initializer=paddle.nn.initializer.Uniform())) sizes = [sparse_feature_dim * num_field + dense_feature_dim ] + self.layer_sizes + [2] acts = ["relu" for _ in range(len(self.layer_sizes))] + [None] self._mlp_layers = [] for i in range(len(layer_sizes) + 1): linear = paddle.nn.Linear( in_features=sizes[i], out_features=sizes[i + 1], weight_attr=paddle.ParamAttr( initializer=paddle.nn.initializer.Normal( std=1.0 / math.sqrt(sizes[i])))) self.add_sublayer('linear_%d' % i, linear) self._mlp_layers.append(linear) if acts[i] == 'relu': act = paddle.nn.ReLU() self.add_sublayer('act_%d' % i, act) self._mlp_layers.append(act)
* Copyright (c) 2019 Baidu.com, Inc. All Rights Reserved * @file test_sgd.py * @author [email protected] * @date 2021-03-24 15:46 * @brief * **************************************************************************/ """ import paddle import numpy as np import paddle.fluid as fluid # global params types = [np.float64, np.float32] if fluid.is_compiled_with_cuda() is True: places = [fluid.CPUPlace(), fluid.CUDAPlace(0)] elif paddle.is_compiled_with_npu() is True: places = [fluid.CPUPlace(), paddle.NPUPlace(7)] else: places = [fluid.CPUPlace()] def test_static_learning_rate(): """ test_static_learning_rate """ for place in places: for t in types: paddle.enable_static() paddle.set_default_dtype(t) main_program = fluid.Program() startup_program = fluid.Program()
# See the License for the specific language governing permissions and # limitations under the License. from __future__ import print_function import unittest import numpy as np import sys sys.path.append("..") from op_test import OpTest import paddle paddle.enable_static() @unittest.skipIf(not paddle.is_compiled_with_npu(), "core is not compiled with NPU") class TestEmpty(OpTest): def setUp(self): self.set_npu() self.init_dtype() self.op_type = "is_empty" self.set_data() def set_npu(self): self.__class__.use_npu = True self.place = paddle.NPUPlace(0) def init_dtype(self): self.dtype = np.float32
def setUp(self): self.places = [paddle.CPUPlace()] if paddle.is_compiled_with_npu(): self.places.append(paddle.NPUPlace(0))
def backward(self, grad_tensor=None, retain_graph=False): """ Run backward of current Graph which starts from current Tensor. The new gradient will accumulat on previous gradient. You can clear gradient by ``Tensor.clear_grad()`` . Args: grad_tensor(Tensor, optional): initial gradient values of the current Tensor. If `grad_tensor` is None, the initial gradient values of the current Tensor would be Tensor filled with 1.0; if `grad_tensor` is not None, it must have the same length as the current Tensor. Teh default value is None. retain_graph(bool, optional): If False, the graph used to compute grads will be freed. If you would like to add more ops to the built graph after calling this method( :code:`backward` ), set the parameter :code:`retain_graph` to True, then the grads will be retained. Thus, seting it to False is much more memory-efficient. Defaults to False. Returns: NoneType: None Examples: .. code-block:: python import paddle x = paddle.to_tensor(5., stop_gradient=False) for i in range(5): y = paddle.pow(x, 4.0) y.backward() print("{}: {}".format(i, x.grad)) # 0: [500.] # 1: [1000.] # 2: [1500.] # 3: [2000.] # 4: [2500.] x.clear_grad() print("{}".format(x.grad)) # 0. grad_tensor=paddle.to_tensor(2.) for i in range(5): y = paddle.pow(x, 4.0) y.backward(grad_tensor) print("{}: {}".format(i, x.grad)) # 0: [1000.] # 1: [2000.] # 2: [3000.] # 3: [4000.] # 4: [5000.] """ if framework.in_dygraph_mode(): if grad_tensor is not None: if core._in_eager_mode(): assert isinstance( grad_tensor, core.eager.EagerTensor ), "The type of grad_tensor must be paddle.Tensor" else: assert isinstance( grad_tensor, paddle.Tensor ), "The type of grad_tensor must be paddle.Tensor" assert grad_tensor.shape == self.shape, \ "Tensor shape not match, Tensor of grad_tensor [ {} ] with shape {} mismatch Tensor [ {} ] with shape {}".format( grad_tensor.name, grad_tensor.shape, self.name, self.shape) if core._in_eager_mode(): if grad_tensor is None: grad_tensor = [] else: grad_tensor = [grad_tensor] if paddle.is_compiled_with_xpu() or paddle.is_compiled_with_npu(): # TODO(liuyuhui): Currently only for xpu. Will be removed in the future. scaled_loss = scale_loss(self) if core._in_eager_mode(): core.eager.run_backward([scaled_loss], grad_tensor, retain_graph) else: core.dygraph_run_backward([scaled_loss], [grad_tensor], retain_graph, framework._dygraph_tracer()) else: if core._in_eager_mode(): core.eager.run_backward([self], grad_tensor, retain_graph) else: core.dygraph_run_backward([self], [grad_tensor], retain_graph, framework._dygraph_tracer()) else: raise ValueError( "Variable.backward() is only available in DyGraph mode")
def _setup_nccl_op(self, startup_program, main_program, build_strategy): trainer_endpoints = self.role_maker._get_trainer_endpoints() other_trainers = copy.copy(trainer_endpoints) trainer_id = self.role_maker._worker_index() current_endpoint = self.role_maker._get_trainer_endpoints()[trainer_id] other_trainers.remove(current_endpoint) trainer_endpoints_env = ",".join(trainer_endpoints) trainers_num = self.role_maker._worker_num() # NOTE(wangxi): npu don't need to wait server ready if trainer_id == 0 and not paddle.is_compiled_with_npu(): wait_server_ready(other_trainers) if core.is_compiled_with_cuda(): comm_id_var = startup_program.global_block().create_var( name="NCCLID", persistable=True, type=core.VarDesc.VarType.RAW) for i in range(1, build_strategy.nccl_comm_num): startup_program.global_block().create_var( name="NCCLID_{}".format(i), persistable=True, type=core.VarDesc.VarType.RAW) if build_strategy.use_hierarchical_allreduce: for i in range(0, build_strategy.nccl_comm_num): startup_program.global_block().create_var( name="Hierarchical_inter_NCCLID_{}".format(i), persistable=True, type=core.VarDesc.VarType.RAW) startup_program.global_block().create_var( name="Hierarchical_exter_NCCLID_{}".format(i), persistable=True, type=core.VarDesc.VarType.RAW) startup_program.global_block().append_op( type="gen_nccl_id", inputs={}, outputs={"NCCLID": comm_id_var}, attrs={ "trainers": trainer_endpoints, "trainer_id": trainer_id, "nccl_comm_num": build_strategy.nccl_comm_num, "use_hierarchical_allreduce": build_strategy.use_hierarchical_allreduce, "hierarchical_allreduce_inter_ranks": build_strategy.hierarchical_allreduce_inter_nranks }) elif core.is_compiled_with_xpu(): comm_id_var = startup_program.global_block().create_var( name="BKCLID", persistable=True, type=core.VarDesc.VarType.RAW) #NOTE(liuyuhui) Baidu Kunlun Communication Library(BKCL) currently do not support multi machines. assert build_strategy.bkcl_comm_num == 1, \ "Baidu Kunlun Communication Library(BKCL) currently do not support multi machines." for i in range(1, build_strategy.bkcl_comm_num): startup_program.global_block().create_var( name="BKCLID_{}".format(i), persistable=True, type=core.VarDesc.VarType.RAW) startup_program.global_block().append_op( type="gen_bkcl_id", inputs={}, outputs={"BKCLID": comm_id_var}, attrs={ "trainers": trainer_endpoints, "trainer_id": trainer_id, "nccl_comm_num": build_strategy.nccl_comm_num, "use_hierarchical_allreduce": build_strategy.use_hierarchical_allreduce, "hierarchical_allreduce_inter_ranks": build_strategy.hierarchical_allreduce_inter_nranks }) else: raise ValueError( "comm_id must be generated in paddlepaddle-xpu or paddlepaddle-gpu." )
def run_check(): """ Check whether PaddlePaddle is installed correctly and running successfully on your system. Examples: .. code-block:: python import paddle paddle.utils.run_check() # Running verify PaddlePaddle program ... # W1010 07:21:14.972093 8321 device_context.cc:338] Please NOTE: device: 0, CUDA Capability: 70, Driver API Version: 11.0, Runtime API Version: 10.1 # W1010 07:21:14.979770 8321 device_context.cc:346] device: 0, cuDNN Version: 7.6. # PaddlePaddle works well on 1 GPU. # PaddlePaddle works well on 8 GPUs. # PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now. """ print("Running verify PaddlePaddle program ... ") use_cuda = False use_xpu = False use_npu = False if paddle.is_compiled_with_cuda(): use_cuda = _is_cuda_available() elif paddle.is_compiled_with_xpu(): use_xpu = _is_xpu_available() elif paddle.is_compiled_with_npu(): use_npu = _is_npu_available() if use_cuda: device_str = "GPU" device_list = paddle.static.cuda_places() elif use_xpu: device_str = "XPU" device_list = paddle.static.xpu_places() elif use_npu: device_str = "NPU" device_list = paddle.static.npu_places() else: device_str = "CPU" device_list = paddle.static.cpu_places(device_count=2) device_count = len(device_list) _run_static_single(use_cuda, use_xpu, use_npu) _run_dygraph_single(use_cuda, use_xpu, use_npu) print("PaddlePaddle works well on 1 {}.".format(device_str)) try: _run_static_parallel(use_cuda, use_xpu, use_npu, device_list) print("PaddlePaddle works well on {} {}s.".format( device_count, device_str)) print( "PaddlePaddle is installed successfully! Let's start deep learning with PaddlePaddle now." ) except Exception as e: logging.warning( "PaddlePaddle meets some problem with {} {}s. This may be caused by:" "\n 1. There is not enough GPUs visible on your system" "\n 2. Some GPUs are occupied by other process now" "\n 3. NVIDIA-NCCL2 is not installed correctly on your system. Please follow instruction on https://github.com/NVIDIA/nccl-tests " "\n to test your NCCL, or reinstall it following https://docs.nvidia.com/deeplearning/sdk/nccl-install-guide/index.html" .format(device_count, device_str)) logging.warning("\n Original Error is: {}".format(e)) print("PaddlePaddle is installed successfully ONLY for single {}! " "Let's start deep learning with PaddlePaddle now.".format( device_str))