def linear_static(func, device, dtype, np_x, np_weight, np_bias): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=[None, np_x.shape[1]], dtype=dtype) weight = static.data(name="weight", shape=np_weight.shape, dtype=dtype) bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype) x.stop_gradient = False weight.stop_gradient = False bias.stop_gradient = False out = func(x, weight, bias) mean_out = paddle.mean(out) static.append_backward(mean_out) exe = static.Executor() exe.run(static.default_startup_program()) out_v, x_grad_v, weight_grad_v, bias_grad_v = exe.run( static.default_main_program(), feed={ "x": np_x.astype(dtype), "weight": np_weight.astype(dtype), "bias": np_bias.astype(dtype) }, fetch_list=[ out.name, x.name + "@GRAD", weight.name + "@GRAD", bias.name + "@GRAD" ]) paddle.disable_static() return out_v, x_grad_v, weight_grad_v, bias_grad_v
def check_static_result(self, place): from paddle.distributed.fleet.meta_parallel.parallel_layers.random import dropout with static.program_guard(static.Program(), static.Program()): input = static.data(name="input", shape=[40, 40], dtype="float32") res1 = dropout( input, p=0.3, training=True, mode='upscale_in_train', rng_name='seed0') res2 = dropout( input, p=0.3, training=True, mode='upscale_in_train', rng_name='seed1') res3 = dropout(input, p=0.3) in_np = np.random.random([40, 40]).astype("float32") exe = static.Executor(place) res_list = [res1, res2] for i in range(2): out1, out2 = exe.run(static.default_main_program(), feed={"input": in_np}, fetch_list=res_list) self.assertTrue(np.allclose(out1, out2))
def __init__(self, model=None, inputs_spec=None, labels_spec=None, cluster=None, strategy=None): self.model = model self.inputs_spec = self._validate_spec(inputs_spec) self.labels_spec = self._validate_spec(labels_spec) self.cluster = cluster # if self.cluster is None: # self.cluster = get_default_cluster() self.strategy = strategy if self.strategy is None: self.strategy = fleet.DistributedStrategy() self._executor = None self._cur_rank = paddle.distributed.get_rank() self._nranks = paddle.distributed.get_world_size() self._saver = DistributedSaver() self._logger = get_logger(logging.INFO) self._default_strategy = None self._orig_main_prog = static.default_main_program() self._orig_startup_prog = static.default_startup_program() self._orig_dist_context = get_default_distributed_context() self._dist_contexts = {} self._serial_main_progs = {} self._serial_startup_progs = {} self._dist_main_progs = defaultdict(dict) # dist main programs self._dist_startup_progs = defaultdict(dict) # dist startup programs self._feed_vars = {} self._fetch_vars = {}
def custom_relu_static(func, device, dtype, np_x, use_func=True, test_infer=False): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = func(x) if use_func else paddle.nn.functional.relu(x) static.append_backward(out) exe = static.Executor() exe.run(static.default_startup_program()) # in static mode, x data has been covered by out out_v = exe.run(static.default_main_program(), feed={'X': np_x}, fetch_list=[out.name]) paddle.disable_static() return out_v
def setUp(self): self._places = [paddle.CPUPlace()] if paddle.device.is_compiled_with_cuda(): self._places.append(paddle.CUDAPlace(0)) self._ema_decay = 0.999 self._param_name = "fc.weight" self._train_program = static.Program() self._startup_prog = static.Program() strategy = paddle.distributed.fleet.DistributedStrategy() strategy.without_graph_optimization = True paddle.distributed.fleet.init(is_collective=True, strategy=strategy) with static.program_guard(self._train_program, self._startup_prog): with utils.unique_name.guard(): data = static.data(name='x', shape=[-1, 5], dtype='float32') hidden = static.nn.fc(x=data, size=10, weight_attr=self._param_name) cost = paddle.mean(hidden) self._test_program = static.default_main_program().clone( for_test=True) optimizer = paddle.optimizer.Adam(learning_rate=0.001) optimizer = paddle.distributed.fleet.distributed_optimizer( optimizer, strategy) optimizer.minimize(cost) self._ema = static.ExponentialMovingAverage(self._ema_decay) self._ema.update()
def custom_relu_static_pe(func, device, dtype, np_x, use_func=True): paddle.enable_static() paddle.set_device(device) places = static.cpu_places() if device is 'cpu' else static.cuda_places() with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = func(x) if use_func else paddle.nn.functional.relu(x) static.append_backward(out) exe = static.Executor() exe.run(static.default_startup_program()) # in static mode, x data has been covered by out compiled_prog = static.CompiledProgram( static.default_main_program()).with_data_parallel( loss_name=out.name, places=places) out_v = exe.run(compiled_prog, feed={'X': np_x}, fetch_list=[out.name]) paddle.disable_static() return out_v
def custom_relu_static_inference(func, device, np_data, np_label, path_prefix): paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): # simple module data = static.data(name='data', shape=[None, 1, 28, 28], dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') hidden = static.nn.fc(data, size=128) hidden = func(hidden) hidden = static.nn.fc(hidden, size=128) predict = static.nn.fc(hidden, size=10, activation='softmax') loss = paddle.nn.functional.cross_entropy(input=hidden, label=label) avg_loss = paddle.mean(loss) opt = paddle.optimizer.SGD(learning_rate=0.1) opt.minimize(avg_loss) # run start up model exe = static.Executor() exe.run(static.default_startup_program()) # train for i in range(4): avg_loss_v = exe.run(static.default_main_program(), feed={ 'data': np_data, 'label': np_label }, fetch_list=[avg_loss]) # save inference model static.save_inference_model(path_prefix, [data], [predict], exe) # get train predict value predict_v = exe.run(static.default_main_program(), feed={ 'data': np_data, 'label': np_label }, fetch_list=[predict]) return predict_v
def test_relu2_static(device, dtype): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = librelu2_op.relu2(x) static.append_backward(out) print(static.default_main_program()) exe = static.Executor() exe.run(static.default_startup_program()) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) out, = exe.run(static.default_main_program(), feed={'X': x}, fetch_list=[out.name]) print(out)
def check_static_result(self, place): import paddle.distributed.fleet.meta_parallel.parallel_layers.random as random with static.program_guard(static.Program(), static.Program()): res1 = random.determinate_seed('seed0') exe = static.Executor(place) res_list = [res1] for i in range(2): out1, = exe.run(static.default_main_program(), fetch_list=res_list) self.assertEqual(out1, np.cast['int32'](self.rng1.random()))
def test_relu2_static(device, dtype, use_custom=True): paddle.enable_static() paddle.set_device(device) with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name='X', shape=[None, 8], dtype=dtype) x.stop_gradient = False out = custom_relu_op_rf.relu2( x) if use_custom else paddle.nn.functional.relu(x) static.append_backward(out) print(static.default_main_program()) places = static.cuda_places() print(places) exe = static.Executor() compiled_prog = static.CompiledProgram( static.default_main_program()).with_data_parallel( loss_name=out.name, places=static.cuda_places()) x = np.random.uniform(-1, 1, [4, 8]).astype(dtype) out, = exe.run(compiled_prog, feed={'X': x}, fetch_list=[out.name]) print(out)
def run_inference(drop_last): loader = paddle.io.DataLoader.from_generator(feed_list=[x], capacity=8, drop_last=drop_last) loader.set_batch_generator(batch_generator, static.cpu_places()) exe = static.Executor(paddle.CPUPlace()) prog = static.CompiledProgram(static.default_main_program()) prog = prog.with_data_parallel() result = [] for data in loader(): each_ret, = exe.run(prog, feed=data, fetch_list=[y]) result.extend(each_ret) return result
def concat_static(func, dtype, np_inputs, axis_v, with_attr=False): paddle.enable_static() paddle.set_device("cpu") with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x1 = static.data(name="x1", shape=[2, 3], dtype=dtype) x2 = static.data(name="x2", shape=[2, 3], dtype=dtype) if with_attr: axis = axis_v else: axis = paddle.full(shape=[1], dtype='int64', fill_value=axis_v) x1.stop_gradient = False x2.stop_gradient = False total_time = 0 for i in range(TEST_TIME): start = time.time() out = func([x1, x2], axis) total_time += time.time() - start print("- static mode concat time cost: {} s".format(total_time / TEST_TIME)) # mean only support float, so here use sum sum_out = paddle.sum(out) static.append_backward(sum_out) exe = static.Executor() exe.run(static.default_startup_program()) if with_attr: feed_dict = { "x1": np_inputs[0].astype(dtype), "x2": np_inputs[1].astype(dtype) } else: feed_dict = { "x1": np_inputs[0].astype(dtype), "x2": np_inputs[1].astype(dtype), "axis": axis } out_v, x1_grad_v, x2_grad_v = exe.run( static.default_main_program(), feed=feed_dict, fetch_list=[out.name, x1.name + "@GRAD", x2.name + "@GRAD"]) paddle.disable_static() return out_v, x1_grad_v, x2_grad_v
def conj_static(func, shape, dtype, np_input): paddle.enable_static() paddle.set_device("cpu") with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=shape, dtype=dtype) x.stop_gradient = False out = func(x) sum_out = paddle.sum(out) static.append_backward(sum_out) exe = static.Executor() exe.run(static.default_startup_program()) out_v, x_grad_v = exe.run(static.default_main_program(), feed={"x": np_input}, fetch_list=[out.name, x.name + "@GRAD"]) paddle.disable_static() return out_v, x_grad_v
def linear_static(func, dtype, np_x, np_weight, np_bias): paddle.enable_static() paddle.set_device("cpu") with static.scope_guard(static.Scope()): with static.program_guard(static.Program()): x = static.data(name="x", shape=np_x.shape, dtype=dtype) weight = static.data( name="weight", shape=np_weight.shape, dtype=dtype) bias = static.data(name="bias", shape=np_bias.shape, dtype=dtype) out = func(x, weight, bias) exe = static.Executor() exe.run(static.default_startup_program()) out_v, = exe.run(static.default_main_program(), feed={ "x": np_x.astype(dtype), "weight": np_weight.astype(dtype), "bias": np_bias.astype(dtype) }, fetch_list=[out.name]) paddle.disable_static() return out_v
import paddle import paddle.static as static paddle.enable_static() startup_prog = static.Program() main_prog = static.Program() with static.program_guard(startup_prog, main_prog): x = static.data(name='X', shape=[1000, 784], dtype='float32') y = static.data(name='Y', shape=[784, 100], dtype='float32') z = paddle.matmul(x=x, y=y) binary_str = static.default_main_program().desc.serialize_to_string() prog_restored = static.default_main_program().parse_from_string(binary_str) print(static.default_main_program()) print(prog_restored)
import os import paddle import paddle.static as static paddle.enable_static() os.environ['CPU_NUM'] = str(2) places = static.cpu_places() data = static.data(name="x", shape=[None, 1], dtype="float32") hidden = static.nn.fc(input=data, size=10) loss = paddle.mean(hidden) paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) build_strategy = static.BuildStrategy() build_strategy.enable_inplace = True build_strategy.memory_optimize = True build_strategy.reduce_strategy = static.BuildStrategy.ReduceStrategy.Reduce program = static.CompiledProgram(static.default_main_program()) program = program.with_data_parallel(loss_name=loss.name, build_strategy=build_strategy, places=places)
image = static.data(name='image', shape=[None, 784], dtype='float32') label = static.data(name='label', shape=[None, 1], dtype='int64') # Define DataLoader loader = paddle.io.DataLoader.from_generator(feed_list=[image, label], capacity=16, iterable=ITERABLE) # Define network loss = simple_net(image, label) # Set data source of DataLoader # # If DataLoader is iterable, places must be given and the number of places must be the same with device number. # - If you are using GPU, call `paddle.static.cuda_places()` to get all GPU places. # - If you are using CPU, call `paddle.static.cpu_places()` to get all CPU places. # # If DataLoader is not iterable, places can be None. places = static.cuda_places() if USE_GPU else static.cpu_places() set_data_source(loader, places) exe = static.Executor(places[0]) exe.run(static.default_startup_program()) prog = static.CompiledProgram( static.default_main_program()).with_data_parallel(loss_name=loss.name) if loader.iterable: train_iterable(exe, prog, loss, loader) else: train_non_iterable(exe, prog, loss, loader)
# failed by an exception. if not use_cuda: os.environ['CPU_NUM'] = str(2) places = static.cpu_places() else: places = static.cuda_places() data = static.data(name='X', shape=[None, 1], dtype='float32') hidden = static.nn.fc(input=data, size=10) loss = paddle.mean(hidden) paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) exe.run(static.default_startup_program()) build_strategy = static.BuildStrategy() build_strategy.gradient_scale_strategy = \ static.BuildStrategy.GradientScaleStrategy.Customized compiled_prog = static.CompiledProgram( static.default_main_program()).with_data_parallel( loss_name=loss.name, build_strategy=build_strategy, places=places) dev_count = len(places) x = numpy.random.random(size=(10, 1)).astype('float32') loss_grad = numpy.ones((dev_count)).astype("float32") * 0.01 loss_grad_name = loss.name + "@GRAD" loss_data = exe.run(compiled_prog, feed={ "X": x, loss_grad_name: loss_grad }, fetch_list=[loss.name, loss_grad_name])
# NOTE: If you use CPU to run the program, you need # to specify the CPU_NUM, otherwise, paddle will use # all the number of the logic core as the CPU_NUM, # in that case, the batch size of the input should be # greater than CPU_NUM, if not, the process will be # failed by an exception. if not use_cuda: os.environ['CPU_NUM'] = str(2) exe = static.Executor(place) data = static.data(name='X', shape=[None, 1], dtype='float32') hidden = static.nn.fc(input=data, size=10) loss = paddle.mean(hidden) test_program = static.default_main_program().clone(for_test=True) paddle.optimizer.SGD(learning_rate=0.01).minimize(loss) exe.run(static.default_startup_program()) compiled_train_prog = static.CompiledProgram( static.default_main_program()).with_data_parallel(loss_name=loss.name, places=parallel_places) # NOTE: if not set share_vars_from=compiled_train_prog, # the parameters used in test process are different with # the parameters used by train process compiled_test_prog = static.CompiledProgram(test_program).with_data_parallel( share_vars_from=compiled_train_prog, places=parallel_places) train_data = numpy.random.random(size=(10, 1)).astype('float32') loss_data, = exe.run(compiled_train_prog, feed={"X": train_data},
def sampling_id(probs): prog = static.default_main_program() sampling_ids = prog.current_block().create_var(name="sampling_ids", dtype="int64", shape=[-1]) static.py_func(func=_sampling_id, x=probs, out=sampling_ids) return sampling_ids
import paddle import paddle.static as static paddle.enable_static() img = static.data(name='image', shape=[None, 784]) pred = static.nn.fc(input=img, size=10, act='relu') loss = paddle.mean(pred) # Here we use clone before Momentum test_program = static.default_main_program().clone(for_test=True) optimizer = paddle.optimizer.Momentum(learning_rate=0.01, momentum=0.9) optimizer.minimize(loss)
# coding=utf-8 import numpy import paddle import paddle.static as static import paddle.nn.functional as F # 开启静态图模式 paddle.enable_static() paddle.set_device('cpu') # 网络结构定义 x = static.data(name='X', shape=[None, 13], dtype='float32') y = static.data(name='Y', shape=[None, 1], dtype='float32') predict = static.nn.fc(x=x, size=1) loss = F.square_error_cost(input=predict, label=y) avg_loss = paddle.mean(loss) # 执行环境准备 exe = static.Executor(paddle.CPUPlace()) exe.run(static.default_startup_program()) # 执行网络 x = numpy.random.random(size=(7, 13)).astype('float32') y = numpy.random.random(size=(8, 1)).astype('float32') loss_data, = exe.run(static.default_main_program(), feed={ 'X': x, 'Y': y }, fetch_list=[avg_loss.name])
import paddle import paddle.static as static paddle.enable_static() prog = static.default_main_program() num_blocks = prog.num_blocks print(num_blocks)
import paddle import paddle.static as static paddle.enable_static() x1 = static.data(name='x1', shape=[2, 3], dtype='float32') x2 = static.data(name='x2', shape=[2, 3], dtype='float32') x3 = static.data(name='x3', shape=[2, 3], dtype='float32') out1 = paddle.concat(x=[x1, x2, x3], axis=-1) print(static.default_main_program())