def add(self, name, seed): if seed in self.seeds_: raise ValueError('seed {} already exists'.format(seed)) self.seeds_.add(seed) if name in self.states_: raise ValueError('state {} already exists'.format(name)) orig_rng_state = paddle.get_cuda_rng_state() paddle.seed(seed) self.states_[name] = paddle.get_cuda_rng_state() paddle.set_cuda_rng_state(orig_rng_state)
def rng_state(self, name=MODEL_PARALLEL_RNG): if name not in self.states_: raise ValueError('state {} does not exist'.format(name)) orig_cuda_rng_state = paddle.get_cuda_rng_state() paddle.set_cuda_rng_state(self.states_[name]) try: yield finally: self.states_[name] = paddle.get_cuda_rng_state() paddle.set_cuda_rng_state(orig_cuda_rng_state)
def swith_rng_state(rng_state): orig_cuda_rng_state = paddle.get_cuda_rng_state() paddle.set_cuda_rng_state(rng_state) try: yield finally: paddle.set_cuda_rng_state(orig_cuda_rng_state)
def _swith_rng_state_tracker(rng_state, tracker): orig_cuda_rng_state = paddle.get_cuda_rng_state() orig_cuda_rng_tracker = get_rng_state_tracker().get_states_tracker() paddle.set_cuda_rng_state(rng_state) get_rng_state_tracker().set_states_tracker(tracker) try: yield finally: paddle.set_cuda_rng_state(orig_cuda_rng_state) get_rng_state_tracker().set_states_tracker(orig_cuda_rng_tracker)
def forward(ctx, run_function, preserve_rng_state, *args): if framework._dygraph_tracer()._has_grad: check_recompute_necessary(args) # store for recomputing ctx.run_function = run_function ctx.preserve_rng_state = preserve_rng_state # NOTE the number of outputs of backward() should be equal to the number of tensors in forward()'s input # the order of tensors in backward()'s output should be the same as tensors in forward()'s input # None tensor inputs will be filtered in backward inputs. # save input for backward ctx.inputs = [] ctx.tensor_indices = [] tensor_inputs = [] for i, arg in enumerate(args): if paddle.is_tensor(arg): tensor_inputs.append(arg) ctx.tensor_indices.append(i) ctx.inputs.append(None) else: ctx.inputs.append(arg) ctx.save_for_backward(*tensor_inputs) # NOTE recompute with restore RNG only support one senario where one process for one cuda gpu. # one process with multiple gpu and mix-gpu-cpu senarios are not support if ctx.preserve_rng_state: cur_device = paddle.get_device() if 'gpu:' not in cur_device: raise RuntimeError( "Recompute with RNG perserve is not support current device: {}.". format(cur_device)) ctx.fw_cuda_rng_state = paddle.get_cuda_rng_state() # TODO support AMP tracer = framework._dygraph_tracer() ctx.is_fw_autocast = False if tracer._amp_level == core.AmpLevel.O0 else True if tracer._amp_level == core.AmpLevel.O2: ctx.amp_level = 'O2' elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0): ctx.amp_level = 'O1' else: raise ValueError("unsupported amp level: {}".format( tracer._amp_level)) ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list() with paddle.no_grad(): outputs = run_function(*args) return outputs
def test_gen_dropout_dygraph(self): gen = paddle.seed(12343) fluid.enable_dygraph() gen.manual_seed(111111111) st = paddle.get_cuda_rng_state() x = fluid.layers.uniform_random([2, 10], dtype="float32", min=0.0, max=1.0) x_again = fluid.layers.uniform_random([2, 10], dtype="float32", min=0.0, max=1.0) x_third = fluid.layers.uniform_random([2, 10], dtype="float32", min=0.0, max=1.0) print("x: {}".format(x.numpy())) print("x_again: {}".format(x_again.numpy())) x = x + x_again + x_third y = fluid.layers.dropout(x, 0.5) paddle.set_cuda_rng_state(st) x1 = fluid.layers.uniform_random([2, 10], dtype="float32", min=0.0, max=1.0) x1_again = fluid.layers.uniform_random([2, 10], dtype="float32", min=0.0, max=1.0) x1_third = fluid.layers.uniform_random([2, 10], dtype="float32", min=0.0, max=1.0) x1 = x1 + x1_again + x1_third y1 = fluid.layers.dropout(x1, 0.5) y_np = y.numpy() y1_np = y1.numpy() if core.is_compiled_with_cuda(): print(">>>>>>> dropout dygraph >>>>>>>") self.assertTrue(np.allclose(y_np, y1_np))
def test_gen_TruncatedNormal_initializer(self): fluid.disable_dygraph() gen = paddle.seed(123123143) cur_state = paddle.get_cuda_rng_state() startup_program = fluid.Program() train_program = fluid.Program() with fluid.program_guard(train_program, startup_program): # example 1: # attr shape is a list which doesn't contain tensor Variable. x = fluid.layers.uniform_random(shape=[2, 10]) result_1 = fluid.layers.fc( input=x, size=10, param_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0)) result_2 = fluid.layers.fc( input=x, size=10, param_attr=fluid.initializer.TruncatedNormal(loc=0.0, scale=2.0)) exe = fluid.Executor(fluid.CPUPlace()) exe.run(startup_program) out1 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) paddle.seed(123123143) with fluid.program_guard(train_program, startup_program): exe.run(startup_program) out2 = exe.run(train_program, feed={}, fetch_list=[result_1, result_2]) out1_res1 = np.array(out1[0]) out1_res2 = np.array(out1[1]) out2_res1 = np.array(out2[0]) out2_res2 = np.array(out2[1]) if core.is_compiled_with_cuda(): print(">>>>>>> truncated normal static >>>>>>>") self.assertTrue(np.allclose(out1_res1, out2_res1)) self.assertTrue(np.allclose(out1_res2, out2_res2)) self.assertTrue(not np.allclose(out1_res2, out1_res1))
def main(): args = parser.parse_args() os.makedirs(args.save, exist_ok=True) # save the configurations t = time.localtime() timestamp = time.strftime('%b-%d-%Y_%H%M', t) with open(os.path.join(args.save, 'args-{}.txt'.format(timestamp)), 'w') as fh: json.dump(args.__dict__, fh, indent=2) print('Start at : {}'.format(timestamp)) # show non-default args default_args = parser.parse_args([args.data, args.save]) for key in args.__dict__: if args.__dict__[key] != default_args.__dict__[key]: print('{}: {} | default ({})'.format(key, args.__dict__[key], default_args.__dict__[key])) if args.seed is not None: random.seed(args.seed) paddle.seed(args.seed) warnings.warn('You have chosen to seed training. ' 'This will turn on the CUDNN deterministic setting, ' 'which can slow down your training considerably! ' 'You may see unexpected behavior when restarting ' 'from checkpoints.') if args.gpu is not None: warnings.warn('You have chosen a specific GPU. This will completely ' 'disable data parallelism.') ngpus_per_node = len(paddle.get_cuda_rng_state()) print('ngpus per node is {}'.format(ngpus_per_node)) if args.distributed: dist.spawn(main_worker, nprocs=ngpus_per_node, args=(args.gpu, ngpus_per_node, args), started_port=6671) else: # Simply call main_worker function main_worker(args.gpu, ngpus_per_node, args)
def test_generator_gaussian_random_dygraph(self): """Test Generator seed.""" fluid.enable_dygraph() paddle.seed(12312321111) x = fluid.layers.gaussian_random([120], dtype="float32") st1 = paddle.get_cuda_rng_state() x1 = fluid.layers.gaussian_random([120], dtype="float32") paddle.set_cuda_rng_state(st1) x2 = fluid.layers.gaussian_random([120], dtype="float32") paddle.seed(12312321111) x3 = fluid.layers.gaussian_random([120], dtype="float32") x_np = x.numpy() x1_np = x1.numpy() x2_np = x2.numpy() x3_np = x3.numpy() if core.is_compiled_with_cuda(): print(">>>>>>> gaussian random dygraph >>>>>>>") self.assertTrue(np.allclose(x1_np, x2_np)) self.assertTrue(np.allclose(x_np, x3_np))
def forward(ctx, run_function, all_outputs, *args): check_recompute_necessary(args) # store for recomputing ctx.run_function = run_function # store the rng states ctx.fwd_cuda_rng_state = paddle.get_cuda_rng_state() ctx.fwd_cuda_rng_state_tracker = get_rng_state_tracker( ).get_states_tracker() # save input for backward ctx.inputs = [] ctx.tensor_indices = [] ctx.tensor_shapes = [] tensor_inputs = [] cur_device = paddle.get_device() assert 'gpu:' in paddle.get_device( ), "Recompute with RNG is not support current device: {}.".format( cur_device) # TODO support AMP tracer = framework._dygraph_tracer() ctx.is_fw_autocast = False if tracer._amp_level == core.AmpLevel.O0 else True if tracer._amp_level == core.AmpLevel.O2: ctx.amp_level = 'O2' elif tracer._amp_level in (core.AmpLevel.O1, core.AmpLevel.O0): ctx.amp_level = 'O1' else: raise ValueError("unsupported amp level: {}".format( tracer._amp_level)) ctx.amp_white_list, ctx.amp_black_list = tracer._get_amp_op_list() with paddle.no_grad(): outputs = run_function(*args) for i, arg in enumerate(args): if paddle.is_tensor(arg): state = arg.stop_gradient if _recompute_partition: ctx.tensor_shapes.append(arg.shape) partition = _split_activation(arg.detach()).clone() # TODO(shenliang03) not use calculate stream to D2H to speed arg = partition.cpu() if _recompute_offload else partition else: arg = arg.cpu() if _recompute_offload else arg arg.stop_gradient = state tensor_inputs.append(arg) ctx.tensor_indices.append(i) ctx.inputs.append(None) else: ctx.inputs.append(arg) ctx.save_for_backward(*tensor_inputs) if paddle.is_tensor(outputs): all_outputs += [outputs] return outputs else: all_outputs += outputs return tuple(outputs)
def get_rng_state(): return get_cuda_rng_state()