# limitations under the License. import numbers import unittest import numpy as np import paddle import scipy.special import scipy.stats from paddle.distribution import kl import config import mock_data as mock import parameterize as param paddle.set_default_dtype('float64') @param.place(config.DEVICES) @param.parameterize_cls((param.TEST_CASE_NAME, 'a1', 'b1', 'a2', 'b2'), [ ('test_regular_input', 6.0 * np.random.random( (4, 5)) + 1e-4, 6.0 * np.random.random( (4, 5)) + 1e-4, 6.0 * np.random.random( (4, 5)) + 1e-4, 6.0 * np.random.random((4, 5)) + 1e-4), ]) class TestKLBetaBeta(unittest.TestCase): def setUp(self): self.p = paddle.distribution.Beta(paddle.to_tensor(self.a1), paddle.to_tensor(self.b1)) self.q = paddle.distribution.Beta(paddle.to_tensor(self.a2), paddle.to_tensor(self.b2))
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import paddle paddle.set_default_dtype("float64") from paddle.fluid.layers import sequence_mask paddle.enable_static() import numpy as np import unittest from convert import convert_params_for_net_static from rnn_numpy import SimpleRNN, LSTM, GRU bidirectional_list = ["bidirectional", "bidirect"] class TestSimpleRNN(unittest.TestCase): def __init__(self, time_major=True, direction="forward", place="cpu"): super(TestSimpleRNN, self).__init__("runTest")
def setUp(self): paddle.set_default_dtype("float64") self.places = [fluid.CPUPlace()] if core.is_compiled_with_cuda(): self.places.append(fluid.CUDAPlace(0))
def tearDown(self): paddle.set_default_dtype(self.original_dtyep)
def setUp(self): self.original_dtyep = paddle.get_default_dtype() paddle.set_default_dtype("float32") self.places = [fluid.CPUPlace()] if core.is_compiled_with_mlu(): self.places.append(fluid.MLUPlace(0))
def __init__(self, num_features, T_max, momentum=0.1, epsilon=1e-5, weight_attr=None, bias_attr=None, data_format='NCHW', name=None): super(Recurrent_BatchNorm3D, self).__init__() self._num_features = num_features self.T_max = T_max self._weight_attr = weight_attr self._bias_attr = bias_attr if paddle.get_default_dtype() == 'float16': paddle.set_default_dtype('float32') param_shape = [num_features] # create parameter if weight_attr == False: self.weight = self.create_parameter( attr=None, shape=param_shape, default_initializer=paddle.nn.initializer.Constant(0.1)) self.weight.stop_gradient = True else: self.weight = self.create_parameter( attr=self._weight_attr, shape=param_shape, default_initializer=paddle.nn.initializer.Constant(0.1)) self.weight.stop_gradient = self._weight_attr != None and self._weight_attr.learning_rate == 0. if bias_attr == False: self.bias = self.create_parameter( attr=None, shape=param_shape, default_initializer=paddle.nn.initializer.Constant(0.0), is_bias=True) self.bias.stop_gradient = True else: self.bias = self.create_parameter(attr=self._bias_attr, shape=param_shape, is_bias=True) self.bias.stop_gradient = self._bias_attr != None and self._bias_attr.learning_rate == 0. # moving_mean_name = None # moving_variance_name = None for i in range(self.T_max): # if name is not None: # moving_mean_name = 'running_mean_{}'.format(i) # moving_variance_name = 'running_var_{}'.format(i) # self._mean = self.create_parameter( # attr=paddle.ParamAttr( # name=moving_mean_name, # initializer=paddle.nn.initializer.Constant(0.0), # trainable=False, # do_model_average=True), # shape=param_shape) # self._mean.stop_gradient = True # self._variance = self.create_parameter( # attr=paddle.ParamAttr( # name=moving_variance_name, # initializer=paddle.nn.initializer.Constant(1.0), # trainable=False, # do_model_average=True), # shape=param_shape) # self._variance.stop_gradient = True self.register_buffer('running_mean_{}'.format(i), paddle.zeros(param_shape)) self.register_buffer('running_var_{}'.format(i), paddle.zeros(param_shape)) self.reset_parameters() self._data_format = data_format self._in_place = False self._momentum = momentum self._epsilon = epsilon self._fuse_with_relu = False self._name = name
def train(args): rank = int(os.getenv("PADDLE_TRAINER_ID", 0)) world_size = int(os.getenv("PADDLE_TRAINERS_NUM", 1)) gpu_id = int(os.getenv("FLAGS_selected_gpus", 0)) place = paddle.CUDAPlace(gpu_id) RELATED_FLAGS_SETTING = {} if args.seed == 0: RELATED_FLAGS_SETTING['FLAGS_cudnn_deterministic'] = 1 RELATED_FLAGS_SETTING['FLAGS_benchmark'] = 1 args.num_workers = 0 else: # args.seed == None or args.seed != 0 RELATED_FLAGS_SETTING['FLAGS_cudnn_exhaustive_search'] = 1 RELATED_FLAGS_SETTING['FLAGS_cudnn_batchnorm_spatial_persistent'] = 1 RELATED_FLAGS_SETTING['FLAGS_max_inplace_grad_add'] = 8 paddle.fluid.set_flags(RELATED_FLAGS_SETTING) if args.seed is not None: args.seed = args.seed + rank paddle.seed(args.seed) np.random.seed(args.seed) random.seed(args.seed) if world_size > 1: import paddle.distributed.fleet as fleet strategy = fleet.DistributedStrategy() strategy.without_graph_optimization = True fleet.init(is_collective=True, strategy=strategy) if args.use_synthetic_dataset: trainset = datasets.SyntheticDataset(args.num_classes, fp16=args.fp16) else: trainset = eval("datasets.{}".format(args.dataset_type))( root_dir=args.data_dir, label_file=args.label_file, rank=rank, world_size=world_size, fp16=args.fp16, is_bin=args.is_bin, seed=args.seed) num_image = trainset.total_num_samples total_batch_size = args.batch_size * world_size steps_per_epoch = num_image // total_batch_size if args.train_unit == 'epoch': warmup_steps = steps_per_epoch * args.warmup_num total_steps = steps_per_epoch * args.train_num decay_steps = [x * steps_per_epoch for x in args.decay_boundaries] total_epoch = args.train_num else: warmup_steps = args.warmup_num total_steps = args.train_num decay_steps = [x for x in args.decay_boundaries] total_epoch = (total_steps + steps_per_epoch - 1) // steps_per_epoch logging.info('world_size: {}'.format(world_size)) logging.info('total_batch_size: {}'.format(total_batch_size)) logging.info('warmup_steps: {}'.format(warmup_steps)) logging.info('steps_per_epoch: {}'.format(steps_per_epoch)) logging.info('total_steps: {}'.format(total_steps)) logging.info('total_epoch: {}'.format(total_epoch)) logging.info('decay_steps: {}'.format(decay_steps)) base_lr = total_batch_size * args.lr / 512 lr_scheduler = paddle.optimizer.lr.PiecewiseDecay( boundaries=decay_steps, values=[ base_lr * (args.lr_decay**i) for i in range(len(decay_steps) + 1) ]) if warmup_steps > 0: lr_scheduler = paddle.optimizer.lr.LinearWarmup( lr_scheduler, warmup_steps, 0, base_lr) if args.fp16: paddle.set_default_dtype("float16") margin_loss_params = eval("losses.{}".format(args.loss))() backbone = eval("backbones.{}".format(args.backbone))( num_features=args.embedding_size, dropout=args.dropout, data_format=args.data_format) classifier = eval("classifiers.{}".format(args.classifier))( rank=rank, world_size=world_size, num_classes=args.num_classes, margin1=margin_loss_params.margin1, margin2=margin_loss_params.margin2, margin3=margin_loss_params.margin3, scale=margin_loss_params.scale, sample_ratio=args.sample_ratio, embedding_size=args.embedding_size, fp16=args.fp16, numpy_init=args.lsc_init_from_numpy, ) backbone.train() classifier.train() optimizer = HybridOptimizer(parameters=[{ 'params': backbone.parameters(), }, { 'params': classifier.parameters(), }], learning_rate=lr_scheduler, momentum=args.momentum, weight_decay=args.weight_decay) if args.do_validation_while_train: callback_verification = CallBackVerification( args.validation_interval_step, rank, world_size, args.batch_size, args.val_targets, args.data_dir, fp16=args.fp16, ) callback_logging = CallBackLogging(args.log_interval_step, rank, world_size, total_steps, args.batch_size) checkpoint = Checkpoint( rank=rank, world_size=world_size, embedding_size=args.embedding_size, num_classes=args.num_classes, model_save_dir=os.path.join(args.output, args.backbone), checkpoint_dir=args.checkpoint_dir, max_num_last_checkpoint=args.max_num_last_checkpoint) start_epoch = 0 global_step = 0 loss_avg = AverageMeter() if args.resume: extra_info = checkpoint.load(backbone, classifier, optimizer, for_train=True) start_epoch = extra_info['epoch'] + 1 lr_state = extra_info['lr_state'] # there last_epoch means last_step in for PiecewiseDecay # since we always use step style for lr_scheduler global_step = lr_state['last_epoch'] lr_scheduler.set_state_dict(lr_state) batch_sampler = eval("paddle.io.{}".format(args.batch_sampler))( dataset=trainset, batch_size=args.batch_size, shuffle=True, drop_last=True) train_loader = paddle.io.DataLoader(trainset, places=place, num_workers=args.num_workers, batch_sampler=batch_sampler) scaler = HybridParallelGradScaler( enable=args.fp16, init_loss_scaling=args.init_loss_scaling, incr_ratio=args.incr_ratio, decr_ratio=args.decr_ratio, incr_every_n_steps=args.incr_every_n_steps, decr_every_n_nan_or_inf=args.decr_every_n_nan_or_inf, use_dynamic_loss_scaling=args.use_dynamic_loss_scaling, grad_norm_clip=args.grad_norm_clip, grad_norm_clip_max=args.grad_norm_clip_max, world_size=world_size, ) scaler.sync_params_buffers(backbone) for epoch in range(start_epoch, total_epoch): for step, (img, label) in enumerate(train_loader): global_step += 1 with paddle.amp.auto_cast(enable=args.fp16): features = backbone(img) loss_v = classifier(features, label) scaler.scale(loss_v).backward() classifier.set_attr_for_sparse_momentum() scaler.sync_gradient_and_unscale(optimizer) scaler.step(optimizer) optimizer.clear_grad() lr_value = optimizer.get_lr() loss_avg.update(loss_v.item(), 1) callback_logging(global_step, loss_avg, epoch, lr_value) if args.do_validation_while_train: best_metric = callback_verification(global_step, backbone) if best_metric is not None and len(best_metric) > 0: for ver_dataset in best_metric: checkpoint.save(backbone, classifier, optimizer, epoch=epoch, for_train=True, best_metric=best_metric[ver_dataset]) lr_scheduler.step() if global_step >= total_steps: break sys.stdout.flush() checkpoint.save(backbone, classifier, optimizer, epoch=epoch, for_train=True)