def optimizer_function(outputs): with tf.variable_scope("training", reuse=tf.AUTO_REUSE, use_resource=True): optimizer = optimizers.SparseOptimizer(opt_cls)( learning_rate=outputs['lr'], **opt_kws, name='optimise', sparse_layers=fc_layers.values(), dense_gradient_condition=outputs['last_itr'] if dense_grad_enabled else None, prune_and_grow_outfeed=png_queue) return pipelining_ops.OptimizerFunctionOutput(optimizer, outputs['mean_loss'])
def optimize_func(self, giou_loss, conf_loss, prob_loss, lr): self.loss = giou_loss + conf_loss + prob_loss self.loss = self.loss * self.loss_scaling if self.opts["train"]["freeze_pretrain"]: # with freeze_pretrain option, we only train new added parameters restored_variables = get_restore_variables( self.opts["train"]["load_type"]) var_list = [ var for var in tf.trainable_variables() if var not in restored_variables ] logger.info("variables will be trained:") for var in var_list: logger.info(var.name) else: var_list = tf.trainable_variables() if self.opts["train"]["optimizer"] == "adamw": # adamw uses a update scaled by it's second momentum # so gradients getting larger won't affect it's update optimizer = AdamWeightDecayOptimizer( lr, use_moving_avg=opts["yolo"]["use_moving_avg"], moving_avg_decay=opts["yolo"]["moving_avg_decay"], darknet_gn=opts["yolo"]["darknet_gn"], upsample_gn=opts["yolo"]["upsample_gn"]) elif self.opts["train"]["optimizer"] == "momentum": optimizer = MomentumOptimizer( lr, use_moving_avg=opts["yolo"]["use_moving_avg"], moving_avg_decay=opts["yolo"]["moving_avg_decay"], loss_scaling=self.loss_scaling, momentum=0.9, backbone_gn=opts["yolo"]["backbone_gn"], upsample_gn=opts["yolo"]["upsample_gn"]) else: raise Exception("unexpected optimizer config") return pipelining_ops.OptimizerFunctionOutput( IPUOptimizer( optimizer, sharded=False, replicas=opts["train"]["total_replicas"], gradient_accumulation_count=opts["train"]["pipeline_depth"], pipelining=True, var_list=var_list), self.loss, )
def optimizer_function(loss): opt = gradient_descent.GradientDescentOptimizer(0.01) return pipelining_ops.OptimizerFunctionOutput(opt, loss)
def make_pipeline_opt(outputs): optimizer = make_optimizer(outputs["learning_rate"], outputs["last_itr"]) return pipelining_ops.OptimizerFunctionOutput(optimizer, outputs["training_loss"])
def optimizer_function(loss): return pipelining_ops.OptimizerFunctionOutput( optimizer, loss)