def record_to_tensorboard(loss): global backward_counter n = ['score', 'class'] yolo_gluon.record_loss(loss, n, summary_writer, step=backward_counter) backward_counter += 1 if backward_counter % record_step == 0: save_path = os.path.join(backup_dir, args.version + '_%d') net.collect_params().save(save_path)
def _record_to_tensorboard_and_save(self, loss): self.backward_counter += 1 # do not save at first step if self.backward_counter % 10 == 0: yolo_gluon.record_loss(loss, self.loss_name, self.sw, step=self.backward_counter, exp=self.exp) if self.backward_counter % self.valid_step == 0: self._valid_iou() if self.backward_counter % self.record_step == 0: idx = self.backward_counter // self.record_step path = os.path.join(self.backup_dir, self.exp + '_%d' % idx) self.net.collect_params().save(path)
def _train_batch_LP(self, bxs, bys): all_gpu_loss = [None] * len(self.ctx) with mxnet.autograd.record(): for gpu_i, (bx, by) in enumerate(zip(bxs, bys)): # ---------- Forward ---------- # by_ = self.net(bx) by_ = self.slice_out(by_) with mxnet.autograd.pause(): by, mask = self._loss_mask_LP(by, gpu_i) ones = nd.ones_like(mask) s_weight = nd.where(mask > 0, ones * self.LP_positive_weight, ones * self.LP_negative_weight, ctx=self.ctx[gpu_i]) loss = self._get_loss_LP(by_, by, s_weight, mask) # ---------- Backward ---------- # sum(loss).backward() # ---------- Update Weights ---------- # self.trainer.step(self.batch_size) self.backward_counter += 1 # do not save at first step # ---------- Record Loss ---------- # if self.record and self.backward_counter % 10 == 0: # only record last gpu loss yolo_gluon.record_loss(loss, self.loss_name, self.sw, step=self.backward_counter, exp=self.exp) # ---------- Save Weights ---------- # if self.backward_counter % self.record_step == 0: idx = self.backward_counter // self.record_step path = os.path.join(self.backup_dir, self.exp + '_%d' % idx) self.net.collect_params().save(path)