示例#1
0
    def forward(self,
                inputs,
                length,
                initial_inputs=None,
                static_inputs=None,
                initial_seq_inputs={}):
        """
        
        :param inputs: These are sliced by time. Time is the second dimension
        :param length: Rollout length
        :param initial_inputs: These are not sliced and are overridden by cell output
        :param initial_seq_inputs: These can contain partial sequences. Cell output is used after these end.
        :param static_inputs: These are not sliced and can't be overridden by cell output
        :return:
        """
        # NOTE! Unrolling the cell directly will result in crash as the hidden state is not being reset
        # Use this function or CustomLSTMCell.unroll if needed
        initial_inputs, static_inputs = self.assert_begin(
            inputs, initial_inputs, static_inputs)

        step_inputs = initial_inputs.copy()
        step_inputs.update(static_inputs)
        lstm_outputs = []
        for t in range(length):
            step_inputs.update(map_dict(lambda x: x[:, t], inputs))  # Slicing
            step_inputs.update(
                map_dict(
                    lambda x: x[:, t],
                    filter_dict(lambda x: t < x[1].shape[1],
                                initial_seq_inputs)))
            output = self.cell(**step_inputs)

            self.assert_post(output, inputs, initial_inputs, static_inputs)
            # TODO Test what signature does with *args
            autoregressive_output = subdict(
                output,
                output.keys() & signature(self.cell.forward).parameters)
            step_inputs.update(autoregressive_output)
            lstm_outputs.append(output)

        lstm_outputs = rmap_list(lambda *x: stack(x, dim=1), lstm_outputs)

        self.cell.reset()
        return lstm_outputs
示例#2
0
文件: core.py 项目: orybkin/blox-nn
 def recursive_map(tensors):
     if tensors is None:
         return tensors
     elif isinstance(tensors[0], list) or isinstance(tensors[0], tuple):
         return type(tensors[0])(map(recursive_map, zip(*tensors)))
     elif isinstance(tensors[0], dict):
         return map_dict(recursive_map, listdict2dictlist(tensors))
     elif isinstance(tensors[0], TENSOR):
         return fn(*tensors)
     elif hasattr(tensors[0], 'to_dict'):
         old_type = type(tensors[0])
         tensors = type(tensors)(map(lambda x: x.to_dict(), tensors))
         return old_type(**map_dict(recursive_map, listdict2dictlist(tensors)))
     else:
         try:
             return fn(*tensors)
         except Exception as e:
             print("The following error was raised when recursively applying a function:")
             print(e)
             raise ValueError("Type {} not supported for recursive map".format(type(tensors)))
示例#3
0
 def cat(*argv):
     tree = SubgoalTreeLayer()
     for attr, val in argv[0].__dict__.items():
         if val is None or np.isscalar(val):
             tree.__dict__[attr] = val
         elif attr == 'subgoals':
             tree.__dict__[attr] = map_dict(concat, listdict2dictlist([d.subgoals for d in argv]))
         elif attr == 'child_layer':
             tree.__dict__[attr] = SubgoalTreeLayer.cat(*[d.child_layer for d in argv])
         else:
             raise ValueError("Cannot handle data type {} during tree concatenation!".format(type(val)))
     return tree
示例#4
0
def num_parameters(model, level=0):
    """  Returns the number of parameters used in a module.
    
    Known bug: if some of the submodules are repeated, their parameters will be double counted
    :param model:
    :param level: if level==1, returns a dictionary of submodule names and corresponding parameter counts
    :return:
    """
    
    if level == 0 or len(model.named_children()) == 0:
        return sum([p.numel() for p in model.parameters()])
    else:
        return map_dict(lambda x: num_parameters(x, level - 1), dict(model.named_children()))
示例#5
0
文件: core.py 项目: orybkin/blox-nn
 def recursive_map(tensors):
     if isinstance(tensors, target_class):
         return fn(tensors, *argv, **kwargs)
     elif tensors is None:
         return tensors
     elif isinstance(tensors, list) or isinstance(tensors, tuple):
         return type(tensors)(map(recursive_map, tensors))
     elif isinstance(tensors, dict):
         return type(tensors)(map_dict(recursive_map, tensors))
     elif hasattr(tensors, 'to_dict'):
         return type(tensors)(**map_dict(recursive_map, tensors.to_dict()))
     else:
         # Misc elements - neither collections nor targets
         if only_target:
             return tensors
             
         try:
             assert not strict
             return fn(tensors, *argv, **kwargs)
         except Exception as e:
             print("The following error was raised when recursively applying a function:")
             print(e)
             raise ValueError("Type {} not supported for recursive map".format(type(tensors)))
示例#6
0
    def val(self, test_control=True):
        print('Running Testing')
        if self.cmd_args.test_prediction:
            start = time.time()
            losses_meter = RecursiveAverageMeter()
            infer_time = AverageMeter()

            # self.model.eval()
            with autograd.no_grad():
                for batch_idx, sample_batched in enumerate(self.val_loader):
                    inputs = AttrDict(
                        map_dict(self.try_move_to_dev, sample_batched))
                    with self.model.val_mode(pred_length=False):
                        infer_start = time.time()
                        output = self.model(inputs, 'test')
                        infer_time.update(time.time() - infer_start)
                        if self.evaluator is not None:  # force eval on all batches for reduced noise
                            self.evaluator.eval(inputs, output, self.model)
                    # run train model to get NLL on validation data
                    output_train_mdl = self.model(inputs)
                    losses = self.model.loss(inputs, output_train_mdl)
                    losses.total = self.model.get_total_loss(inputs, losses)
                    losses_meter.update(losses)
                    del losses
                    del output_train_mdl

                    # if batch_idx == 0:
                    #     break

                if not self.cmd_args.dont_save:
                    if self.evaluator is not None:
                        self.evaluator.dump_results(self.global_step)
                    if self.cmd_args.metric:
                        print("Finished Evaluation! Exiting...")
                        exit(0)

                    self.model.log_outputs(output,
                                           inputs,
                                           losses_meter.avg,
                                           self.global_step,
                                           log_images=self.cmd_args.log_images,
                                           phase='val')
                    print((
                        '\nTest set: Average loss: {:.4f} in {:.2f}s\n'.format(
                            losses_meter.avg.total.value.item(),
                            time.time() - start)))
                    if self.cmd_args.verbose_timing:
                        print("avg Inference time: {:.3f}s/batch".format(
                            infer_time.avg))
            del output
示例#7
0
    def sample_max_len_video(self, data_dict, start_ind, end_ind):
        """ This function processes data tensors so as to have length equal to max_seq_len
        by sampling / padding if necessary """
        extra_length = (end_ind - start_ind + 1) - self.spec['max_seq_len']
        if self.phase == 'train':
            offset = max(0, int(np.random.rand() * (extra_length + 1))) + start_ind
        else:
            offset = 0 
        
        data_dict = map_dict(lambda tensor: self._maybe_pad(tensor, offset, self.spec['max_seq_len']), data_dict)
        if 'actions' in data_dict:
            data_dict.actions = data_dict.actions[:-1]
        end_ind = min(end_ind - offset, self.spec['max_seq_len'] - 1)

        return 0, end_ind, data_dict        # start index gets 0 by design
示例#8
0
    def train_epoch(self, epoch):
        self.model.train()
        epoch_len = len(self.train_loader)
        end = time.time()
        batch_time = AverageMeter()
        upto_log_time = AverageMeter()
        data_load_time = AverageMeter()
        forward_backward_time = AverageMeter()
        self.log_images_interval = int(epoch_len / self.cmd_args.imepoch)

        print('starting epoch ', epoch)

        for self.batch_idx, sample_batched in enumerate(self.train_loader):
            data_load_time.update(time.time() - end)
            inputs = AttrDict(map_dict(self.try_move_to_dev, sample_batched))
            with self.training_context():
                self.optimizer.zero_grad()
                start_fw_bw = time.time()
                output = self.model(inputs)
                losses = self.model.loss(inputs, output)
                losses.total = self.model.get_total_loss(inputs, losses)
                losses.total.value.backward()
                self.call_hooks(inputs, output, losses, epoch)
                self.optimizer.step()
                self.model.step()
                forward_backward_time.update(time.time() - start_fw_bw)

            if self.cmd_args.train_loop_pdb:
                import pdb
                pdb.set_trace()

            upto_log_time.update(time.time() - end)
            if self.log_outputs_now and not self.cmd_args.dont_save:
                self.model.log_outputs(output,
                                       inputs,
                                       losses,
                                       self.global_step,
                                       log_images=self.log_images_now,
                                       phase='train')
            batch_time.update(time.time() - end)
            end = time.time()

            if self.log_outputs_now:
                print('GPU {}: {}'.format(
                    os.environ["CUDA_VISIBLE_DEVICES"]
                    if self.use_cuda else 'none', self._hp.exp_path))
                print(
                    ('itr: {} Train Epoch: {} [{}/{} ({:.0f}%)]\tLoss: {:.6f}'.
                     format(self.global_step, epoch, self.batch_idx,
                            len(self.train_loader),
                            100. * self.batch_idx / len(self.train_loader),
                            losses.total.value.item())))

                print(
                    'avg time for loading: {:.2f}s, logs: {:.2f}s, compute: {:.2f}s, total: {:.2f}s'
                    .format(data_load_time.avg,
                            batch_time.avg - upto_log_time.avg,
                            upto_log_time.avg - data_load_time.avg,
                            batch_time.avg))
                togo_train_time = batch_time.avg * (self._hp.num_epochs -
                                                    epoch) * epoch_len / 3600.
                print('ETA: {:.2f}h'.format(togo_train_time))
                if self.cmd_args.verbose_timing:
                    print("avg FW/BW time: {:.3f}s/batch".format(
                        forward_backward_time.avg))

            del output, losses
            self.global_step = self.global_step + 1