def test_concat_backward():
    # shape of tensor to test
    tensor_shapes = [[(1, 4, 3), (1, 8, 3), (1, 5, 3)], [(2, 3, 4), (1, 3, 4)],
                     [(6, 7, 8, 9), (6, 7, 8, 1), (6, 7, 8, 2)],
                     [(1, 2, 3), (1, 2, 4), (1, 2, 3), (1, 2, 4)]]

    cat_dims = [1, 0, 3, 2]

    for tensor_shapes_cur, d_cur in zip(tensor_shapes, cat_dims):
        # get mytorch and torch tensor: 'a'
        a = [Tensor.randn(*shape_i) for shape_i in tensor_shapes_cur]
        for i in range(len(a)):
            a[i].requires_grad = True

        a_torch = [get_same_torch_tensor(a_i) for a_i in a]

        c = cat(a, d_cur)
        c_torch = torch.cat(a_torch, dim=d_cur)

        l = (c**2).sum()
        l_torch = (c_torch**2).sum()


        for a_i, a_torch_i in zip(a, a_torch):
            assert check_grad(a_i, a_torch_i, eps=eps)

    return True
    def forward(self, input, hidden=None):
        NOTE: Please get a good grasp on util.PackedSequence before attempting this.

            input (PackedSequence): input.data is tensor of shape ( total number of timesteps (sum) across all samples in the batch, input_size)
            hidden (Tensor, None): (batch_size, hidden_size)
            PackedSequence: ( total number of timesteps (sum) across all samples in the batch, hidden_size)
            Tensor (batch_size,hidden_size): This is the hidden generated by the last time step for each sample joined together. Samples are ordered in descending order based on number of timesteps. This is a slight deviation from PyTorch.

        # Resolve the PackedSequence into its components
        data, sorted_indices, batch_sizes = input

        # Iterate over appropriate segments of the "data" tensor to pass same timesteps across all samples in the batch simultaneously to the unit for processing.
        # Remeber to account for scenarios when effective_batch_size changes between one iteration to the next

        #raise NotImplementedError('Implement Forward')
        output = []
        output_hidden = ["" for i in sorted_indices]
        start = 0
        hidden_shape = None
        for batch_size in batch_sizes:
            end = start + batch_size
            input = data[start:end]
            if hidden is not None:
                hidden = hidden[:input.shape[0]]
            hidden = self.unit.forward(input, hidden)
            start = end
            j = 0
            if hidden_shape is None:
                hidden_shape = hidden.shape
            for i in sorted_indices:
                if j < hidden.shape[0]:
                    output_hidden[j] = hidden[j]
                    j += 1

        #return tensor.cat(output), tensor.cat(output_hidden).reshape(*hidden_shape)
        return pack_sequence(
            tensor.cat(output).unsqueeze()), tensor.cat(output_hidden).reshape(
def unpack_sequence(ps):
    Given a PackedSequence, this unpacks this into the original list of tensors.
    NOTE: Attempt this only after you have completed pack_sequence and understand how it works.

        ps (PackedSequence)
        list of Tensors

    # This operation is just the reverse operation of pack_sequences
    # Use the ps.batch_size to determine number of time steps in each tensor of the original list (assuming the tensors were sorted in a descending fashion based on number of timesteps)
    # Construct these individual tensors using tensor.cat
    # Re-arrange this list of tensor based on ps.sorted_indices

    #raise NotImplementedError('Implement unpack_sequence')

    sequences = ["" for i in range(ps.sorted_indices.shape[0])]
    k = 0
    myDict = {}
    for i in ps.sorted_indices:
        sequence = []
        start = k
        for batch_size in ps.batch_sizes:
            if start >= ps.data.shape[0]:
            end = start + 1
            if str(start) + "_" + str(end) not in myDict:
                #print("Start: "+str(start)+", End: "+str(end)+", BatchSize: "+str(batch_size) )
                myDict[str(start) + "_" + str(end)] = 1
            start += batch_size

        seq = tensor.cat(sequence)
        sequences[i] = seq
        k += 1

    return sequences
def pack_sequence(sequence):
    Constructs a packed sequence from an input sequence of tensors.
    By default assumes enforce_sorted ( compared to PyTorch ) is False
    i.e the length of tensors in the sequence need not be sorted (desc).

        sequence (list of Tensor): ith tensor in the list is of shape (Ti,K) where Ti is the number of time steps in sample i and K is the # features
        PackedSequence: data attribute of the result is of shape ( total number of timesteps (sum) across all samples in the batch, # features )

    # Find the sorted indices based on number of time steps in each sample
    # Extract slices from each sample and properly order them for the construction of the packed tensor. __getitem__ you defined for Tensor class will come in handy
    # Use the tensor.cat function to create a single tensor from the re-ordered segements
    # Finally construct the PackedSequence object
    # REMEMBER: All operations here should be able to construct a valid autograd graph.

    #raise NotImplementedError('Implement pack_Sequence!')
    sequence_lengths = [seq.shape[0] for seq in sequence]
    ranked = np.argsort(np.asarray(sequence_lengths))
    sorted_indices = ranked[::-1]
    packed_seq = []
    batch_sizes = []
    for i in range(sequence[sorted_indices[0]].shape[0]):
        batch_size = 0
        for j in sorted_indices:
            start = i
            end = start + 1
            if start < sequence[j].shape[0]:
                batch_size += 1

    pack_seq = tensor.cat(packed_seq)

    return PackedSequence(pack_seq, sorted_indices, np.asarray(batch_sizes))