示例#1
0
def parse_matrix_part(matrix, szSub, ovSub):
    assert matrix.ndim == 3
    assert np_ndim(szSub) == 1
    assert len(szSub) == 3
    assert np_ndim(ovSub) == 1
    assert len(ovSub) == 3

    matrix_shape = np_asarray(matrix.shape, dtype=int)
    len_each_section, _, _ = szSub
    shift_length, _, _ = ovSub

    len_each_section_range = np_arange(len_each_section)

    matrix_shape = np_ceil((matrix_shape - szSub + 1)/ovSub).astype(int)
    num_rows_overlap, num_elements, num_beams = matrix_shape
    result_matrix = np_zeros((np_prod(szSub), np_prod(matrix_shape)))
    cnt = 0
    for i in range(num_beams):
        for j in range(num_elements):
            for k in range(num_rows_overlap):
                index_1 = len_each_section_range + k * shift_length
                index_2 = j
                index_3 = i
                tmp = matrix[index_1, index_2, index_3]
                result_matrix[:, cnt] = tmp
                cnt += 1

    return result_matrix
示例#2
0
def prod(*inputs: Tensor, dim: Optional[int] = None, keepdim=False) -> Tensor:
    ''' Product of tensor(s)

    Parameters:
    -----------
     - inputs : varargs, tensors to be multiplied;
       if a single tensor is passed, its elements will be multiplied
     - dim : int (optional), dimension to reduce over
     - keepdim : bool, whether to keep `dim`

    Returns:
    --------
     - result : Tensor

    '''

    if len(inputs) == 1:
        return _InnerProd(inputs[0], dim=dim, keepdim=keepdim)()
    else:
        return np_prod(inputs, axis=dim, keepdims=keepdim)
示例#3
0
def mean(*inputs: Tensor, dim: Optional[int] = None, keepdim=False) -> Tensor:
    ''' Mean of tensor(s)

    Parameters:
    -----------
     - inputs : varargs, tensors to compute the mean of;
       if a single tensor is passed, the mean of its elements will be computed
     - dim : int (optional), dimension to reduce over
     - keepdim : bool, whether to keep `dim`

    Returns:
    --------
     - result : Tensor

    '''

    if len(inputs) == 1:
        n = np_prod(inputs[0].shape) if dim is None else inputs[0].shape[dim]
        return _InnerSum(inputs[0], dim=dim, keepdim=keepdim)() / n
    else:
        return np_sum(inputs, axis=dim, keepdims=keepdim) / len(inputs)
示例#4
0
    def __init__(self, filter_sz, n_lc_in, n_lc_out, lc_upsample_filt_sizes,
            lc_upsample_strides, n_res, n_dil, n_skp, n_post, n_quant,
            n_blocks, n_block_layers, jitter_prob, n_speakers, n_global_embed,
            bias=True, parent_rf=None):
        super(WaveNet, self).__init__()

        self.n_blocks = n_blocks
        self.n_block_layers = n_block_layers
        self.n_quant = n_quant
        self.quant_onehot = None 
        self.bias = bias
        self.jitter = Jitter(jitter_prob)
        post_jitter_filt_sz = 3
        lc_input_stepsize = np_prod(lc_upsample_strides) 

        lc_conv_name = 'LC_Conv(filter_size={})'.format(post_jitter_filt_sz) 
        self.lc_conv = Conv1dWrap(n_lc_in, n_lc_out,
                kernel_size=post_jitter_filt_sz, stride=1, bias=self.bias)

        cur_rf = rfield.Rfield(filter_info=post_jitter_filt_sz,
                stride=1, parent=parent_rf, name=lc_conv_name)
        self.beg_rf = cur_rf
        
        # This RF is the first processing of the local conditioning after the
        # Jitter. It is the starting point for the commitment loss aggregation
        self.pre_upsample_rf = cur_rf
        self.lc_upsample = nn.Sequential()

        # WaveNet is a stand-alone model, so parent_rf is None
        # The Autoencoder model in model.py will link parent_rfs together.
        for i, (filt_sz, stride) in enumerate(zip(lc_upsample_filt_sizes,
            lc_upsample_strides)):
            name = 'Upsampling_{}(filter_sz={}, stride={})'.format(i, filt_sz, stride)   
            mod = Upsampling(n_lc_out, filt_sz, stride, cur_rf, name=name)
            self.lc_upsample.add_module(str(i), mod)
            cur_rf = mod.rf

        # This rf describes the bounds of the input wav corresponding to the
        # local conditioning vectors
        self.last_upsample_rf = cur_rf
        self.cond = Conditioning(n_speakers, n_global_embed)
        self.base_layer = Conv1dWrap(n_quant, n_res, kernel_size=1, stride=1,
                dilation=1, bias=self.bias)

        self.conv_layers = nn.ModuleList() 
        n_cond = n_lc_out + n_global_embed

        for b in range(self.n_blocks):
            for bl in range(self.n_block_layers):
                dil = 2**bl
                name = 'GRCC_{},{}(dil={})'.format(b, bl, dil)
                grc = GatedResidualCondConv(n_cond, n_res, n_dil, n_skp, 1,
                        dil, filter_sz, bias, cur_rf, name)
                self.conv_layers.append(grc)
                cur_rf = grc.rf

        self.last_grcc_rf = cur_rf

        # Each module in the stack needs to know the dimensions of
        # the input and output of the overall stack, in order to trim
        # residual connections
        beg_grcc_rf = self.conv_layers[0].rf
        end_grcc_rf = self.conv_layers[-1].rf 
        for mod in self.conv_layers.children():
            mod.init_bound_rfs(beg_grcc_rf, end_grcc_rf)

        self.relu = nn.ReLU()
        self.post1 = Conv1dWrap(n_skp, n_post, 1, bias=bias)
        self.post2 = Conv1dWrap(n_post, n_quant, 1, bias=bias)
        self.logsoftmax = nn.LogSoftmax(1) # (B, Q, N)
        self.rf = cur_rf
示例#5
0
 def size(self):
   """Returns the number of elements in the array."""
   return np_prod(self.shape)
示例#6
0
    def __init__(self, hps, parent_vc=None):
        super(WaveNet, self).__init__()

        self.n_blocks = hps.n_blocks
        self.n_block_layers = hps.n_block_layers
        self.n_skp = hps.n_skp
        self.n_res = hps.n_res
        self.n_quant = hps.n_quant

        self.bias = hps.bias
        post_jitter_filt_sz = 3
        lc_input_stepsize = np_prod(hps.lc_upsample_strides)

        lc_conv_name = f'LC_Conv(filter_size={post_jitter_filt_sz})'
        self.lc_conv = Conv1dWrap(lc_conv_name,
                                  parent_vc,
                                  in_channels=hps.n_lc_in,
                                  out_channels=hps.n_lc_out,
                                  kernel_size=post_jitter_filt_sz,
                                  stride=1,
                                  bias=hps.bias)

        self.vc = dict()
        self.vc['beg'] = self.lc_conv.vc
        cur_vc = self.vc['beg']

        # This VC is the first processing of the local conditioning after the
        # Jitter. It is the starting point for the commitment loss aggregation
        self.lc_upsample = nn.Sequential()

        # WaveNet is a stand-alone model, so parent_vc is None
        # The Autoencoder model in model.py will link parent_vcs together.
        iterator = enumerate(
            zip(hps.lc_upsample_filt_sizes, hps.lc_upsample_strides))
        for i, (filt_sz, stride) in iterator:
            name = f'Upsampling_{i}(filter_sz={filt_sz}, stride={stride})'
            mod = Upsampling(hps.n_lc_out, filt_sz, stride, cur_vc, name=name)
            self.lc_upsample.add_module(str(i), mod)
            cur_vc = mod.vc

        # This vc describes the bounds of the input wav corresponding to the
        # local conditioning vectors
        self.vc['last_upsample'] = cur_vc
        self.cond = Conditioning(hps.n_speakers, hps.n_global_embed)
        self.base_layer = Conv1dWrap('Base Layer',
                                     cur_vc,
                                     in_channels=hps.n_quant,
                                     out_channels=hps.n_res,
                                     kernel_size=1,
                                     stride=1,
                                     dilation=1,
                                     bias=self.bias)

        self.base_layer.vc.do_trim_input = True
        cur_vc = self.base_layer.vc

        self.conv_layers = nn.ModuleList()
        n_cond = hps.n_lc_out + hps.n_global_embed

        for b in range(self.n_blocks):
            for bl in range(self.n_block_layers):
                dil = 2**bl
                name = f'GRCC_{b},{bl}(dil={dil})'
                final_layer = (b + 1 == self.n_blocks
                               and bl + 1 == self.n_block_layers)
                grc = GatedResidualCondConv(self.vc,
                                            hps,
                                            n_cond=n_cond,
                                            stride=1,
                                            dil=dil,
                                            final_layer=final_layer,
                                            parent_vc=cur_vc,
                                            name=name)
                self.conv_layers.append(grc)
                cur_vc = grc.vc

        # Each module in the stack needs to know the dimensions of
        # the input and output of the overall stack, in order to trim
        # residual connections
        self.vc['beg_grcc'] = self.conv_layers[0].vc
        self.vc['end_grcc'] = self.conv_layers[-1].vc

        self.relu = nn.ReLU()
        self.post1 = Conv1dWrap('Post1',
                                cur_vc,
                                in_channels=hps.n_skp,
                                out_channels=hps.n_post,
                                kernel_size=1,
                                stride=1,
                                bias=hps.bias)

        self.post2 = Conv1dWrap('Post2',
                                self.post1.vc,
                                in_channels=hps.n_post,
                                out_channels=hps.n_quant,
                                kernel_size=1,
                                stride=1,
                                bias=hps.bias)
        self.logsoftmax = nn.LogSoftmax(1)  # (B, Q, N)
        self.vc['main'] = self.post2.vc