Exemplo n.º 1
0
def fetch_blizzard_unify_spec(data_path, sz=8000, timestep=79, frame_size=200, overlap=100,
                                batch_size=100, file_name="blizzard_unify_spec.h5"):
    hdf5_path = os.path.join(data_path, file_name)
    if not os.path.exists(hdf5_path):
        data_matches = []
        for root, dirnames, filenames in os.walk(data_path):
            for filename in fnmatch.filter(filenames, 'data_*.npy'):
                data_matches.append(os.path.join(root, filename))
        # sort in proper order
        data_matches = sorted(data_matches,
                              key=lambda x: int(
                                  x.split("/")[-1].split("_")[-1][0]))

        # setup tables
        compression_filter = tables.Filters(complevel=5, complib='blosc')
        hdf5_file = tables.openFile(hdf5_path, mode='w')
        data = hdf5_file.createEArray(hdf5_file.root, 'data',
                                      tables.Int16Atom(),
                                      shape=(0, timestep, frame_size),
                                      filters=compression_filter,)

        for n, f in enumerate(data_matches):
            print("Reading file %s" % (f))
            with open(f) as fp:
                # Array of arrays, ragged
                d = np.load(fp)
                large_d = d[0]
                for i in xrange(1, len(d)):
                    print("Processing line %i of %i" % (i+1, len(d)))
                    di = d[i]
                    if len(di.shape) > 1:
                        di = di[:, 0]
                    large_d = np.concatenate([large_d, di])
                chunk_size = int(np.float(len(large_d) / batch_size))
                seg_d = segment_axis(large_d, chunk_size, 0)
                num_batch = int(np.float((seg_d.shape[-1] - 1)/float(sz)))
                for i in range(num_batch):
                    batch = seg_d[:, i*sz:(i+1)*sz]
                    batch = np.array([segment_axis(x, frame_size, overlap,
                                                   end='pad') for x in batch])
                    batch = apply_window(batch)
                    batch = apply_fft(batch)
                    batch = log_magnitude(batch)
                    batch = apply_ifft(batch)
                    for j in range(batch_size):
                        data.append(batch[j][None])
        hdf5_file.close()
    hdf5_file = tables.openFile(hdf5_path, mode='r')
    data = hdf5_file.root.data
    X = data
    return X
Exemplo n.º 2
0
 def load(self, data_path):
     data, tags = fetch_onomatopoeia(data_path)
     # hardcode split for now
     random_state = np.random.RandomState(1999)
     indices = np.arange(len(data))
     random_state.shuffle(indices)
     if self.name == "train":
         idx = int(.9 * float(len(data)))
         assert idx != len(data)
         data = data[indices[:idx]]
     elif self.name == "valid":
         idx = int(.1 * float(len(data)))
         assert idx != 0
         data = data[indices[-idx:]]
     else:
         raise ValueError("name = %s is not supported!" % self.name)
     raw_X = []
     for x in data:
         if len(x) < self.thresh:
             raw_X.append(np.asarray(x, dtype=theano.config.floatX))
         else:
             raw_X.append(np.asarray(x[:self.thresh], dtype=theano.config.floatX))
     raw_X = np.array(raw_X)
     pre_X, self.X_mean, self.X_std = self.global_normalize(raw_X, self.X_mean, self.X_std)
     X = np.array([segment_axis(x, self.frame_size, 0) for x in pre_X])
     return [X]
Exemplo n.º 3
0
    def apply_fft(self, batch):

        batch = np.array(
            [self.numpy_rfft(self.window * segment_axis(x, self.frame_size, self.overlap, end="pad")) for x in batch]
        )

        return batch
Exemplo n.º 4
0
    def apply_window(self, batch):

        batch = np.array([self.window * segment_axis(x, self.frame_size,
                                                     self.overlap, end='pad')
                          for x in batch])

        return batch
Exemplo n.º 5
0
    def load(self, data_path):
        data = np.load(data_path)
        if self.data_mode == 'words':
            if self.name == 'train':
                raw_data = data['train_words']
            elif self.name == 'valid':
                raw_data = data['valid_words']
            elif self.name == 'test':
                raw_data = data['test_words']
            self._max_labels = data['n_words']
        elif self.data_mode == 'chars':
            if self.name == 'train':
                raw_data = data['train_chars']
            elif self.name == 'valid':
                raw_data = data['valid_chars']
            elif self.name == 'test':
                raw_data = data['test_chars']
        chunk_size = len(raw_data) / self.batch_size
        raw_data = segment_axis(raw_data, chunk_size, 0)

        X = []
        y = []
        for i in range(int(np.float((raw_data.shape[1] - 1) /
                           float(self.context_len)))):
            X.extend(raw_data[:, :-1][:, i * self.context_len:(i + 1) * self.context_len,
                                      np.newaxis])
            y.extend(raw_data[:, 1:][:, i * self.context_len:(i + 1) * self.context_len,
                                     np.newaxis])
        X = np.asarray(X)
        y = np.asarray(y)
        return [X, y]
Exemplo n.º 6
0
    def load(self, data_path):
        data = np.load(data_path)
        if self.data_mode == 'words':
            if self.name == 'train':
                raw_data = data['train_words']
            elif self.name == 'valid':
                raw_data = data['valid_words']
            elif self.name == 'test':
                raw_data = data['test_words']
            self._max_labels = data['n_words']
        elif self.data_mode == 'chars':
            if self.name == 'train':
                raw_data = data['train_chars']
            elif self.name == 'valid':
                raw_data = data['valid_chars']
            elif self.name == 'test':
                raw_data = data['test_chars']
        chunk_size = len(raw_data) / self.batch_size
        raw_data = segment_axis(raw_data, chunk_size, 0)

        X = []
        y = []
        for i in range(
                int(np.float(
                    (raw_data.shape[1] - 1) / float(self.context_len)))):
            X.extend(raw_data[:, :-1][:, i * self.context_len:(i + 1) *
                                      self.context_len, np.newaxis])
            y.extend(raw_data[:, 1:][:, i * self.context_len:(i + 1) *
                                     self.context_len, np.newaxis])
        X = np.asarray(X)
        y = np.asarray(y)
        return [X, y]
Exemplo n.º 7
0
 def slices(self, start, end):
     batch = np.array(self.data[self.idx[start:end]], dtype=theano.config.floatX)
     batch -= self.X_mean
     batch /= self.X_std
     batch = np.asarray([segment_axis(x, self.inpsz, 0) for x in batch])
     batch = batch.transpose(1, 0, 2)
     return totuple(batch)
Exemplo n.º 8
0
 def slices(self, start, end):
     batch = np.array(self.data[start:end], dtype=theano.config.floatX)
     batch -= self.X_mean
     batch /= self.X_std
     batch = np.asarray([segment_axis(x, self.frame_size, 0) for x in batch])
     ipdb.set_trace()
     batch = batch.transpose(1, 0, 2)
     return totuple(batch)
Exemplo n.º 9
0
    def apply_window(self, batch):

        batch = np.array([
            self.window *
            segment_axis(x, self.frame_size, self.overlap, end='pad')
            for x in batch
        ])

        return batch
Exemplo n.º 10
0
 def slices(self, start, end):
     batch = np.array(self.data[start:end], dtype=theano.config.floatX)
     if self.use_spec:
         batch = self._use_spec(batch)
         batch = self._log_magnitude(batch)
         batch = self._concatenate(batch)
     else:
         batch -= self.X_mean
         batch /= self.X_std
         if self.use_window:
             batch = self._use_window(batch)
         else:
             batch = np.asarray([segment_axis(x, self.frame_size, 0) for x in batch])
     batch = batch.transpose(1, 0, 2)
     return totuple(batch)
Exemplo n.º 11
0
 def load(self, data_path):
     if self.name == 'train':
         data_path = data_path + 'sf_train_segmented_0.npy'
     elif self.name == 'valid':
         data_path = data_path + 'sf_valid_segmented_0.npy'
     data = np.load(data_path)
     raw_X = []
     for x in data:
         if len(x) < 50000:
             raw_X.append(np.asarray(x, dtype=theano.config.floatX))
         else:
             half_len = np.int(len(x) / 2.)
             raw_X.append(np.asarray(x[:half_len], dtype=theano.config.floatX))
             raw_X.append(np.asarray(x[half_len:], dtype=theano.config.floatX))
     raw_X = np.array(raw_X)
     if self.shuffle:
         idx = np.random.permutation(len(raw_X))
         raw_X = raw_X[idx]
     pre_X = self.apply_preprocessing(raw_X)
     if self.multi_source:
         X = [np.array([segment_axis(x, self.frame_size, 0) for x in X]) for X in pre_X]
     else:
         X = [np.array([segment_axis(x, self.frame_size, 0) for x in pre_X])]
     return X
Exemplo n.º 12
0
def fetch_accent_tbptt(data_path, sz=8000, batch_size=100,
                       file_name="accent_tbptt.h5"):
    hdf5_path = os.path.join(data_path, file_name)
    if not os.path.exists(hdf5_path):
        data_matches = []
        for root, dirnames, filenames in os.walk(data_path):
            for filename in fnmatch.filter(filenames, '*.wav'):
                if '._' not in filename:
                    data_matches.append(os.path.join(root, filename))
        # Just group same languages, numbering will be in *alpha* not numeric
        # order within each language
        data_matches = sorted(data_matches)
        compression_filter = tables.Filters(complevel=5, complib='blosc')
        hdf5_file = tables.openFile(hdf5_path, mode='w')
        data = hdf5_file.createEArray(hdf5_file.root, 'data',
                                      tables.Int16Atom(),
                                      shape=(0, sz),
                                      filters=compression_filter,)
        large_d = None
        for n, f in enumerate(data_matches):
            print("Processing file %i of %i" % (n+1, len(data_matches)))
            try:
                sr, d = wavfile.read(f)
                if len(d.shape) > 1:
                    d = d[:, 0]
                if large_d is None:
                    large_d = d
                else:
                    large_d = np.concatenate([large_d, d])
            except ValueError:
                print("Not a proper wave file.")
        chunk_size = int(np.float(len(large_d) / batch_size))
        seg_d = segment_axis(large_d, chunk_size, 0)
        num_batch = int(np.float((seg_d.shape[-1] - 1)/float(sz)))
        for i in range(num_batch):
            this_batch = seg_d[:, i*sz:(i+1)*sz]
            for j in range(batch_size):
                data.append(this_batch[j][None])
        hdf5_file.close()
    hdf5_file = tables.openFile(hdf5_path, mode='r')
    data = hdf5_file.root.data
    X = data
    return X
Exemplo n.º 13
0
    def slices(self, start, end):

        batch = np.array(self.data[start:end], dtype=theano.config.floatX)

        if self.use_spec:
            batch = self.apply_fft(batch)
            batch = self.log_magnitude(batch)
            batch = self.concatenate(batch)
        else:
            batch -= self.X_mean
            batch /= self.X_std
            if self.use_window:
                batch = self.apply_window(batch)
            else:
                batch = np.asarray(
                    [segment_axis(x, self.frame_size, 0) for x in batch])

        batch = batch.transpose(1, 0, 2)

        return totuple(batch)
Exemplo n.º 14
0
 def slices(self, start, end):
     batches = [mat[start:end] for mat in self.data]
     if self.use_spec:
         batches[0] = self._use_spec(batches[0])
         batches[0] = self._log_magnitude(batches[0])
         batches[0] = self._concatenate(batches[0])
     else:
         batches[0] -= self.X_mean
         batches[0] /= self.X_std
         if self.use_window:
             batches[0] = self._use_window(batches[0])
         else:
             batches[0] = np.asarray([segment_axis(x, self.frame_size, 0) for x in batches[0]])
     mask = self.create_mask(batches[0].swapaxes(0, 1))
     if self.load_spk_info:
         batches = [self.zero_pad(batch) for batch in batches[:-1]]
         spk = batches[-1]
         return totuple(batches + [spk, mask])
     else:
         batches = [self.zero_pad(batch) for batch in batches]
         return totuple(batches + [mask])
Exemplo n.º 15
0
def fetch_timit(data_path, shuffle=0, frame_size=200, this_set="train",
                use_n_gram=1, file_name='_timit.h5'):
    file_name = this_set + file_name
    hdf5_path = os.path.join(data_path, file_name)
    if not os.path.exists(hdf5_path):
        raw_name = data_path + this_set + '_x_raw.npy'
        pho_name = data_path + this_set + '_x_phonemes.npy'
        raw_data = np.load(raw_name)
        pho_data = np.load(pho_name)
        if shuffle:
            idx = np.random.permutation(len(raw_data))
            raw_data = raw_data[idx]
            pho_data = pho_data[idx]
        len_pho = np.array([np.unique(x).max() for x in pho_data]).max() + 1
        pho_data = np.array([segment_axis(y, frame_size, 0) for y in pho_data])
        if use_n_gram:
            pho_data = assign_n_gram_per_frame(pho_data, len_pho)
        else:
            pho_data = assign_phoneme_per_frame(pho_data, len_pho)

        # setup tables
        compression_filter = tables.Filters(complevel=5, complib='blosc')
        hdf5_file = tables.openFile(hdf5_path, mode='w')
        raw = hdf5_file.createVLArray(hdf5_file.root, 'raw',
                                      tables.Int16Atom(shape=()),
                                      filters=compression_filter,)
        pho = hdf5_file.createVLArray(hdf5_file.root, 'pho',
                                           tables.Int16Atom(shape=()),
                                           filters=compression_filter,)
        for x, y in zip(raw_data, pho_data):
            raw.append(x)
            pho.append(y.flatten())
        hdf5_file.close()
    hdf5_file = tables.openFile(hdf5_path, mode='r')
    X = hdf5_file.root.raw
    y = hdf5_file.root.pho
    return X, y
Exemplo n.º 16
0
def _segment_axis(data):
    x = tuple([numpy.array([segment_axis(x, frame_size, 0) for x in var]) for var in data])
    return x
Exemplo n.º 17
0
def fetch_blizzard_tbptt(data_path,
                         sz=8000,
                         batch_size=100,
                         file_name="blizzard_tbptt.h5"):

    hdf5_path = os.path.join(data_path, file_name)

    if not os.path.exists(hdf5_path):
        data_matches = []

        for root, dir_names, file_names in os.walk(data_path):
            for filename in fnmatch.filter(file_names, 'data_*.npy'):
                data_matches.append(os.path.join(root, filename))

        # sort in proper order
        data_matches = sorted(
            data_matches,
            key=lambda x: int(x.split("/")[-1].split("_")[-1][0]))

        # setup tables
        compression_filter = tables.Filters(complevel=5, complib='blosc')
        hdf5_file = tables.openFile(hdf5_path, mode='w')
        data = hdf5_file.createEArray(
            hdf5_file.root,
            'data',
            tables.Int16Atom(),
            shape=(0, sz),
            filters=compression_filter,
        )

        for n, f in enumerate(data_matches):
            print("Reading file %s" % (f))

            with open(f) as fp:
                # Array of arrays, ragged
                d = np.load(fp)
                large_d = d[0]

                for i in xrange(1, len(d)):
                    print("Processing line %i of %i" % (i + 1, len(d)))
                    di = d[i]

                    if len(di.shape) > 1:
                        di = di[:, 0]

                    large_d = np.concatenate([large_d, di])

                chunk_size = int(np.float(len(large_d) / batch_size))
                seg_d = segment_axis(large_d, chunk_size, 0)
                num_batch = int(np.float((seg_d.shape[-1] - 1) / float(sz)))

                for i in range(num_batch):
                    batch = seg_d[:, i * sz:(i + 1) * sz]

                    for j in range(batch_size):
                        data.append(batch[j][None])

        hdf5_file.close()

    hdf5_file = tables.openFile(hdf5_path, mode='r')

    return hdf5_file.root.data
Exemplo n.º 18
0
def _segment_axis(data):
    x = numpy.array([segment_axis(x, frame_size, 0) for x in data[0]])
    return (x,)
Exemplo n.º 19
0
 def _segment_axis(data):
     # Defined inside so that frame_size is available
     x = tuple([numpy.array([segment_axis(x, frame_size, 0) for x in var])
                for var in data])
     return x
Exemplo n.º 20
0
    def load(self, data_path):
        if self.name not in ['train', 'valid', 'test']:
            raise ValueError(self.name + " is not a recognized value. " +
                             "Valid values are ['train', 'valid', 'test'].")

        speaker_info_list_path = os.path.join(data_path, 'spkrinfo.npy')
        #phoneme_list_path = os.path.join(data_path, 'reduced_phonemes.pkl')
        #word_list_path = os.path.join(data_path, 'words.pkl')
        #speaker_features_list_path = os.path.join(data_path,
        #                                          'spkr_feature_names.pkl')
        speaker_id_list_path = os.path.join(data_path, 'speakers_ids.pkl')
        raw_path = os.path.join(data_path, self.name + '_x_raw.npy')
        phoneme_path = os.path.join(data_path, self.name + '_x_phonemes.npy')
        #phone_path = os.path.join(data_path, self.name + '_x_phones.npy')
        #word_path = os.path.join(data_path, self.name + '_x_words.npy')
        speaker_path = os.path.join(data_path, self.name + '_spkr.npy')

        raw = np.load(raw_path)
        raw_X = []
        for x in raw:
            raw_X.append(np.asarray(x, dtype=theano.config.floatX))
        raw_X = np.array(raw_X)

        if self.shuffle:
            idx = np.random.permutation(len(raw_X))
            raw_X = raw_X[idx]
        else:
            idx = np.arange(len(raw_X))

        if not self.use_spec:
            pre_X, self.X_mean, self.X_std =\
                self.global_normalize(raw_X, self.X_mean, self.X_std)

        if self.use_window:
            if self.use_spec:
                X = self._use_spec(raw_X)
                X = self._log_magnitude(X)
                X = self._concatenate(X)
            else:
                X = self._use_window(pre_X)
        else:
            X = np.asarray([segment_axis(x, self.frame_size, 0) for x in pre_X])
 
        if self.load_spk_info:
            spk = np.load(speaker_path)
            spk = spk[idx]
            S = np.zeros((len(spk), 630))
            for i, s in enumerate(spk):
                S[i, s] = 1
 
        if self.load_phonetic_label:
            #pho = np.load(phone_path)
            pho = np.load(phoneme_path)
            self.len_pho = np.array([np.unique(x).max() for x in pho]).max() + 1
            unseg_Y = []
            for y in pho:
                unseg_Y.append(np.asarray(y, dtype=theano.config.floatX))
            unseg_Y = np.array(unseg_Y)
            unseg_Y = unseg_Y[idx]
            unseg_Y = np.array([segment_axis(y, self.frame_size, 0) for y in unseg_Y])
            if self.use_n_gram:
                Y = self.assign_n_gram_per_frame(unseg_Y)
            else:
                Y = self.assign_phoneme_per_frame(unseg_Y)

        if self.load_spk_info and self.load_phonetic_label:
            return [X, Y, S]
        elif self.load_spk_info and not self.load_phonetic_label:
            return [X, S]
        elif not self.load_spk_info and self.load_phonetic_label:
            return [X, Y]
        elif not self.load_spk_info and not self.load_phonetic_label:
            return [X]
Exemplo n.º 21
0
import numpy as np
import pysptk as SPTK
from scipy.io import wavfile

fs, x = wavfile.read('test.wav')
assert fs == 16000

x = 1. * x  #change to float64

from cle.cle.utils import segment_axis

frame_length = 1024
hopsize = 80
noverlap = frame_length - hopsize

frames = segment_axis(x, frame_length, noverlap).astype('float64').T
frames = xw * SPTK.blackman(frame_length).reshape((1024, 1))

#frames = frames.T
#frames = frames.copy(order='C')
frames = frames.T

order = 20
alpha = 0.41
stage = 4
gamma = -1.0 / stage

mgc = np.apply_along_axis(SPTK.mgcep, 1, frames, order, alpha, gamma)
mgc_sp = np.apply_along_axis(SPTK.mgc2sp, 1, mgc, alpha, gamma,
                             frame_length).real
Exemplo n.º 22
0
    def load(self, data_path):
        dataset = 'audio.tar.gz'
        datafile = os.path.join(data_path, dataset)
        if not os.path.isfile(datafile):
            try:
                import urllib
                urllib.urlretrieve('http://google.com')
                url =\
                    'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz'
            except AttributeError:
                import urllib.request as urllib
                url =\
                    'https://dl.dropboxusercontent.com/u/15378192/audio.tar.gz'
            print("Downloading data from %s" % url)
            urllib.urlretrieve(url, datafile)
        if not os.path.exists(os.path.join(data_path, "audio")):
            tar = tarfile.open(datafile)
            os.chdir(data_path)
            tar.extractall()
            tar.close()
        h5_file_path = os.path.join(data_path, "saved_fruit.h5")
        if not os.path.exists(h5_file_path):
            data_path = os.path.join(data_path, "audio")
            audio_matches = []
            for root, dirnames, filenames in os.walk(data_path):
                for filename in fnmatch.filter(filenames, '*.wav'):
                    audio_matches.append(os.path.join(root, filename))
            random.seed(1999)
            random.shuffle(audio_matches)
            # http://mail.scipy.org/pipermail/numpy-discussion/2011-March/055219.html
            h5_file = tables.openFile(h5_file_path, mode='w')
            data_x = h5_file.createVLArray(h5_file.root, 'data_x',
                                           tables.Float32Atom(shape=()),
                                           filters=tables.Filters(1))
            data_y = h5_file.createVLArray(h5_file.root, 'data_y',
                                           tables.Int32Atom(shape=()),
                                           filters=tables.Filters(1))
            for wav_path in audio_matches:
                # Convert chars to int classes
                word = wav_path.split(os.sep)[-1][:6]
                chars = [ord(c) - 97 for c in word]
                data_y.append(np.array(chars, dtype='int32'))
                fs, d = wavfile.read(wav_path)
                data_x.append(d.astype(theano.config.floatX))
            h5_file.close()
        h5_file = tables.openFile(h5_file_path, mode='r')
        raw_X = np.array([np.asarray(x) for x in h5_file.root.data_x])
        cls = np.array([''.join([chr(y+97) for y in Y]) for Y in h5_file.root.data_y])

        if self.name != 'all':
            fruit_list = []
            if len(self.name) > 1:
                for i, fruit_name in enumerate(cls):
                    for name in self.name:
                        if name in fruit_name:
                            fruit_list.append(i)
            else:
                for i, fruit_name in enumerate(cls):
                    if self.name in fruit_name:
                        fruit_list.append(i)
        else:
            fruit_list = tolist(np.arange(len(raw_X)))
        raw_X = raw_X[fruit_list]
        if self.prep == 'normalize':
            pre_X, self.X_mean, self.X_std = self.global_normalize(raw_X)
        elif self.prep == 'standardize':
            pre_X, self.X_max, self.X_min = self.standardize(raw_X)
        X = np.array([segment_axis(x, self.frame_size, 0) for x in pre_X])
           
        return [X]
Exemplo n.º 23
0
def _segment_axis(data):
    x = numpy.array([segment_axis(x, frame_size, 0) for x in data[0]])
    return (x, )
Exemplo n.º 24
0
 def _use_spec(self, batch):
     batch = np.asarray([self.numpy_rfft(self.window *
                                         segment_axis(x, self.frame_size,
                                                      self.overlap, end='pad'))
                         for x in batch])
     return batch
Exemplo n.º 25
0
def fetch_blizzard_tbptt(data_path, sz=8000, batch_size=100, file_name="blizzard_tbptt.h5"):

    hdf5_path = os.path.join(data_path, file_name)
    print("looking for ", hdf5_path)

    if not os.path.exists(hdf5_path):
        data_matches = []

        for root, dir_names, file_names in os.walk(data_path):
            for filename in fnmatch.filter(file_names, '*.npy'):
                data_matches.append(os.path.join(root, filename))

        # sort in proper order
	'''
        data_matches = sorted(data_matches,
                              key=lambda x: int(
                                  x.split("/")[-1].split("_")[-1][0]))
        '''
        # print(data_matches)

        # setup tables
        compression_filter = tables.Filters(complevel=5, complib='blosc')
        hdf5_file = tables.openFile(hdf5_path, mode='w')
        data = hdf5_file.createEArray(hdf5_file.root, 'data',
                                      tables.Int16Atom(),
                                      shape=(0, sz),
                                      filters=compression_filter,)

        for n, f in enumerate(data_matches):
            print("Reading file %s" % (f))

            with open(f) as fp:
                # Array of arrays, ragged
                large_d = np.load(fp)
                '''
                d = np.load(fp)
                large_d = d[0]

                for i in xrange(1, len(d)):
                    print("Processing line %i of %i" % (i+1, len(d)))
                    di = d[i]

                    if len(di.shape) > 1:
                        di = di[:, 0]

                    large_d = np.concatenate([large_d, di])
                '''
                chunk_size = int(np.float(len(large_d) / batch_size))
                seg_d = segment_axis(large_d, chunk_size, 0)
                num_batch = int(np.float((seg_d.shape[-1] - 1)/float(sz)))

                for i in range(num_batch):
                    batch = seg_d[:, i*sz:(i+1)*sz]

                    for j in range(batch_size):
                        data.append(batch[j][None])

        hdf5_file.close()

    hdf5_file = tables.openFile(hdf5_path, mode='r')

    return hdf5_file.root.data
Exemplo n.º 26
0
def _segment_axis(data):
    x = tuple([
        numpy.array([segment_axis(x, frame_size, 0) for x in var])
        for var in data
    ])
    return x
Exemplo n.º 27
0
import numpy as np
import pysptk as SPTK
from scipy.io import wavfile

fs, x = wavfile.read('test.wav')
assert fs == 16000

x = 1.*x #change to float64

from cle.cle.utils import segment_axis

frame_length = 1024
hopsize = 80
noverlap = frame_length - hopsize

frames = segment_axis(x,frame_length, noverlap).astype('float64').T
frames = xw*SPTK.blackman(frame_length).reshape((1024,1))

#frames = frames.T
#frames = frames.copy(order='C')
frames = frames.T

order = 20
alpha = 0.41
stage = 4
gamma = -1.0 / stage

mgc = np.apply_along_axis(SPTK.mgcep, 1, frames, order, alpha, gamma)
mgc_sp = np.apply_along_axis(SPTK.mgc2sp, 1, mgc, alpha, gamma, frame_length).real

mgc_sp_test = np.hstack([mgc_sp,mgc_sp[:,::-1][:,1:-1]])