def __init__(self, datapath, framelen, overlap, start=0, stop=None, spkrid=None): """ datapath: path to TIMIT raw data (using WAV format) framelen: length of the acoustic frames overlap: amount of acoustic samples to overlap start: index of first frame to use end: index of last frame to use FIXME: start and end here are kind of hackish if used with spkrid... """ data = TimitFullCorpusReader(datapath) print start, stop # Some list comprehension/zip magic here (but it works!) if spkrid: utterances = data.utteranceids(spkrid=spkrid) else: utterances = data.utteranceids() if stop is not None: utterances = utterances[start:stop] else: utterances = utterances[start:] print "# of utterances: ", len(utterances) uttfr = [data.frames(z, framelen, overlap) for z in utterances] fr, ph = zip(*[(x[0], x[1]) for x in uttfr]) fr = np.vstack(fr)*2**-15 ph = list(itertools.chain(*ph)) X = fr[:,0:framelen-1] y = np.asmatrix([fr[:,framelen-1]]).T # y.ndim has to be 2 if stop is None: stop = len(y) super(TimitFrameData,self).__init__(X=X, y=y)
def __init__(self, datapath, framelen, overlap, start=0, stop=None): """ datapath: path to TIMIT raw data (using WAV format) framelen: length of the acoustic frames overlap: amount of acoustic samples to overlap start: index of first TIMIT file to be used end: index of last TIMIT file to be used """ data = TimitFullCorpusReader(datapath) # Some list comprehension/zip magic here (but it works!) if stop is None: utterances = data.utteranceids()[start:] else: utterances = data.utteranceids()[start:stop] spkrfr = [data.frames(z, framelen, overlap) for z in utterances] fr, ph = zip(*[(x[0], x[1]) for x in spkrfr]) framedata = np.vstack(fr)*2**-15 ph = list(itertools.chain(*ph)) # making y a one-hot output one_hot = np.zeros((len(ph),len(data.phonelist)),dtype='float32') idx = [data.phonelist.index(p) for p in ph] for i in xrange(len(ph)): one_hot[i,idx[i]] = 1. X = np.hstack([framedata[:,0:framelen-1], one_hot]) y = np.asmatrix(framedata[:,framelen-1]).T super(TimitFramePhoneData,self).__init__(X=X, y=y)
def __init__(self, spkrid, phone, framelen, overlap, start, stop): data = TimitFullCorpusReader('/home/jfsantos/data/TIMIT/') # Some list comprehension/zip magic here (but it works!) spkrfr = [data.frames(z, 160, 159) for z in data.utteranceids(spkrid=spkrid)] fr, ph = zip(*[(x[0], x[1]) for x in spkrfr]) fr = np.vstack(fr)*2**-15 ph = list(itertools.chain(*ph)) # Get all elements for which the phone is 'iy' iy_idx = [i for i,x in enumerate(ph) if x == 'iy'] fr_iy = fr[iy_idx] X = fr_iy[:,0:159] y = np.array([fr_iy[:,159]]).T # y.ndim has to be 2 super(TimitPhoneData,self).__init__(X=X[start:stop], y=y[start:stop])