示例#1
0
    def __init__(self, datapath, framelen, overlap, start=0, stop=None, spkrid=None):
        """
        datapath: path to TIMIT raw data (using WAV format)
        framelen: length of the acoustic frames
        overlap: amount of acoustic samples to overlap
        start: index of first frame to use 
        end: index of last frame to use

        FIXME: start and end here are kind of hackish if used with
        spkrid...
        """
        data = TimitFullCorpusReader(datapath)
        print start, stop
        # Some list comprehension/zip magic here (but it works!)
        if spkrid:
            utterances = data.utteranceids(spkrid=spkrid)
        else:
            utterances = data.utteranceids()
        if stop is not None:
            utterances = utterances[start:stop]
        else:
            utterances = utterances[start:]
        print "# of utterances: ", len(utterances)
        uttfr = [data.frames(z, framelen, overlap) for z in
                  utterances]
        fr, ph = zip(*[(x[0], x[1]) for x in uttfr])
        fr = np.vstack(fr)*2**-15
        ph = list(itertools.chain(*ph))

        X = fr[:,0:framelen-1]
        y = np.asmatrix([fr[:,framelen-1]]).T # y.ndim has to be 2
        if stop is None:
            stop = len(y)

        super(TimitFrameData,self).__init__(X=X, y=y)
示例#2
0
    def __init__(self, datapath, framelen, overlap, start=0, stop=None):
        """
        datapath: path to TIMIT raw data (using WAV format)
        framelen: length of the acoustic frames
        overlap: amount of acoustic samples to overlap
        start: index of first TIMIT file to be used
        end: index of last TIMIT file to be used
        """
        data = TimitFullCorpusReader(datapath)
        # Some list comprehension/zip magic here (but it works!)
        if stop is None:
            utterances = data.utteranceids()[start:]
        else:
            utterances = data.utteranceids()[start:stop]
        spkrfr = [data.frames(z, framelen, overlap) for z in
                  utterances]
        fr, ph = zip(*[(x[0], x[1]) for x in spkrfr])
        framedata = np.vstack(fr)*2**-15
        ph = list(itertools.chain(*ph))

        # making y a one-hot output
        one_hot = np.zeros((len(ph),len(data.phonelist)),dtype='float32')
        idx = [data.phonelist.index(p) for p in ph]
        for i in xrange(len(ph)):
            one_hot[i,idx[i]] = 1.

        X = np.hstack([framedata[:,0:framelen-1], one_hot])
        y = np.asmatrix(framedata[:,framelen-1]).T

        super(TimitFramePhoneData,self).__init__(X=X, y=y)
示例#3
0
    def __init__(self, spkrid, phone, framelen, overlap, start, stop):
        data = TimitFullCorpusReader('/home/jfsantos/data/TIMIT/')
        # Some list comprehension/zip magic here (but it works!)
        spkrfr = [data.frames(z, 160, 159) for z in
             data.utteranceids(spkrid=spkrid)]
        fr, ph = zip(*[(x[0], x[1]) for x in spkrfr])
        fr = np.vstack(fr)*2**-15
        ph = list(itertools.chain(*ph))

        # Get all elements for which the phone is 'iy'
        iy_idx = [i for i,x in enumerate(ph) if x == 'iy']
        
        fr_iy = fr[iy_idx]

        X = fr_iy[:,0:159]
        y = np.array([fr_iy[:,159]]).T # y.ndim has to be 2

        super(TimitPhoneData,self).__init__(X=X[start:stop], y=y[start:stop])