示例#1
0
 def _set_nose_radii(self, values):
     assert len(values) == gnuspeech.TOTAL_NASAL_SECTIONS - 1
     radii = gnuspeech.new_double_array(gnuspeech.TOTAL_NASAL_SECTIONS)
     gnuspeech.double_array_setitem(radii, 0, 0.)
     for i, v in enumerate(values):
         gnuspeech.double_array_setitem(radii, i + 1, v)
     self._params.noseRadius = radii
     gnuspeech.delete_double_array(radii)
示例#2
0
    def synthesize(self, *controls):
        '''Synthesize a sound from the given control variables.

        Each element of controls is expected to be a list or numpy array
        containing controls for each frame of the sound synthesis. If it is a
        numpy array, frames are read from the 0 axis (the "rows") of the array.

        The variables for each frame, in order, are:

        glotPitch - glottal pitch, 0 == middle C
        glotVol - glottal volume, dB
        aspVol - aspirate volume, dB
        fricVol - fricative volume, dB
        fricPos - fricative position, cm
        fricCF - fricative filter center frequency, Hz
        fricBW - fricative filter bandwidth, Hz
        radius[0] - radius of vocal tract, region 0, cm
        ...
        radius[7] - radius of vocal tract, region 7, cm
        velum - radius of velar opening, cm
        '''
        # convert control frames into TRM linked list structure
        data = gnuspeech.TRMData()
        data.inputParameters = self.parameters._params

        radii = gnuspeech.new_double_array(gnuspeech.TOTAL_REGIONS)
        for frame in itertools.chain.from_iterable(controls):
            glot_pitch, glot_vol, asp_vol, fric_vol, fric_pos, fric_cf, fric_bw = frame[:7]
            for i, v in enumerate(frame[7:15]):
                gnuspeech.double_array_setitem(radii, i, v)
            velum = frame[15]
            gnuspeech.addInput(data, glot_pitch, glot_vol, asp_vol,
                               fric_vol, fric_pos, fric_cf, fric_bw,
                               radii, velum)
        gnuspeech.delete_double_array(radii)

        # run the synthesizer
        gnuspeech.synthesize(self._model, data)

        # now return the synthesized sound data as an array of doubles
        converter = self._model.sampleRateConverter
        converter.tempFilePtr.seek(0)
	logging.debug('number of samples: %d', converter.numberSamples)
	logging.debug('maximum sample value: %.4f', converter.maximumSampleValue)
        arr = array.array('d')
        arr.fromstring(converter.tempFilePtr.read())
        return arr