示例#1
0
def batch_preprocess(
    fundamental_frequency_in_blocks,
    voiced_samples, 
    rms):
    """
    batch_preprocess(fundamental_frequency_in_blocks,voiced_samples,rms)

                    This is the pre-process or pre-synthesis stage. This module computes the
                    samples for the begining of utterances and finally computes the selected_inflect_block
            Parameters: fundamental_frequency_in_blocks-This is a fundamental frequency(or pitch)
                                    for the blocks in Chunk_Size
                                    voiced_samples-This are samples that contain the voiced samples.
                                    rms-is the root mean square computation
            Returns:	selected_inflect_block- are the blocks that are important for the synthesis process
    """

    voice_sample_begin = prep.utterance_region_begin_samples(voiced_samples)
    voice_chunk_sample = prep.utterance_chunk(
        voiced_samples, voice_sample_begin[1])
    inflection_voice_samples = prep.pre_process(voice_chunk_sample)
    #frequency_of_voiced_samples = fundamental_frequency_in_blocks[voiced_samples]
    #frequency_for_inflection = prep.potential_inflection_fundamental_frequency(frequency_of_voiced_samples)
    inflection_sample_numbers = prep.matrix_of_sample_numbers(
        rms[voice_sample_begin[0]], inflection_voice_samples)
    selected_inflect_block = prep.selected_inflect_block_new(
        inflection_sample_numbers)
    return selected_inflect_block
示例#2
0
def emotive_speech(x, fs, typeOfEmotion):
    CHUNK_SIZE = 1024
    NUM_BLOCKS = int(np.ceil(len(x) / CHUNK_SIZE))
    SAMPLE_PERIOD = 1 / float(fs) * CHUNK_SIZE
    TIME_STAMPS = (np.arange(0, NUM_BLOCKS - 1) * (CHUNK_SIZE / float(fs)))
    QFACTOR = 1
    #---------------------Analysis---------------------------------------#
    data_in_blocks = alysis.data_blocks(x, CHUNK_SIZE)
    fundamental_frequency_in_blocks = alysis.pitch_detect(x, fs, CHUNK_SIZE)
    voiced_unvoiced_starting_info_object = alysis.starting_info(
        x, fundamental_frequency_in_blocks, fs, CHUNK_SIZE)
    voiced_samples = voiced_unvoiced_starting_info_object['VSamp']
    voiced_regions = alysis.voiced_regions(
        x, fundamental_frequency_in_blocks,
        voiced_unvoiced_starting_info_object, CHUNK_SIZE)
    consecutive_blocks = 1 + int(0.5 / SAMPLE_PERIOD)

    #---------------------preprocess-------------------------------------#
    inflection_voice_samples = prep.pre_process(voiced_samples)
    frequency_of_voiced_samples = fundamental_frequency_in_blocks[
        inflection_voice_samples]
    rms = prep.root_mean_square(x, CHUNK_SIZE, fs)[0]
    frequency_for_inflection = prep.potential_inflection_fundamental_frequency(
        frequency_of_voiced_samples)
    inflection_sample_numbers = prep.matrix_of_sample_numbers(
        rms, inflection_voice_samples)
    inflect_blocks = prep.consecutive_blocks_for_inflection(
        inflection_sample_numbers, consecutive_blocks)
    selected_inflect_block = prep.alteration_of_discrete_data(
        inflection_sample_numbers, consecutive_blocks, inflect_blocks)
    n = prep.consecutive_blocks_in_selected_blocks(selected_inflect_block,
                                                   consecutive_blocks)
    reshaped_inflect_blocks = prep.reshaped_inflection_blocks(
        n, selected_inflect_block, consecutive_blocks)
    differece_arrays = prep.difference_arrays(NUM_BLOCKS,
                                              reshaped_inflect_blocks)

    #----------------------synthesis-------------------------------------#

    if typeOfEmotion == "Happy":
        consecutive_blocks = 1 + int(0.5 / SAMPLE_PERIOD)
        selected_inflect_block = prep.alteration_of_discrete_data(
            inflection_sample_numbers, consecutive_blocks, inflect_blocks)
        utterance_time_stamps = TIME_STAMPS[selected_inflect_block]

        gain = 3.0
        semitones = 0.5
        synth.happy_patch(fs, semitones, QFACTOR, gain, utterance_time_stamps)

    if typeOfEmotion == "HappyTensed":
        consecutive_blocks = int(0.5 / SAMPLE_PERIOD)
        inflection_sample_numbers = prep.matrix_of_sample_numbers(
            rms, inflection_voice_samples)
        inflect_blocks = prep.consecutive_blocks_for_inflection(
            inflection_sample_numbers, consecutive_blocks)
        selected_inflect_block = prep.alteration_of_discrete_data(
            inflection_sample_numbers, consecutive_blocks, inflect_blocks)
        utterance_time_stamps = TIME_STAMPS[selected_inflect_block]

        gain = 3.0
        semitones = 1.0
        synth.happy_tensed_patch(fs, semitones, QFACTOR, gain,
                                 utterance_time_stamps)

    if typeOfEmotion == "Sad":
        gain = 0.25
        semitones = -0.5
        synth.sad_patch(fs, semitones, QFACTOR, gain)

    if typeOfEmotion == "Afraid":
        speed = 8.5
        depth = 50
        utterance_time_stamps = TIME_STAMPS[selected_inflect_block]
        synth.afraid_patch(fs, speed, depth, utterance_time_stamps)