示例#1
0
def batch_analysis(x, fs, chunk_size):
    """
    batch_analysis(x,fs,chunk_size)

                    computes the fundamental frequency/pitch of blocks/,voiced_samples and the rms values
                    that are important for analysis and will be used for pre-process
            Parameters:  x-discrete data from the wavefile
                                     fs-sampling frequency
                                     Chunk_Size- The size of block containing datas

            Returns:	fundamental_frequency_in_blocks- This is a fundamental frequency(or pitch)
                                    for the blocks in Chunk_Size

                                    voiced_samples-This are samples that contain the voiced samples.Will be used
                                for the entire process and is important for the synthesis process as well.

                                    rms- is the root mean square computation that will be important for
                                    categorizing inflecion/pitch bending samples.
    """

    fundamental_frequency_in_blocks = alysis.pitch_detect(x, fs, chunk_size)
    rms = alysis.root_mean_square(x, chunk_size, fs)
    voiced_unvoiced_starting_info_object = alysis.starting_info(
        x, fundamental_frequency_in_blocks, fs, chunk_size)
    voiced_samples = voiced_unvoiced_starting_info_object['VSamp']
    return fundamental_frequency_in_blocks, voiced_samples, rms
示例#2
0
def emotive_speech(x, fs, typeOfEmotion):
    CHUNK_SIZE = 1024
    NUM_BLOCKS = int(np.ceil(len(x) / CHUNK_SIZE))
    SAMPLE_PERIOD = 1 / float(fs) * CHUNK_SIZE
    TIME_STAMPS = (np.arange(0, NUM_BLOCKS - 1) * (CHUNK_SIZE / float(fs)))
    QFACTOR = 1
    #---------------------Analysis---------------------------------------#
    data_in_blocks = alysis.data_blocks(x, CHUNK_SIZE)
    fundamental_frequency_in_blocks = alysis.pitch_detect(x, fs, CHUNK_SIZE)
    voiced_unvoiced_starting_info_object = alysis.starting_info(
        x, fundamental_frequency_in_blocks, fs, CHUNK_SIZE)
    voiced_samples = voiced_unvoiced_starting_info_object['VSamp']
    voiced_regions = alysis.voiced_regions(
        x, fundamental_frequency_in_blocks,
        voiced_unvoiced_starting_info_object, CHUNK_SIZE)
    consecutive_blocks = 1 + int(0.5 / SAMPLE_PERIOD)

    #---------------------preprocess-------------------------------------#
    inflection_voice_samples = prep.pre_process(voiced_samples)
    frequency_of_voiced_samples = fundamental_frequency_in_blocks[
        inflection_voice_samples]
    rms = prep.root_mean_square(x, CHUNK_SIZE, fs)[0]
    frequency_for_inflection = prep.potential_inflection_fundamental_frequency(
        frequency_of_voiced_samples)
    inflection_sample_numbers = prep.matrix_of_sample_numbers(
        rms, inflection_voice_samples)
    inflect_blocks = prep.consecutive_blocks_for_inflection(
        inflection_sample_numbers, consecutive_blocks)
    selected_inflect_block = prep.alteration_of_discrete_data(
        inflection_sample_numbers, consecutive_blocks, inflect_blocks)
    n = prep.consecutive_blocks_in_selected_blocks(selected_inflect_block,
                                                   consecutive_blocks)
    reshaped_inflect_blocks = prep.reshaped_inflection_blocks(
        n, selected_inflect_block, consecutive_blocks)
    differece_arrays = prep.difference_arrays(NUM_BLOCKS,
                                              reshaped_inflect_blocks)

    #----------------------synthesis-------------------------------------#

    if typeOfEmotion == "Happy":
        consecutive_blocks = 1 + int(0.5 / SAMPLE_PERIOD)
        selected_inflect_block = prep.alteration_of_discrete_data(
            inflection_sample_numbers, consecutive_blocks, inflect_blocks)
        utterance_time_stamps = TIME_STAMPS[selected_inflect_block]

        gain = 3.0
        semitones = 0.5
        synth.happy_patch(fs, semitones, QFACTOR, gain, utterance_time_stamps)

    if typeOfEmotion == "HappyTensed":
        consecutive_blocks = int(0.5 / SAMPLE_PERIOD)
        inflection_sample_numbers = prep.matrix_of_sample_numbers(
            rms, inflection_voice_samples)
        inflect_blocks = prep.consecutive_blocks_for_inflection(
            inflection_sample_numbers, consecutive_blocks)
        selected_inflect_block = prep.alteration_of_discrete_data(
            inflection_sample_numbers, consecutive_blocks, inflect_blocks)
        utterance_time_stamps = TIME_STAMPS[selected_inflect_block]

        gain = 3.0
        semitones = 1.0
        synth.happy_tensed_patch(fs, semitones, QFACTOR, gain,
                                 utterance_time_stamps)

    if typeOfEmotion == "Sad":
        gain = 0.25
        semitones = -0.5
        synth.sad_patch(fs, semitones, QFACTOR, gain)

    if typeOfEmotion == "Afraid":
        speed = 8.5
        depth = 50
        utterance_time_stamps = TIME_STAMPS[selected_inflect_block]
        synth.afraid_patch(fs, speed, depth, utterance_time_stamps)