def batch_preprocess( fundamental_frequency_in_blocks, voiced_samples, rms): """ batch_preprocess(fundamental_frequency_in_blocks,voiced_samples,rms) This is the pre-process or pre-synthesis stage. This module computes the samples for the begining of utterances and finally computes the selected_inflect_block Parameters: fundamental_frequency_in_blocks-This is a fundamental frequency(or pitch) for the blocks in Chunk_Size voiced_samples-This are samples that contain the voiced samples. rms-is the root mean square computation Returns: selected_inflect_block- are the blocks that are important for the synthesis process """ voice_sample_begin = prep.utterance_region_begin_samples(voiced_samples) voice_chunk_sample = prep.utterance_chunk( voiced_samples, voice_sample_begin[1]) inflection_voice_samples = prep.pre_process(voice_chunk_sample) #frequency_of_voiced_samples = fundamental_frequency_in_blocks[voiced_samples] #frequency_for_inflection = prep.potential_inflection_fundamental_frequency(frequency_of_voiced_samples) inflection_sample_numbers = prep.matrix_of_sample_numbers( rms[voice_sample_begin[0]], inflection_voice_samples) selected_inflect_block = prep.selected_inflect_block_new( inflection_sample_numbers) return selected_inflect_block
def emotive_speech(x, fs, typeOfEmotion): CHUNK_SIZE = 1024 NUM_BLOCKS = int(np.ceil(len(x) / CHUNK_SIZE)) SAMPLE_PERIOD = 1 / float(fs) * CHUNK_SIZE TIME_STAMPS = (np.arange(0, NUM_BLOCKS - 1) * (CHUNK_SIZE / float(fs))) QFACTOR = 1 #---------------------Analysis---------------------------------------# data_in_blocks = alysis.data_blocks(x, CHUNK_SIZE) fundamental_frequency_in_blocks = alysis.pitch_detect(x, fs, CHUNK_SIZE) voiced_unvoiced_starting_info_object = alysis.starting_info( x, fundamental_frequency_in_blocks, fs, CHUNK_SIZE) voiced_samples = voiced_unvoiced_starting_info_object['VSamp'] voiced_regions = alysis.voiced_regions( x, fundamental_frequency_in_blocks, voiced_unvoiced_starting_info_object, CHUNK_SIZE) consecutive_blocks = 1 + int(0.5 / SAMPLE_PERIOD) #---------------------preprocess-------------------------------------# inflection_voice_samples = prep.pre_process(voiced_samples) frequency_of_voiced_samples = fundamental_frequency_in_blocks[ inflection_voice_samples] rms = prep.root_mean_square(x, CHUNK_SIZE, fs)[0] frequency_for_inflection = prep.potential_inflection_fundamental_frequency( frequency_of_voiced_samples) inflection_sample_numbers = prep.matrix_of_sample_numbers( rms, inflection_voice_samples) inflect_blocks = prep.consecutive_blocks_for_inflection( inflection_sample_numbers, consecutive_blocks) selected_inflect_block = prep.alteration_of_discrete_data( inflection_sample_numbers, consecutive_blocks, inflect_blocks) n = prep.consecutive_blocks_in_selected_blocks(selected_inflect_block, consecutive_blocks) reshaped_inflect_blocks = prep.reshaped_inflection_blocks( n, selected_inflect_block, consecutive_blocks) differece_arrays = prep.difference_arrays(NUM_BLOCKS, reshaped_inflect_blocks) #----------------------synthesis-------------------------------------# if typeOfEmotion == "Happy": consecutive_blocks = 1 + int(0.5 / SAMPLE_PERIOD) selected_inflect_block = prep.alteration_of_discrete_data( inflection_sample_numbers, consecutive_blocks, inflect_blocks) utterance_time_stamps = TIME_STAMPS[selected_inflect_block] gain = 3.0 semitones = 0.5 synth.happy_patch(fs, semitones, QFACTOR, gain, utterance_time_stamps) if typeOfEmotion == "HappyTensed": consecutive_blocks = int(0.5 / SAMPLE_PERIOD) inflection_sample_numbers = prep.matrix_of_sample_numbers( rms, inflection_voice_samples) inflect_blocks = prep.consecutive_blocks_for_inflection( inflection_sample_numbers, consecutive_blocks) selected_inflect_block = prep.alteration_of_discrete_data( inflection_sample_numbers, consecutive_blocks, inflect_blocks) utterance_time_stamps = TIME_STAMPS[selected_inflect_block] gain = 3.0 semitones = 1.0 synth.happy_tensed_patch(fs, semitones, QFACTOR, gain, utterance_time_stamps) if typeOfEmotion == "Sad": gain = 0.25 semitones = -0.5 synth.sad_patch(fs, semitones, QFACTOR, gain) if typeOfEmotion == "Afraid": speed = 8.5 depth = 50 utterance_time_stamps = TIME_STAMPS[selected_inflect_block] synth.afraid_patch(fs, speed, depth, utterance_time_stamps)