def batch_analysis(x, fs, chunk_size): """ batch_analysis(x,fs,chunk_size) computes the fundamental frequency/pitch of blocks/,voiced_samples and the rms values that are important for analysis and will be used for pre-process Parameters: x-discrete data from the wavefile fs-sampling frequency Chunk_Size- The size of block containing datas Returns: fundamental_frequency_in_blocks- This is a fundamental frequency(or pitch) for the blocks in Chunk_Size voiced_samples-This are samples that contain the voiced samples.Will be used for the entire process and is important for the synthesis process as well. rms- is the root mean square computation that will be important for categorizing inflecion/pitch bending samples. """ fundamental_frequency_in_blocks = alysis.pitch_detect(x, fs, chunk_size) rms = alysis.root_mean_square(x, chunk_size, fs) voiced_unvoiced_starting_info_object = alysis.starting_info( x, fundamental_frequency_in_blocks, fs, chunk_size) voiced_samples = voiced_unvoiced_starting_info_object['VSamp'] return fundamental_frequency_in_blocks, voiced_samples, rms
def emotive_speech(x, fs, typeOfEmotion): CHUNK_SIZE = 1024 NUM_BLOCKS = int(np.ceil(len(x) / CHUNK_SIZE)) SAMPLE_PERIOD = 1 / float(fs) * CHUNK_SIZE TIME_STAMPS = (np.arange(0, NUM_BLOCKS - 1) * (CHUNK_SIZE / float(fs))) QFACTOR = 1 #---------------------Analysis---------------------------------------# data_in_blocks = alysis.data_blocks(x, CHUNK_SIZE) fundamental_frequency_in_blocks = alysis.pitch_detect(x, fs, CHUNK_SIZE) voiced_unvoiced_starting_info_object = alysis.starting_info( x, fundamental_frequency_in_blocks, fs, CHUNK_SIZE) voiced_samples = voiced_unvoiced_starting_info_object['VSamp'] voiced_regions = alysis.voiced_regions( x, fundamental_frequency_in_blocks, voiced_unvoiced_starting_info_object, CHUNK_SIZE) consecutive_blocks = 1 + int(0.5 / SAMPLE_PERIOD) #---------------------preprocess-------------------------------------# inflection_voice_samples = prep.pre_process(voiced_samples) frequency_of_voiced_samples = fundamental_frequency_in_blocks[ inflection_voice_samples] rms = prep.root_mean_square(x, CHUNK_SIZE, fs)[0] frequency_for_inflection = prep.potential_inflection_fundamental_frequency( frequency_of_voiced_samples) inflection_sample_numbers = prep.matrix_of_sample_numbers( rms, inflection_voice_samples) inflect_blocks = prep.consecutive_blocks_for_inflection( inflection_sample_numbers, consecutive_blocks) selected_inflect_block = prep.alteration_of_discrete_data( inflection_sample_numbers, consecutive_blocks, inflect_blocks) n = prep.consecutive_blocks_in_selected_blocks(selected_inflect_block, consecutive_blocks) reshaped_inflect_blocks = prep.reshaped_inflection_blocks( n, selected_inflect_block, consecutive_blocks) differece_arrays = prep.difference_arrays(NUM_BLOCKS, reshaped_inflect_blocks) #----------------------synthesis-------------------------------------# if typeOfEmotion == "Happy": consecutive_blocks = 1 + int(0.5 / SAMPLE_PERIOD) selected_inflect_block = prep.alteration_of_discrete_data( inflection_sample_numbers, consecutive_blocks, inflect_blocks) utterance_time_stamps = TIME_STAMPS[selected_inflect_block] gain = 3.0 semitones = 0.5 synth.happy_patch(fs, semitones, QFACTOR, gain, utterance_time_stamps) if typeOfEmotion == "HappyTensed": consecutive_blocks = int(0.5 / SAMPLE_PERIOD) inflection_sample_numbers = prep.matrix_of_sample_numbers( rms, inflection_voice_samples) inflect_blocks = prep.consecutive_blocks_for_inflection( inflection_sample_numbers, consecutive_blocks) selected_inflect_block = prep.alteration_of_discrete_data( inflection_sample_numbers, consecutive_blocks, inflect_blocks) utterance_time_stamps = TIME_STAMPS[selected_inflect_block] gain = 3.0 semitones = 1.0 synth.happy_tensed_patch(fs, semitones, QFACTOR, gain, utterance_time_stamps) if typeOfEmotion == "Sad": gain = 0.25 semitones = -0.5 synth.sad_patch(fs, semitones, QFACTOR, gain) if typeOfEmotion == "Afraid": speed = 8.5 depth = 50 utterance_time_stamps = TIME_STAMPS[selected_inflect_block] synth.afraid_patch(fs, speed, depth, utterance_time_stamps)