Пример #1
0
    def interpolate_f0(self, f0_file):

        io_funcs = BinaryIOCollection()
        data = io_funcs.load_float_file(f0_file)
        ip_data = data

        frame_number = len(data)
        last_value = 0.0
        for i in xrange(frame_number):
            if data[i] <= 0.0:
                j = i + 1
                for j in range(i + 1, frame_number):
                    if data[j] > 0.0:
                        break
                if j < frame_number - 1:
                    if last_value > 0.0:
                        step = (data[j] - data[i - 1]) / float(j - i)
                        for k in range(i, j):
                            ip_data[k] = data[i - 1] + step * (k - i + 1)
                    else:
                        for k in range(i, j):
                            ip_data[k] = data[j]
                else:
                    for k in range(i, frame_number):
                        ip_data[k] = last_value
            else:
                ip_data[i] = data[i]
                last_value = data[i]

        return ip_data
Пример #2
0
    def make_equal_frames(self, in_file_list, ref_file_list, in_dimension_dict):
        logger = logging.getLogger("acoustic_comp")
        
        logger.info('making equal number of lines...')
        
        io_funcs = BinaryIOCollection()

        utt_number = len(in_file_list)

        for i in xrange(utt_number):
            in_file_name = in_file_list[i]
            in_data_stream_name = in_file_name.split('.')[-1]
            in_feature_dim = in_dimension_dict[in_data_stream_name]
            in_features, in_frame_number = io_funcs.load_binary_file_frame(in_file_name, in_feature_dim)
            
            ref_file_name = ref_file_list[i]
            ref_data_stream_name = ref_file_name.split('.')[-1]
            ref_feature_dim = in_dimension_dict[ref_data_stream_name]
            ref_features, ref_frame_number = io_funcs.load_binary_file_frame(ref_file_name, ref_feature_dim)

            target_features = numpy.zeros((ref_frame_number, in_feature_dim))
            if in_frame_number == ref_frame_number:
                continue;
            elif in_frame_number > ref_frame_number:
                target_features[0:ref_frame_number, ] = in_features[0:ref_frame_number, ]
            elif in_frame_number < ref_frame_number:
                target_features[0:in_frame_number, ] = in_features[0:in_frame_number, ]
            io_funcs.array_to_binary_file(target_features, in_file_name)
        
        logger.info('Finished: made equal rows in data stream %s with reference to data stream %s ' %(in_data_stream_name, ref_data_stream_name))
Пример #3
0
    def produce_nn_cmp(self, in_file_list, out_file_list):


        logger = logging.getLogger("acoustic_norm")

        delta_win = [-0.5, 0.0, 0.5]
        acc_win   = [1.0, -2.0, 1.0]
        
        file_number = len(in_file_list)
        logger.info('starting creation of %d files' % file_number)

        for i in xrange(file_number):
            
            mgc_data, bap_data, lf0_data = self.load_cmp_file(in_file_list[i])
            ip_lf0, vuv_vector = self.interpolate_f0(lf0_data)
            
            delta_lf0 = self.compute_delta(ip_lf0, delta_win)
            acc_lf0 = self.compute_delta(ip_lf0, acc_win)

            frame_number = ip_lf0.size

            cmp_data = numpy.concatenate((mgc_data, ip_lf0, delta_lf0, acc_lf0, vuv_vector, bap_data), axis=1)
            
            io_funcs = BinaryIOCollection()
            io_funcs.array_to_binary_file(cmp_data, out_file_list[i])
            
        logger.info('finished creation of %d binary files' % file_number)
Пример #4
0
    def process_utterance(self, utt):
        # if utt.has_attribute("waveform"):
        # print "Utt has a natural waveform -- don't synthesise"
        # return

        if not self.trained:
            print('WARNING: Cannot apply processor %s till model is trained' %
                  (self.processor_name))
            return

        label = utt.get_filename(self.input_label_filetype)
        owave = utt.get_filename(self.output_filetype)

        streams = self.model.generate(label, variance_expansion=self.variance_expansion, \
                                      fill_unvoiced_gaps=self.fill_unvoiced_gaps)

        # TODO: save streams to binary files
        # Streams are a dictionary: {bap, lf0, mgc, vuv}
        # I can specify path via self.voice_resources['voice'] ('/home/alexander/Documents/text_to_speech/projects/Ossian_py3/voices//ice/ivona/lvl_lex_01_nn')
        directory = os.path.join(self.voice_resources.path['voice'], 'output',
                                 'cmp')
        if not os.path.exists(directory):
            os.makedirs(directory)

        io = BinaryIOCollection()
        for name, data in streams.items():
            file = os.path.join(directory,
                                utt.data.attrib['utterance_name'] + '.' + name)
            io.array_to_binary_file(data, file)
            # writelist(utt_data=data, label_file=file, uni=False)

        self.world_resynth(streams, owave)
Пример #5
0
    def compute_global_variance(self, file_list, feat_dim, save_dir):
	logger = logging.getLogger("compute gv")
	logger.info('computed global variance of length %d')

	all_std_vector = numpy.zeros((len(file_list), feat_dim))

	filenum = 0
	io_funcs = BinaryIOCollection()
	for file_name in file_list:
	    features = io_funcs.load_binary_file(file_name, feat_dim)
	    std_vector = numpy.var(features, axis=0)
	    all_std_vector[filenum, :] = std_vector
	    filenum = filenum + 1

#compute mean and std for all_std_vectors
	print all_std_vector.shape
	global_mean = numpy.mean(all_std_vector, axis=0)
	global_var = numpy.var(all_std_vector, axis=0)

        gv_mean_name = os.path.join(save_dir, 'gv_mean')
	fid = open(gv_mean_name, 'wb')
	global_mean.tofile(fid)
	fid.close()

        gv_var_name = os.path.join(save_dir, 'gv_var')
	fid = open(gv_var_name, 'wb')
	global_var.tofile(fid)
	fid.close()

        print global_mean
        print global_var
Пример #6
0
    def compute_std(self, file_list, mean_vector, start_index, end_index):
        local_feature_dimension = end_index - start_index

        std_vector = numpy.zeros((1, self.feature_dimension))
        all_frame_number = 0

        io_funcs = BinaryIOCollection()
        for file_name in file_list:
            features, current_frame_number = io_funcs.load_binary_file_frame(file_name, self.feature_dimension)

            mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1))
            
            std_vector += numpy.reshape(numpy.sum((features[:, start_index:end_index] - mean_matrix) ** 2, axis=0), (1, local_feature_dimension))
            all_frame_number += current_frame_number
            
        std_vector /= float(all_frame_number)
        
        std_vector = std_vector ** 0.5
        
        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        self.logger.info('computed  std vector of length %d' % std_vector.shape[1] )
        self.logger.info('  std: %s' % std_vector)
        # restore the print options
        # numpy.set_printoptions(po)
        
        return  std_vector
Пример #7
0
    def find_min_max_values(self, in_file_list, start_index, end_index):

        local_feature_dimension = end_index - start_index

        file_number = len(in_file_list)
        min_value_matrix = numpy.zeros((file_number, local_feature_dimension))
        max_value_matrix = numpy.zeros((file_number, local_feature_dimension))
        io_funcs = BinaryIOCollection()
        for i in xrange(file_number):
            features = io_funcs.load_binary_file(in_file_list[i], self.feature_dimension)
            
            temp_min = numpy.amin(features[:, start_index:end_index], axis = 0)
            temp_max = numpy.amax(features[:, start_index:end_index], axis = 0)
            
            min_value_matrix[i, ] = temp_min;
            max_value_matrix[i, ] = temp_max;

        self.min_vector = numpy.amin(min_value_matrix, axis = 0)
        self.max_vector = numpy.amax(max_value_matrix, axis = 0)
        self.min_vector = numpy.reshape(self.min_vector, (1, local_feature_dimension))
        self.max_vector = numpy.reshape(self.max_vector, (1, local_feature_dimension))

        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        self.logger.info('found min/max values of length %d:' % local_feature_dimension)
        self.logger.info('  min: %s' % self.min_vector)
        self.logger.info('  max: %s' % self.max_vector)
Пример #8
0
    def load_next_utterance_S2SML(self):
        """Load the data for one utterance. This function will be called when utterance-by-utterance loading is required (e.g., sequential training).
        
        """
        
        io_fun = BinaryIOCollection()

        in_features, lab_frame_number = io_fun.load_binary_file_frame(self.x_files_list[self.file_index], self.n_ins)
        out_features, out_frame_number = io_fun.load_binary_file_frame(self.y_files_list[self.file_index], self.n_outs)
        dur_features, dur_frame_number = io_fun.load_binary_file_frame(self.dur_files_list[self.file_index], 1)
      
        ### MLU features sub-division ###
        temp_set_MLU = in_features[0:lab_frame_number, ]
        temp_set_y   = out_features[0:out_frame_number, ]
      
        temp_set_phone = numpy.concatenate([temp_set_MLU[:, self.MLU_div['phone'][0]: self.MLU_div['phone'][1]], temp_set_MLU[:, self.MLU_div['phone'][2]: self.MLU_div['phone'][3]]], axis = 1)
        temp_set_syl   = numpy.concatenate([temp_set_MLU[:, self.MLU_div['syl'][0]: self.MLU_div['syl'][1]], temp_set_MLU[:, self.MLU_div['syl'][2]: self.MLU_div['syl'][3]]], axis = 1)
        temp_set_word  = numpy.concatenate([temp_set_MLU[:, self.MLU_div['word'][0]: self.MLU_div['word'][1]], temp_set_MLU[:, self.MLU_div['word'][2]: self.MLU_div['word'][3] ]], axis = 1)
        
        ### duration array sub-division ###
        dur_features = numpy.reshape(dur_features, (-1, ))
        temp_set_d   = dur_features.astype(int)   
        dur_word_syl = temp_set_d[0: -lab_frame_number]    
        
        num_ph    = lab_frame_number
        num_syl   = (numpy.where(numpy.cumsum(dur_word_syl[::-1])==lab_frame_number)[0][0] + 1)
        num_words = len(dur_word_syl) - num_syl 
        
        temp_set_dur_phone = temp_set_d[-num_ph:] 
        temp_set_dur_word  = dur_word_syl[0: num_words]
        temp_set_dur_syl   = dur_word_syl[num_words: ]
        
        ### additional feature matrix (syllable+phone+frame=432) ###
        num_frames = sum(temp_set_dur_phone)
        temp_set_af = numpy.empty((num_frames, self.MLU_div['length'][-1]))
        
        temp_set_af[0: num_syl, self.MLU_div['length'][0]: self.MLU_div['length'][1] ] = temp_set_syl[numpy.cumsum(temp_set_dur_syl)-1]
        temp_set_af[0: num_ph, self.MLU_div['length'][1]: self.MLU_div['length'][2]] = temp_set_phone
        
        ### input word feature matrix ###
        temp_set_dur_word_segments = numpy.zeros(num_words, dtype='int32')
        syl_bound = numpy.cumsum(temp_set_dur_word)
        for indx in range(num_words):
            temp_set_dur_word_segments[indx] = int(sum(temp_set_dur_syl[0: syl_bound[indx]]))
        temp_set_x = temp_set_word[temp_set_dur_word_segments-1]
        
        ### rest of the code similar to S2S ###
        self.file_index += 1

        if  self.file_index >= self.list_size:
            self.end_reading = True
            self.file_index = 0

        shared_set_x  = self.make_shared(temp_set_x, 'x')
        shared_set_y  = self.make_shared(temp_set_y, 'y')
        shared_set_d  = theano.shared(numpy.asarray(temp_set_d, dtype='int32'), name='d', borrow=True)

        shared_set_xyd = (shared_set_x, shared_set_y, shared_set_d)
        
        return shared_set_xyd, temp_set_x, temp_set_y, temp_set_d, temp_set_af
Пример #9
0
def read_and_transform_data_from_file_list(in_file_list,
                                           dim,
                                           seq_length=200,
                                           merge_size=1):
    io_funcs = BinaryIOCollection()

    num_of_utt = len(in_file_list)

    temp_set = np.zeros((FRAME_BUFFER_SIZE, dim))

    ### read file by file ###
    current_index = 0
    for i in range(num_of_utt):
        in_file_name = in_file_list[i]
        in_features, frame_number = io_funcs.load_binary_file_frame(
            in_file_name, dim)
        base_file_name = os.path.basename(in_file_name).split(".")[0]

        temp_set[current_index:current_index + frame_number, ] = in_features
        current_index += frame_number

        if (i + 1) % merge_size == 0:
            current_index = seq_length * (int(
                np.ceil(float(current_index) / float(seq_length))))

        drawProgressBar(i + 1, num_of_utt)

    sys.stdout.write("\n")

    num_of_samples = int(np.ceil(float(current_index) / float(seq_length)))

    temp_set = temp_set[0:num_of_samples * seq_length, ]
    temp_set = temp_set.reshape(num_of_samples, seq_length)

    return temp_set
Пример #10
0
    def load_next_utterance_CTC(self):

        temp_set_x = numpy.empty((self.buffer_size, self.n_ins))
        temp_set_y = numpy.empty(self.buffer_size)

        io_fun = BinaryIOCollection()

        in_features, lab_frame_number = io_fun.load_binary_file_frame(
            self.x_files_list[self.file_index], self.n_ins)
        out_features, out_frame_number = io_fun.load_binary_file_frame(
            self.y_files_list[self.file_index], self.n_outs)

        frame_number = lab_frame_number
        temp_set_x = in_features[0:frame_number, ]

        temp_set_y = numpy.array([self.n_outs])
        for il in numpy.argmax(out_features, axis=1):
            temp_set_y = numpy.concatenate((temp_set_y, [il, self.n_outs]),
                                           axis=0)

        self.file_index += 1

        if self.file_index >= self.list_size:
            self.end_reading = True
            self.file_index = 0

        shared_set_x = self.make_shared(temp_set_x, 'x')
        shared_set_y = theano.shared(numpy.asarray(temp_set_y, dtype='int32'),
                                     name='y',
                                     borrow=True)

        shared_set_xy = (shared_set_x, shared_set_y)

        return shared_set_xy, temp_set_x, temp_set_y
Пример #11
0
    def normal_standardization(self, in_file_list, out_file_list,
                               feature_dimension):

        #        self.dimension_dict = dimension_dict
        self.feature_dimension = feature_dimension

        mean_vector = self.compute_mean(in_file_list, 0, feature_dimension)
        std_vector = self.compute_std(in_file_list, mean_vector, 0,
                                      feature_dimension)

        io_funcs = BinaryIOCollection()
        file_number = len(in_file_list)

        for i in range(file_number):

            features, current_frame_number = io_funcs.load_binary_file_frame(
                in_file_list[i], self.feature_dimension)

            mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1))
            std_matrix = numpy.tile(std_vector, (current_frame_number, 1))

            norm_features = (features - mean_matrix) / std_matrix

            io_funcs.array_to_binary_file(norm_features, out_file_list[i])

        return mean_vector, std_vector
Пример #12
0
    def find_min_max_values(self, in_file_list, start_index, end_index):

        local_feature_dimension = end_index - start_index

        file_number = len(in_file_list)
        min_value_matrix = numpy.zeros((file_number, local_feature_dimension))
        max_value_matrix = numpy.zeros((file_number, local_feature_dimension))
        io_funcs = BinaryIOCollection()
        for i in range(file_number):
            features = io_funcs.load_binary_file(in_file_list[i],
                                                 self.feature_dimension)

            temp_min = numpy.amin(features[:, start_index:end_index], axis=0)
            temp_max = numpy.amax(features[:, start_index:end_index], axis=0)

            min_value_matrix[i, ] = temp_min
            max_value_matrix[i, ] = temp_max

        self.min_vector = numpy.amin(min_value_matrix, axis=0)
        self.max_vector = numpy.amax(max_value_matrix, axis=0)
        self.min_vector = numpy.reshape(self.min_vector,
                                        (1, local_feature_dimension))
        self.max_vector = numpy.reshape(self.max_vector,
                                        (1, local_feature_dimension))

        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        self.logger.info('found min/max values of length %d:' %
                         local_feature_dimension)
        self.logger.info('  min: %s' % self.min_vector)
        self.logger.info('  max: %s' % self.max_vector)
Пример #13
0
    def compute_mean(self, file_list, start_index, end_index):

        local_feature_dimension = end_index - start_index

        mean_vector = numpy.zeros((1, local_feature_dimension))
        all_frame_number = 0

        io_funcs = BinaryIOCollection()
        for file_name in file_list:
            features, current_frame_number = io_funcs.load_binary_file_frame(
                file_name, self.feature_dimension)

            mean_vector += numpy.reshape(
                numpy.sum(features[:, start_index:end_index], axis=0),
                (1, local_feature_dimension))
            all_frame_number += current_frame_number

        mean_vector /= float(all_frame_number)

        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        self.logger.info('computed mean vector of length %d :' %
                         mean_vector.shape[1])
        self.logger.info(' mean: %s' % mean_vector)
        # restore the print options
        # numpy.set_printoptions(po)

        return mean_vector
Пример #14
0
    def compute_std(self, file_list, mean_vector, start_index, end_index):
        local_feature_dimension = end_index - start_index

        std_vector = numpy.zeros((1, self.feature_dimension))
        all_frame_number = 0

        io_funcs = BinaryIOCollection()
        for file_name in file_list:
            features, current_frame_number = io_funcs.load_binary_file_frame(
                file_name, self.feature_dimension)

            mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1))

            std_vector += numpy.reshape(
                numpy.sum(
                    (features[:, start_index:end_index] - mean_matrix)**2,
                    axis=0), (1, local_feature_dimension))
            all_frame_number += current_frame_number

        std_vector /= float(all_frame_number)

        std_vector = std_vector**0.5

        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        self.logger.info('computed  std vector of length %d' %
                         std_vector.shape[1])
        self.logger.info('  std: %s' % std_vector)
        # restore the print options
        # numpy.set_printoptions(po)

        return std_vector
    def feature_denormalisation(self, in_file_list, out_file_list, mean_vector,
                                std_vector):
        io_funcs = BinaryIOCollection()
        file_number = len(in_file_list)
        try:
            assert len(in_file_list) == len(out_file_list)
        except AssertionError:
            logger.critical(
                'The input and output file numbers are not the same! %d vs %d'
                % (len(in_file_list), len(out_file_list)))
            raise

        try:
            assert mean_vector.size == self.feature_dimension and std_vector.size == self.feature_dimension
        except AssertionError:
            logger.critical(
                'the dimensionalities of the mean and standard derivation vectors are not the same as the dimensionality of the feature'
            )
            raise

        for i in xrange(file_number):
            features, current_frame_number = io_funcs.load_binary_file_frame(
                in_file_list[i], self.feature_dimension)

            mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1))
            std_matrix = numpy.tile(std_vector, (current_frame_number, 1))

            norm_features = features * std_matrix + mean_matrix

            io_funcs.array_to_binary_file(norm_features, out_file_list[i])
Пример #16
0
    def load_next_utterance(self):
        """Load the data for one utterance. This function will be called when utterance-by-utterance loading is required (e.g., sequential training).

        """

        temp_set_x = np.empty((self.buffer_size, self.n_ins))
        temp_set_y = np.empty((self.buffer_size, self.n_outs))

        io_fun = BinaryIOCollection()

        in_features, lab_frame_number = io_fun.load_binary_file_frame(self.x_files_list[self.file_index], self.n_ins)
        out_features, out_frame_number = io_fun.load_binary_file_frame(self.y_files_list[self.file_index], self.n_outs)

        frame_number = lab_frame_number
        if abs(lab_frame_number - out_frame_number) < 5:  ## we allow small difference here. may not be correct, but sometimes, there is one/two frames difference
            if lab_frame_number > out_frame_number:
                frame_number = out_frame_number
        else:
            base_file_name = self.x_files_list[self.file_index].split('/')[-1].split('.')[0]
            logging.info("the number of frames in label and acoustic features are different: %d vs %d (%s)" % (
            lab_frame_number, out_frame_number, base_file_name))
            raise

        temp_set_y = out_features[0:frame_number, ]
        temp_set_x = in_features[0:frame_number, ]

        self.file_index += 1

        if self.file_index >= self.list_size:
            self.end_reading = True
            self.file_index = 0


        return temp_set_x, temp_set_y
Пример #17
0
    def find_min_max_values(self, in_file_list):

        logger = logging.getLogger("acoustic_norm")

        file_number = len(in_file_list)
        min_value_matrix = numpy.zeros((file_number, self.feature_dimension))
        max_value_matrix = numpy.zeros((file_number, self.feature_dimension))
        io_funcs = BinaryIOCollection()
        for i in range(file_number):
            features = io_funcs.load_binary_file(in_file_list[i], self.feature_dimension)

            temp_min = numpy.amin(features, axis = 0)
            temp_max = numpy.amax(features, axis = 0)

            min_value_matrix[i, ] = temp_min;
            max_value_matrix[i, ] = temp_max;

        self.min_vector = numpy.amin(min_value_matrix, axis = 0)
        self.max_vector = numpy.amax(max_value_matrix, axis = 0)
        self.min_vector = numpy.reshape(self.min_vector, (1, self.feature_dimension))
        self.max_vector = numpy.reshape(self.max_vector, (1, self.feature_dimension))

        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('across %d files found min/max values of length %d:' % (file_number,self.feature_dimension) )
        logger.info('  min: %s' % self.min_vector)
        logger.info('  max: %s' % self.max_vector)
Пример #18
0
def read_data_from_file_list(inp_file_list,
                             out_file_list,
                             inp_dim,
                             out_dim,
                             sequential_training=True):
    io_funcs = BinaryIOCollection()

    utt_len = len(inp_file_list)

    file_length_dict = {}

    if sequential_training:
        temp_set_x = {}
        temp_set_y = {}
    else:
        temp_set_x = np.empty((BUFFER_SIZE, inp_dim))
        temp_set_y = np.empty((BUFFER_SIZE, out_dim))

    ### read file by file ###
    current_index = 0
    for i in range(utt_len):
        inp_file_name = inp_file_list[i]
        out_file_name = out_file_list[i]
        inp_features, inp_frame_number = io_funcs.load_binary_file_frame(
            inp_file_name, inp_dim)
        out_features, out_frame_number = io_funcs.load_binary_file_frame(
            out_file_name, out_dim)
        base_file_name = os.path.basename(inp_file_name).split(".")[0]

        if abs(inp_frame_number - out_frame_number) > 5:
            print 'the number of frames in input and output features are different: %d vs %d (%s)' % (
                inp_frame_number, out_frame_number, base_file_name)
            sys.exit(0)
        else:
            frame_number = min(inp_frame_number, out_frame_number)

        if sequential_training:
            temp_set_x[base_file_name] = inp_features
            temp_set_y[base_file_name] = out_features
        else:
            temp_set_x[current_index:current_index +
                       frame_number, ] = inp_features
            temp_set_y[current_index:current_index +
                       frame_number, ] = out_features
            current_index += frame_number

        if frame_number not in file_length_dict:
            file_length_dict[frame_number] = [base_file_name]
        else:
            file_length_dict[frame_number].append(base_file_name)

        print_status(i, utt_len)

    sys.stdout.write("\n")

    if not sequential_training:
        temp_set_x = temp_set_x[0:current_index, ]
        temp_set_y = temp_set_y[0:current_index, ]

    return temp_set_x, temp_set_y, file_length_dict
Пример #19
0
    def compute_std(self, file_list, mean_vector):

        logger = logging.getLogger("acoustic_norm")

        std_vector = numpy.zeros((1, self.feature_dimension))
        all_frame_number = 0

        io_funcs = BinaryIOCollection()
        for file_name in file_list:
            features = io_funcs.load_binary_file(file_name,
                                                 self.feature_dimension)
            current_frame_number = features.size // self.feature_dimension
            mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1))

            std_vector += numpy.reshape(
                numpy.sum((features - mean_matrix)**2, axis=0),
                (1, self.feature_dimension))
            all_frame_number += current_frame_number

        std_vector /= float(all_frame_number)

        std_vector = std_vector**0.5

        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('computed  std vector of length %d' % std_vector.shape[1])
        logger.info('  std: %s' % std_vector)
        # restore the print options
        # numpy.set_printoptions(po)

        return std_vector
Пример #20
0
    def compute_std(self, file_list, mean_vector):

        logger = logging.getLogger("acoustic_norm")

        std_vector = numpy.zeros((1, self.feature_dimension))
        all_frame_number = 0

        io_funcs = BinaryIOCollection()
        for file_name in file_list:
            features = io_funcs.load_binary_file(file_name, self.feature_dimension)
            current_frame_number = features.size // self.feature_dimension
            mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1))

            std_vector += numpy.reshape(numpy.sum((features - mean_matrix) ** 2, axis=0), (1, self.feature_dimension))
            all_frame_number += current_frame_number

        std_vector /= float(all_frame_number)

        std_vector = std_vector ** 0.5

        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('computed  std vector of length %d' % std_vector.shape[1] )
        logger.info('  std: %s' % std_vector)
        # restore the print options
        # numpy.set_printoptions(po)

        return  std_vector
Пример #21
0
    def denormalise_data(self, in_file_list, out_file_list):

        logger = logging.getLogger("acoustic_norm")

        file_number = len(in_file_list)
        logger.info('MinMaxNormalisation.denormalise_data for %d files' % file_number)

        # print   self.max_vector, self.min_vector
        fea_max_min_diff = self.max_vector - self.min_vector
        diff_value = self.target_max_value - self.target_min_value
        # logger.debug('reshaping fea_max_min_diff from shape %s to (1,%d)' % (fea_max_min_diff.shape, self.feature_dimension) )

        fea_max_min_diff = numpy.reshape(fea_max_min_diff, (1, self.feature_dimension))

        target_max_min_diff = numpy.zeros((1, self.feature_dimension))
        target_max_min_diff.fill(diff_value)

        target_max_min_diff[fea_max_min_diff <= 0.0] = 1.0
        fea_max_min_diff[fea_max_min_diff <= 0.0] = 1.0

        io_funcs = BinaryIOCollection()
        for i in range(file_number):
            features = io_funcs.load_binary_file(in_file_list[i], self.feature_dimension)

            frame_number = features.size // self.feature_dimension
            fea_min_matrix = numpy.tile(self.min_vector, (frame_number, 1))
            target_min_matrix = numpy.tile(self.target_min_value, (frame_number, self.feature_dimension))

            fea_diff_matrix = numpy.tile(fea_max_min_diff, (frame_number, 1))
            diff_norm_matrix = fea_diff_matrix / numpy.tile(target_max_min_diff, (frame_number, 1))
            norm_features = diff_norm_matrix * (features - target_min_matrix) + fea_min_matrix
            io_funcs.array_to_binary_file(norm_features, out_file_list[i])
Пример #22
0
    def extract_dur_features(self, orig_file, output_file):
        io_funcs = BinaryIOCollection()
        totalMat = io_funcs.file2matrix(orig_file)
        self.label_dimension = totalMat.shape[1] - 5  # collum num
        durMat = totalMat[:, -5:]

        io_funcs.array_to_binary_file(durMat, output_file)
Пример #23
0
    def compute_std(self, file_list, mean_vector, start_index, end_index):
    
        logger = logging.getLogger('feature_normalisation')
        
        local_feature_dimension = end_index - start_index

        std_vector = numpy.zeros((1, self.feature_dimension))
        all_frame_number = 0

        io_funcs = BinaryIOCollection()
        for file_name in file_list:
            features, current_frame_number = io_funcs.load_binary_file_frame(file_name, self.feature_dimension)

            mean_matrix = numpy.tile(mean_vector, (current_frame_number, 1))
            
            std_vector += numpy.reshape(numpy.sum((features[:, start_index:end_index] - mean_matrix) ** 2, axis=0), (1, local_feature_dimension))
            all_frame_number += current_frame_number
            
        std_vector /= float(all_frame_number)
        
        std_vector = std_vector ** 0.5
        
        # setting the print options in this way seems to break subsequent printing of numpy float32 types
        # no idea what is going on - removed until this can be solved
        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('computed  std vector of length %d' % std_vector.shape[1] )
        logger.info('  std: %s' % std_vector)
        # restore the print options
        # numpy.set_printoptions(po)
        
        self.std_vector = std_vector
        
        return  std_vector
Пример #24
0
    def predict(self,
                test_x,
                out_scaler,
                gen_test_file_list,
                sequential_training=False,
                stateful=False):
        #### compute predictions ####
        io_funcs = BinaryIOCollection()

        test_file_number = len(gen_test_file_list)
        print("generating features on held-out test data...")
        for utt_index in range(test_file_number):
            gen_test_file_name = gen_test_file_list[utt_index]
            test_id = os.path.splitext(os.path.basename(gen_test_file_name))[0]
            temp_test_x = test_x[test_id]
            num_of_rows = temp_test_x.shape[0]

            if stateful:
                temp_test_x = data_utils.get_stateful_input(
                    temp_test_x, self.seq_length, self.batch_size)
            elif sequential_training:
                temp_test_x = np.reshape(temp_test_x,
                                         (1, num_of_rows, self.n_in))

            predictions = self.model.predict(temp_test_x)
            if sequential_training:
                predictions = np.reshape(predictions,
                                         (num_of_rows, self.n_out))

            data_utils.denorm_data(predictions, out_scaler)

            io_funcs.array_to_binary_file(predictions, gen_test_file_name)
            data_utils.drawProgressBar(utt_index + 1, test_file_number)

        sys.stdout.write("\n")
    def compute_mean(self, file_list, start_index, end_index):

        logger = logging.getLogger('feature_normalisation')

        local_feature_dimension = end_index - start_index

        mean_vector = numpy.zeros((1, local_feature_dimension))
        all_frame_number = 0

        io_funcs = BinaryIOCollection()
        for file_name in file_list:
            features, current_frame_number = io_funcs.load_binary_file_frame(
                file_name, self.feature_dimension)

            mean_vector += numpy.reshape(
                numpy.sum(features[:, start_index:end_index], axis=0),
                (1, local_feature_dimension))
            all_frame_number += current_frame_number

        mean_vector /= float(all_frame_number)

        # setting the print options in this way seems to break subsequent printing of numpy float32 types
        # no idea what is going on - removed until this can be solved
        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('computed mean vector of length %d :' %
                    mean_vector.shape[1])
        logger.info(' mean: %s' % mean_vector)
        # restore the print options
        # numpy.set_printoptions(po)

        self.mean_vector = mean_vector

        return mean_vector
Пример #26
0
    def compute_mean(self, file_list):

        logger = logging.getLogger("acoustic_norm")

        mean_vector = numpy.zeros((1, self.feature_dimension))
        all_frame_number = 0

        io_funcs = BinaryIOCollection()
        for file_name in file_list:
            features = io_funcs.load_binary_file(file_name,
                                                 self.feature_dimension)
            current_frame_number = features.size // self.feature_dimension
            mean_vector += numpy.reshape(numpy.sum(features, axis=0),
                                         (1, self.feature_dimension))
            all_frame_number += current_frame_number

        mean_vector /= float(all_frame_number)

        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('computed mean vector of length %d :' %
                    mean_vector.shape[1])
        logger.info(' mean: %s' % mean_vector)
        # restore the print options
        # numpy.set_printoptions(po)

        return mean_vector
Пример #27
0
    def load_next_utterance_S2SML(self):
        """Load the data for one utterance. This function will be called when utterance-by-utterance loading is required (e.g., sequential training).
        
        """
        
        io_fun = BinaryIOCollection()

        in_features, lab_frame_number = io_fun.load_binary_file_frame(self.x_files_list[self.file_index], self.n_ins)
        out_features, out_frame_number = io_fun.load_binary_file_frame(self.y_files_list[self.file_index], self.n_outs)
        dur_features, dur_frame_number = io_fun.load_binary_file_frame(self.dur_files_list[self.file_index], 1)
      
        ### MLU features sub-division ###
        temp_set_MLU = in_features[0:lab_frame_number, ]
        temp_set_y   = out_features[0:out_frame_number, ]
      
        temp_set_phone = numpy.concatenate([temp_set_MLU[:, self.MLU_div['phone'][0]: self.MLU_div['phone'][1]], temp_set_MLU[:, self.MLU_div['phone'][2]: self.MLU_div['phone'][3]]], axis = 1)
        temp_set_syl   = numpy.concatenate([temp_set_MLU[:, self.MLU_div['syl'][0]: self.MLU_div['syl'][1]], temp_set_MLU[:, self.MLU_div['syl'][2]: self.MLU_div['syl'][3]]], axis = 1)
        temp_set_word  = numpy.concatenate([temp_set_MLU[:, self.MLU_div['word'][0]: self.MLU_div['word'][1]], temp_set_MLU[:, self.MLU_div['word'][2]: self.MLU_div['word'][3] ]], axis = 1)
        
        ### duration array sub-division ###
        dur_features = numpy.reshape(dur_features, (-1, ))
        temp_set_d   = dur_features.astype(int)   
        dur_word_syl = temp_set_d[0: -lab_frame_number]    
        
        num_ph    = lab_frame_number
        num_syl   = (numpy.where(numpy.cumsum(dur_word_syl[::-1])==lab_frame_number)[0][0] + 1)
        num_words = len(dur_word_syl) - num_syl 
        
        temp_set_dur_phone = temp_set_d[-num_ph:] 
        temp_set_dur_word  = dur_word_syl[0: num_words]
        temp_set_dur_syl   = dur_word_syl[num_words: ]
        
        ### additional feature matrix (syllable+phone+frame=432) ###
        num_frames = sum(temp_set_dur_phone)
        temp_set_af = numpy.empty((num_frames, self.MLU_div['length'][-1]))
        
        temp_set_af[0: num_syl, self.MLU_div['length'][0]: self.MLU_div['length'][1] ] = temp_set_syl[numpy.cumsum(temp_set_dur_syl)-1]
        temp_set_af[0: num_ph, self.MLU_div['length'][1]: self.MLU_div['length'][2]] = temp_set_phone
        
        ### input word feature matrix ###
        temp_set_dur_word_segments = numpy.zeros(num_words, dtype='int32')
        syl_bound = numpy.cumsum(temp_set_dur_word)
        for indx in xrange(num_words):
            temp_set_dur_word_segments[indx] = int(sum(temp_set_dur_syl[0: syl_bound[indx]]))
        temp_set_x = temp_set_word[temp_set_dur_word_segments-1]
        
        ### rest of the code similar to S2S ###
        self.file_index += 1

        if  self.file_index >= self.list_size:
            self.end_reading = True
            self.file_index = 0

        shared_set_x  = self.make_shared(temp_set_x, 'x')
        shared_set_y  = self.make_shared(temp_set_y, 'y')
        shared_set_d  = theano.shared(numpy.asarray(temp_set_d, dtype='int32'), name='d', borrow=True)

        shared_set_xyd = (shared_set_x, shared_set_y, shared_set_d)
        
        return shared_set_xyd, temp_set_x, temp_set_y, temp_set_d, temp_set_af
Пример #28
0
    def load_next_utterance_CTC(self):

        temp_set_x = numpy.empty((self.buffer_size, self.n_ins))
        temp_set_y = numpy.empty(self.buffer_size)

        io_fun = BinaryIOCollection()

        in_features, lab_frame_number = io_fun.load_binary_file_frame(self.x_files_list[self.file_index], self.n_ins)
        out_features, out_frame_number = io_fun.load_binary_file_frame(self.y_files_list[self.file_index], self.n_outs)

        frame_number = lab_frame_number
        temp_set_x = in_features[0:frame_number, ]

        temp_set_y = numpy.array([self.n_outs])
        for il in numpy.argmax(out_features, axis=1):
            temp_set_y = numpy.concatenate((temp_set_y, [il, self.n_outs]), axis=0)

        self.file_index += 1

        if  self.file_index >= self.list_size:
            self.end_reading = True
            self.file_index = 0

        shared_set_x = self.make_shared(temp_set_x, 'x')
        shared_set_y = theano.shared(numpy.asarray(temp_set_y, dtype='int32'), name='y', borrow=True)

        shared_set_xy = (shared_set_x, shared_set_y)

        return shared_set_xy, temp_set_x, temp_set_y
Пример #29
0
    def extract_durational_features(self, dur_file_name=None, dur_data=None):

        if dur_file_name:
            io_funcs = BinaryIOCollection()
            dur_dim = 1  ## hard coded for now
            dur_data = io_funcs.load_binary_file(dur_file_name, dur_dim)

        ph_count = len(dur_data)
        total_num_of_frames = int(sum(dur_data))

        duration_feature_array = numpy.zeros(
            (total_num_of_frames, self.frame_feature_size))

        frame_index = 0
        for i in range(ph_count):
            frame_number = int(dur_data[i])
            if self.subphone_feats == "coarse_coding":
                cc_feat_matrix = self.extract_coarse_coding_features_relative(
                    frame_number)

                for j in range(frame_number):
                    duration_feature_array[frame_index, 0] = cc_feat_matrix[j,
                                                                            0]
                    duration_feature_array[frame_index, 1] = cc_feat_matrix[j,
                                                                            1]
                    duration_feature_array[frame_index, 2] = cc_feat_matrix[j,
                                                                            2]
                    duration_feature_array[frame_index,
                                           3] = float(frame_number)
                    frame_index += 1

        return duration_feature_array
Пример #30
0
    def get_file_lengths(self):
        io_funcs = BinaryIOCollection()

        self.file_length_dict = {'framenum2utt':{}, 'utt2framenum':{}, 'utt2index':{}}

        ### read file by file ###
        while True:
            if  self.file_index >= self.list_size:
                self.end_reading = True
                self.file_index = 0
                break

            in_features, lab_frame_number = io_funcs.load_binary_file_frame(self.x_files_list[self.file_index], self.n_ins)
            out_features, out_frame_number = io_funcs.load_binary_file_frame(self.y_files_list[self.file_index], self.n_outs)
         
            base_file_name = os.path.basename(self.x_files_list[self.file_index]).split('.')[0]
            if abs(lab_frame_number - out_frame_number) < 5:    ## we allow small difference here. may not be correct, but sometimes, there is one/two frames difference
                frame_number = min(lab_frame_number, out_frame_number)
            else:
                self.logger.critical("the number of frames in label and acoustic features are different: %d vs %d (%s)" %(lab_frame_number, out_frame_number, base_file_name))
                raise

            if frame_number not in self.file_length_dict['framenum2utt']:
                self.file_length_dict['framenum2utt'][frame_number] = [base_file_name]
            else:
                self.file_length_dict['framenum2utt'][frame_number].append(base_file_name)

            self.file_length_dict['utt2framenum'][base_file_name] = frame_number
            self.file_length_dict['utt2index'][base_file_name] = self.file_index
            self.file_index += 1

        self.reset()
Пример #31
0
    def compute_distortion(self, file_id_list, reference_dir, generation_dir, file_ext, feature_dim):
        total_voiced_frame_number = 0
        
        distortion = 0.0
        vuv_error = 0
        total_frame_number = 0

        io_funcs = BinaryIOCollection()

        ref_all_files_data = numpy.reshape(numpy.array([]), (-1,1))
        gen_all_files_data = numpy.reshape(numpy.array([]), (-1,1))
        for file_id in file_id_list:
            ref_file_name  = reference_dir + '/' + file_id + file_ext
            gen_file_name  = generation_dir + '/' + file_id + file_ext

            ref_data, ref_frame_number = io_funcs.load_binary_file_frame(ref_file_name, feature_dim)
            gen_data, gen_frame_number = io_funcs.load_binary_file_frame(gen_file_name, feature_dim)

            if ref_frame_number != gen_frame_number:
                self.logger.critical("The number of frames is not the same: %d vs %d. Error in compute_distortion.py\n." %(ref_frame_number, gen_frame_number))
                raise

            if file_ext == '.lf0':
                ref_all_files_data = numpy.concatenate((ref_all_files_data, ref_data), axis=0)
                gen_all_files_data = numpy.concatenate((gen_all_files_data, gen_data), axis=0)
                temp_distortion, temp_vuv_error, voiced_frame_number = self.compute_f0_mse(ref_data, gen_data)
                vuv_error += temp_vuv_error
                total_voiced_frame_number += voiced_frame_number
            elif file_ext == '.dur':
                ref_data = numpy.reshape(numpy.sum(ref_data, axis=1), (-1, 1))
                gen_data = numpy.reshape(numpy.sum(gen_data, axis=1), (-1, 1))
                ref_all_files_data = numpy.concatenate((ref_all_files_data, ref_data), axis=0)
                gen_all_files_data = numpy.concatenate((gen_all_files_data, gen_data), axis=0)
                continue; 
            elif file_ext == '.mgc':
                temp_distortion = self.compute_mse(ref_data[:, 1:feature_dim], gen_data[:, 1:feature_dim])
            else:
                temp_distortion = self.compute_mse(ref_data, gen_data)
            
            distortion += temp_distortion

            total_frame_number += ref_frame_number

        if file_ext == '.dur':
            dur_rmse = self.compute_rmse(ref_all_files_data, gen_all_files_data)
            dur_corr = self.compute_corr(ref_all_files_data, gen_all_files_data)

            return dur_rmse, dur_corr
        elif file_ext == '.lf0':
            distortion /= float(total_voiced_frame_number)
            vuv_error  /= float(total_frame_number)

            distortion = numpy.sqrt(distortion)
            f0_corr = self.compute_f0_corr(ref_all_files_data, gen_all_files_data)

            return  distortion, f0_corr, vuv_error
        else:
            distortion /= float(total_frame_number)

            return  distortion
Пример #32
0
def read_and_transform_data_from_file_list(in_file_list, dim, seq_length=200, merge_size=1):
    io_funcs = BinaryIOCollection()

    num_of_utt = len(in_file_list)

    temp_set = np.zeros((FRAME_BUFFER_SIZE, dim))

    ### read file by file ###
    current_index = 0
    for i in range(num_of_utt):
        in_file_name = in_file_list[i]
        in_features, frame_number = io_funcs.load_binary_file_frame(in_file_name, dim)
        base_file_name            = os.path.basename(in_file_name).split(".")[0]

        temp_set[current_index:current_index+frame_number, ] = in_features
        current_index += frame_number

        if (i+1)%merge_size == 0:
            current_index = seq_length * (int(np.ceil(float(current_index)/float(seq_length))))

        drawProgressBar(i+1, num_of_utt)

    sys.stdout.write("\n")

    num_of_samples = int(np.ceil(float(current_index)/float(seq_length)))

    temp_set = temp_set[0: num_of_samples*seq_length, ]
    temp_set = temp_set.reshape(num_of_samples, seq_length)

    return temp_set
Пример #33
0
    def normalise_data(self, in_file_list, out_file_list):
        file_number = len(in_file_list)

        fea_max_min_diff = self.max_vector - self.min_vector
        diff_value = self.target_max_value - self.target_min_value
        fea_max_min_diff = numpy.reshape(fea_max_min_diff, (1, self.feature_dimension))

        target_max_min_diff = numpy.zeros((1, self.feature_dimension))
        target_max_min_diff.fill(diff_value)

        target_max_min_diff[fea_max_min_diff <= 0.0] = 1.0
        fea_max_min_diff[fea_max_min_diff <= 0.0] = 1.0

        io_funcs = BinaryIOCollection()
        for i in range(file_number):
            features = io_funcs.load_binary_file(in_file_list[i], self.feature_dimension)

            frame_number = features.size // self.feature_dimension
            fea_min_matrix = numpy.tile(self.min_vector, (frame_number, 1))
            target_min_matrix = numpy.tile(self.target_min_value, (frame_number, self.feature_dimension))

            fea_diff_matrix = numpy.tile(fea_max_min_diff, (frame_number, 1))
            diff_norm_matrix = numpy.tile(target_max_min_diff, (frame_number, 1)) / fea_diff_matrix

            norm_features = diff_norm_matrix * (features - fea_min_matrix) + target_min_matrix

            ## If we are to keep some columns unnormalised, use advanced indexing to
            ## reinstate original values:
            m,n = numpy.shape(features)
            for col in self.exclude_columns:
                norm_features[list(range(m)),[col]*m] = features[list(range(m)),[col]*m]

            io_funcs.array_to_binary_file(norm_features, out_file_list[i])
    def feature_normalisation(self, in_file_list, out_file_list):
        logger = logging.getLogger('feature_normalisation')

        #        self.feature_dimension = feature_dimension
        try:
            assert len(in_file_list) == len(out_file_list)
        except AssertionError:
            logger.critical(
                'The input and output file numbers are not the same! %d vs %d'
                % (len(in_file_list), len(out_file_list)))
            raise

        if self.mean_vector == None:
            self.mean_vector = self.compute_mean(in_file_list, 0,
                                                 self.feature_dimension)
        if self.std_vector == None:
            self.std_vector = self.compute_std(in_file_list, self.mean_vector,
                                               0, self.feature_dimension)

        io_funcs = BinaryIOCollection()
        file_number = len(in_file_list)
        for i in xrange(file_number):
            features, current_frame_number = io_funcs.load_binary_file_frame(
                in_file_list[i], self.feature_dimension)

            mean_matrix = numpy.tile(self.mean_vector,
                                     (current_frame_number, 1))
            std_matrix = numpy.tile(self.std_vector, (current_frame_number, 1))

            norm_features = (features - mean_matrix) / std_matrix

            io_funcs.array_to_binary_file(norm_features, out_file_list[i])

        return self.mean_vector, self.std_vector
Пример #35
0
    def merge_data(self, binary_label_file_list, new_feat_file_list, out_feat_file_list):
        '''
        merging new features with normalised label features
        '''
        utt_number = len(new_feat_file_list)
        if utt_number != len(binary_label_file_list):
            print("the number of new feature input files and label files should be the same!\n");
            sys.exit(1)

        new_feat_ext   = new_feat_file_list[0].split('/')[-1].split('.')[1]

        io_funcs = BinaryIOCollection()
        for i in range(utt_number):
            lab_file_name = binary_label_file_list[i]
            new_feat_file_name = new_feat_file_list[i]
            out_feat_file_name = out_feat_file_list[i]

            lab_features, lab_frame_number  = io_funcs.load_binary_file_frame(lab_file_name, self.lab_dim)
            new_features, feat_frame_number = io_funcs.load_binary_file_frame(new_feat_file_name, self.feat_dim)


            if (lab_frame_number - feat_frame_number)>5:
                base_file_name = new_feat_file_list[i].split('/')[-1].split('.')[0]
                self.logger.critical("the number of frames in label and new features are different: %d vs %d (%s)" %(lab_frame_number, feat_frame_number, base_file_name))
                raise

            merged_features = numpy.zeros((lab_frame_number, self.lab_dim+self.feat_dim))

            merged_features[0:lab_frame_number, 0:self.lab_dim] = lab_features
            merged_features[0:feat_frame_number, self.lab_dim:self.lab_dim+self.feat_dim] = new_features[0:lab_frame_number, ]

            io_funcs.array_to_binary_file(merged_features, out_feat_file_name)
            self.logger.debug('merged new feature %s of %d frames with %d label features' % (new_feat_ext, feat_frame_number,lab_frame_number) )
Пример #36
0
    def predict(self, test_x, out_scaler, gen_test_file_list, sequential_training=False, stateful=False):
        #### compute predictions ####
        io_funcs = BinaryIOCollection()

        test_file_number = len(gen_test_file_list)
        print("generating features on held-out test data...")
        for utt_index in range(test_file_number):
            gen_test_file_name = gen_test_file_list[utt_index]
            test_id = os.path.splitext(os.path.basename(gen_test_file_name))[0]
            temp_test_x        = test_x[test_id]
            num_of_rows        = temp_test_x.shape[0]

            if stateful:
                temp_test_x = data_utils.get_stateful_input(temp_test_x, self.seq_length, self.batch_size)
            elif sequential_training:
                temp_test_x = np.reshape(temp_test_x, (1, num_of_rows, self.n_in))

            predictions = self.model.predict(temp_test_x)
            if sequential_training:
                predictions = np.reshape(predictions, (num_of_rows, self.n_out))

            data_utils.denorm_data(predictions, out_scaler)

            io_funcs.array_to_binary_file(predictions, gen_test_file_name)
            data_utils.drawProgressBar(utt_index+1, test_file_number)

        sys.stdout.write("\n")
Пример #37
0
    def find_min_max_values(self, in_file_list):

        logger = logging.getLogger("acoustic_norm")

        file_number = len(in_file_list)
        min_value_matrix = numpy.zeros((file_number, self.feature_dimension))
        max_value_matrix = numpy.zeros((file_number, self.feature_dimension))
        io_funcs = BinaryIOCollection()
        for i in range(file_number):
            features = io_funcs.load_binary_file(in_file_list[i],
                                                 self.feature_dimension)

            temp_min = numpy.amin(features, axis=0)
            temp_max = numpy.amax(features, axis=0)

            min_value_matrix[i, ] = temp_min
            max_value_matrix[i, ] = temp_max

        self.min_vector = numpy.amin(min_value_matrix, axis=0)
        self.max_vector = numpy.amax(max_value_matrix, axis=0)
        self.min_vector = numpy.reshape(self.min_vector,
                                        (1, self.feature_dimension))
        self.max_vector = numpy.reshape(self.max_vector,
                                        (1, self.feature_dimension))

        # po=numpy.get_printoptions()
        # numpy.set_printoptions(precision=2, threshold=20, linewidth=1000, edgeitems=4)
        logger.info('across %d files found min/max values of length %d:' %
                    (file_number, self.feature_dimension))
        logger.info('  min: %s' % self.min_vector)
        logger.info('  max: %s' % self.max_vector)
Пример #38
0
    def feature_normalisation(self, in_file_list, out_file_list):
        logger = logging.getLogger('feature_normalisation')

#        self.feature_dimension = feature_dimension
        try:
            assert len(in_file_list) == len(out_file_list)
        except  AssertionError:
            logger.critical('The input and output file numbers are not the same! %d vs %d' %(len(in_file_list), len(out_file_list)))
            raise

        if self.mean_vector is None:
            self.mean_vector = self.compute_mean(in_file_list, 0, self.feature_dimension)
        if self.std_vector  is None:
            self.std_vector = self.compute_std(in_file_list, self.mean_vector, 0, self.feature_dimension)

        io_funcs = BinaryIOCollection()
        file_number = len(in_file_list)
        for i in range(file_number):
            features, current_frame_number = io_funcs.load_binary_file_frame(in_file_list[i], self.feature_dimension)

            mean_matrix = numpy.tile(self.mean_vector, (current_frame_number, 1))
            std_matrix = numpy.tile(self.std_vector, (current_frame_number, 1))

            norm_features = (features - mean_matrix) / std_matrix
            print(current_frame_number,in_file_list[i])
            norm_features=numpy.concatenate([norm_features[:,:self.feature_dimension-2],features[:,self.feature_dimension-2:]],axis=-1)


            # in fact the problem is that I normalized the out put xvector pvector and onehotvector.... so we have to in formalized this
            print(' normalized vector :{}'.format(norm_features[1,:]))

            io_funcs.array_to_binary_file(norm_features, out_file_list[i])

        return  self.mean_vector, self.std_vector
Пример #39
0
    def get_file_lengths(self):
        io_funcs = BinaryIOCollection()

        self.file_length_dict = {'framenum2utt':{}, 'utt2framenum':{}, 'utt2index':{}}

        ### read file by file ###
        while True:
            if  self.file_index >= self.list_size:
                self.end_reading = True
                self.file_index = 0
                break

            in_features, lab_frame_number = io_funcs.load_binary_file_frame(self.x_files_list[self.file_index], self.n_ins)
            out_features, out_frame_number = io_funcs.load_binary_file_frame(self.y_files_list[self.file_index], self.n_outs)
         
            base_file_name = os.path.basename(self.x_files_list[self.file_index]).split('.')[0]
            if abs(lab_frame_number - out_frame_number) < 5:    ## we allow small difference here. may not be correct, but sometimes, there is one/two frames difference
                frame_number = min(lab_frame_number, out_frame_number)
            else:
                self.logger.critical("the number of frames in label and acoustic features are different: %d vs %d (%s)" %(lab_frame_number, out_frame_number, base_file_name))
                raise

            if frame_number not in self.file_length_dict['framenum2utt']:
                self.file_length_dict['framenum2utt'][frame_number] = [base_file_name]
            else:
                self.file_length_dict['framenum2utt'][frame_number].append(base_file_name)

            self.file_length_dict['utt2framenum'][base_file_name] = frame_number
            self.file_length_dict['utt2index'][base_file_name] = self.file_index
            self.file_index += 1

        self.reset()
Пример #40
0
    def feature_normalisation(self, in_file_list, out_file_list):
        logger = logging.getLogger('feature_normalisation')
        
#        self.feature_dimension = feature_dimension
        try:
            assert len(in_file_list) == len(out_file_list)
        except  AssertionError:
            logger.critical('The input and output file numbers are not the same! %d vs %d' %(len(in_file_list), len(out_file_list)))
            raise

        if self.mean_vector == None:
            self.mean_vector = self.compute_mean(in_file_list, 0, self.feature_dimension)
        if self.std_vector  == None:
            self.std_vector = self.compute_std(in_file_list, self.mean_vector, 0, self.feature_dimension)
        
        io_funcs = BinaryIOCollection()
        file_number = len(in_file_list)
        for i in xrange(file_number):
            features, current_frame_number = io_funcs.load_binary_file_frame(in_file_list[i], self.feature_dimension)

            mean_matrix = numpy.tile(self.mean_vector, (current_frame_number, 1))
            std_matrix = numpy.tile(self.std_vector, (current_frame_number, 1))
            
            norm_features = (features - mean_matrix) / std_matrix
            
            io_funcs.array_to_binary_file(norm_features, out_file_list[i])

        return  self.mean_vector, self.std_vector
Пример #41
0
    def extract_label_features(self, orig_file, output_file):
        io_funcs = BinaryIOCollection()
        totalMat = io_funcs.file2matrix(orig_file)
        self.label_dimension = totalMat.shape[1] - 5  # collum num
        labelMat = totalMat[:, :-5]
        #print orig_file, totalMat.shape, labelMat.shape

        io_funcs.array_to_binary_file(labelMat, output_file)
def load_covariance(var_file_dict, out_dimension_dict):
    var = {}
    io_funcs = BinaryIOCollection()
    for feature_name in var_file_dict.keys():
        var_values, dimension = io_funcs.load_binary_file_frame(var_file_dict[feature_name], 1)
        var_values = numpy.reshape(var_values, (out_dimension_dict[feature_name], 1))
        var[feature_name] = var_values
    return  var
Пример #43
0
    def load_covariance(self, var_file_dict, out_dimension_dict):

        io_funcs = BinaryIOCollection()
        for feature_name in list(var_file_dict.keys()):
            var_values, dimension = io_funcs.load_binary_file_frame(var_file_dict[feature_name], 1)

            var_values = numpy.reshape(var_values, (out_dimension_dict[feature_name], 1))

            self.var[feature_name] = var_values
Пример #44
0
    def load_next_utterance(self):
        """Load the data for one utterance. This function will be called when utterance-by-utterance loading is required (e.g., sequential training).

        """

        temp_set_x = numpy.empty((self.buffer_size, self.n_ins))
        temp_set_y = numpy.empty((self.buffer_size, self.n_outs))

        io_fun = BinaryIOCollection()

        in_features, lab_frame_number = io_fun.load_binary_file_frame(
            self.x_files_list[self.file_index], self.n_ins)
        out_features, out_frame_number = io_fun.load_binary_file_frame(
            self.y_files_list[self.file_index], self.n_outs)

        frame_number = lab_frame_number
        print(' %%%%%  {} : {} /  {}   '.format(base_file_name, self.n_ins,
                                                self.n_outs))

        if abs(
                lab_frame_number - out_frame_number
        ) < 5:  ## we allow small difference here. may not be correct, but sometimes, there is one/two frames difference
            if lab_frame_number > out_frame_number:
                frame_number = out_frame_number
        else:
            base_file_name = os.path.basename(
                self.x_files_list[self.file_index]).split('.')[0]
            self.logger.critical(
                "the number of frames in label and acoustic features are different: %d vs %d (%s)"
                % (lab_frame_number, out_frame_number, base_file_name))
            raise

        temp_set_y = out_features[0:frame_number, ]
        temp_set_x = in_features[0:frame_number, ]

        self.file_index += 1

        if self.file_index >= self.list_size:
            self.end_reading = True
            self.file_index = 0

        # reshape input-output
        if self.reshape_io:
            temp_set_x = numpy.reshape(temp_set_x,
                                       (1, temp_set_x.shape[0], self.n_ins))
            temp_set_y = numpy.reshape(temp_set_y,
                                       (1, temp_set_y.shape[0], self.n_outs))

            temp_set_x = numpy.array(temp_set_x, 'float32')
            temp_set_y = numpy.array(temp_set_y, 'float32')

        shared_set_x = self.make_shared(temp_set_x, 'x')
        shared_set_y = self.make_shared(temp_set_y, 'y')

        shared_set_xy = (shared_set_x, shared_set_y)

        return shared_set_xy, temp_set_x, temp_set_y
Пример #45
0
    def modify_dur_from_phone_alignment_labels(self, label_file_name,
                                               gen_dur_file_name,
                                               gen_lab_file_name):
        logger = logging.getLogger("dur")

        dur_dim = 1

        io_funcs = BinaryIOCollection()
        dur_features, frame_number = io_funcs.load_binary_file_frame(
            gen_dur_file_name, dur_dim)

        fid = open(label_file_name)
        utt_labels = fid.readlines()
        fid.close()

        label_number = len(utt_labels)
        logger.info('loaded %s, %3d labels' % (label_file_name, label_number))

        out_fid = open(gen_lab_file_name, 'w')

        current_index = 0
        prev_end_time = 0
        for line in utt_labels:
            line = line.strip()

            if len(line) < 1:
                continue
            temp_list = re.split('\s+', line)
            start_time = int(temp_list[0])
            end_time = int(temp_list[1])

            full_label = temp_list[2]

            label_binary_flag = self.check_silence_pattern(full_label)

            if label_binary_flag == 1:
                current_phone_dur = end_time - start_time
                out_fid.write(
                    str(prev_end_time) + ' ' +
                    str(prev_end_time + current_phone_dur) + ' ' + full_label +
                    '\n')
                prev_end_time = prev_end_time + current_phone_dur
                continue
            else:
                phone_dur = dur_features[current_index]
                phone_dur = int(phone_dur) * 5 * 10000
                out_fid.write(
                    str(prev_end_time) + ' ' + str(prev_end_time + phone_dur) +
                    ' ' + full_label + '\n')
                prev_end_time = prev_end_time + phone_dur

            current_index += 1

        logger.debug(
            'modifed label with predicted duration of %d frames x %d features'
            % dur_features.shape)
Пример #46
0
def read_data_from_file_list(inp_file_list, out_file_list, inp_dim, out_dim, sequential_training=True):
    io_funcs = BinaryIOCollection()

    num_of_utt = len(inp_file_list)

    file_length_dict = {'framenum2utt':{}, 'utt2framenum':{}}

    if sequential_training:
        temp_set_x = {}
        temp_set_y = {}
    else:
        temp_set_x = np.empty((FRAME_BUFFER_SIZE, inp_dim))
        temp_set_y = np.empty((FRAME_BUFFER_SIZE, out_dim))

    ### read file by file ###
    current_index = 0
    for i in range(num_of_utt):
        inp_file_name = inp_file_list[i]
        out_file_name = out_file_list[i]
        inp_features, inp_frame_number = io_funcs.load_binary_file_frame(inp_file_name, inp_dim)
        out_features, out_frame_number = io_funcs.load_binary_file_frame(out_file_name, out_dim)

        base_file_name = os.path.basename(inp_file_name).split(".")[0]

        if abs(inp_frame_number-out_frame_number)>5:
            print('the number of frames in input and output features are different: %d vs %d (%s)' %(inp_frame_number, out_frame_number, base_file_name))
            sys.exit(0)
        else:
            frame_number = min(inp_frame_number, out_frame_number)

        if sequential_training:
            temp_set_x[base_file_name] = inp_features[0:frame_number]
            temp_set_y[base_file_name] = out_features[0:frame_number]
        else:
            temp_set_x[current_index:current_index+frame_number, ] = inp_features[0:frame_number]
            temp_set_y[current_index:current_index+frame_number, ] = out_features[0:frame_number]
            current_index += frame_number

        if frame_number not in file_length_dict['framenum2utt']:
            file_length_dict['framenum2utt'][frame_number] = [base_file_name]
        else:
            file_length_dict['framenum2utt'][frame_number].append(base_file_name)

        file_length_dict['utt2framenum'][base_file_name] = frame_number

        drawProgressBar(i+1, num_of_utt)

    sys.stdout.write("\n")

    if not sequential_training:
        temp_set_x = temp_set_x[0:current_index, ]
        temp_set_y = temp_set_y[0:current_index, ]

    return temp_set_x, temp_set_y, file_length_dict
Пример #47
0
    def load_min_max_values(self, label_norm_file):

        logger = logging.getLogger("acoustic_norm")

        io_funcs = BinaryIOCollection()
        min_max_vector, frame_number = io_funcs.load_binary_file_frame(label_norm_file, 1)
        min_max_vector = numpy.reshape(min_max_vector, (-1, ))
        self.min_vector = min_max_vector[0:frame_number//2]
        self.max_vector = min_max_vector[frame_number//2:]

        logger.info('Loaded min max values from the trained data for feature dimension of %d' % self.feature_dimension)
Пример #48
0
def read_data_from_file_list(inp_file_list, out_file_list, inp_dim, out_dim, sequential_training=True):
    io_funcs = BinaryIOCollection()

    num_of_utt = len(inp_file_list)

    file_length_dict = {'framenum2utt':{}, 'utt2framenum':{}}

    if sequential_training:
        temp_set_x = {}
        temp_set_y = {}
    else:
        temp_set_x = np.empty((FRAME_BUFFER_SIZE, inp_dim))
        temp_set_y = np.empty((FRAME_BUFFER_SIZE, out_dim))

    ### read file by file ###
    current_index = 0
    for i in range(num_of_utt):
        inp_file_name = inp_file_list[i]
        out_file_name = out_file_list[i]
        inp_features, inp_frame_number = io_funcs.load_binary_file_frame(inp_file_name, inp_dim)
        out_features, out_frame_number = io_funcs.load_binary_file_frame(out_file_name, out_dim)

        base_file_name = os.path.basename(inp_file_name).split(".")[0]

        if abs(inp_frame_number-out_frame_number)>5:
            print('the number of frames in input and output features are different: %d vs %d (%s)' %(inp_frame_number, out_frame_number, base_file_name))
            sys.exit(0)
        else:
            frame_number = min(inp_frame_number, out_frame_number)

        if sequential_training:
            temp_set_x[base_file_name] = inp_features[0:frame_number]
            temp_set_y[base_file_name] = out_features[0:frame_number]
        else:
            temp_set_x[current_index:current_index+frame_number, ] = inp_features[0:frame_number]
            temp_set_y[current_index:current_index+frame_number, ] = out_features[0:frame_number]
            current_index += frame_number

        if frame_number not in file_length_dict['framenum2utt']:
            file_length_dict['framenum2utt'][frame_number] = [base_file_name]
        else:
            file_length_dict['framenum2utt'][frame_number].append(base_file_name)

        file_length_dict['utt2framenum'][base_file_name] = frame_number

        drawProgressBar(i+1, num_of_utt)

    sys.stdout.write("\n")

    if not sequential_training:
        temp_set_x = temp_set_x[0:current_index, ]
        temp_set_y = temp_set_y[0:current_index, ]

    return temp_set_x, temp_set_y, file_length_dict
Пример #49
0
def simple_scale_variance_CONTINUUM(indir, outdir, var_file_dict, out_dimension_dict, file_id_list):
    ## Try range of interpolation weights for combining global & local variance
    all_streams = ['cmp','HNR','F0','LSF','Gain','LSFsource']
    streams_to_scale = ['LSF']

    static_variances = {}

    static_dimension_dict = {}
    for (feature_name,size) in list(out_dimension_dict.items()):
        static_dimension_dict[feature_name] = size/3

    io_funcs = BinaryIOCollection()
    for feature_name in list(var_file_dict.keys()):
        var_values, dimension = io_funcs.load_binary_file_frame(var_file_dict[feature_name], 1)
        static_var_values = var_values[:static_dimension_dict[feature_name], :]
        static_variances[feature_name] = static_var_values

    if not os.path.isdir(outdir):
        os.makedirs(outdir)

    file_id_list_out = []
    for uttname in file_id_list:
        for gv_weight in [0.0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1.0]:
            local_weight = 1.0 - gv_weight
            for stream in all_streams:
                infile = os.path.join(indir, uttname + '.' + stream)
                extended_uttname = uttname + '_gv' + str(gv_weight)
                print(extended_uttname)
                outfile = os.path.join(outdir, extended_uttname + '.' + stream)
                if not os.path.isfile(infile):
                    sys.exit(infile + ' does not exist')
                if stream in streams_to_scale:
                    speech, dimension = io_funcs.load_binary_file_frame(infile, static_dimension_dict[stream])
                    utt_mean = numpy.mean(speech, axis=0)
                    utt_std =  numpy.std(speech, axis=0)

                    global_std = numpy.transpose((static_variances[stream]))

                    weighted_global_std = (gv_weight * global_std) + (local_weight * utt_std)

                    std_ratio = weighted_global_std / utt_std

                    nframes, ndim = numpy.shape(speech)
                    utt_mean_matrix = numpy.tile(utt_mean, (nframes,1))
                    std_ratio_matrix = numpy.tile(std_ratio, (nframes,1))

                    scaled_speech = ((speech - utt_mean_matrix) * std_ratio_matrix) + utt_mean_matrix
                    io_funcs.array_to_binary_file(scaled_speech, outfile)


                else:
                    os.system('cp %s %s'%(infile, outfile))
            file_id_list_out.append(extended_uttname)
    return file_id_list_out
Пример #50
0
    def load_mean_std_values(self, acoustic_norm_file):

        logger = logging.getLogger('feature_normalisation')

        io_funcs = BinaryIOCollection()
        mean_std_vector, frame_number = io_funcs.load_binary_file_frame(acoustic_norm_file, 1)
        mean_std_vector = numpy.reshape(mean_std_vector, (-1, ))
        self.mean_vector = mean_std_vector[0:frame_number//2]
        self.std_vector = mean_std_vector[frame_number//2:]

        logger.info('Loaded mean std values from the trained data for feature dimension of %d' % self.feature_dimension)
        return self.mean_vector, self.std_vector
Пример #51
0
    def load_mean_std_values(self, acoustic_norm_file):

        logger = logging.getLogger('feature_normalisation')

        io_funcs = BinaryIOCollection()
        mean_std_vector, frame_number = io_funcs.load_binary_file_frame(acoustic_norm_file, 1)
        mean_std_vector = numpy.reshape(mean_std_vector, (-1, ))
        self.mean_vector = mean_std_vector[0:frame_number//2]
        self.std_vector = mean_std_vector[frame_number//2:]

        logger.info('Loaded mean std values from the trained data for feature dimension of %d' % self.feature_dimension)
        return self.mean_vector, self.std_vector
Пример #52
0
    def extract_linguistic_features(self, in_file_name, out_file_name=None, label_type="state_align", dur_file_name=None):
        if label_type=="phone_align":
            A = self.load_labels_with_phone_alignment(in_file_name, dur_file_name)
        elif label_type=="state_align":
            A = self.load_labels_with_state_alignment(in_file_name)
        else:
            logger.critical("we don't support %s labels as of now!!" % (label_type))

        if out_file_name:
            io_funcs = BinaryIOCollection()
            io_funcs.array_to_binary_file(A, out_file_name)
        else:
            return A
Пример #53
0
def simple_scale_variance(indir, outdir, var_file_dict, out_dimension_dict, file_id_list, gv_weight=1.0):
    ## simple variance scaling (silen et al. 2012, paragraph 3.1)
    ## TODO: Lots of things like stream names hardcoded here; 3 for delta + delta-delta; ...
#     all_streams = ['cmp','HNR','F0','LSF','Gain','LSFsource']
#     streams_to_scale = ['LSF']
    all_streams = ['cmp','mgc','lf0','bap']
    streams_to_scale = ['mgc']
    
    static_variances = {}
 
    static_dimension_dict = {}
    for (feature_name,size) in out_dimension_dict.items():
        static_dimension_dict[feature_name] = size/3

    io_funcs = BinaryIOCollection()
    for feature_name in var_file_dict.keys():
        var_values, dimension = io_funcs.load_binary_file_frame(var_file_dict[feature_name], 1)
        static_var_values = var_values[:static_dimension_dict[feature_name], :]
        static_variances[feature_name] = static_var_values

    if not os.path.isdir(outdir):
        os.makedirs(outdir)

    assert gv_weight <= 1.0 and gv_weight >= 0.0
    local_weight = 1.0 - gv_weight

    for uttname in file_id_list:
        for stream in all_streams:
            infile = os.path.join(indir, uttname + '.' + stream)
            outfile = os.path.join(outdir, uttname + '.' + stream)
            if not os.path.isfile(infile):
                sys.exit(infile + ' does not exist')
            if stream in streams_to_scale:
                speech, dimension = io_funcs.load_binary_file_frame(infile, static_dimension_dict[stream])
                utt_mean = numpy.mean(speech, axis=0) 
                utt_std =  numpy.std(speech, axis=0) 

                global_std = numpy.transpose((static_variances[stream]))
                weighted_global_std = (gv_weight * global_std) + (local_weight * utt_std)
                std_ratio = weighted_global_std / utt_std 

                nframes, ndim = numpy.shape(speech)
                utt_mean_matrix = numpy.tile(utt_mean, (nframes,1))
                std_ratio_matrix = numpy.tile(std_ratio, (nframes,1))

                scaled_speech = ((speech - utt_mean_matrix) * std_ratio_matrix) + utt_mean_matrix
                io_funcs.array_to_binary_file(scaled_speech, outfile)


            else:
                os.system('cp %s %s'%(infile, outfile))
Пример #54
0
    def modify_dur_from_state_alignment_labels(self, label_file_name, gen_dur_file_name, gen_lab_file_name): 
        logger = logging.getLogger("dur")

        state_number = self.state_number
        dur_dim = state_number
        
        io_funcs = BinaryIOCollection()
        dur_features, frame_number = io_funcs.load_binary_file_frame(gen_dur_file_name, dur_dim)

        fid = open(label_file_name)
        utt_labels = fid.readlines()
        fid.close()
        
        label_number = len(utt_labels)
        logger.info('loaded %s, %3d labels' % (label_file_name, label_number) )
		
        out_fid = open(gen_lab_file_name, 'w')

        current_index = 0
        prev_end_time = 0
        for line in utt_labels:
            line = line.strip()
            
            if len(line) < 1:
                continue
            temp_list = re.split('\s+', line)
            start_time = int(temp_list[0])
            end_time = int(temp_list[1])
            
            full_label = temp_list[2]
            full_label_length = len(full_label) - 3  # remove state information [k]
            state_index = full_label[full_label_length + 1]
            state_index = int(state_index) - 1

            label_binary_flag = self.check_silence_pattern(full_label)
          
            if label_binary_flag == 1:
                current_state_dur = end_time - start_time
                out_fid.write(str(prev_end_time)+' '+str(prev_end_time+current_state_dur)+' '+full_label+'\n')
                prev_end_time = prev_end_time+current_state_dur
                continue;
            else:
                state_dur = dur_features[current_index, state_index-1]
                state_dur = int(state_dur)*5*10000
                out_fid.write(str(prev_end_time)+' '+str(prev_end_time+state_dur)+' '+full_label+'\n')
                prev_end_time = prev_end_time+state_dur
        
            if state_index == state_number:
                current_index += 1
     
        logger.debug('modifed label with predicted duration of %d frames x %d features' % dur_features.shape )
Пример #55
0
    def predict(self, test_x, out_scaler, gen_test_file_list, sequential_training=False, stateful=False):
        #### compute predictions ####

        io_funcs = BinaryIOCollection()

        test_id_list = test_x.keys()
        test_id_list.sort()

        test_file_number = len(test_id_list)

        print("generating features on held-out test data...")
        with tf.Session() as sess:
           new_saver=tf.train.import_meta_graph(os.path.join(self.ckpt_dir,"mymodel.ckpt.meta"))
           print "loading the model parameters..."
           output_layer=tf.get_collection("output_layer")[0]
           input_layer=tf.get_collection("input_layer")[0]
           new_saver.restore(sess,os.path.join(self.ckpt_dir,"mymodel.ckpt"))
           print "The model parameters are successfully restored"
           for utt_index in xrange(test_file_number):
               gen_test_file_name = gen_test_file_list[utt_index]
               temp_test_x        = test_x[test_id_list[utt_index]]
               num_of_rows        = temp_test_x.shape[0]
               if not sequential_training:
                   is_training_batch=tf.get_collection("is_training_batch")[0]
                   if self.dropout_rate!=0.0:
                        is_training_drop=tf.get_collection("is_training_drop")[0]
                        y_predict=sess.run(output_layer,feed_dict={input_layer:temp_test_x,is_training_drop:False,is_training_batch:False})
                   else:
                        y_predict=sess.run(output_layer,feed_dict={input_layer:temp_test_x,is_training_batch:False})
               else:
                        temp_test_x=np.reshape(temp_test_x,[1,num_of_rows,self.n_in])
                        hybrid=0
                        utt_length_placeholder=tf.get_collection("utt_length")[0]
                        if "tanh" in self.hidden_layer_type:
                            hybrid=1
                            is_training_batch=tf.get_collection("is_training_batch")[0]
                        if self.dropout_rate!=0.0:
                           is_training_drop=tf.get_collection("is_training_drop")[0]
                           if hybrid:
                              y_predict=sess.run(output_layer,feed_dict={input_layer:temp_test_x,utt_length_placeholder:[num_of_rows],is_training_drop:False,is_training_batch:False})
                           else:
                              y_predict=sess.run(output_layer,feed_dict={input_layer:temp_test_x,utt_length_placeholder:[num_of_rows],is_training_drop:False})
                        elif hybrid:
                              y_predict=sess.run(output_layer,feed_dict={input_layer:temp_test_x,utt_length_placeholder:[num_of_rows],is_training_batch:False})
                        else:
                              y_predict=sess.run(output_layer,feed_dict={input_layer:temp_test_x,utt_length_placeholder:[num_of_rows]})
               data_utils.denorm_data(y_predict, out_scaler)
               io_funcs.array_to_binary_file(y_predict, gen_test_file_name)
               data_utils.drawProgressBar(utt_index+1, test_file_number)
Пример #56
0
    def extract_dur_features(self, in_file_name, out_file_name=None, label_type="state_align", feature_type=None, unit_size=None, feat_size=None):
        logger = logging.getLogger("dur")
        if label_type=="phone_align":
            A = self.extract_dur_from_phone_alignment_labels(in_file_name, feature_type, unit_size, feat_size)
        elif label_type=="state_align":
            A = self.extract_dur_from_state_alignment_labels(in_file_name, feature_type, unit_size, feat_size)
        else:
            logger.critical("we don't support %s labels as of now!!" % (label_type))
            sys.exit(1)

        if out_file_name:
            io_funcs = BinaryIOCollection()
            io_funcs.array_to_binary_file(A, out_file_name)
        else:
            return A
Пример #57
0
def compute_norm_stats(data, stats_file, method="MVN"):
    #### normalize training data ####
    io_funcs = BinaryIOCollection()

    if method=="MVN":
        scaler = preprocessing.StandardScaler().fit(data)
        norm_matrix = np.vstack((scaler.mean_, scaler.scale_))
    elif method=="MINMAX":
        scaler = preprocessing.MinMaxScaler(feature_range=(0.01, 0.99)).fit(data)
        norm_matrix = np.vstack((scaler.min_, scaler.scale_))

    print(norm_matrix.shape)
    io_funcs.array_to_binary_file(norm_matrix, stats_file)

    return scaler
Пример #58
0
    def extract_durational_features(self, dur_file_name=None, dur_data=None):

        if dur_file_name:
            io_funcs = BinaryIOCollection()
            dur_dim = 1 ## hard coded for now
            dur_data = io_funcs.load_binary_file(dur_file_name, dur_dim)

        ph_count = len(dur_data)
        total_num_of_frames = int(sum(dur_data))

        duration_feature_array = numpy.zeros((total_num_of_frames, self.frame_feature_size))

        frame_index=0
        for i in range(ph_count):
            frame_number = int(dur_data[i])
            if self.subphone_feats == "coarse_coding":
                cc_feat_matrix = self.extract_coarse_coding_features_relative(frame_number)

                for j in range(frame_number):
                    duration_feature_array[frame_index, 0] = cc_feat_matrix[j, 0]
                    duration_feature_array[frame_index, 1] = cc_feat_matrix[j, 1]
                    duration_feature_array[frame_index, 2] = cc_feat_matrix[j, 2]
                    duration_feature_array[frame_index, 3] = float(frame_number)
                    frame_index+=1

            elif self.subphone_feats == 'full':
                state_number = 5 # hard coded here 
                phone_duration = sum(dur_data[i, :])
                state_duration_base = 0
                for state_index in xrange(1, state_number+1):
                    state_index_backward = (state_number - state_index) + 1
                    frame_number = int(dur_data[i][state_index-1])
                    for j in xrange(frame_number):
                        duration_feature_array[frame_index, 0] = float(j+1) / float(frame_number)   ## fraction through state (forwards)
                        duration_feature_array[frame_index, 1] = float(frame_number - j) / float(frame_number)  ## fraction through state (backwards)
                        duration_feature_array[frame_index, 2] = float(frame_number)  ## length of state in frames
                        duration_feature_array[frame_index, 3] = float(state_index)   ## state index (counting forwards)
                        duration_feature_array[frame_index, 4] = float(state_index_backward) ## state index (counting backwards)
    
                        duration_feature_array[frame_index, 5] = float(phone_duration)   ## length of phone in frames
                        duration_feature_array[frame_index, 6] = float(frame_number) / float(phone_duration)   ## fraction of the phone made up by current state
                        duration_feature_array[frame_index, 7] = float(phone_duration - j - state_duration_base) / float(phone_duration) ## fraction through phone (forwards)
                        duration_feature_array[frame_index, 8] = float(state_duration_base + j + 1) / float(phone_duration)  ## fraction through phone (backwards)
                        frame_index+=1
                    
                    state_duration_base += frame_number

        return duration_feature_array
Пример #59
0
def read_data_from_file_list(in_file_list, dim): 
    io_funcs = BinaryIOCollection()

    temp_set = np.empty((500000, dim))
     
    ### read file by file ###
    current_index = 0
    for i in tqdm.tqdm(range(len(in_file_list))):    
        in_file_name = in_file_list[i]
        in_features, frame_number = io_funcs.load_binary_file_frame(in_file_name, dim)
        temp_set[current_index:current_index+frame_number, ] = in_features
        current_index += frame_number
    
    temp_set = temp_set[0:current_index, ]
    
    return temp_set