def batch_sequence(stop_event, queue, data, feature_list, features2spk, batch_size=128, min_len=200, max_len=400, shuffle=True, seed=0): """Load features and fill a queue. Used in KaldiDataSeqQueue. Args: stop_event: An event indicating the reading is finished. queue: A queue to put the data. data: The kaldi data directory. feature_list: A list shows which features the process should read. features2spk: A dict map features to speaker index. batch_size: The batch_size min_len: The minimum length of the features. max_len: The maximum length of the features. shuffle: Load the feature from the 0-th frame or a random frame. seed: The number is used to generate a random seed """ # Read the comment in batch_random rd = random.Random(os.urandom(4)) rd.seed(seed) # rd.jumpahead(seed) feature_reader = FeatureReader(data) num_batches = int(len(feature_list) / batch_size) for i in range(num_batches): batch_length = rd.randint(min_len, max_len) # In some cases, the minimum length of the utterances is smaller than the batch length. # Use the smallest length as the real batch length. for j in range(batch_size): if feature_reader.utt2num_frames[feature_list[ i * batch_size + j].split(' ')[0]] < batch_length: batch_length = feature_reader.utt2num_frames[feature_list[ i * batch_size + j].split(' ')[0]] features = np.zeros((batch_size, batch_length, feature_reader.dim), dtype=np.float32) labels = np.zeros((batch_size), dtype=np.int32) for j in range(batch_size): features[j, :, :], _ = feature_reader.read_segment( feature_list[i * batch_size + j], batch_length, shuffle=shuffle) labels[j] = features2spk[feature_list[i * batch_size + j]] queue.put((features, labels)) stop_event.set() print("The process {} is about to exit.".format(os.getpid())) return
def batch_random(stop_event, queue, data, spk2features, num_total_speakers, num_speakers=10, num_segments=10, min_len=200, max_len=400, shuffle=True, seed=0): """Load features and fill a queue. Used in KaldiDataRandomQueue Args: stop_event: An event to tell the process to stop. queue: A queue to put the data. data: The kaldi data directory. spk2features: A dict from speaker index to the segments. num_total_speakers: The total number of speakers. num_speakers: The number of speakers in the batch. num_segments: The number of segments per speaker. min_len: The minimum length of the features. max_len: The maximum length of the features. shuffle: Load the feature from the 0-th frame or a random frame. seed: The value used to generate the random seed. """ # TODO: If you use numpy.random in the sub-process, it is better to use: # local_state = np.random.RandomState(seed) # print local_state.uniform(0, 1, 5) # # The re-seed is necessary if numpy.random is used # You can use os.urandom to generate the `random` seed. rd = random.Random(os.urandom(4)) rd.seed(seed) feature_reader = FeatureReader(data) speakers = list(spk2features.keys()) # 7323 if num_total_speakers < num_speakers: print( "[Warning] The number of available speakers are less than the required speaker. Some speakers will be duplicated." ) speakers = speakers * (int(num_speakers / num_total_speakers) + 1) # Now we have enough speakers while not stop_event.is_set(): batch_speakers = rd.sample(speakers, num_speakers) # 为选出的spk_id batch_length = rd.randint( min_len, max_len) # 在min_len 200 和max_len 400之间随机选择一个batch_length features = np.zeros( (num_speakers * num_segments, batch_length, feature_reader.dim), dtype=np.float32) # (batch_size, frame_length, feat_dim) labels = np.zeros((num_speakers * num_segments), dtype=np.int32) # (batch_size) for i, speaker in enumerate(batch_speakers): # The length may be larger than the utterance length. A check should be applied first. feature_list = [] spk = speaker while len(feature_list) == 0: feature_list = [] for feat in spk2features[spk]: if feature_reader.utt2num_frames[feat.split(' ') [0]] > batch_length: feature_list.append(feat) if len(feature_list) == 0: # The speaker is not appropriate for this batch. Resample the speaker spk = rd.choice(list(set(speakers) - set(batch_speakers))) batch_speakers[i] = spk labels[i * num_segments:(i + 1) * num_segments] = spk # If the number is not enough if len(feature_list) < num_segments: feature_list *= (int(num_segments / len(feature_list)) + 1 ) # 对现有的list进行复制 # Now the length of the list must be greater than the sample size. speaker_features = rd.sample( feature_list, num_segments ) # 从现有该说话人的feature_list中选出num_segments句作为speaker features for j, feat in enumerate(speaker_features): features[i * num_segments + j, :, :], _ = feature_reader.read_segment( feat, batch_length, shuffle=shuffle) queue.put((features, labels)) time.sleep(3) while not queue.empty(): try: queue.get(block=False) except: pass print("The process {} is about to exit.".format(os.getpid())) return