def batch_generator(self, full_sequences, rnd): indices = list(self.fragment_indices(full_sequences)) if self.randomize_batch_order: rnd.shuffle(indices) batches_parted = [ batch for batch in partition_all(self.batch_size, indices) ] start_index = len(batches_parted) // self.num_gpus * self.train_rank batches_gpu = batches_parted[start_index:] batches = cycle(batches_gpu) for batch in batches: if len(batch) < self.batch_size: continue yield np.array( [ self.one_hot( full_sequences[e[0]][e[1]:e[1] + self.fragment_length]) for e in batch ], dtype='uint8'), np.array([ self.one_hot( full_sequences[e[0]][e[1] + 1:e[1] + self.fragment_length + 1]) for e in batch ], dtype='uint8')
def batch_generator(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, randomize_batch_order, _rnd): indices = list(fragment_indices(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins)) if randomize_batch_order: _rnd.shuffle(indices) batches = cycle(partition_all(batch_size, indices)) for batch in batches: if len(batch) < batch_size: continue yield np.array( [one_hot(full_sequences[e[0]][e[1]:e[1] + fragment_length]) for e in batch], dtype='uint8'), np.array( [one_hot(full_sequences[e[0]][e[1] + 1:e[1] + fragment_length + 1]) for e in batch], dtype='uint8')
def batch_generator(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, learn_all_outputs): indices = list( fragment_indices(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins)) # TODO: shuffle batches = cycle(partition_all(batch_size, indices)) for batch in batches: if len(batch) < batch_size: continue yield np.array( [one_hot(full_sequences[e[0][0]][e[0][1]:e[0][2]]) for e in batch], dtype='uint8'), np.array([ one_hot(full_sequences[e[1][0]][e[1][1]:e[1][2]]) for e in batch ], dtype='uint8')
def batch_generator(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins, randomize_batch_order, _rnd): indices = list( fragment_indices(full_sequences, fragment_length, batch_size, fragment_stride, nb_output_bins)) global g_multi_gpu if g_multi_gpu: import horovod.keras as hvd gpu_count = hvd.size() current_gpu = hvd.rank() else: gpu_count = 1 current_gpu = 0 if randomize_batch_order: _rnd.shuffle(indices) batches_parted = [batch for batch in partition_all(batch_size, indices)] start_index = len(batches_parted) // gpu_count * current_gpu batches_gpu = batches_parted[start_index:] batches = cycle(batches_gpu) for batch in batches: if len(batch) < batch_size: continue yield np.array([ one_hot(full_sequences[e[0]][e[1]:e[1] + fragment_length]) for e in batch ], dtype='uint8'), np.array([ one_hot(full_sequences[e[0]][e[1] + 1:e[1] + fragment_length + 1]) for e in batch ], dtype='uint8')
def get_epoch_iterator(self, **kwargs): unlabeled = self.ds_unlabeled.get_epoch_iterator(**kwargs) labeled = self.ds_labeled.get_epoch_iterator(**kwargs) assert type(labeled) == type(unlabeled) return imap(self.mergedicts, cycle(labeled), unlabeled)
def get_request_iterator(self): ''' Careful this is indeed infinite ''' return imap(list, partition_all(self.batch_size, cycle(self.indices)))
def get_epoch_iterator(self, **kwargs): unlabeled = self.ds_unlabeled.get_epoch_iterator(**kwargs) labeled = self.ds_labeled.get_epoch_iterator(**kwargs) assert type(labeled) == type(unlabeled) return map(self.mergedicts, cycle(labeled), unlabeled)