def call(self, audio, forward=True): if forward is True: audio = layers.Permute(dims=(2, 1), dtype=self.dtype)(audio) output_chunk = layers.Cropping1D( cropping=(0, self.n_remaining_channels), dtype=self.dtype)(audio) audio = layers.Cropping1D(cropping=(self.n_early_size, 0), dtype=self.dtype)(audio) audio = layers.Permute(dims=(2, 1), dtype=self.dtype)(audio) output_chunk = layers.Permute(dims=(2, 1), dtype=self.dtype)(output_chunk) output_chunk = tf.reshape(output_chunk, [ output_chunk.shape[0], output_chunk.shape[1] * output_chunk.shape[2], 1 ]) return audio, output_chunk else: raise NotImplementedError( 'The false forward boolean for this layer is not working yet')
def get_last_time_step_aux(self, sequence): """ Returns aux features of last time step of `sequence`. ObjectType: `keras.layers` The model outputs fundamental features only. Hence, predicted fundamental features along with last available aux features are used to predict fundamentals further in the future than the last prediction. keras.layers.Cropping1D operation is used. :param sequence: objectType: keras.layer :return: keras.layer of last time step aux features """ # crop all except the last time step last_step = layers.Cropping1D(cropping=(self.seq_len - 1, 0), name='last_time_step_aux')(sequence) # crop all except the aux features which are last self.n_outputs features # Cropping1D only allows for cropping on axis=0 and hence the use of reshaping last_step = layers.Reshape(target_shape=(self.n_inputs, 1))(last_step) last_step = layers.Cropping1D(cropping=(self.n_outputs, 0), name='aux_features')(last_step) last_step = layers.Reshape(target_shape=(1, self.n_inputs - self.n_outputs))(last_step) return last_step
def model_builder(hp): #build a CNN # Let's say we expect our inputs to be RGB images of arbitrary size inputs = keras.Input(shape=(None, 3)) targetsize=512 from tensorflow.keras import layers import math # 1D cropping to fit sample size x = layers.Cropping1D(cropping=[math.floor((inputs.shape[1]-targetsize)/2),math.ceil((inputs.shape[1]-targetsize)/2)])(inputs) #print(x.shape) # Rescale images to [0, 1] x = Rescaling(scale=1./255)(x) # Apply some convolution and pooling layers x = layers.Conv1D(filters=32, kernel_size=3, strides=2, padding='SAME', activation='relu')(x) x = layers.Conv1D(filters=32, kernel_size=3, strides=2, padding='SAME', activation='relu')(x) x = layers.Conv1D(filters=32, kernel_size=3, strides=2, padding='SAME', activation='relu')(x) # Apply global average pooling to get flat feature vectors x = layers.GlobalAveragePooling1D()(x) # add a dense layer x = layers.Dense(16, activation='relu')(x) # Add a dense classifier on top num_classes = 3 outputs = layers.Dense(num_classes, activation='sigmoid')(x) model = keras.Model(inputs=inputs, outputs=outputs) model.summary() # Tune the learning rate for the optimizer # Choose an optimal value from 0.01, 0.001, or 0.0001 hp_learning_rate = hp.Choice('learning_rate', values = [1e-2, 1e-3, 1e-4]) #compile and keep metrics model.compile(optimizer=keras.optimizers.Adam(learning_rate = hp_learning_rate), loss=keras.losses.BinaryCrossentropy(from_logits = True), metrics=[keras.metrics.SparseCategoricalAccuracy(name='acc')]) #model.fit(x_train, y_train, batch_size=32, epochs=10) return model
def create_vae(self): input_size = self.hparams['window_samples'] scale = self.hparams['model_scale'] n_layers = 4 i = tfkl.Input(shape=(input_size, 1)) o = i for n in range(1, n_layers + 1): o = tfkl.Conv1D(scale * (n + 1), kernel_size=2, strides=2, padding='same')(o) o = tfkl.BatchNormalization(axis=1)(o) o = tfkl.ReLU()(o) o = tfkl.Flatten()(o) o = tfkl.Dense(tfpl.IndependentNormal.params_size( self.hparams['latent_size']), activation=None)(o) o = tfpl.IndependentNormal( self.hparams['latent_size'], activity_regularizer=tfpl.KLDivergenceRegularizer(self.prior, weight=2.0))(o) encoder = tfk.Model(inputs=i, outputs=o) i = tfkl.Input(shape=(self.hparams['latent_size'], )) s = scale * (n_layers + 1) o = tfkl.Dense(s, activation='relu')(i) o = tfkl.Reshape(target_shape=(1, s))(o) for n in range(1, n_layers + 1): o = tfkl.UpSampling1D(size=2)(o) o = tfkl.Conv1D(scale * (n_layers + 1 - n), kernel_size=2, strides=1, padding='same')(o) o = tfkl.BatchNormalization(axis=1)(o) o = tfkl.ReLU()(o) o = tfkl.Cropping1D((0, o.shape[1] - input_size))(o) o = tfkl.Conv1D(1, kernel_size=1, strides=1, padding='same')(o) o = tfkl.Flatten()(o) o = tfkl.Dense(tfpl.IndependentNormal.params_size( (self.hparams['window_samples'], 1)), activation='tanh')(o) o = tfpl.IndependentNormal((self.hparams['window_samples'], 1))(o) decoder = tfk.Model(inputs=i, outputs=o) vae = tfk.Model(inputs=encoder.inputs, outputs=decoder(encoder.outputs[0])) return vae, encoder, decoder
def get_last_time_step(self, sequence, count): """ Returns all features of the last time step of `sequence`. ObjectType: `keras.layers` keras.layers.Cropping1d operation is used :param sequence: keras.layer sequence :param count: output count to keep tracking of layer name :return: last time step of the sequence (layer) """ return layers.Cropping1D(cropping=(self.seq_len - 1, 0), name='last_time_step_%i' % count)(sequence)
def infer(self, spect, sigma=1.0): """ Push inputs through network in reverse direction. Two key aspects: Layers in reverse order. Layers are inverted through exposed training boolean. """ spect = layers.Reshape( target_shape=[63, self.hparams['mel_channels']])(spect) spect = self.upsampling(spect) spect = layers.Cropping1D( cropping=(0, spect.shape[1] - self.hparams['segment_length']))(spect) spect = layers.Reshape([ self.hparams["segment_length"] // self.n_group, self.mel_channels * self.n_group ])(spect) audio = tf.random.normal(shape=[ spect.shape[0], self.hparams['segment_length'] // self.n_group, self.n_remaining_channels ], dtype=self.hparams['ftype']) audio *= sigma for index in reversed(range(self.n_flows)): audio = self.waveNetAffineBlocks[index]((audio, spect), training=False) audio = self.weightNormInv1x1ConvLayers[index](audio, training=False) if ((index % self.n_early_every == 0) and (index > 0)): z = tf.random.normal(shape=[ spect.shape[0], self.hparams['segment_length'] // self.n_group, self.n_early_size ], dtype=self.hparams['ftype']) audio = layers.Concatenate(axis=2)([z * sigma, audio]) audio = layers.Reshape( target_shape=[self.hparams['segment_length']])(audio) return audio
def get_deepripe_models(strand): basedir = 'data/deepripe_models/' seqlen = 200 # dummy functions to avoid import errors def precision(y_true, y_pred): return K.mean(y_true) def recall(y_true, y_pred): return K.mean(y_true) # ENCODE HepG2, K562 eclip_dirs = [basedir + 'eclip_model_encodeHepG2_high1_seq.h5', basedir + 'eclip_model_encodeHepG2_high2_seq.h5', basedir + 'eclip_model_encodeHepG2_mid1_seq.h5', basedir + 'eclip_model_encodeHepG2_mid2_seq.h5', basedir + 'eclip_model_encodeHepG2_low_seq.h5', basedir + 'eclip_model_encodeK562_high1_seq.h5', basedir + 'eclip_model_encodeK562_high2_seq.h5', basedir + 'eclip_model_encodeK562_mid1_seq.h5', basedir + 'eclip_model_encodeK562_mid2_seq.h5', basedir + 'eclip_model_encodeK562_low_seq.h5'] # parclip parclip_dirs = [basedir + 'parclip_model_high_seq.h5', basedir + 'parclip_model_med_seq.h5', basedir + 'parclip_model_low_seq.h5'] eclip_models = [load_model(m, custom_objects={'precision': precision, 'recall': recall}) for m in eclip_dirs] parclip_models = [load_model(m, custom_objects={'precision': precision, 'recall': recall}) for m in parclip_dirs] # rename to avoid conflicts later for i, _ in enumerate(eclip_models): eclip_models[i]._name = 'eclip_{}'.format(i) for i, _ in enumerate(parclip_models): parclip_models[i]._name = 'parclip_{}'.format(i) in_string = layers.Input((1,), name='dna_string', dtype=tf.string) if strand == 'plus': ''' the forward model (plus strand): ''' print('using plus-stand') in_seq = DnaOneHot(seqlen + 4)(in_string) # eclip input seq_squeeze = layers.Reshape((204, 4), name='rna_reshape')(in_seq) # parclip input seq_squeeze_crp = layers.Cropping1D(cropping=(25, 25))(seq_squeeze) in_sq = [] # shifted in_sq_crp = [] # shifted, cropped for i in range(4): in_sq.append(layers.Cropping1D(cropping=(i,4-i), name='in_sq_{}'.format(i))(seq_squeeze)) in_sq_crp.append(layers.Cropping1D(cropping=(i,4-i), name='in_sq_cr_{}'.format(i))(seq_squeeze_crp)) out = [[],[],[],[]] for m in eclip_models: for i in range(4): out[i].append(m(in_sq[i])) for m in parclip_models: for i in range(4): out[i].append(m(in_sq_crp[i])) out_concat = [] for i in range(4): out_concat.append(layers.Concatenate(name='concat_{}'.format(i))(out[i])) out_avg= layers.Average(name='avg_pred')(out_concat) model = Model([in_string], [out_avg], name='forward_model') else: ''' the reverse-complement model (minus strand): ''' print('using minus-stand') in_seq = DnaOneHot(seqlen + 4, reverse=True, complement=True)(in_string) # eclip input seq_squeeze = layers.Reshape((204, 4), name='rna_reshape')(in_seq) # parclip input seq_squeeze_crp = layers.Cropping1D(cropping=(25, 25))(seq_squeeze) in_sq = [] # shifted in_sq_crp = [] # shifted, cropped for i in range(4): in_sq.append(layers.Cropping1D(cropping=(i, 4 - i), name='in_sq_{}'.format(i))(seq_squeeze)) in_sq_crp.append(layers.Cropping1D(cropping=(i, 4 - i), name='in_sq_cr_{}'.format(i))(seq_squeeze_crp)) out = [[], [], [], []] for m in eclip_models: for i in range(4): out[i].append(m(in_sq[i])) for m in parclip_models: for i in range(4): out[i].append(m(in_sq_crp[i])) out_concat = [] for i in range(4): out_concat.append(layers.Concatenate(name='concat_{}'.format(i))(out[i])) out_avg = layers.Average(name='avg_pred')(out_concat) model = Model([in_string], [out_avg], name='forward_model') # labels hepg_names = ['DDX3X', 'PCBP2', 'FAM120A', 'HNRNPL', 'RBFOX2', 'PTBP1', 'MATR3', 'EFTUD2', 'PRPF4', 'UPF1', 'GRWD1', 'PRPF8', 'PPIG', 'CSTF2T', 'QKI', 'U2AF2', 'SUGP2', 'HNRNPM', 'AQR', 'BCLAF1', 'LSM11', 'NKRF', 'SUB1', 'NCBP2', 'UCHL5', 'LIN28B', 'IGF2BP3', 'SF3A3', 'AGGF1', 'DROSHA', 'DDX59', 'CSTF2', 'DKC1', 'EIF3H', 'FUBP3', 'SFPQ', 'HNRNPC', 'ILF3', 'TIAL1', 'HLTF', 'ZNF800', 'PABPN1', 'YBX3', 'FXR2', 'GTF2F1', 'IGF2BP1', 'HNRNPK', 'XPO5', 'RPS3', 'SF3B4', 'LARP4', 'BUD13', 'SND1', 'G3BP1', 'AKAP1', 'KHSRP', 'RBM22', 'GRSF1', 'CDC40', 'NOLC1', 'FKBP4', 'DGCR8', 'ZC3H11A', 'XRN2', 'SLTM', 'DDX55', 'TIA1', 'SRSF1', 'U2AF1', 'RBM15'] hepg_names = [n + '_hepg2' for n in hepg_names] k562_names = ['BUD13', 'PTBP1', 'DDX24', 'EWSR1', 'RBM15', 'SF3B4', 'YBX3', 'UCHL5', 'KHSRP', 'ZNF622', 'NONO', 'EXOSC5', 'PRPF8', 'CSTF2T', 'AQR', 'UPF1', 'U2AF2', 'AKAP8L', 'METAP2', 'SMNDC1', 'GEMIN5', 'HNRNPK', 'SLTM', 'SRSF1', 'FMR1', 'SAFB2', 'DROSHA', 'RPS3', 'IGF2BP2', 'ILF3', 'RBFOX2', 'QKI', 'PCBP1', 'ZNF800', 'PUM1', 'EFTUD2', 'LIN28B', 'AGGF1', 'HNRNPL', 'SND1', 'GTF2F1', 'EIF4G2', 'TIA1', 'TARDBP', 'FXR2', 'HNRNPM', 'IGF2BP1', 'PUM2', 'FAM120A', 'DDX3X', 'MATR3', 'FUS', 'GRWD1', 'PABPC4', 'MTPAP', 'RBM22', 'DHX30', 'DDX6', 'DDX55', 'TRA2A', 'XRN2', 'U2AF1', 'LSM11', 'ZC3H11A', 'NOLC1', 'KHDRBS1', 'GPKOW', 'DGCR8', 'AKAP1', 'FXR1', 'DDX52', 'AATF'] k562_names = [n + '_k562' for n in k562_names] parclip_names = ['DND1', 'CPSF7', 'CPSF6', 'CPSF1', 'CSTF2', 'CSTF2T', 'ZC3H7B', 'FMR1iso1', 'RBM10', 'MOV10', 'ELAVL1', 'TARDBP', 'ELAVL2', 'ELAVL3', 'ELAVL4', 'RBM20', 'IGF2BP1', 'IGF2BP2', 'IGF2BP3', 'EWSR1', 'HNRNPD', 'RBPMS', 'SRRM4', 'AGO2', 'NUDT21', 'FIP1L1', 'CAPRIN1', 'FMR1iso7', 'FXR2', 'AGO1', 'L1RE1', 'ORF1', 'MBNL1', 'P53_NONO', 'PUM2', 'QKI', 'AGO3', 'FUS', 'TAF15', 'ZFP36', 'DICER1', 'EIF3A', 'EIF3D', 'EIF3G', 'SSB', 'PAPD5', 'CPSF4', 'CPSF3', 'RTCB', 'FXR1', 'NOP58', 'NOP56', 'FBL', 'LIN28A', 'LIN28B', 'UPF1', 'G35', 'G45', 'XPO5'] clabels = hepg_names + k562_names + parclip_names print(model.summary()) sys.stdout.flush() return model, clabels
def _build_model(self): """ Builds a rnn uq range estimate model based on the architecture defined in the configs The input received is already padded from the data processing module for variable sequence length. Making is used to keep track of padded elements in the tensor. Keras layers such as Cropping1D and Concatenate do not use masking, hence custom layer RemoveMask is used to strip masking information from the outputs for such layers. Architecture Logic for Multi Step Forecast -> Append the output of previous forecast step to the next one 1. Concatenate last time step aux features with outputs as outputs only contain financial fields 2. Concatenate the above output to the inputs and strip the first element in the sequence to keep the input shape consistent 3. Repeat 1,2 for subsequent outputs :return: compiled keras model which outputs (output_1, output_2, ...) where _1 refers to the forecast step. For example _1 : 12 month forecast, _2 : 24 month forecast and so on """ outputs = [] # Masking information is only used by certain layers such as LSTM. Hence two copies of inputs are used, one for # propagating the mask and second for storing inputs which are used in operations such as Cropping1D and # concatenate. inputs = x = keras.Input(shape=(self.seq_len, self.n_inputs), name='input_financials') prev_input = inputs last_time_step_aux = self.get_last_time_step_aux(x) lstm_count = 0 output_count = 0 initializer = self.initializer.get_initializer() for i in range(self.n_layers): lstm_count += 1 if self.config.rnn_cell == 'lstm': x = layers.LSTM( self.n_hidden_units, kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2( self.config.l2_alpha), recurrent_regularizer=tf.keras.regularizers.l2( self.config.recurrent_l2_alpha), return_sequences=True, kernel_constraint=MaxNorm(self.config.max_norm), recurrent_dropout=self.config.recurrent_dropout, name='lstm_%i' % lstm_count)(x, training=True) x = layers.BatchNormalization()(x) x = layers.Dropout(rate=self.config.dropout)(x, training=True) elif self.config.rnn_cell == 'gru': x = layers.GRU(self.n_hidden_units, kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2( self.config.l2_alpha), recurrent_regularizer=tf.keras.regularizers.l2( self.config.recurrent_l2_alpha), return_sequences=True, kernel_constraint=MaxNorm(self.config.max_norm), recurrent_dropout=self.config.recurrent_dropout, name='gru_%i' % lstm_count)(x, training=True) x = layers.BatchNormalization()(x) x = layers.Dropout(rate=self.config.dropout)(x, training=True) else: raise NotImplementedError output_count += 1 # outputs for target values cur_output_tar = layers.Dense(self.n_outputs, name='OUTPUT_TARGET_%i' % output_count)(x) # outputs for variances of the target values cur_output_var = layers.Dense(self.n_outputs, name='OUTPUT_VARIANCE_%i' % output_count)(x) cur_output_var = SoftPlus()(cur_output_var) outputs.append(cur_output_tar) outputs.append(cur_output_var) for fcst_step in range(1, self.forecast_steps): # output_count, lstm_count keep track of layer ids. output_count and fcst_step are not the same as one # fcst_step could have multiple outputs. output_count += 1 cur_output = outputs[-2] # last target output last_time_step_fin = self.get_last_time_step( cur_output, output_count) # Combine latest prediction with last available aux features to make the input shape compatible last_time_step = layers.concatenate( [last_time_step_fin, last_time_step_aux], axis=2, name='concat_fin_aux_%i' % fcst_step) # combine latest prediction with input sequence cur_input = layers.concatenate( [prev_input, last_time_step], axis=1, name='combine_input_w_last_pred_%i' % fcst_step) cur_input = layers.Cropping1D(cropping=(1, 0), name='updated_input_w_last_pred_%i' % fcst_step)(cur_input) prev_input = cur_input # Add LSTM layer for intermediary prediction lstm_count += 1 if self.config.rnn_cell == 'lstm': intm = layers.LSTM( self.n_hidden_units, return_sequences=True, kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2( self.config.l2_alpha), recurrent_regularizer=tf.keras.regularizers.l2( self.config.recurrent_l2_alpha), kernel_constraint=MaxNorm(self.config.max_norm), recurrent_dropout=self.config.recurrent_dropout, name='lstm_%i' % lstm_count)(cur_input, training=True) intm = layers.BatchNormalization()(intm) intm = layers.Dropout(rate=self.config.dropout)(intm, training=True) elif self.config.rnn_cell == 'gru': intm = layers.GRU( self.n_hidden_units, return_sequences=True, kernel_initializer=initializer, kernel_regularizer=tf.keras.regularizers.l2( self.config.l2_alpha), recurrent_regularizer=tf.keras.regularizers.l2( self.config.recurrent_l2_alpha), kernel_constraint=MaxNorm(self.config.max_norm), recurrent_dropout=self.config.recurrent_dropout, name='gru_%i' % lstm_count)(cur_input, training=True) intm = layers.BatchNormalization()(intm) intm = layers.Dropout(rate=self.config.dropout)(intm, training=True) else: raise NotImplementedError outputs.append( layers.Dense(self.n_outputs, name='OUTPUT_TARGET_%i' % output_count)(intm)) intm_var = layers.Dense(self.n_outputs, name='OUTPUT_VARIANCE_%i' % output_count)(intm) outputs.append(SoftPlus()(intm_var)) model = keras.Model(inputs=inputs, outputs=outputs) return model
def call(self, inputs, training=None): """ Evaluate model against inputs if training is false simply return the output of the infer method, which effectively run through the layers backward and invert them. Otherwise run the network in the training "direction". """ if not training: return self.infer(inputs) audio, spect = inputs['wav'], inputs['mel'] audio = layers.Reshape(target_shape=[ self.hparams["segment_length"] // self.n_group, self.n_group ], dtype=self.dtype)(audio) # No reshape happening here, but enforce well defined rank # for spect tensor which is required for upsampling layer spect = layers.Reshape(target_shape=[63, self.mel_channels], dtype=self.dtype)(spect) spect = self.upsampling(spect) spect = layers.Cropping1D(cropping=(0, spect.shape[1] - hparams['segment_length']), dtype=self.dtype)(spect) spect = layers.Reshape([ self.hparams["segment_length"] // self.n_group, self.mel_channels * self.n_group ], dtype=self.dtype)(spect) output_audio = [] n_remaining_channels = self.n_group for index in range(self.n_flows): if ((index % self.n_early_every == 0) and (index > 0)): n_remaining_channels -= hparams['n_early_size'] audio = layers.Permute(dims=(2, 1), dtype=self.dtype)(audio) output_chunk = layers.Cropping1D( cropping=(0, n_remaining_channels), dtype=self.dtype)(audio) audio = layers.Cropping1D(cropping=(hparams['n_early_size'], 0), dtype=self.dtype)(audio) audio = layers.Permute(dims=(2, 1), dtype=self.dtype)(audio) output_chunk = layers.Permute(dims=(2, 1), dtype=self.dtype)(output_chunk) output_audio.append(output_chunk) # output_audio.append(audio[:, :, :self.n_early_size]) # audio = audio[:,:,self.n_early_size:] # No need to output log_det_W or log_s as added as loss in custom # layers audio = self.weightNormInv1x1ConvLayers[index](audio) audio = self.waveNetAffineBlocks[index]((audio, spect), training=True) output_audio.append(audio) self.custom_logging() return layers.Concatenate(axis=2, dtype=self.dtype)(output_audio)
def __call__(self, p, ip): ip_shape = ip.shape if p is not None: p_shape = p.shape with backend.name_scope('adjust_block'): if p is None: p = ip elif p_shape[-2] != ip_shape[-2]: with backend.name_scope('adjust_reduction_block_%s' % self.block_id): p = layers.Activation('relu', name='adjust_relu_1_%s' % self.block_id)(p) p1 = layers.AveragePooling1D(1, strides=2, padding='valid', name='adjust_avg_pool_1_%s' % self.block_id)(p) p1 = layers.Conv1D(self.filters // 2, 1, padding='same', use_bias=False, name='adjust_conv_1_%s' % self.block_id, kernel_initializer='he_normal')(p1) p2 = layers.ZeroPadding1D((0, 1))(p) p2 = layers.Cropping1D((1, 0))(p2) p2 = layers.AveragePooling1D(1, strides=2, padding='valid', name='adjust_avg_pool_2_%s' % self.block_id)(p2) p2 = layers.Conv1D(self.filters // 2, 1, padding='same', use_bias=False, name='adjust_conv_2_%s' % self.block_id, kernel_initializer='he_normal')(p2) p = layers.concatenate([p1, p2], axis=-1) p = layers.BatchNormalization(momentum=0.9997, epsilon=1e-3, name='adjust_bn_%s' % self.block_id)(p) elif p_shape[-1] != self.filters: with backend.name_scope('adjust_projection_block_%s' % self.block_id): p = layers.Activation('relu')(p) p = layers.Conv1D(self.filters, 1, strides=1, padding='same', name='adjust_conv_projection_%s' % self.block_id, use_bias=False, kernel_initializer='he_normal')(p) p = layers.BatchNormalization(momentum=0.9997, epsilon=1e-3, name='adjust_bn_%s' % self.block_id)(p) return p