def get_batch(sentences, word_padding_idx, pos_padding_idx, chunk_padding_idx, character_padding_idx, tag_padding_idx): batch_sentence_word_indexes = utils.zero_padding([sentence.word_indexes for sentence in sentences], fill_value=word_padding_idx) batch_sentence_pos_indexes = utils.zero_padding([sentence.pos_indexes for sentence in sentences], fill_value=pos_padding_idx) batch_sentence_chunk_indexes = utils.zero_padding([sentence.chunk_indexes for sentence in sentences], fill_value=chunk_padding_idx) batch_sentence_tag_indexes = utils.zero_padding([sentence.tag_indexes for sentence in sentences], fill_value=tag_padding_idx) batch_lengths = [sentence.length for sentence in sentences] padded_length = max(batch_lengths) batch_sentence_word_character_indexes = list(itertools.chain.from_iterable( [[*sentence.character_indexes, *[[character_padding_idx]] * (padded_length - len(sentence.character_indexes))] for sentence in sentences] )) batch_word_lengths = [len(characters) for characters in batch_sentence_word_character_indexes] batch_sentence_word_character_indexes = utils.zero_padding(batch_sentence_word_character_indexes, fill_value=character_padding_idx) return ((torch.tensor(batch_sentence_word_indexes, dtype=torch.long, device=const.DEVICE), torch.tensor(batch_sentence_pos_indexes, dtype=torch.long, device=const.DEVICE), torch.tensor(batch_sentence_chunk_indexes, dtype=torch.long, device=const.DEVICE), torch.tensor(batch_sentence_word_character_indexes, dtype=torch.long, device=const.DEVICE) ), torch.tensor(batch_sentence_tag_indexes, dtype=torch.long, device=const.DEVICE), torch.tensor(batch_lengths, dtype=torch.long, device=const.DEVICE), torch.tensor(batch_word_lengths, dtype=torch.long, device=const.DEVICE))
def occlusion_seqs(seq, df, radius, window): global ALANINE #Initiates global variable inside local scope global PADDING size_seq = len(seq) start = int(df.values[0, 0] - 1) # -1 to get from numeric index to data index end = int(df.values[-1, 0]) m = np.zeros([size_seq, size_seq], dtype=bool) m[start:end, start:end] = df.values[:, 1:] < radius mod_list_seq_3D = [] for i in range(m.shape[0]): tmp = np.zeros([seq.shape[0], seq.shape[1]]) tmp[m[ i, :], :] = ALANINE # Setting all amino acids within radious to Alanin tmp[~m[i, :], :] = seq[~m[i, :], :] tmp = zero_padding( tmp, PADDING) #pads sequence to constant length of PADDING to fit model mod_list_seq_3D.append(tmp) mod_list_seq_1D = [] for i in range(size_seq - window + 1): tmp = np.zeros([seq.shape[0], seq.shape[1]]) logic = np.zeros(seq.shape[0], dtype=bool) logic[i:i + window] = True tmp[logic, :] = ALANINE tmp[~logic, :] = seq[~logic, :] tmp = zero_padding(tmp, PADDING) mod_list_seq_1D.append(tmp) return (mod_list_seq_1D, mod_list_seq_3D, m)
def forward(self, background, instance, box): background_weight = self.background_conv(background) instance_weight = [self.instance_conv(i) if i is not None else None for i in instance] instance_padded, weight_map = zero_padding(background, instance, instance_weight, box) fused = self.softmax(background_weight)*background for n in range(fused.shape[0]): if instance_padded[n] is None: fused[n] = background[n] else: fused[n] += torch.sum(self.softmax(weight_map[n])*instance_padded[n], dim=0) return fused
def main(): radius = 6 # 6Å window = 3 # 3 aminoascids seq_dict = load_sequens_data(args.seq_file) print('Loaded sequence data') model = load_model(args.model, v=0) print('Loaded Model') seq_ids = [] for rec in SeqIO.parse(args.seq_file, 'fasta'): seq_ids.append(rec.id) for seq in seq_ids: try: #id_ = seq.split('_')[0] sequence = seq_dict['seq_bin'][seq_dict['id'].index(seq)] prop_val = seq_dict['prop'][seq_dict['id'].index(seq)] except ValueError: print(seq[:-4] + ' is not in sequence data') continue original_seq = zero_padding(sequence, PADDING) predict_prop_val = model.predict( original_seq.reshape( [1, original_seq.shape[0], original_seq.shape[1]]))[0] df = pd.read_csv(os.path.join(args.outfolder, 'pdb' + seq.lower() + '_dist_mat.tsv'), sep='\t') mod_list_seq_1D, mod_list_seq_3D, m = occlusion_seqs( sequence, df, radius, window) print('Done calculationg occlusion sequences') prediction_arr_1D = np.zeros(len( mod_list_seq_1D)) # Predicting on all the 1D occluded sequences for i, mod_seq in enumerate(mod_list_seq_1D): prediction_arr_1D[i] = model.predict( mod_seq.reshape([1, mod_seq.shape[0], mod_seq.shape[1]]))[0] print('Done predicting 1D occlusion') prediction_arr_3D = np.zeros([ len(mod_list_seq_3D) ]) # Predicting on all the 3D occluded sequences for i, mod_seq in enumerate(mod_list_seq_3D): prediction_arr_3D[i] = model.predict( mod_seq.reshape([1, mod_seq.shape[0], mod_seq.shape[1]]))[0] print('Done predicting 3D occlusion') amax = np.argmax(prediction_arr_3D) plot_occlusion(prediction_arr_1D, prediction_arr_3D, m, prop_val, predict_prop_val, args.imgfolder, fname=seq)
def cascade_feature_fusion_module(f1, f2, c3, is_training, names): """ Perform cascade feature fusion between f1 and f2. `names` argument is only needed to match names of pretrained weights. """ # f2 height and width should always be double that of f1's f2_shape = tf.shape(f2) f1_interp = tf.image.resize_bilinear(f1, f2_shape[1:-1], align_corners=True) f1_padded = zero_padding(f1_interp, paddings=2) f1_conv = tf.layers.conv2d(f1_padded, kernel_size=3, strides=1, filters=c3, dilation_rate=2, use_bias=False, name=names["f1_conv"]) f1_bn = tf.layers.batch_normalization(f1_conv, momentum=0.95, epsilon=1e-5, training=is_training, name=names["f1_bn"]) f2_proj = tf.layers.conv2d(f2, filters=c3, kernel_size=1, strides=1, use_bias=False, name=names["f2_conv"]) f2_bn = tf.layers.batch_normalization(f2_proj, momentum=0.95, epsilon=1e-5, training=is_training, name=names["f2_bn"]) cff = tf.add_n([f2_bn, f1_bn], name=names["out"]) # We need a hook into f1_interp to create sum{4,24}_outs; # return it as well return f1_interp, tf.nn.relu(cff)
def __init__(self, sentence, word_vocab): self.length = len(sentence) self.word_indexes = [] self.character_indexes = [] self.pos_indexes = [] self.chunk_indexes = [] self.tag_indexes = [] self.word_lengths = [] self.get_value(sentence, word_vocab) self.padded_character_indexes_tensor = torch.tensor( utils.zero_padding(self.character_indexes, fill_value=const.CHARACTER2INDEX['<PAD>']), dtype=torch.long, device=const.DEVICE) self.word_lengths_tensor = torch.tensor(self.word_lengths, dtype=torch.long, device=const.DEVICE)
def build_dilated_residual_network(input_layer): """Construct a 34-layer variant dilated residual network.""" is_training = self.placeholders["is_training"] conv1_1 = tf.layers.conv2d(input_layer, filters=32, kernel_size=3, strides=2, padding="same", use_bias=False, name="conv1_1_3x3_s2") conv1_1_bn = tf.layers.batch_normalization( conv1_1, momentum=0.95, epsilon=1e-5, training=is_training, name="conv1_1_3x3_s2_bn") conv1_1_relu = tf.nn.relu(conv1_1_bn) conv1_2 = tf.layers.conv2d(conv1_1_relu, filters=32, kernel_size=3, strides=1, padding="same", use_bias=False, name="conv1_2_3x3") conv1_2_bn = tf.layers.batch_normalization(conv1_2, momentum=0.95, epsilon=1e-5, training=is_training, name="conv1_2_3x3_bn") conv1_2_relu = tf.nn.relu(conv1_2_bn) conv1_3 = tf.layers.conv2d(conv1_2_relu, filters=64, kernel_size=3, strides=1, padding="same", use_bias=False, name="conv1_3_3x3") conv1_3_bn = tf.layers.batch_normalization(conv1_3, momentum=0.95, epsilon=1e-5, training=is_training, name="conv1_3_3x3_bn") conv1_3_relu = tf.nn.relu(conv1_3_bn) padding0 = zero_padding(conv1_3_relu, paddings=1) pool1 = tf.layers.max_pooling2d(padding0, pool_size=3, strides=2, padding='valid', name="pool1") conv2_1_block = bottleneck_module(pool1, lvl="2_1", pad=1, is_training=is_training, filters=128, strides=1) conv2_2_block = bottleneck_module(conv2_1_block, lvl="2_2", pad=1, is_training=is_training, filters=128, strides=1) conv2_3_block = bottleneck_module(conv2_2_block, lvl="2_3", pad=1, is_training=is_training, filters=128, strides=1) conv3_1_block = bottleneck_module(conv2_3_block, lvl="3_1", pad=1, is_training=is_training, filters=256, strides=2) # We share weights for the low and med resolution levels; # conv3_1_sub4 is a hook into the end of med resolution level conv3_1_sub4 = tf.image.resize_bilinear( conv3_1_block, tf.shape(conv3_1_block)[1:-1] // 2, align_corners=True, name="conv3_1_sub4") conv3_2_block = bottleneck_module(conv3_1_sub4, lvl="3_2", pad=1, is_training=is_training, filters=256, strides=1) conv3_3_block = bottleneck_module(conv3_2_block, lvl="3_3", pad=1, is_training=is_training, filters=256, strides=1) conv3_4_block = bottleneck_module(conv3_3_block, lvl="3_4", pad=1, is_training=is_training, filters=256, strides=1) # Pad is used as dilation rate internally in bottleneck module conv4_1_block = bottleneck_module(conv3_4_block, lvl="4_1", pad=2, is_training=is_training, filters=512, strides=1) conv4_2_block = bottleneck_module(conv4_1_block, lvl="4_2", pad=2, is_training=is_training, filters=512, strides=1) conv4_3_block = bottleneck_module(conv4_2_block, lvl="4_3", pad=2, is_training=is_training, filters=512, strides=1) conv4_4_block = bottleneck_module(conv4_3_block, lvl="4_4", pad=2, is_training=is_training, filters=512, strides=1) conv4_5_block = bottleneck_module(conv4_4_block, lvl="4_5", pad=2, is_training=is_training, filters=512, strides=1) conv4_6_block = bottleneck_module(conv4_5_block, lvl="4_6", pad=2, is_training=is_training, filters=512, strides=1) conv5_1_block = bottleneck_module(conv4_6_block, lvl="5_1", pad=4, is_training=is_training, filters=1024, strides=1) conv5_2_block = bottleneck_module(conv5_1_block, lvl="5_2", pad=4, is_training=is_training, filters=1024, strides=1) conv5_3_block = bottleneck_module(conv5_2_block, lvl="5_3", pad=4, is_training=is_training, filters=1024, strides=1) return conv3_1_block, conv5_3_block
def bottleneck_module(inputs, lvl, pad, is_training, filters, strides, data_format='channels_last', bottleneck_factor=4): """ Implement the bottleneck module proposed in ResNet. 1x1 conv -> 3x3 conv -> 1x1 conv """ # 1x1 reduce component x = tf.layers.conv2d(inputs, filters=filters // bottleneck_factor, kernel_size=1, strides=strides, data_format=data_format, use_bias=False, name="conv{}_1x1_reduce".format(lvl)) x = tf.layers.batch_normalization( x, momentum=0.95, epsilon=1e-5, training=is_training, name="conv{}_1x1_reduce_bn".format(lvl)) x = tf.nn.relu(x) # 3x3 component x = zero_padding(x, pad) x = tf.layers.conv2d(x, filters=filters // bottleneck_factor, kernel_size=3, strides=1, dilation_rate=pad, data_format=data_format, use_bias=False, name="conv{}_3x3".format(lvl)) x = tf.layers.batch_normalization(x, momentum=0.95, epsilon=1e-5, training=is_training, name="conv{}_3x3_bn".format(lvl)) x = tf.nn.relu(x) # 1x1 increase component x = tf.layers.conv2d(x, filters=filters, kernel_size=1, strides=1, data_format=data_format, use_bias=False, name="conv{}_1x1_increase".format(lvl)) x = tf.layers.batch_normalization( x, momentum=0.95, epsilon=1e-5, training=is_training, name="conv{}_1x1_increase_bn".format(lvl)) # 1x1 project (if needed) if data_format == "channels_last": _, h, w, d = inputs.get_shape().as_list() _, hh, ww, dd = x.get_shape().as_list() else: _, d, h, w = inputs.get_shape().as_list() _, dd, hh, ww = x.get_shape().as_list() if h != hh or d != dd: conv_proj = tf.layers.conv2d( inputs, filters, kernel_size=1, strides=strides, use_bias=False, name="conv{}_1x1_proj".format(lvl)) conv_proj_bn = tf.layers.batch_normalization( conv_proj, momentum=0.95, epsilon=1e-5, training=is_training, name="conv{}_1x1_proj_bn".format(lvl)) out = x + conv_proj_bn else: out = x + inputs return tf.nn.relu(out)