Пример #1
0
    def get_batch(sentences,
                  word_padding_idx,
                  pos_padding_idx,
                  chunk_padding_idx,
                  character_padding_idx,
                  tag_padding_idx):
        batch_sentence_word_indexes = utils.zero_padding([sentence.word_indexes for sentence in sentences],
                                                         fill_value=word_padding_idx)
        batch_sentence_pos_indexes = utils.zero_padding([sentence.pos_indexes for sentence in sentences],
                                                        fill_value=pos_padding_idx)
        batch_sentence_chunk_indexes = utils.zero_padding([sentence.chunk_indexes for sentence in sentences],
                                                          fill_value=chunk_padding_idx)
        batch_sentence_tag_indexes = utils.zero_padding([sentence.tag_indexes for sentence in sentences],
                                                        fill_value=tag_padding_idx)
        batch_lengths = [sentence.length for sentence in sentences]
        padded_length = max(batch_lengths)
        batch_sentence_word_character_indexes = list(itertools.chain.from_iterable(
            [[*sentence.character_indexes,
              *[[character_padding_idx]] * (padded_length - len(sentence.character_indexes))] for sentence in sentences]
        ))
        batch_word_lengths = [len(characters) for characters in batch_sentence_word_character_indexes]
        batch_sentence_word_character_indexes = utils.zero_padding(batch_sentence_word_character_indexes,
                                                                   fill_value=character_padding_idx)

        return ((torch.tensor(batch_sentence_word_indexes, dtype=torch.long, device=const.DEVICE),
                torch.tensor(batch_sentence_pos_indexes, dtype=torch.long, device=const.DEVICE),
                torch.tensor(batch_sentence_chunk_indexes, dtype=torch.long, device=const.DEVICE),
                torch.tensor(batch_sentence_word_character_indexes, dtype=torch.long, device=const.DEVICE)
                 ),
                torch.tensor(batch_sentence_tag_indexes, dtype=torch.long, device=const.DEVICE),
                torch.tensor(batch_lengths, dtype=torch.long, device=const.DEVICE),
                torch.tensor(batch_word_lengths, dtype=torch.long, device=const.DEVICE))
Пример #2
0
def occlusion_seqs(seq, df, radius, window):
    global ALANINE  #Initiates global variable inside local scope
    global PADDING

    size_seq = len(seq)
    start = int(df.values[0, 0] -
                1)  # -1 to get from numeric index to data index
    end = int(df.values[-1, 0])
    m = np.zeros([size_seq, size_seq], dtype=bool)
    m[start:end, start:end] = df.values[:, 1:] < radius
    mod_list_seq_3D = []
    for i in range(m.shape[0]):
        tmp = np.zeros([seq.shape[0], seq.shape[1]])
        tmp[m[
            i, :], :] = ALANINE  # Setting all amino acids within radious to Alanin
        tmp[~m[i, :], :] = seq[~m[i, :], :]
        tmp = zero_padding(
            tmp,
            PADDING)  #pads sequence to constant length of PADDING to fit model
        mod_list_seq_3D.append(tmp)

    mod_list_seq_1D = []
    for i in range(size_seq - window + 1):
        tmp = np.zeros([seq.shape[0], seq.shape[1]])
        logic = np.zeros(seq.shape[0], dtype=bool)
        logic[i:i + window] = True
        tmp[logic, :] = ALANINE
        tmp[~logic, :] = seq[~logic, :]
        tmp = zero_padding(tmp, PADDING)
        mod_list_seq_1D.append(tmp)

    return (mod_list_seq_1D, mod_list_seq_3D, m)
Пример #3
0
 def forward(self, background, instance, box):
     background_weight = self.background_conv(background)
     instance_weight = [self.instance_conv(i) if i is not None else None for i in instance]
     instance_padded, weight_map = zero_padding(background, instance, instance_weight, box)
     fused = self.softmax(background_weight)*background
     for n in range(fused.shape[0]):
         if instance_padded[n] is None:
             fused[n] = background[n]
         else:
             fused[n] += torch.sum(self.softmax(weight_map[n])*instance_padded[n], dim=0)
     return fused
Пример #4
0
def main():
    radius = 6  # 6Å
    window = 3  # 3 aminoascids

    seq_dict = load_sequens_data(args.seq_file)
    print('Loaded sequence data')
    model = load_model(args.model, v=0)
    print('Loaded Model')
    seq_ids = []
    for rec in SeqIO.parse(args.seq_file, 'fasta'):
        seq_ids.append(rec.id)
    for seq in seq_ids:
        try:
            #id_ = seq.split('_')[0]
            sequence = seq_dict['seq_bin'][seq_dict['id'].index(seq)]
            prop_val = seq_dict['prop'][seq_dict['id'].index(seq)]
        except ValueError:
            print(seq[:-4] + ' is not in sequence data')
            continue
        original_seq = zero_padding(sequence, PADDING)
        predict_prop_val = model.predict(
            original_seq.reshape(
                [1, original_seq.shape[0], original_seq.shape[1]]))[0]
        df = pd.read_csv(os.path.join(args.outfolder,
                                      'pdb' + seq.lower() + '_dist_mat.tsv'),
                         sep='\t')
        mod_list_seq_1D, mod_list_seq_3D, m = occlusion_seqs(
            sequence, df, radius, window)
        print('Done calculationg occlusion sequences')
        prediction_arr_1D = np.zeros(len(
            mod_list_seq_1D))  # Predicting on all the 1D occluded sequences
        for i, mod_seq in enumerate(mod_list_seq_1D):
            prediction_arr_1D[i] = model.predict(
                mod_seq.reshape([1, mod_seq.shape[0], mod_seq.shape[1]]))[0]
        print('Done predicting 1D occlusion')

        prediction_arr_3D = np.zeros([
            len(mod_list_seq_3D)
        ])  # Predicting on all the 3D occluded sequences
        for i, mod_seq in enumerate(mod_list_seq_3D):
            prediction_arr_3D[i] = model.predict(
                mod_seq.reshape([1, mod_seq.shape[0], mod_seq.shape[1]]))[0]
        print('Done predicting 3D occlusion')
        amax = np.argmax(prediction_arr_3D)

        plot_occlusion(prediction_arr_1D,
                       prediction_arr_3D,
                       m,
                       prop_val,
                       predict_prop_val,
                       args.imgfolder,
                       fname=seq)
Пример #5
0
def cascade_feature_fusion_module(f1, f2, c3, is_training, names):
    """
    Perform cascade feature fusion between f1 and f2.

    `names` argument is only needed to match names of pretrained weights.
    """

    # f2 height and width should always be double that of f1's
    f2_shape = tf.shape(f2)
    f1_interp = tf.image.resize_bilinear(f1,
                                         f2_shape[1:-1],
                                         align_corners=True)

    f1_padded = zero_padding(f1_interp, paddings=2)
    f1_conv = tf.layers.conv2d(f1_padded,
                               kernel_size=3,
                               strides=1,
                               filters=c3,
                               dilation_rate=2,
                               use_bias=False,
                               name=names["f1_conv"])
    f1_bn = tf.layers.batch_normalization(f1_conv,
                                          momentum=0.95,
                                          epsilon=1e-5,
                                          training=is_training,
                                          name=names["f1_bn"])

    f2_proj = tf.layers.conv2d(f2,
                               filters=c3,
                               kernel_size=1,
                               strides=1,
                               use_bias=False,
                               name=names["f2_conv"])
    f2_bn = tf.layers.batch_normalization(f2_proj,
                                          momentum=0.95,
                                          epsilon=1e-5,
                                          training=is_training,
                                          name=names["f2_bn"])

    cff = tf.add_n([f2_bn, f1_bn], name=names["out"])

    # We need a hook into f1_interp to create sum{4,24}_outs;
    # return it as well
    return f1_interp, tf.nn.relu(cff)
Пример #6
0
    def __init__(self, sentence, word_vocab):
        self.length = len(sentence)
        self.word_indexes = []
        self.character_indexes = []
        self.pos_indexes = []
        self.chunk_indexes = []
        self.tag_indexes = []
        self.word_lengths = []

        self.get_value(sentence, word_vocab)

        self.padded_character_indexes_tensor = torch.tensor(
            utils.zero_padding(self.character_indexes,
                               fill_value=const.CHARACTER2INDEX['<PAD>']),
            dtype=torch.long,
            device=const.DEVICE)
        self.word_lengths_tensor = torch.tensor(self.word_lengths,
                                                dtype=torch.long,
                                                device=const.DEVICE)
Пример #7
0
        def build_dilated_residual_network(input_layer):
            """Construct a 34-layer variant dilated residual network."""
            is_training = self.placeholders["is_training"]

            conv1_1 = tf.layers.conv2d(input_layer,
                                       filters=32,
                                       kernel_size=3,
                                       strides=2,
                                       padding="same",
                                       use_bias=False,
                                       name="conv1_1_3x3_s2")
            conv1_1_bn = tf.layers.batch_normalization(
                conv1_1,
                momentum=0.95,
                epsilon=1e-5,
                training=is_training,
                name="conv1_1_3x3_s2_bn")
            conv1_1_relu = tf.nn.relu(conv1_1_bn)

            conv1_2 = tf.layers.conv2d(conv1_1_relu,
                                       filters=32,
                                       kernel_size=3,
                                       strides=1,
                                       padding="same",
                                       use_bias=False,
                                       name="conv1_2_3x3")
            conv1_2_bn = tf.layers.batch_normalization(conv1_2,
                                                       momentum=0.95,
                                                       epsilon=1e-5,
                                                       training=is_training,
                                                       name="conv1_2_3x3_bn")
            conv1_2_relu = tf.nn.relu(conv1_2_bn)

            conv1_3 = tf.layers.conv2d(conv1_2_relu,
                                       filters=64,
                                       kernel_size=3,
                                       strides=1,
                                       padding="same",
                                       use_bias=False,
                                       name="conv1_3_3x3")
            conv1_3_bn = tf.layers.batch_normalization(conv1_3,
                                                       momentum=0.95,
                                                       epsilon=1e-5,
                                                       training=is_training,
                                                       name="conv1_3_3x3_bn")
            conv1_3_relu = tf.nn.relu(conv1_3_bn)

            padding0 = zero_padding(conv1_3_relu, paddings=1)
            pool1 = tf.layers.max_pooling2d(padding0,
                                            pool_size=3,
                                            strides=2,
                                            padding='valid',
                                            name="pool1")

            conv2_1_block = bottleneck_module(pool1,
                                              lvl="2_1",
                                              pad=1,
                                              is_training=is_training,
                                              filters=128,
                                              strides=1)
            conv2_2_block = bottleneck_module(conv2_1_block,
                                              lvl="2_2",
                                              pad=1,
                                              is_training=is_training,
                                              filters=128,
                                              strides=1)
            conv2_3_block = bottleneck_module(conv2_2_block,
                                              lvl="2_3",
                                              pad=1,
                                              is_training=is_training,
                                              filters=128,
                                              strides=1)

            conv3_1_block = bottleneck_module(conv2_3_block,
                                              lvl="3_1",
                                              pad=1,
                                              is_training=is_training,
                                              filters=256,
                                              strides=2)

            # We share weights for the low and med resolution levels;
            # conv3_1_sub4 is a hook into the end of med resolution level
            conv3_1_sub4 = tf.image.resize_bilinear(
                conv3_1_block,
                tf.shape(conv3_1_block)[1:-1] // 2,
                align_corners=True,
                name="conv3_1_sub4")

            conv3_2_block = bottleneck_module(conv3_1_sub4,
                                              lvl="3_2",
                                              pad=1,
                                              is_training=is_training,
                                              filters=256,
                                              strides=1)
            conv3_3_block = bottleneck_module(conv3_2_block,
                                              lvl="3_3",
                                              pad=1,
                                              is_training=is_training,
                                              filters=256,
                                              strides=1)
            conv3_4_block = bottleneck_module(conv3_3_block,
                                              lvl="3_4",
                                              pad=1,
                                              is_training=is_training,
                                              filters=256,
                                              strides=1)

            # Pad is used as dilation rate internally in bottleneck module
            conv4_1_block = bottleneck_module(conv3_4_block,
                                              lvl="4_1",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_2_block = bottleneck_module(conv4_1_block,
                                              lvl="4_2",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_3_block = bottleneck_module(conv4_2_block,
                                              lvl="4_3",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_4_block = bottleneck_module(conv4_3_block,
                                              lvl="4_4",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_5_block = bottleneck_module(conv4_4_block,
                                              lvl="4_5",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)
            conv4_6_block = bottleneck_module(conv4_5_block,
                                              lvl="4_6",
                                              pad=2,
                                              is_training=is_training,
                                              filters=512,
                                              strides=1)

            conv5_1_block = bottleneck_module(conv4_6_block,
                                              lvl="5_1",
                                              pad=4,
                                              is_training=is_training,
                                              filters=1024,
                                              strides=1)
            conv5_2_block = bottleneck_module(conv5_1_block,
                                              lvl="5_2",
                                              pad=4,
                                              is_training=is_training,
                                              filters=1024,
                                              strides=1)
            conv5_3_block = bottleneck_module(conv5_2_block,
                                              lvl="5_3",
                                              pad=4,
                                              is_training=is_training,
                                              filters=1024,
                                              strides=1)

            return conv3_1_block, conv5_3_block
Пример #8
0
        def bottleneck_module(inputs,
                              lvl,
                              pad,
                              is_training,
                              filters,
                              strides,
                              data_format='channels_last',
                              bottleneck_factor=4):
            """
            Implement the bottleneck module proposed in ResNet.
            1x1 conv -> 3x3 conv -> 1x1 conv
            """

            # 1x1 reduce component
            x = tf.layers.conv2d(inputs,
                                 filters=filters // bottleneck_factor,
                                 kernel_size=1,
                                 strides=strides,
                                 data_format=data_format,
                                 use_bias=False,
                                 name="conv{}_1x1_reduce".format(lvl))
            x = tf.layers.batch_normalization(
                x,
                momentum=0.95,
                epsilon=1e-5,
                training=is_training,
                name="conv{}_1x1_reduce_bn".format(lvl))
            x = tf.nn.relu(x)

            # 3x3 component
            x = zero_padding(x, pad)
            x = tf.layers.conv2d(x,
                                 filters=filters // bottleneck_factor,
                                 kernel_size=3,
                                 strides=1,
                                 dilation_rate=pad,
                                 data_format=data_format,
                                 use_bias=False,
                                 name="conv{}_3x3".format(lvl))
            x = tf.layers.batch_normalization(x,
                                              momentum=0.95,
                                              epsilon=1e-5,
                                              training=is_training,
                                              name="conv{}_3x3_bn".format(lvl))
            x = tf.nn.relu(x)

            # 1x1 increase component
            x = tf.layers.conv2d(x,
                                 filters=filters,
                                 kernel_size=1,
                                 strides=1,
                                 data_format=data_format,
                                 use_bias=False,
                                 name="conv{}_1x1_increase".format(lvl))
            x = tf.layers.batch_normalization(
                x,
                momentum=0.95,
                epsilon=1e-5,
                training=is_training,
                name="conv{}_1x1_increase_bn".format(lvl))

            # 1x1 project (if needed)
            if data_format == "channels_last":
                _, h, w, d = inputs.get_shape().as_list()
                _, hh, ww, dd = x.get_shape().as_list()
            else:
                _, d, h, w = inputs.get_shape().as_list()
                _, dd, hh, ww = x.get_shape().as_list()

            if h != hh or d != dd:
                conv_proj = tf.layers.conv2d(
                    inputs,
                    filters,
                    kernel_size=1,
                    strides=strides,
                    use_bias=False,
                    name="conv{}_1x1_proj".format(lvl))
                conv_proj_bn = tf.layers.batch_normalization(
                    conv_proj,
                    momentum=0.95,
                    epsilon=1e-5,
                    training=is_training,
                    name="conv{}_1x1_proj_bn".format(lvl))
                out = x + conv_proj_bn
            else:
                out = x + inputs

            return tf.nn.relu(out)