Пример #1
0
def main():
    try:
        model = load_model(FLAGS.model_file, custom_objects=custom_layers)
    except Exception as e:
        print('Unable to load model.')
        print(e)
        sys.exit(1)

    model_key = FLAGS.model_file.split('/')[-1].split('.')[0]

    try:
        proteins = pd.read_csv(FLAGS.infile, sep='\t')
    except Exception as e:
        print('Unable to read input protein dataset.')
        print(e)
        sys.exit(1)

    dataset_key = FLAGS.infile.split('/')[-1].split('.')[0]

    output_folder = os.path.join(FLAGS.output_dir, 'motifs', dataset_key,
                                 model_key)

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)

    extract_motifs(model, proteins, output_folder)
Пример #2
0
def main():
    try:
        model = load_model(FLAGS.model_file, custom_objects=custom_layers)
    except Exception as e:
        print('Unable to load model.')
        print(e)
        sys.exit(1)

    model_key = FLAGS.model_file.split('/')[-1].split('.')[0]

    try:
        proteins = pd.read_csv(FLAGS.infile, sep='\t')
    except Exception as e:
        print('Unable to read input protein dataset.')
        print(e)
        sys.exit(1)

    dataset_key = FLAGS.infile.split('/')[-1].split('.')[0]

    data_gen = UniProtSequence(proteins, 100, model.input_shape[1])

    preds = model.predict_generator(data_gen,
                                    use_multiprocessing=True,
                                    workers=4)

    proteins['comet_predictions'] = preds

    output = proteins[(proteins['comet_predictions'] > 0.5)]

    if '.csv' not in FLAGS.output_file:
        FLAGS.output_file += '.csv'

    output_folder = os.path.join(FLAGS.data_dir, 'homologs', dataset_key,
                                 model_key)

    if not os.path.isdir(output_folder):
        os.makedirs(output_folder)

    output.to_csv(os.path.join(output_folder, FLAGS.output_file))
Пример #3
0
def build_coder_model(input_shape=None, saved_model=None):
    if saved_model:
        model = load_model(saved_model,
                           custom_objects=custom_layers,
                           compile=False)
    else:
        # Check input parameters
        assert len(input_shape) == 2, 'Unrecognizable input dimensions'
        assert K.image_dim_ordering(
        ) == 'tf', 'Theano dimension ordering not supported yet'
        assert input_shape[1] in [20, 4,
                                  22], 'Input dimensions error, check order'

        seq_length, alphabet = input_shape

        # Input LayerRO
        inp = Input(shape=input_shape, name='aa_seq')
        feature_layer = inp
        # Convolutional Layers
        convs = []
        for i, (filter_number, filter_size, filter_stride) in enumerate(
                zip(FLAGS.filters, FLAGS.filter_length, FLAGS.filter_stride)):
            feature_layer = Convolution1D(filters=filter_number,
                                          kernel_size=filter_size,
                                          strides=filter_stride,
                                          padding='same',
                                          use_bias=False,
                                          kernel_initializer='glorot_uniform',
                                          activation='linear',
                                          name='Conv%d' %
                                          (i + 1))(feature_layer)
            convs.append(feature_layer)
            feature_layer = BatchNormalization()(feature_layer)
            feature_layer = Activation(activation='relu')(feature_layer)

        # Global Max-pooling
        pool = GlobalMaxPooling1D()(feature_layer)

        # Fully-Connected encoding layers
        fc_enc = [
            Dense(FLAGS.filters[-1],
                  kernel_initializer='glorot_uniform',
                  activation='sigmoid',
                  name='FCEnc1')(pool)
        ]

        for d in range(1, FLAGS.n_fc_layers):
            fc_enc.append(
                Dense(FLAGS.filters[-1],
                      kernel_initializer='glorot_uniform',
                      activation='sigmoid',
                      name='FCEnc{}'.format(d + 1))(fc_enc[-1]))

        encoded = fc_enc[-1]  # To access if model for encoding needed

        # Fully-Connected decoding layers
        fc_dec = [
            Dedense(encoded._keras_history[0],
                    activation='linear',
                    name='FCDec{}'.format(FLAGS.n_fc_layers))(encoded)
        ]

        for d in range(FLAGS.n_fc_layers - 2, -1, -1):
            fc_dec.append(
                Dedense(fc_enc[d]._keras_history[0],
                        activation='linear',
                        name='FCDec{}'.format(d + 1))(fc_dec[-1]))

        # Reshaping and unpooling
        unflat = Reshape((1, fc_dec[-1]._keras_shape[-1]))(fc_dec[-1])
        deconvs = [
            Upsampling1D(pool._keras_history[0].input_shape[1],
                         name='Upsampling')(unflat)
        ]
        # Deconvolution
        for c in range(FLAGS.n_conv_layers - 1, 0, -1):
            deconvs.append(
                Deconvolution1D(convs[c]._keras_history[0],
                                activation='relu',
                                name='Deconv{}'.format(c + 1))(
                                    deconvs[-1]))  # maybe add L1 regularizer

        decoded = Deconvolution1D(convs[0]._keras_history[0],
                                  apply_mask=False,
                                  activation='sigmoid',
                                  name='Deconv1')(deconvs[-1])

        model = Model(inputs=inp,
                      outputs=decoded,
                      name='CoDER',
                      classification=False)

    losses = [masked_mse]

    # Metrics
    metrics = [mean_cat_acc]

    # Compilation

    model.compile(optimizer=FLAGS.optimizer,
                  loss=losses,
                  metrics=metrics,
                  lr=FLAGS.learning_rate)
    return model
Пример #4
0
def build_cohst_model(input_shape=None, saved_model=None):
    if saved_model:
        model = load_model(saved_model,
                           custom_objects=custom_layers,
                           compile=False)
        model.classification = True
    else:
        # Check input parameters
        assert len(input_shape) == 2, 'Unrecognizable input dimensions'
        assert K.image_dim_ordering(
        ) == 'tf', 'Theano dimension ordering not supported yet'
        assert input_shape[1] in [20, 4,
                                  22], 'Input dimensions error, check order'

        seq_length, alphabet = input_shape

        # Model Architecture

        # Input LayerRO
        inp = Input(shape=input_shape, name='aa_seq')
        feature_layer = inp
        # Convolutional Layers
        for i, (filter_number, filter_size, filter_stride) in enumerate(
                zip(FLAGS.filters, FLAGS.filter_length, FLAGS.filter_stride)):
            feature_layer = Convolution1D(filters=filter_number,
                                          kernel_size=filter_size,
                                          strides=filter_stride,
                                          padding='same',
                                          use_bias=False,
                                          kernel_initializer='glorot_uniform',
                                          activation='linear',
                                          name='Conv%d' %
                                          (i + 1))(feature_layer)
            feature_layer = BatchNormalization()(feature_layer)
            feature_layer = Activation(activation='relu')(feature_layer)

        # Max-pooling
        if seq_length:
            max_pool = MaxPooling1D(pool_size=seq_length)(feature_layer)
            flat = Flatten()(max_pool)
        else:
            # max_pool = GlobalMaxPooling1D()(convs[-1])
            # flat = max_pool
            raise NotImplementedError(
                'Sequence length must be known at this point. Pad and use mask.'
            )

        # Fully-Connected encoding layers
        fc_enc = [
            Dense(FLAGS.filters[-1],
                  kernel_initializer='glorot_uniform',
                  activation='relu',
                  activity_regularizer=regularizers.l2(0.02),
                  name='FCEnc1')(flat)
        ]

        for d in range(1, FLAGS.n_fc_layers):
            fc_enc.append(
                Dense(FLAGS.filters[-1],
                      kernel_initializer='glorot_uniform',
                      activation='relu',
                      activity_regularizer=regularizers.l2(0.02),
                      name='FCEnc{}'.format(d + 1))(fc_enc[-1]))

        encoded = fc_enc[-1]  # To access if model for encoding needed

        classifier = Dense(1, activation='sigmoid', name='Classifier')(encoded)

        model = Model(inputs=inp,
                      outputs=classifier,
                      name='CoHST',
                      classification=True)

    # Loss Functions
    losses = [binary_crossentropy]

    # Metrics
    metrics = [binary_accuracy]

    # Compilation

    model.compile(optimizer=FLAGS.optimizer,
                  loss=losses,
                  metrics=metrics,
                  lr=FLAGS.learning_rate)
    return model