示例#1
0
def meme_generate(W, output_file='meme.txt', prefix='filter', factor=None):

    # background frequency
    nt_freqs = [1. / 4 for i in range(4)]

    # open file for writing
    f = open(output_file, 'w')

    # print intro material
    f.write('MEME version 4\n')
    f.write('\n')
    f.write('ALPHABET= ACGT\n')
    f.write('\n')
    f.write('Background letter frequencies:\n')
    f.write('A %.4f C %.4f G %.4f T %.4f \n' % tuple(nt_freqs))
    f.write('\n')

    for j in range(len(W)):
        if factor:
            pwm = utils.normalize_pwm(W[j], factor=factor)
        else:
            pwm = W[j]
        f.write('MOTIF %s%d \n' % (prefix, j))
        f.write('letter-probability matrix: alength= 4 w= %d nsites= %d \n' %
                (pwm.shape[1], pwm.shape[1]))
        for i in range(pwm.shape[1]):
            f.write('%.4f %.4f %.4f %.4f \n' % tuple(pwm[:, i]))
        f.write('\n')

    f.close()
def fom_saliency_mul(X, layer, alphabet, nntrainer, sess, ax, title='notitle'):
    ''' requires that deepomics is being used and the appropriate architecture has already been constructed
    Must first initialize the session and set best parameters

    layer is the activation layer we want to use as a string
    figsize is the figure size we want to use'''

    #first mutate the sequence
    X_mut = mutate(X, X.shape[1], X.shape[3])

    #take all the mutations and assign them into a dict for deepomics
    mutations = {'inputs': X_mut, 'targets': np.ones((X_mut.shape[0], 1))}
    #Get output or logits activations for the mutations
    mut_predictions = nntrainer.get_activations(sess, mutations, layer=layer)

    #take the WT and put it into a dict for deepomics
    WT = {'inputs': X, 'targets': np.ones((X.shape[0], 1))}
    #Get output or logits activations for the WT sequence
    predictions = nntrainer.get_activations(sess, WT, layer=layer)

    #shape the predictions of the mutations into the shape of a heatmap
    heat_mut = mut_predictions.reshape(X.shape[1], 4).T

    #normalize the heat map rearrangement by minusing it by the true prediction score of that test sequence
    norm_heat_mut = heat_mut - predictions[0]
    norm_heat_mut = utils.normalize_pwm(norm_heat_mut, factor=4)

    visualize.plot_seq_pos_saliency(np.squeeze(X).T,
                                    norm_heat_mut,
                                    alphabet=alphabet,
                                    nt_width=400)
示例#3
0
def fom_convsal(X,
                layer,
                alphabet,
                convidx,
                nntrainer,
                sess,
                title='notitle',
                figsize=(15, 2),
                fig=None,
                pos=None,
                idx=None):

    eps = 1e-7

    #choose neuron coordinates within convolution output
    i2, i3, i4 = convidx

    #first mutate the sequence
    X_mut = mutate(X, X.shape[1], X.shape[3])

    #take all the mutations and assign them into a dict for deepomics
    mutations = {'inputs': X_mut, 'targets': np.ones((X_mut.shape[0], 1))}
    #Get the neurons score for the mutations
    mut_scores = nntrainer.get_activations(sess, mutations,
                                           layer=layer)[:, i2, i3, i4]

    #take the WT and put it into a dict for deepomics
    WT = {'inputs': X, 'targets': np.ones((X.shape[0], 1))}
    #Get activations for the WT sequence
    WT_score = nntrainer.get_activations(sess, WT, layer=layer)[:, i2, i3, i4]

    #shape the predictions of the mutations into the shape of a heatmap
    heat_mut = mut_scores.reshape(X.shape[1], 4).T

    #normalize the heat map rearrangement by minusing it by the true prediction score of that test sequence
    norm_heat_mut = (heat_mut - WT_score) + eps
    norm_heat_mut = utils.normalize_pwm(norm_heat_mut, factor=4)

    if fig:
        row, col = pos
        ax = fig.add_subplot(row, col, idx)
        if title != 'notitle':
            ax.set_title(title)
        ax = visualize.plot_seq_pos_saliency(np.squeeze(X).T,
                                             norm_heat_mut,
                                             alphabet=alphabet,
                                             nt_width=400)

    else:
        plt.figure(figsize=figsize)
        if title != 'notitle':
            plt.title(title)
        visualize.plot_seq_pos_saliency(np.squeeze(X).T,
                                        norm_heat_mut,
                                        alphabet=alphabet,
                                        nt_width=400)
示例#4
0
def clip_filters(W, threshold=0.5, pad=3):
    num_filters, _, filter_length = W.shape

    W_clipped = []
    for i in range(num_filters):
        w = utils.normalize_pwm(W[i], factor=3)
        entropy = np.log2(4) + np.sum(w * np.log2(w + 1e-7), axis=0)
        index = np.where(entropy > threshold)[0]
        if index.any():
            start = np.maximum(np.min(index) - pad, 0)
            end = np.minimum(np.max(index) + pad + 1, filter_length)
            W_clipped.append(W[i, :, start:end])
        else:
            W_clipped.append(W[i, :, :])

    return W_clipped
def entropy_weighted_cosine_distance(X_saliency, X_model):
    """calculate entropy-weighted cosine distance between normalized saliency map and model"""
    def cosine_distance(X_norm, X_model):
        norm1 = np.sqrt(np.sum(X_norm**2, axis=0))
        norm2 = np.sqrt(np.sum(X_model**2, axis=0))

        dist = np.sum(X_norm * X_model, axis=0) / norm1 / norm2
        return dist

    def entropy(X):
        information = np.log2(4) - np.sum(-X * np.log2(X + 1e-10), axis=0)
        return information

    X_norm = utils.normalize_pwm(X_saliency, factor=3)
    cd = cosine_distance(X_norm, X_model)
    model_info = entropy(X_model)
    tpr = np.sum(model_info * cd) / np.sum(model_info)

    inv_model_info = -(model_info - 2)
    inv_cd = -(cd - 1)
    fpr = np.sum(inv_cd * inv_model_info) / np.sum(inv_model_info)

    return tpr, fpr
示例#6
0
def fom_heatmap(X, layer, alphabet, nntrainer, sess, eps=0):

    #first mutate the sequence
    X_mut = mutate(X, X.shape[1], X.shape[3])

    #take all the mutations and assign them into a dict for deepomics
    mutations = {'inputs': X_mut, 'targets': np.ones((X_mut.shape[0], 1))}
    #Get output or logits activations for the mutations
    mut_predictions = nntrainer.get_activations(sess, mutations, layer=layer)

    #take the WT and put it into a dict for deepomics
    WT = {'inputs': X, 'targets': np.ones((X.shape[0], 1))}
    #Get output or logits activations for the WT sequence
    predictions = nntrainer.get_activations(sess, WT, layer=layer)

    #shape the predictions of the mutations into the shape of a heatmap
    heat_mut = mut_predictions.reshape(X.shape[1], 4).T

    #normalize the heat map rearrangement by minusing it by the true prediction score of that test sequence
    norm_heat_mut = heat_mut - predictions[0] + eps
    norm_heat_mut = utils.normalize_pwm(norm_heat_mut, factor=4)

    return (norm_heat_mut)