Пример #1
0
def compute_pwcca(acts1, acts2, epsilon=0.):
    """ Computes projection weighting for weighting CCA coefficients 
    
    Args:
         acts1: 2d numpy array, shaped (neurons, num_datapoints)
	 acts2: 2d numpy array, shaped (neurons, num_datapoints)
    Returns:
	 Original cca coefficient mean and weighted mean
    """
    sresults = cca_core.get_cca_similarity(acts1,
                                           acts2,
                                           epsilon=epsilon,
                                           compute_dirns=False,
                                           compute_coefs=True,
                                           verbose=False)
    if np.sum(sresults["x_idxs"]) <= np.sum(sresults["y_idxs"]):
        dirns = np.dot(sresults["coef_x"],
                    (acts1[sresults["x_idxs"]] - \
                     sresults["neuron_means1"][sresults["x_idxs"]])) + sresults["neuron_means1"][sresults["x_idxs"]]
        coefs = sresults["cca_coef1"]
        acts = acts1
        idxs = sresults["x_idxs"]
    else:
        dirns = np.dot(sresults["coef_y"],
                    (acts1[sresults["y_idxs"]] - \
                     sresults["neuron_means2"][sresults["y_idxs"]])) + sresults["neuron_means2"][sresults["y_idxs"]]
        coefs = sresults["cca_coef2"]
        acts = acts2
        idxs = sresults["y_idxs"]
    P, _ = np.linalg.qr(dirns.T)
    weights = np.sum(np.abs(np.dot(P.T, acts[idxs].T)), axis=1)
    weights = weights / np.sum(weights)

    return np.sum(weights * coefs), weights, coefs
Пример #2
0
def SVCCA(activations1, activations2, layer_number):
    # SVCCA different x
    # print("Results using SVCCA keeping 60 dims")
    # load activations
    acts1 = np.genfromtxt(activations1 + str(layer_number) + '.csv',
                          delimiter=',')
    acts2 = np.genfromtxt(activations2 + str(layer_number) + '.csv',
                          delimiter=',')

    # Mean subtract activations
    cacts1 = acts1  # - np.mean(acts1, axis=0, keepdims=True)
    cacts2 = acts2  # - np.mean(acts2, axis=0, keepdims=True)

    # Perform SVD
    U1, s1, V1 = np.linalg.svd(cacts1, full_matrices=False)
    U2, s2, V2 = np.linalg.svd(cacts2, full_matrices=False)

    svacts1 = np.dot(
        s1[:60] * np.eye(60),
        V1[:60])  # default: np.dot(s1[:20]*np.eye(20), V1[:20]), 49
    # can also compute as svacts1 = np.dot(U1.T[:20], cacts1)
    svacts2 = np.dot(
        s2[:60] * np.eye(60),
        V2[:60])  # default: np.dot(s2[:20]*np.eye(20), V2[:20]), 49
    # can also compute as svacts1 = np.dot(U2.T[:20], cacts2)

    svcca_results = cca_core.get_cca_similarity(svacts1,
                                                svacts2,
                                                epsilon=1e-10,
                                                verbose=False)  # 1e-10
    # print("Layer Number:", layer_number)
    # print("SVCCA Correlation Coefficient:", np.mean(svcca_results["cca_coef1"]))
    return np.mean(
        svcca_results["cca_coef1"])  # , acts1, cacts1, U1, s1, V1, svacts1
Пример #3
0
def get_similarity(acts1, acts2, verbose=False, epsilon=1e-10, method='mean'):
    import pwcca

    if method == 'all':
        similarity_dict = cca_core.get_cca_similarity(acts1, acts2, verbose=verbose, epsilon=epsilon)
        return similarity_dict['cca_coef1']
    if method == 'mean':
        similarity_dict = cca_core.get_cca_similarity(acts1, acts2, verbose=verbose, epsilon=epsilon)
        return similarity_dict['mean'][0]  # contains two times the same value.
    elif method == 'svcca':
        similarity_dict = get_svcca_similarity(acts1, acts2, K=10, verbose=verbose, epsilon=epsilon)
        return similarity_dict['mean'][0]
    elif method == 'pwcca':
        pwcca_mean, w, __ = pwcca.compute_pwcca(acts1, acts2, epsilon=epsilon)
        return pwcca_mean
    else:
        raise NotImplementedError(method)
Пример #4
0
def get_svcca_similarity(acts1, acts2, K=20, verbose=False, epsilon=None):
    ''' Compute svcca similarity, adapted from tutorial on
    https://github.com/google/svcca/tree/master/tutorials.
    '''
    cacts1 = acts1 - np.mean(acts1, axis=1, keepdims=True)
    cacts2 = acts2 - np.mean(acts2, axis=1, keepdims=True)

    # Perform SVD
    U1, s1, V1 = np.linalg.svd(cacts1, full_matrices=False)
    U2, s2, V2 = np.linalg.svd(cacts2, full_matrices=False)

    svacts1 = np.dot(s1[:K] * np.eye(K), V1[:K])
    svacts2 = np.dot(s2[:K] * np.eye(K), V2[:K])
    svcca_results = cca_core.get_cca_similarity(svacts1, svacts2, epsilon=epsilon, verbose=verbose)
    return svcca_results
Пример #5
0
def fourier_ccas(conv_acts1,
                 conv_acts2,
                 return_coefs=False,
                 compute_dirns=False,
                 verbose=False):
    """Computes cca similarity between two conv layers with DFT.

  This function takes in two sets of convolutional activations, conv_acts1,
  conv_acts2 After resizing the spatial dimensions to be the same, applies fft
  and then computes the ccas.

  Finally, it applies the inverse fourier transform to get the CCA directions
  and neuron coefficients.

  Args:
            conv_acts1: numpy array with shape
                        [batch_size, height1, width1, num_channels1]
            conv_acts2: numpy array with shape
                        [batch_size, height2, width2, num_channels2]
            compute_dirns: boolean, used to determine whether results also
                           contain actual cca directions.

  Returns:
            all_results: a pandas dataframe, with cca results for every spatial
                         location. Columns are neuron coefficients (combinations
                         of neurons that correspond to cca directions), the cca
                         correlation coefficients (how well aligned directions
                         correlate) x and y idxs (for computing cca directions
                         on the fly if compute_dirns=False), and summary
                         statistics. If compute_dirns=True, the cca directions
                         are also computed.
  """

    height1, width1 = conv_acts1.shape[1], conv_acts1.shape[2]
    height2, width2 = conv_acts2.shape[1], conv_acts2.shape[2]
    if height1 != height2 or width1 != width2:
        height = min(height1, height2)
        width = min(width1, width2)
        new_size = [height, width]
        resize = True
    else:
        height = height1
        width = width1
        new_size = None
        resize = False

    # resize and preprocess with fft
    fft_acts1 = fft_resize(conv_acts1, resize=resize, new_size=new_size)
    fft_acts2 = fft_resize(conv_acts2, resize=resize, new_size=new_size)

    # loop over spatial dimensions and get cca coefficients
    all_results = pd.DataFrame()
    for i in range(height):
        for j in range(width):
            results_dict = cca_core.get_cca_similarity(fft_acts1[:, i, j, :].T,
                                                       fft_acts2[:, i, j, :].T,
                                                       compute_dirns,
                                                       verbose=verbose)

            # apply inverse FFT to get coefficients and directions if specified
            if return_coefs:
                results_dict["neuron_coeffs1"] = np.fft.ifft2(
                    results_dict["neuron_coeffs1"])
                results_dict["neuron_coeffs2"] = np.fft.ifft2(
                    results_dict["neuron_coeffs2"])
            else:
                del results_dict["neuron_coeffs1"]
                del results_dict["neuron_coeffs2"]

            if compute_dirns:
                results_dict["cca_dirns1"] = np.fft.ifft2(
                    results_dict["cca_dirns1"])
                results_dict["cca_dirns2"] = np.fft.ifft2(
                    results_dict["cca_dirns2"])

            # accumulate results
            results_dict["location"] = (i, j)
            all_results = all_results.append(results_dict, ignore_index=True)

    return all_results
Пример #6
0
                   n_comp1=300,
                   feat_new=['pca' + str(i) for i in range(300)]):
    pca = PCA(n_components=n_comp1, random_state=42)
    df_pca = pd.DataFrame(pca.fit_transform(df), columns=feat_new)
    return (df_pca)


df_L1_pc_x = pca_preprocess(df_L1000_x)
df_cp_pc_x = pca_preprocess(df_cp_x)
"""#### - CCA on CP & L1000 train data


"""

cca_results = cca_core.get_cca_similarity(df_cp_pc_x.values.T,
                                          df_L1_pc_x.values.T,
                                          epsilon=1e-10,
                                          verbose=False)

plt.figure(figsize=(12, 8))
sns.set_context('talk', font_scale=0.85)
sns.lineplot(x=range(len(cca_results["cca_coef1"])),
             y=cca_results["cca_coef1"])
plt.title(
    "CCA correlation coefficients between CP and L1000 canonical variables (300) after PCA"
)

print(
    "Mean Canonical Correlation co-efficient between CP and L1000 canonical variables (300):",
    np.mean(cca_results["cca_coef1"]))
"""#### - (Singular Vectors)CCA as a method to analyze the correlation between Cell painting & L1000"""
Пример #7
0
model1 = cc.get_cnn_model(input_shape, num_classes)
model2 = cc.get_dense_model(input_shape, num_classes)

history1 = model1.fit(x_train,
                      y_train,
                      epochs=60,
                      batch_size=256,
                      validation_split=0.1)
history2 = model2.fit(x_train,
                      y_train,
                      epochs=60,
                      batch_size=256,
                      validation_split=0.1)

model_performance = cc.output_model(history1)
model_performance.to_csv('performance1.csv')
model_performance = cc.output_model(history2)
model_performance.to_csv('performance2.csv')

act1 = cc.get_acts_from_model(model1, x_train[range(600)])
act2 = cc.get_acts_from_model(model2, x_train[range(600)])

cca = cc.get_cca_similarity(act1, act2, compute_dirns=False)
pd.DataFrame(cca['cca_dirns1']).round(3).to_csv('cca1.csv')
pd.DataFrame(cca['cca_dirns2']).round(3).to_csv('cca2.csv')

#pd.DataFrame(act1).to_csv('acts1.csv')
# history = model4.fit(x_train, y_train,epochs=12,batch_size=128,validation_data=(x_test,y_test))
# model_info = cc.output_model(history)
# model_info.to_csv('model_history.csv')
Пример #8
0
def compute_cca(embeddings, descriptors):
    results = cca_core.get_cca_similarity(embeddings, descriptors, verbose=False, epsilon=1e-20)
    print(np.mean(results["cca_coef1"]))
Пример #9
0
        if subspace is None:
            assert (len(subspace.shape) == 2)
            subspace = next_batch
        else:
            subspace = np.concatenate([subspace, next_batch])
    fh.close()
    return subspace.transpose()  # return dimensions num_neurons1 x data_points


# The models must be run on the same data, with the same sequence length and batch size.
if __name__ == "__main__":
    parser = argparse.ArgumentParser(
        description='SVCCA on npy files generated by appending each batch.')
    parser.add_argument('--subspace-file1',
                        type=str,
                        help='location of first subspace saved as .npy')
    parser.add_argument('--subspace-file2',
                        type=str,
                        help='location of second subspace saved as .npy')
    parser.add_argument('--results-file',
                        type=str,
                        help='location to save mean cca scores')
    args = parser.parse_args()

    results_file = open(args.results_file, 'a')
    print(cca_core.get_cca_similarity(load_subspace(args.subspace_file1),
                                      load_subspace(
                                          args.subspace_file2))["mean"],
          file=results_file)