def permute_ltee(k=5, n_blocks=2):
    df_path = os.path.expanduser(
        "~/GitHub/ParEvol") + '/data/Good_et_al/gene_by_pop.txt'
    df = pd.read_csv(df_path, sep='\t', header='infer', index_col=0)
    to_include = pt.complete_nonmutator_lines()
    to_keep.remove('p5')
    df_nonmut = df[df.index.str.contains('|'.join(to_exclude))]
    # remove columns with all zeros
    df_nonmut = df_nonmut.loc[:, (df_nonmut != 0).any(axis=0)]
    time_points = [int(x.split('_')[1]) for x in df_nonmut.index.values]
    time_points_set = sorted(
        list(set([int(x.split('_')[1]) for x in df_nonmut.index.values])))
    df_nonmut_array = df_nonmut.as_matrix()
    time_points_positions = {}
    for x in time_points_set:
        time_points_positions[x] = [
            i for i, j in enumerate(time_points) if j == x
        ]
    df_final = df_nonmut.iloc[time_points_positions[time_points_set[-1]]]

    df_out = open(
        pt.get_path() + '/data/Good_et_al/permute_' + analysis + '.txt', 'w')
    #column_headers = ['Iteration', 'Generation', 'MCD']
    column_headers = ['Iteration', 'Generation', 'mean_dist']
    df_out.write('\t'.join(column_headers) + '\n')
    for i in range(iter):
        continue
        print("Iteration " + str(i))
        matrix_0 = df_nonmut.iloc[time_points_positions[time_points_set[0]]]
        matrix_0_rndm = pt.random_matrix(matrix_0.as_matrix())
        df_rndm_list = [
            pd.DataFrame(data=matrix_0_rndm,
                         index=matrix_0.index,
                         columns=matrix_0.columns)
        ]
        # skip first time step
        for j, tp in enumerate(time_points_set[0:]):
            if j == 0:
                continue
            df_tp_minus1 = df_nonmut[df_nonmut.index.str.contains(
                '_' + str(time_points_set[j - 1]))]
            df_tp = df_nonmut[df_nonmut.index.str.contains('_' + str(tp))]
            matrix_diff = df_tp.as_matrix() - df_tp_minus1.as_matrix()
            matrix_0_rndm = matrix_0_rndm + pt.random_matrix(matrix_diff)
            df_0_rndm = pd.DataFrame(data=matrix_0_rndm,
                                     index=df_tp.index,
                                     columns=df_tp.columns)
            df_rndm_list.append(df_0_rndm)

        df_rndm = pd.concat(df_rndm_list)
        df_rndm_delta = pt.likelihood_matrix(
            df_rndm, 'Good_et_al').get_likelihood_matrix()
        if analysis == 'PCA':
            X = pt.hellinger_transform(df_rndm_delta)
            pca = PCA()
            matrix_rndm_delta_out = pca.fit_transform(X)
        elif analysis == 'cMDS':
            matrix_rndm_delta_bc = np.sqrt(
                pt.get_bray_curtis(df_rndm_delta.as_matrix()))
            matrix_rndm_delta_out = pt.cmdscale(matrix_rndm_delta_bc)[0]
        else:
            print("Analysis argument not accepted")
            continue

        df_rndm_delta_out = pd.DataFrame(data=matrix_rndm_delta_out,
                                         index=df_rndm_delta.index)
        for tp in time_points_set:
            df_rndm_delta_out_tp = df_rndm_delta_out[
                df_rndm_delta_out.index.str.contains('_' + str(tp))]
            df_rndm_delta_out_tp_matrix = df_rndm_delta_out_tp.as_matrix()
            mean_angle = pt.get_mean_angle(df_rndm_delta_out_tp_matrix, k=k)
            mcd = pt.get_mean_centroid_distance(df_rndm_delta_out_tp_matrix,
                                                k=k)
            mean_length = pt.get_euc_magnitude_diff(
                df_rndm_delta_out_tp_matrix, k=k)
            mean_dist = pt.get_mean_pairwise_euc_distance(
                df_rndm_delta_out_tp_matrix, k=k)
            df_out.write('\t'.join([
                str(i),
                str(tp),
                str(mcd),
                str(mean_angle),
                str(mean_length),
                str(mean_dist)
            ]) + '\n')

    df_out.close()
def time_partition_ltee(k=5, iter=100):
    df_path = os.path.expanduser(
        "~/GitHub/ParEvol") + '/data/Good_et_al/gene_by_pop.txt'
    df = pd.read_csv(df_path, sep='\t', header='infer', index_col=0)
    to_include = pt.complete_nonmutator_lines()
    df_nonmut = df[df.index.str.contains('|'.join(to_include))]
    # remove columns with all zeros
    df_nonmut = df_nonmut.loc[:, (df_nonmut != 0).any(axis=0)]
    # make sure it's sorted
    df_nonmut.sort_index(inplace=True)

    time_points = [int(x.split('_')[1]) for x in df_nonmut.index.values]
    time_points_set = sorted(
        list(set([int(x.split('_')[1]) for x in df_nonmut.index.values])))
    time_points_positions = {}
    for x in time_points_set:
        time_points_positions[x] = [
            i for i, j in enumerate(time_points) if j == x
        ]
    t_final_df = df_nonmut.iloc[time_points_positions[max(time_points_set)]]
    t_final_np = t_final_df.values
    gene_names = df_nonmut.columns.tolist()

    df_out = open(
        os.path.expanduser("~/GitHub/ParEvol") +
        '/data/Good_et_al/time_partition_z_scores.txt', 'w')
    df_out.write(
        '\t'.join(['Time', 'Time_less_z_score', 'Time_greater_z_score']) +
        '\n')
    for time_point in time_points_set:
        # very few mutations after generation 50000
        if time_point > 50000:
            continue
        print("Time point " + str(time_point))
        t_i_df = df_nonmut.iloc[time_points_positions[time_point]]
        t_i_np = t_i_df.values
        # remove rows with all zeros
        t_i_np_zeros = np.where(~t_i_np.any(axis=1))[0]
        n_zeros_t_i_np = len(t_i_np_zeros)
        if n_zeros_t_i_np > 0:
            t_i_np = np.delete(t_i_np, t_i_np_zeros, axis=0)

        t_i_to_final_np = t_final_np - t_i_np
        # remove rows with all zeros
        t_i_to_final_np_zeros = np.where(~t_i_to_final_np.any(axis=1))[0]
        n_zeros_t_i_to_final_np = len(t_i_to_final_np_zeros)
        if n_zeros_t_i_to_final_np > 0:
            t_i_to_final_np = np.delete(t_i_to_final_np,
                                        t_i_to_final_np_zeros,
                                        axis=0)

        t_concat = np.concatenate((t_i_np, t_i_to_final_np), axis=0)
        t_norm = cd.likelihood_matrix_array(
            t_concat, gene_names, 'Good_et_al').get_likelihood_matrix()
        t_norm_rel = t_norm / t_norm.sum(axis=1, keepdims=True)

        # first five axes
        e_vals, e_vecs = pt.pca_np(t_norm_rel)
        # The column v[:, i] is the normalized eigenvector corresponding to the eigenvalue w[i]
        e_vecs_k5 = e_vecs[:, -1 - k:-1]
        # account for rows with zero mutations
        e_vec_t_less = e_vecs_k5[:5 - n_zeros_t_i_np, :]
        e_vec_t_greater = e_vecs_k5[5 - n_zeros_t_i_to_final_np:, :]

        dist_t_less = pt.get_mean_pairwise_euc_distance(e_vec_t_less, k=k)
        dist_t_greater = pt.get_mean_pairwise_euc_distance(e_vec_t_greater,
                                                           k=k)

        dist_t_less_list = []
        dist_t_greater_list = []

        for i in range(iter):
            t_i_np_rndm = pt.get_random_matrix(t_i_np)
            t_i_to_final_np_rndm = pt.get_random_matrix(t_i_to_final_np)
            t_rndm_concat = np.concatenate((t_i_np_rndm, t_i_to_final_np_rndm),
                                           axis=0)

            t_rndm_norm = cd.likelihood_matrix_array(
                t_rndm_concat, gene_names,
                'Good_et_al').get_likelihood_matrix()
            t_rndm_norm_rel = t_rndm_norm / t_rndm_norm.sum(axis=1,
                                                            keepdims=True)
            # first five axes
            e_vals_rndm, e_vecs_rndm = pt.pca_np(t_rndm_norm_rel)
            # The column v[:, i] is the normalized eigenvector corresponding to the eigenvalue w[i]
            e_vecs_rndm_k5 = e_vecs_rndm[:, -1 - k:-1]
            e_vec_t_less_rndm = e_vecs_rndm_k5[:5, :]
            e_vec_t_greater_rndm = e_vecs_rndm_k5[5:, :]

            dist_t_less_rndm = pt.get_mean_pairwise_euc_distance(
                e_vec_t_less_rndm, k=k)
            dist_t_greater_rndm = pt.get_mean_pairwise_euc_distance(
                e_vec_t_greater_rndm, k=k)

            dist_t_less_list.append(dist_t_less_rndm)
            dist_t_greater_list.append(dist_t_greater_rndm)

        z_score_less = (dist_t_less -
                        np.mean(dist_t_less_list)) / np.std(dist_t_less_list)
        z_score_greater = (dist_t_greater - np.mean(dist_t_greater_list)
                           ) / np.std(dist_t_greater_list)

        df_out.write('\t'.join(
            [str(time_point),
             str(z_score_less),
             str(z_score_greater)]) + '\n')

    df_out.close()
示例#3
0
def plot_permutation(dataset = 'good', analysis = 'PCA', alpha = 0.05):
    df_path = pt.get_path() + '/data/Good_et_al/gene_by_pop.txt'
    df = pd.read_csv(df_path, sep = '\t', header = 'infer', index_col = 0)
    to_exclude = pt.complete_nonmutator_lines()
    to_exclude.append('p5')
    df_nonmut = df[df.index.str.contains('|'.join( to_exclude))]
    # remove columns with all zeros
    df_nonmut = df_nonmut.loc[:, (df_nonmut != 0).any(axis=0)]
    df_delta = pt.likelihood_matrix(df_nonmut, 'Good_et_al').get_likelihood_matrix()
    if analysis == 'PCA':
        X = pt.hellinger_transform(df_delta)
        pca = PCA()
        df_out = pca.fit_transform(X)
    elif analysis == 'cMDS':
        df_delta_bc = np.sqrt(pt.get_scipy_bray_curtis(df_delta.as_matrix()))
        df_out = pt.cmdscale(df_delta_bc)[0]

    time_points = [ int(x.split('_')[1]) for x in df_nonmut.index.values]
    time_points_set = sorted(list(set([ int(x.split('_')[1]) for x in df_nonmut.index.values])))

    df_rndm_delta_out = pd.DataFrame(data=df_out, index=df_delta.index)
    mcds = []
    for tp in time_points_set:
        df_rndm_delta_out_tp = df_rndm_delta_out[df_rndm_delta_out.index.str.contains('_' + str(tp))]
        mcds.append(pt.get_mean_pairwise_euc_distance(df_rndm_delta_out_tp.as_matrix(), k=3))

    mcd_perm_path = pt.get_path() + '/data/Good_et_al/permute_' + analysis + '.txt'
    mcd_perm = pd.read_csv(mcd_perm_path, sep = '\t', header = 'infer', index_col = 0)
    mcd_perm_x = np.sort(list(set(mcd_perm.Generation.tolist())))
    lower_ci = []
    upper_ci = []
    mean_mcds = []
    std_mcds = []
    lower_z_ci = []
    upper_z_ci = []
    for x in mcd_perm_x:
        mcd_perm_y = mcd_perm.loc[mcd_perm['Generation'] == x]
        mcd_perm_y_sort = np.sort(mcd_perm_y.mean_dist.tolist())
        mean_mcd_perm_y = np.mean(mcd_perm_y_sort)
        std_mcd_perm_y = np.std(mcd_perm_y_sort)
        mean_mcds.append(mean_mcd_perm_y)
        std_mcds.append(std_mcd_perm_y)
        lower_ci.append(mean_mcd_perm_y - mcd_perm_y_sort[int(len(mcd_perm_y_sort) * alpha)])
        upper_ci.append(abs(mean_mcd_perm_y - mcd_perm_y_sort[int(len(mcd_perm_y_sort) * (1 - alpha))]))
        # z-scores
        mcd_perm_y_sort_z = [ ((i - mean_mcd_perm_y) /  std_mcd_perm_y) for i in mcd_perm_y_sort]
        lower_z_ci.append(abs(mcd_perm_y_sort_z[int(len(mcd_perm_y_sort_z) * alpha)]))
        upper_z_ci.append(abs(mcd_perm_y_sort_z[int(len(mcd_perm_y_sort_z) * (1 - alpha))]))

    fig = plt.figure()

    plt.figure(1)
    plt.subplot(211)
    plt.errorbar(mcd_perm_x, mean_mcds, yerr = [lower_ci, upper_ci], fmt = 'o', alpha = 0.5, \
        barsabove = True, marker = '.', mfc = 'k', mec = 'k', c = 'k', zorder=1)
    plt.scatter(time_points_set, mcds, c='#175ac6', marker = 'o', s = 70, \
        edgecolors='#244162', linewidth = 0.6, alpha = 0.5, zorder=2)#, edgecolors='none')

    #plt.xlabel("Time (generations)", fontsize = 16)
    #plt.ylabel("Mean \n Euclidean distance", fontsize = 14)
    plt.ylabel("Mean pair-wise \n Euclidean \n distance, " + r'$   \left \langle   d \right  \rangle$', fontsize = 14)


    plt.figure(1)
    plt.subplot(212)
    plt.errorbar(mcd_perm_x, [0] * len(mcd_perm_x), yerr = [lower_z_ci, upper_z_ci], fmt = 'o', alpha = 0.5, \
        barsabove = True, marker = '.', mfc = 'k', mec = 'k', c = 'k', zorder=1)
    # zip mean, std, and measured values to make z-scores
    zip_list = list(zip(mean_mcds, std_mcds, mcds))
    z_scores = [((i[2] - i[0]) / i[1]) for i in zip_list ]
    plt.scatter(time_points_set, z_scores, c='#175ac6', marker = 'o', s = 70, \
        edgecolors='#244162', linewidth = 0.6, alpha = 0.5, zorder=2)#, edgecolors='none')
    plt.ylim(-2.2, 2.2)
    #plt.axhline(0, color = 'k', lw = 2, ls = '-')
    #plt.axhline(-1, color = 'dimgrey', lw = 2, ls = '--')
    #plt.axhline(-2, color = 'dimgrey', lw = 2, ls = ':')
    plt.xlabel("Time (generations)", fontsize = 16)

    plt.ylabel("Standardized mean \n pair-wise Euclidean \n distance, " + r'$   z_{\left \langle   d \right  \rangle}$', fontsize = 14)
    #plt.ylabel("Standardized mean \n Euclidean distance", fontsize = 14)

    fig.tight_layout()
    fig.savefig(pt.get_path() + '/figs/permutation_scatter_good.png', bbox_inches = "tight", pad_inches = 0.4, dpi = 600)
    plt.close()
def time_partition_ltee(k=5, iter=1000):
    df_path = mydir + '/data/Good_et_al/gene_by_pop.txt'
    df = pd.read_csv(df_path, sep='\t', header='infer', index_col=0)
    to_include = pt.complete_nonmutator_lines()
    df_nonmut = df[df.index.str.contains('|'.join(to_include))]
    # remove columns with all zeros
    df_nonmut = df_nonmut.loc[:, (df_nonmut != 0).any(axis=0)]
    # make sure it's sorted
    df_nonmut.sort_index(inplace=True)

    time_points = [int(x.split('_')[1]) for x in df_nonmut.index.values]
    time_points_set = sorted(
        list(set([int(x.split('_')[1]) for x in df_nonmut.index.values])))
    time_points_positions = {}
    for x in time_points_set:
        time_points_positions[x] = [
            i for i, j in enumerate(time_points) if j == x
        ]
    t_final_df = df_nonmut.iloc[time_points_positions[max(time_points_set)]]
    t_final_np = t_final_df.values
    gene_names = df_nonmut.columns.tolist()

    df_out = open(mydir + '/data/Good_et_al/time_partition_z_scores.txt', 'w')
    df_out.write('\t'.join([
        'Time', 'less_mbd', 'greater_mpd', 'delta_mpd', 'less_mbd_025',
        'less_mbd_975', 'greater_mpd_025', 'greater_mpd_975', 'delta_mpd_025',
        'delta_mpd_975'
    ]) + '\n')
    for time_point in time_points_set:
        # very few mutations after generation 50000
        if time_point > 50000:
            continue
        print("Time point " + str(time_point))
        t_i_df = df_nonmut.iloc[time_points_positions[time_point]]
        t_i_np = t_i_df.values
        # remove rows with all zeros
        t_i_np_zeros = np.where(~t_i_np.any(axis=1))[0]
        n_zeros_t_i_np = len(t_i_np_zeros)
        if n_zeros_t_i_np > 0:
            t_i_np = np.delete(t_i_np, t_i_np_zeros, axis=0)

        t_i_to_final_np = t_final_np - t_i_np
        # remove rows with all zeros
        t_i_to_final_np_zeros = np.where(~t_i_to_final_np.any(axis=1))[0]
        n_zeros_t_i_to_final_np = len(t_i_to_final_np_zeros)
        if n_zeros_t_i_to_final_np > 0:
            t_i_to_final_np = np.delete(t_i_to_final_np,
                                        t_i_to_final_np_zeros,
                                        axis=0)

        t_concat = np.concatenate((t_i_np, t_i_to_final_np), axis=0)
        t_norm = cd.likelihood_matrix_array(
            t_concat, gene_names, 'Good_et_al').get_likelihood_matrix()
        t_norm_rel = t_norm / t_norm.sum(axis=1, keepdims=True)
        t_norm_rel -= np.mean(t_norm_rel, axis=0)
        pca = PCA()
        t_norm_rel_pca = pca.fit_transform(t_norm_rel)
        t_norm_rel_pca_k5 = t_norm_rel_pca[:, -1 - k:-1]
        # account for rows with zero mutations
        dist_t_less = pt.get_mean_pairwise_euc_distance(
            t_norm_rel_pca_k5[:5 - n_zeros_t_i_np, :], k=k)
        dist_t_greater = pt.get_mean_pairwise_euc_distance(
            t_norm_rel_pca_k5[5 - n_zeros_t_i_to_final_np:, :], k=k)
        dist_t_change = dist_t_greater - dist_t_less
        #F_t = pt.get_F_2(t_norm_rel_pca_k5, 5-n_zeros_t_i_np, 5-n_zeros_t_i_to_final_np)[0]
        dist_t_less_list = []
        dist_t_greater_list = []
        dist_t_change_list = []
        #F_t_list = []
        for i in range(iter):
            if i % 1000 == 0:
                print("Iteration " + str(i))
            t_i_np_rndm = pt.get_random_matrix(t_i_np)
            t_i_to_final_np_rndm = pt.get_random_matrix(t_i_to_final_np)
            t_rndm_concat = np.concatenate((t_i_np_rndm, t_i_to_final_np_rndm),
                                           axis=0)
            t_rndm_norm = cd.likelihood_matrix_array(
                t_rndm_concat, gene_names,
                'Good_et_al').get_likelihood_matrix()
            t_rndm_norm_rel = t_rndm_norm / t_rndm_norm.sum(axis=1,
                                                            keepdims=True)
            t_rndm_norm_rel -= np.mean(t_rndm_norm_rel, axis=0)
            t_rndm_norm_rel_pca = pca.fit_transform(t_rndm_norm_rel)
            # first five axes
            t_rndm_norm_rel_pca_k5 = t_rndm_norm_rel_pca[:, -1 - k:-1]
            dist_t_less_rndm = pt.get_mean_pairwise_euc_distance(
                t_rndm_norm_rel_pca_k5[:5 - n_zeros_t_i_np, :], k=k)
            dist_t_greater_rndm = pt.get_mean_pairwise_euc_distance(
                t_rndm_norm_rel_pca_k5[5 - n_zeros_t_i_to_final_np:, :], k=k)
            dist_t_change_list.append(dist_t_greater_rndm - dist_t_less_rndm)
            dist_t_less_list.append(dist_t_less_rndm)
            dist_t_greater_list.append(dist_t_greater_rndm)
            #F_t_list.append(pt.get_F_2(t_rndm_norm_rel_pca, 5-n_zeros_t_i_np, 5-n_zeros_t_i_to_final_np)[0])

        dist_t_change_list.sort()
        dist_t_greater_list.sort()
        dist_t_less_list.sort()
        #F_t_list.sort()
        # get 95% CIs
        dist_t_change_025 = dist_t_change_list[int(iter * 0.025)]
        dist_t_change_975 = dist_t_change_list[int(iter * 0.975)]
        dist_t_greater_025 = dist_t_greater_list[int(iter * 0.025)]
        dist_t_greater_975 = dist_t_greater_list[int(iter * 0.975)]
        dist_t_less_025 = dist_t_less_list[int(iter * 0.025)]
        dist_t_less_975 = dist_t_less_list[int(iter * 0.975)]
        #F_t_025 = F_t_list[int(iter*0.025)]
        #F_t_975 = F_t_list[int(iter*0.975)]
        df_out.write('\t'.join([str(time_point), str(dist_t_less), str(dist_t_greater), \
                                str(dist_t_change), str(dist_t_less_025), str(dist_t_less_975), \
                                str(dist_t_greater_025), str(dist_t_greater_975), \
                                str(dist_t_change_025), str(dist_t_change_975)]) + '\n')

    df_out.close()
示例#5
0
def ltee_convergence(alpha = 0.05, k = 5):
    df_path = os.path.expanduser("~/GitHub/ParEvol") + '/data/Good_et_al/gene_by_pop.txt'
    df = pd.read_csv(df_path, sep = '\t', header = 'infer', index_col = 0)
    to_keep = pt.complete_nonmutator_lines()
    #to_keep.append('p5')
    to_keep.remove('p5')
    df_nonmut = df[df.index.str.contains('|'.join( to_keep))]
    # remove columns with all zeros
    df_nonmut = df_nonmut.loc[:, (df_nonmut != 0).any(axis=0)]
    gene_names = df_nonmut.columns.tolist()
    sample_names = df_nonmut.index.tolist()
    df_delta = cd.likelihood_matrix_array(df_nonmut, gene_names, 'Good_et_al').get_likelihood_matrix()
    df_delta = df_delta/df_delta.sum(axis=1)[:,None]
    X = pt.get_mean_center(df_delta)

    pca = PCA()
    df_out = pca.fit_transform(X)

    time_points = [ int(x.split('_')[1]) for x in df_nonmut.index.values]
    time_points_set = sorted(list(set([ int(x.split('_')[1]) for x in df_nonmut.index.values])))
    colors = np.linspace(min(time_points_set),max(time_points_set),len(time_points_set))
    color_dict = dict(zip(time_points_set, colors))

    df_pca = pd.DataFrame(data=df_out, index=sample_names)
    mean_dist = []
    for tp in time_points_set:
        df_pca_tp = df_pca[df_pca.index.str.contains('_' + str(tp))]
        mean_dist.append(pt.get_mean_pairwise_euc_distance(df_pca_tp.values, k = k))



    #fig = plt.figure()
    #plt.scatter(time_points_set, mean_dist, marker = "o", edgecolors='#244162', c = '#175ac6', alpha = 0.4, s = 60, zorder=4)

    #plt.xlabel("Time", fontsize = 14)
    #plt.ylabel("Mean euclidean distance", fontsize = 12)

    #plt.figure(1)
    #plt.subplot(313)
    #plt.errorbar(perm_gens, mean_L, yerr = [lower_ci_L, upper_ci_L], fmt = 'o', alpha = 0.5, \
    #    barsabove = True, marker = '.', mfc = 'k', mec = 'k', c = 'k', zorder=1)
    #plt.scatter(time_points_set, Ls, c='#175ac6', marker = 'o', s = 70, \
    #    edgecolors='#244162', linewidth = 0.6, alpha = 0.5, zorder=2)#, edgecolors='none')

    #for pop in to_keep:
    #    pop_df_pca = df_pca[df_pca.index.str.contains(pop)]
    #    c_list = [ color_dict[int(x.split('_')[1])] for x in pop_df_pca.index.values]
    #    if  pt.nonmutator_shapes()[pop] == 'p2':
    #        size == 50
    #    else:
    #        size = 80
    #    plt.scatter(pop_df_pca.values[:,0], pop_df_pca.values[:,1], \
    #        c=c_list, cmap = cm.Blues, vmin=min(time_points_set), vmax=max(time_points_set), \
    #        marker = pt.nonmutator_shapes()[pop], s = size, edgecolors='#244162', \
    #        linewidth = 0.6,  zorder=4, alpha=0.7)#, edgecolors='none')

    #c = plt.colorbar()
    #c.set_label("Generations", size=18)
    #plt.xlabel('PCA 1 (' + str(round(pca.explained_variance_ratio_[0],3)*100) + '%)' , fontsize = 16)
    #plt.ylabel('PCA 2 (' + str(round(pca.explained_variance_ratio_[1],3)*100) + '%)' , fontsize = 16)
    #plt.xlim([-0.4,0.4])
    #plt.ylim([-0.4,0.4])
    fig.tight_layout()
    fig.savefig(os.path.expanduser("~/GitHub/ParEvol") + '/figs/ltee_convergence.png', bbox_inches = "tight", pad_inches = 0.4, dpi = 600)
    plt.close()