Пример #1
0
def load_peptide_data(filename, max_PEP = 0.01, max_missed_cleavages = 3):
    df_raw = pd.read_csv(filename, sep = "\t")
    
    reporter_intensity_corrected_cols = get_all_reporter_intensity_correct()
    
    
    # filter df
    df = df_raw[df_raw.PEP < max_PEP] # 5% PEP removed 9578 peptides
    df = df[df["Missed cleavages"] < max_missed_cleavages] # 0 removed
    #df = df.set_index("Leading razor protein")
    df = df.set_index(['Proteins', "Leading razor protein", 'Unique (Proteins)', 'Unique (Groups)', 'PEP', "Score"])
    
    df_int = df[reporter_intensity_corrected_cols]
    df_int = df_int.drop_duplicates() # Removing duplicate rows, these are most likely 0 rows. 2126 rows dropped
    df_int = drop_zero_row(df_int) # dropped last zero-row
    
    #######
    # Raw #
    #######
    df_int = intensities_to_midx_df(df_int)
    return df_int
from get_variables import get_cell_line_states_replicates_from_reporter_intensity_cols
from midx import col_to_mIdx, intensities_to_midx_df, diffacto_col_to_mIdx, diffacto_to_midx_df
from pd_functions import drop_zero_row
from transform import aitchison_transform_part, aitchison_transform, norm_SL, calcNormFactors, irs_norm
from plot import plot_kde, kde_matrix_plot_all_channels, plot_kde_batch, kde_matrix_plot_batch, plot_intensity_boxplot, plot_diffacto_pca, plot_diffacto_pca_cell_line, pca_plot_ax, get_significant_proteins, volcano_plot, kde_matrix_all_samples
from top3 import top3, protSum_col_to_mIdx, protSum_intensities_to_midx_df, aggregate_protein_quantity, get_p_matrix
from de_analysis import get_log2FC_regulation, get_log2FC
from q_value import qvalues

pd.options.mode.chained_assignment = None
os.chdir("/home/ptruong/git/lifeAndDeath/data/amirata")
df_raw = pd.read_csv("peptides tryptic.csv", sep="\t")

base_cols = get_base_cols_peptide()

reporter_intensity_corrected_cols = get_all_reporter_intensity_correct()
cell_lines, states, replicates = get_cell_line_states_replicates_from_reporter_intensity_cols(
    reporter_intensity_corrected_cols)

df_base = df_raw[get_base_cols_peptide()]

# filter df
df = df_raw[df_raw.PEP < 0.05]  # 5% PEP removed 9578 peptides
df = df[df["Missed cleavages"] < 3]  # 0 removed
df = df.set_index("Leading razor protein")

df.sort_values(by="Score", ascending=False, inplace=True)

df_int = df[reporter_intensity_corrected_cols + ["Score"]]
df_int.sort_values(by="Score", ascending=False, inplace=True)