Exemplo n.º 1
0
def add_curated(interactions: pd.DataFrame,
                interaction_curated: pd.DataFrame) -> pd.DataFrame:
    interactions.rename(index=str,
                        columns={
                            'uniprot_1': 'partner_a',
                            'uniprot_2': 'partner_b'
                        },
                        inplace=True)

    interactions_curated_normalized = normalize_interactions(
        interaction_curated, 'partner_a', 'partner_b')
    curated_duplicated = interactions_curated_normalized[
        interactions_curated_normalized.duplicated(['partner_a', 'partner_b'],
                                                   keep=False)]

    if not curated_duplicated.empty:
        print('WARNING - Some curated interactions are duplicated.')
        print(interaction_curated.iloc[curated_duplicated.sort_values(
            ['partner_a', 'partner_b']).index.values].to_csv(index=False))
    interactions.rename(index=str,
                        columns={
                            'protein_1': 'partner_a',
                            'protein_2': 'partner_b'
                        },
                        inplace=True)

    interaction_curated_result = merge_interactions.merge_iuphar_other_and_curated_interactions(
        interactions, interaction_curated)

    return interaction_curated_result
def _drop_duplicates(iuphar_filtered_cellphone_format: pd.DataFrame) -> pd.DataFrame:
    interactions_normalized = normalize_interactions(iuphar_filtered_cellphone_format, 'uniprot_1',
                                                     'uniprot_2')
    interactions_duplicated = interactions_normalized[
        interactions_normalized.duplicated(['uniprot_1', 'uniprot_2'])]
    # if not interactions_duplicated.empty:
    #     print('WARNING: SOME IUPHAR INTERACTIONS ARE DUPLICATED')
    #     print(interactions_duplicated.sort_values(['uniprot_1', 'uniprot_2']).to_csv(index=False))
    iuphar_procesed = interactions_normalized.drop_duplicates(['uniprot_1', 'uniprot_2'])
    return iuphar_procesed
Exemplo n.º 3
0
def merge_iuphar_other_and_curated_interactions(
        iuphar_other_interactions: pd.DataFrame,
        curated_interactions: pd.DataFrame) -> pd.DataFrame:
    all_interactions = iuphar_other_interactions.append(curated_interactions)

    all_interactions.reset_index(inplace=True, drop=True)
    normalized_interactions = normalize_interactions(all_interactions,
                                                     'partner_a', 'partner_b')

    duplicated_interactions = normalized_interactions[
        normalized_interactions.duplicated(['partner_a', 'partner_b'],
                                           keep=False)]

    unique_interactions = duplicated_interactions.drop_duplicates(
        ['partner_a', 'partner_b'])

    def merge_values(interaction: pd.Series) -> pd.Series:
        interaction = interaction.copy()
        duplicated = duplicated_interactions[
            (duplicated_interactions['partner_a'] == interaction['partner_a'])
            &
            (duplicated_interactions['partner_b'] == interaction['partner_b'])]

        if not duplicated[duplicated['annotation_strategy'] ==
                          'curated'].empty:
            interaction = duplicated[duplicated['annotation_strategy'] ==
                                     'curated'].iloc[0]

        if duplicated['iuphar'].any():
            interaction['iuphar'] = True

        return interaction

    merged_duplicated_interactions = unique_interactions.apply(merge_values,
                                                               axis=1)

    non_repeated_interactions = normalized_interactions.drop_duplicates(
        ['partner_a', 'partner_b'], keep=False)
    interactions_merged = non_repeated_interactions.append(
        merged_duplicated_interactions, sort=True, ignore_index=True)
    interactions_merged.fillna({'iuphar': False}, inplace=True)
    interactions_merged = interactions_merged.astype({'iuphar': bool})

    return interactions_merged
Exemplo n.º 4
0
def generate_interactions(
    proteins: str,
    genes: str,
    complex: str,
    user_interactions: Optional[str],
    user_interactions_only: bool,
    result_path: str,
    fetch_imex: bool,
    fetch_iuphar: bool,
    project_name: str,
) -> None:
    if user_interactions_only and not user_interactions:
        raise Exception('You need to set --user-interactions parameter')

    output_path = utils.set_paths(result_path, project_name)
    downloads_path = utils.set_paths(
        utils.set_paths(result_path, project_name), 'downloads')

    proteins = utils.read_data_table_from_file(proteins)
    genes = utils.read_data_table_from_file(genes)
    complexes = utils.read_data_table_from_file(complex)

    if not user_interactions_only:
        raw_imex = get_imex.call(genes, downloads_path, fetch_imex)

        interactions_to_remove = utils.read_data_table_from_file(
            os.path.join(data_dir, 'sources/excluded_interaction.csv'))
        interaction_curated = utils.read_data_table_from_file(
            os.path.join(data_dir, 'sources/interaction_curated.csv'))

    if user_interactions:
        separator = _get_separator(os.path.splitext(user_interactions)[-1])
        user_interactions = pd.read_csv(user_interactions, sep=separator)
        user_interactions['partner_a'] = user_interactions['partner_a'].apply(
            lambda x: str(x).strip())
        user_interactions['partner_b'] = user_interactions['partner_b'].apply(
            lambda x: str(x).strip())
        user_interactions['annotation_strategy'] = 'user_curated'

        if not 'protein_name_a' in user_interactions.columns:
            user_interactions['protein_name_a'] = ''

        if not 'protein_name_b' in user_interactions.columns:
            user_interactions['protein_name_b'] = ''

    result_columns = [
        'partner_a', 'partner_b', 'protein_name_a', 'protein_name_b',
        'annotation_strategy', 'source'
    ]
    if not user_interactions_only:
        print('Parsing IMEX file')
        imex_interactions = parse_interactions_imex(raw_imex, proteins, genes)

        print('Getting iuphar data')
        raw_iuphar = get_iuphar.call(downloads_path, fetch_iuphar)

        print('Generating iuphar interactions')
        iuphar_interactions = parse_iuphar_guidetopharmacology.call(
            raw_iuphar, genes, proteins)

        print('Merging iuphar/imex')
        merged_interactions = merge_iuphar_imex_interactions(
            iuphar_interactions, imex_interactions)

        print('Removing complex interactions')
        no_complex_interactions = only_noncomplex_interactions(
            merged_interactions, complexes)

        print('Removing selected interactions')
        clean_interactions = remove_interactions_in_file(
            no_complex_interactions, interactions_to_remove)

        print('Adding curated interaction')
        interactions_with_curated = add_curated(clean_interactions,
                                                interaction_curated)

        result = tools_helper.normalize_interactions(
            interactions_with_curated.append(user_interactions,
                                             ignore_index=True,
                                             sort=False), 'partner_a',
            'partner_b').drop_duplicates(['partner_a', 'partner_b'],
                                         keep='last')

    else:
        result = tools_helper.normalize_interactions(user_interactions, 'partner_a', 'partner_b') \
            .drop_duplicates(['partner_a', 'partner_b'], keep='last')

    result[result_columns].sort_values(['partner_a', 'partner_b']).to_csv(
        '{}/interaction_input.csv'.format(output_path), index=False)