def call(gene_expanded, output_columns):
    filters.remove_not_defined_columns(gene_expanded, output_columns)

    gene_expanded.drop(['id_gene', 'protein_id'], axis=1, inplace=True)

    gene_expanded.rename(index=str, columns={'name': 'uniprot'}, inplace=True)
    return gene_expanded
示例#2
0
    def add(self, interactions):
        interaction_df = self.blend_dataframes(interactions, ['partner_a', 'partner_b'],
                                               self.database_manager.get_repository('multidata').get_all_name_id(),
                                               'name', 'multidata')

        filters.remove_not_defined_columns(interaction_df,
                                           self.database_manager.get_column_table_names('interaction_table'))

        interaction_df.to_sql(name='interaction_table', if_exists='append', con=self.database_manager.database.engine,
                              index=False, chunksize=50)
示例#3
0
def call(interactions_expanded: pd.DataFrame) -> pd.DataFrame:
    interactions_expanded.rename(index=str, columns={'name_1': 'multidata_name_1', 'name_2': 'multidata_name_2'},
                                 inplace=True)
    filters.remove_not_defined_columns(interactions_expanded,
                                       ['comments_interaction', 'entry_name_1', 'entry_name_2', 'multidata_name_1',
                                        'multidata_name_2', 'score_1', 'score_2', 'source'])

    interactions_extended = dataframe_format.bring_columns_to_start(
        ['multidata_name_1', 'entry_name_1', 'multidata_name_2',
         'entry_name_2'], interactions_expanded)

    return interactions_extended
示例#4
0
def _only_uniprots_in_df(uniprots_df, inweb_interactions):
    inweb_cellphone = pd.merge(inweb_interactions,
                               uniprots_df,
                               left_on=['protein_1'],
                               right_on=['uniprot'],
                               how='inner')

    remove_not_defined_columns(inweb_cellphone,
                               inweb_interactions.columns.values)

    inweb_cellphone = pd.merge(inweb_cellphone,
                               uniprots_df,
                               left_on=['protein_2'],
                               right_on=['uniprot'],
                               how='inner')
    remove_not_defined_columns(inweb_cellphone,
                               inweb_interactions.columns.values)

    # Prevents duplicated interactions if any uniprot is duplicated in uniprots_df or intaractions
    inweb_cellphone = inweb_cellphone[inweb_cellphone.duplicated() == False]

    return remove_not_defined_columns(inweb_cellphone,
                                      inweb_interactions.columns.values)
示例#5
0
    def add(self, genes: pd.DataFrame):
        query_multidatas = self.database_manager.database.session.query(
            Protein.id_protein, Multidata.name).join(Multidata)
        multidatas = pd.read_sql(query_multidatas.statement,
                                 self.database_manager.database.session.bind)

        genes = self._blend_multidata(genes, ['name'], multidatas)

        genes.rename(index=str,
                     columns={'id_protein': 'protein_id'},
                     inplace=True)
        genes = filters.remove_not_defined_columns(
            genes, self.database_manager.get_column_table_names('gene'))

        genes.to_sql(name='gene',
                     if_exists='append',
                     con=self.database_manager.database.engine,
                     index=False,
                     chunksize=50)
示例#6
0
    def add(self, complexes):
        """
        Uploads complex data from csv.

        - Creates new complexes in Multidata table
        - Creates reference in Complex table
        - Creates complex composition to define complexes.
        """

        if complexes.empty:
            return

        existing_complexes = self.database_manager.database.session.query(Multidata.name).all()
        existing_complexes = [c[0] for c in existing_complexes]
        proteins = self.database_manager.database.session.query(Multidata.name, Multidata.id_multidata).join(
            Protein).all()
        proteins = {p[0]: p[1] for p in proteins}

        # Get complex composition info
        complete_indices = []
        incomplete_indices = []
        missing_proteins = []
        complex_map = {}
        for index, row in complexes.iterrows():
            missing = False
            protein_id_list = []
            for protein in ['protein_1', 'protein_2',
                            'protein_3', 'protein_4']:
                if not pd.isnull(row[protein]):
                    protein_id = proteins.get(row[protein])
                    if protein_id is None:
                        missing = True
                        missing_proteins.append(row[protein])
                    else:
                        protein_id_list.append(protein_id)
            if not missing:
                complex_map[row['name']] = protein_id_list
                complete_indices.append(int(index))
            else:
                incomplete_indices.append(index)

        if len(incomplete_indices) > 0:
            core_logger.warning('MISSING PROTEINS:')
            for protein in missing_proteins:
                core_logger.warning('MISSING PROTEINS:')(protein)

            core_logger.warning('COMEPLEXES WITH MISSING PROTEINS:')
            core_logger.warning(complexes.iloc[incomplete_indices, :]['name'])

        # Insert complexes
        if not complexes.empty:
            # Remove unwanted columns
            removal_columns = list(
                [x for x in complexes.columns if 'protein_' in x or 'Name_' in x or 'Unnamed' in x])
            # removal_columns += ['comments']
            complexes.drop(removal_columns, axis=1, inplace=True)

            # Remove rows with missing complexes
            complexes = complexes.iloc[complete_indices, :]

            # Convert ints to bool
            bools = ['receptor', 'other', 'secreted_highlight', 'transmembrane', 'secreted',
                     'peripheral']
            complexes[bools] = complexes[bools].astype(bool)

            # Drop existing complexes
            complexes = complexes[complexes['name'].apply(
                lambda x: x not in existing_complexes)]

            multidata_df = filters.remove_not_defined_columns(complexes.copy(),
                                                              self.database_manager.get_column_table_names(
                                                                  'multidata_table'))

            multidata_df = self._add_complex_optimitzations(multidata_df)
            multidata_df.to_sql(name='multidata_table', if_exists='append', con=self.database_manager.database.engine,
                                index=False, chunksize=50)

        # Now find id's of new complex rows
        new_complexes = self.database_manager.database.session.query(Multidata.name, Multidata.id_multidata).all()
        new_complexes = {c[0]: c[1] for c in new_complexes}

        # Build set of complexes
        complex_set = []
        complex_table = []
        for complex_name in complex_map:
            complex_id = new_complexes[complex_name]
            for protein_id in complex_map[complex_name]:
                complex_set.append((complex_id, protein_id, len(complex_map[complex_name])))
            complex_table.append({'complex_multidata_id': complex_id, 'name': complex_name})

        # Insert complex composition
        complex_set_df = pd.DataFrame(complex_set,
                                      columns=['complex_multidata_id', 'protein_multidata_id', 'total_protein'])

        complex_table_df = pd.DataFrame(complex_table)
        complex_table_df = pd.merge(complex_table_df, complexes, on='name')

        filters.remove_not_defined_columns(complex_table_df,
                                           self.database_manager.get_column_table_names('complex_table'))

        complex_table_df.to_sql(
            name='complex_table', if_exists='append',
            con=self.database_manager.database.engine, index=False, chunksize=50)

        complex_set_df.to_sql(
            name='complex_composition_table', if_exists='append',
            con=self.database_manager.database.engine, index=False, chunksize=50)
def process_protein_data(proteins: pd.DataFrame,
                         protein_columns: list) -> pd.DataFrame:
    proteins = proteins.copy()
    proteins = filters.remove_not_defined_columns(proteins,
                                                  protein_columns + ['name'])
    return proteins
def process_multidata_data(proteins: pd.DataFrame,
                           multidata_columns: list) -> pd.DataFrame:
    multidata_proteins = proteins.copy()
    return filters.remove_not_defined_columns(multidata_proteins,
                                              multidata_columns)
def call(genes: pd.DataFrame, gene_columns: list) -> pd.DataFrame:
    genes.rename(index=str, columns={'uniprot': 'name'}, inplace=True)

    filters.remove_not_defined_columns(genes, gene_columns + ['name'])

    return genes