def get_ppis(examples=10,
             path_swissprot=config.PATH_SWISSPROT,
             file_swissprot_proteins=config.FILE_SWISSPROT_PROTEINS,
             url_swissprot=config.URL_SWISSPROT_PROTEINS,
             path_reactome=config.PATH_REACTOME,
             file_reactome_internal_edges=config.REACTOME_INTERACTIONS,
             file_reactome_ppis=config.REACTOME_PPIS,
             path_pathwaymatcher=config.PATH_TOOLS,
             file_pathwaymatcher=config.FILE_PATHWAYMATCHER,
             url_pathwaymatcher=config.URL_PATHWAYMATCHER):
    """Returns dictionary of lexicographical interactions: accessions --> accessions set"""
    ppis = {}
    if not os.path.exists(path_reactome + file_reactome_ppis):
        create_pathwaymatcher_files(path_swissprot, file_swissprot_proteins,
                                    url_swissprot, path_reactome,
                                    file_reactome_internal_edges,
                                    path_pathwaymatcher, file_pathwaymatcher,
                                    url_pathwaymatcher)

        print("Reading Reactome interactions...")
        ppis = dictionaries.read_dictionary_one_to_set(
            path_reactome,
            file_reactome_internal_edges,
            order_pairs=True,
            col_indices=(0, 1),
            ignore_header=True)
        dictionaries.write_dictionary_one_to_set(ppis, path_reactome,
                                                 file_reactome_ppis)
    else:
        print("Reading Reactome unique interactions...")
        ppis = dictionaries.read_dictionary_one_to_set(path_reactome,
                                                       file_reactome_ppis)

    ppi_subset = {}
    example = 0

    if examples > 8000:
        for key, values in ppis.items():
            for value in values:
                ppi_subset.setdefault(key.strip(), set()).add(value.strip())
                example += 1
                if example >= examples:
                    break
            if example >= examples:
                break
    else:
        random.seed(77)
        keys = random.sample(list(ppis.keys()), int(examples))
        for key in keys:
            ppi_subset.setdefault(key.strip(), set()).add(
                random.sample(ppis[key], 1)[0].strip())

    print("Reactome interactions READY")
    return ppi_subset
示例#2
0
    def test_read_dictionary_skip_header(self):
        # Create trio file with headers
        trios = [('Column1', 'Column2', 'Column3'), (1, 1, 2), (2, 3, 2),
                 (3, 4, 5)]
        file_name = TestCase.id(self) + '_pairs.txt'
        with open(file_name, 'w') as file:
            for x, y, z in trios:
                file.write(f"{x}\t{y}\t{z}\n")

        # Execute target method
        result = read_dictionary_one_to_set('',
                                            file_name,
                                            order_pairs=True,
                                            col_indices=(1, 2),
                                            ignore_header=True)

        # Check headers are not taken as key, value pairs
        self.assertNotIn('Column1',
                         result.keys(),
                         msg="Missing key in dictionary")
        self.assertIn('1', result.keys(), msg="Missing key in dictionary")
        self.assertIn('2', result.keys(), msg="Missing key in dictionary")
        self.assertIn('4', result.keys(), msg="Missing key in dictionary")

        # Remove precondition files
        os.remove(file_name)
示例#3
0
    def test_read_dictionary_missing_two_columns(self):
        """With a one column file, request default columns 0 and 1, report error"""
        # Create file with three columns, some not in lexicographic order
        file_name = TestCase.id(self) + '_single_column.txt'
        with open(file_name, 'w') as file:
            for x in range(5):
                file.write(f"{x}\n")

        with self.assertRaises(
                ValueError,
                msg=
                'Should raise an exception because needed columns of the file are missing.'
        ):
            read_dictionary_one_to_set('', file_name, order_pairs=True)

        os.remove(file_name)
示例#4
0
 def setUp(self):
     self.letters = {chr(ord('A') + i): i for i in range(23)}
     languages = [('C++', '.cpp'), ('Python', '.py'), ('JavaScript', '.js'),
                  ('C++', '.hpp'), ('C++', '.cpp')]
     with open('languages.txt', 'w') as file_languages:
         for entry in languages:
             file_languages.write(f"{entry[0]}\t{entry[1]}\n")
     self.languages = read_dictionary_one_to_set('./', 'languages.txt')
示例#5
0
    def test_read_dictionary_missing_index_columns(self):
        """With two columns file, indices other than (0, 1), like (1, 2), show error."""
        # Create file with three columns, some not in lexicographic order
        pairs = [('a', 'b'), ('c', 'b'), ('d', 'e')]
        file_name = TestCase.id(self) + '_pairs.txt'
        with open(file_name, 'w') as file:
            for x, y in pairs:
                file.write(f"{x}\t{y}\n")

        with self.assertRaises(
                ValueError,
                msg=
                'Should raise an exception because needed columns of the file are missing.'
        ):
            read_dictionary_one_to_set('', file_name, col_indices=(1, 2))

        os.remove(file_name)
示例#6
0
    def test_read_dictionary_order_pairs_true(self):
        # Create file with pairs. Some with inverted lexicographic order
        pairs = [('a', 'b'), ('c', 'b'), ('d', 'e')]
        file_name = TestCase.id(self) + '_pairs.txt'
        with open(file_name, 'w') as file:
            for x, y in pairs:
                file.write(f"{x}\t{y}\n")
        # Execute target method
        result = read_dictionary_one_to_set('', file_name, order_pairs=True)

        # Check the pairs order was corrected, showing them as key and value when word1 < word2 Lexicographical order
        self.assertIn(
            'b',
            result.keys(),
            msg="Missing key because it did not order the column values")
        self.assertEqual(3, len(result.keys()), msg="Wrong number of columns")

        os.remove(file_name)
示例#7
0
    def test_read_dictionary_indices_1_2(self):
        # Create file with three columns, some not in lexicographic order
        trios = [(1, 1, 2), (2, 3, 2), (3, 4, 5)]
        file_name = TestCase.id(self) + '_pairs.txt'
        with open(file_name, 'w') as file:
            for x, y, z in trios:
                file.write(f"{x}\t{y}\t{z}\n")

        # Execute target method
        result = read_dictionary_one_to_set('',
                                            file_name,
                                            order_pairs=True,
                                            col_indices=(1, 2))

        # Check values are correct
        self.assertIn('1', result.keys(), msg="Missing key in dictionary")
        self.assertIn('2', result.keys(), msg="Missing key in dictionary")
        self.assertNotIn('3', result.keys(), msg="Incorrect key in dictionary")
        self.assertIn('4', result.keys(), msg="Missing key in dictionary")

        # Remove file
        os.remove(file_name)
示例#8
0
def create_ensembl_uniprot_mapping(config):
    """Creates a one to one dictionary"""
    print("Reading Entrez -- UniProt mapping...")
    temp_mapping = dictionaries.read_dictionary_one_to_set(config['PATH_STRING'], config['STRING_ID_MAP'],
                                                           order_pairs=False, col_indices=(2, 1), ignore_header=False)
    return {k: {p.split('|')[0] for p in v} for k, v in temp_mapping.items()}  # Extract the Uniprot accessions