def get_ppis(examples=10, path_swissprot=config.PATH_SWISSPROT, file_swissprot_proteins=config.FILE_SWISSPROT_PROTEINS, url_swissprot=config.URL_SWISSPROT_PROTEINS, path_reactome=config.PATH_REACTOME, file_reactome_internal_edges=config.REACTOME_INTERACTIONS, file_reactome_ppis=config.REACTOME_PPIS, path_pathwaymatcher=config.PATH_TOOLS, file_pathwaymatcher=config.FILE_PATHWAYMATCHER, url_pathwaymatcher=config.URL_PATHWAYMATCHER): """Returns dictionary of lexicographical interactions: accessions --> accessions set""" ppis = {} if not os.path.exists(path_reactome + file_reactome_ppis): create_pathwaymatcher_files(path_swissprot, file_swissprot_proteins, url_swissprot, path_reactome, file_reactome_internal_edges, path_pathwaymatcher, file_pathwaymatcher, url_pathwaymatcher) print("Reading Reactome interactions...") ppis = dictionaries.read_dictionary_one_to_set( path_reactome, file_reactome_internal_edges, order_pairs=True, col_indices=(0, 1), ignore_header=True) dictionaries.write_dictionary_one_to_set(ppis, path_reactome, file_reactome_ppis) else: print("Reading Reactome unique interactions...") ppis = dictionaries.read_dictionary_one_to_set(path_reactome, file_reactome_ppis) ppi_subset = {} example = 0 if examples > 8000: for key, values in ppis.items(): for value in values: ppi_subset.setdefault(key.strip(), set()).add(value.strip()) example += 1 if example >= examples: break if example >= examples: break else: random.seed(77) keys = random.sample(list(ppis.keys()), int(examples)) for key in keys: ppi_subset.setdefault(key.strip(), set()).add( random.sample(ppis[key], 1)[0].strip()) print("Reactome interactions READY") return ppi_subset
def test_read_dictionary_skip_header(self): # Create trio file with headers trios = [('Column1', 'Column2', 'Column3'), (1, 1, 2), (2, 3, 2), (3, 4, 5)] file_name = TestCase.id(self) + '_pairs.txt' with open(file_name, 'w') as file: for x, y, z in trios: file.write(f"{x}\t{y}\t{z}\n") # Execute target method result = read_dictionary_one_to_set('', file_name, order_pairs=True, col_indices=(1, 2), ignore_header=True) # Check headers are not taken as key, value pairs self.assertNotIn('Column1', result.keys(), msg="Missing key in dictionary") self.assertIn('1', result.keys(), msg="Missing key in dictionary") self.assertIn('2', result.keys(), msg="Missing key in dictionary") self.assertIn('4', result.keys(), msg="Missing key in dictionary") # Remove precondition files os.remove(file_name)
def test_read_dictionary_missing_two_columns(self): """With a one column file, request default columns 0 and 1, report error""" # Create file with three columns, some not in lexicographic order file_name = TestCase.id(self) + '_single_column.txt' with open(file_name, 'w') as file: for x in range(5): file.write(f"{x}\n") with self.assertRaises( ValueError, msg= 'Should raise an exception because needed columns of the file are missing.' ): read_dictionary_one_to_set('', file_name, order_pairs=True) os.remove(file_name)
def setUp(self): self.letters = {chr(ord('A') + i): i for i in range(23)} languages = [('C++', '.cpp'), ('Python', '.py'), ('JavaScript', '.js'), ('C++', '.hpp'), ('C++', '.cpp')] with open('languages.txt', 'w') as file_languages: for entry in languages: file_languages.write(f"{entry[0]}\t{entry[1]}\n") self.languages = read_dictionary_one_to_set('./', 'languages.txt')
def test_read_dictionary_missing_index_columns(self): """With two columns file, indices other than (0, 1), like (1, 2), show error.""" # Create file with three columns, some not in lexicographic order pairs = [('a', 'b'), ('c', 'b'), ('d', 'e')] file_name = TestCase.id(self) + '_pairs.txt' with open(file_name, 'w') as file: for x, y in pairs: file.write(f"{x}\t{y}\n") with self.assertRaises( ValueError, msg= 'Should raise an exception because needed columns of the file are missing.' ): read_dictionary_one_to_set('', file_name, col_indices=(1, 2)) os.remove(file_name)
def test_read_dictionary_order_pairs_true(self): # Create file with pairs. Some with inverted lexicographic order pairs = [('a', 'b'), ('c', 'b'), ('d', 'e')] file_name = TestCase.id(self) + '_pairs.txt' with open(file_name, 'w') as file: for x, y in pairs: file.write(f"{x}\t{y}\n") # Execute target method result = read_dictionary_one_to_set('', file_name, order_pairs=True) # Check the pairs order was corrected, showing them as key and value when word1 < word2 Lexicographical order self.assertIn( 'b', result.keys(), msg="Missing key because it did not order the column values") self.assertEqual(3, len(result.keys()), msg="Wrong number of columns") os.remove(file_name)
def test_read_dictionary_indices_1_2(self): # Create file with three columns, some not in lexicographic order trios = [(1, 1, 2), (2, 3, 2), (3, 4, 5)] file_name = TestCase.id(self) + '_pairs.txt' with open(file_name, 'w') as file: for x, y, z in trios: file.write(f"{x}\t{y}\t{z}\n") # Execute target method result = read_dictionary_one_to_set('', file_name, order_pairs=True, col_indices=(1, 2)) # Check values are correct self.assertIn('1', result.keys(), msg="Missing key in dictionary") self.assertIn('2', result.keys(), msg="Missing key in dictionary") self.assertNotIn('3', result.keys(), msg="Incorrect key in dictionary") self.assertIn('4', result.keys(), msg="Missing key in dictionary") # Remove file os.remove(file_name)
def create_ensembl_uniprot_mapping(config): """Creates a one to one dictionary""" print("Reading Entrez -- UniProt mapping...") temp_mapping = dictionaries.read_dictionary_one_to_set(config['PATH_STRING'], config['STRING_ID_MAP'], order_pairs=False, col_indices=(2, 1), ignore_header=False) return {k: {p.split('|')[0] for p in v} for k, v in temp_mapping.items()} # Extract the Uniprot accessions