def get_pathways_with_multiple_proteoforms():
    map_proteins_to_proteoforms = read_dictionary_one_to_set(
        config.GRAPHS_PATH,
        "mapping_proteins_to_proteoforms.tsv",
        col_indices=(0, 1))
    pathways = get_pathways()["stId"]

    selected_proteins = []
    for protein, proteoforms in map_proteins_to_proteoforms.items():
        if len(proteoforms) > 1:
            selected_proteins.append(protein)

    selected_pathways = []
    for pathway in pathways:
        filename = get_json_filename(config.proteins, config.no_sm,
                                     config.PATHWAY_GRAPHS_PATH, pathway)
        if not Path(filename).exists():
            create_pathway_interaction_network(pathway, config.proteins,
                                               config.no_sm,
                                               config.PATHWAY_GRAPHS_PATH)
        G = read_graph(filename)
        if any(protein in selected_proteins for protein in list(G.nodes)):
            selected_pathways.append(pathway)

    return selected_pathways
    def test_read_dictionary_missing_two_columns(self):
        """With a one column file, request default columns 0 and 1, report error"""
        # Create file with three columns, some not in lexicographic order
        file_name = TestCase.id(self) + '_single_column.txt'
        with open(file_name, 'w') as file:
            for x in range(5):
                file.write(f"{x}\n")

        with self.assertRaises(
                ValueError,
                msg=
                'Should raise an exception because needed columns of the file are missing.'
        ):
            read_dictionary_one_to_set('', file_name, order_pairs=True)

        os.remove(file_name)
def get_pathways_with_multiple_proteoforms(data_path, output_path):
    """
    Get list of pathways which contain proteins with multiple proteoforms
    """
    map_proteins_to_proteoforms = read_dictionary_one_to_set(
        data_path, "mapping_proteins_to_proteoforms.tsv", col_indices=(0, 1))

    # Create list of proteins that have multiple proteoforms
    selected_proteins = []
    for protein, proteoforms in map_proteins_to_proteoforms.items():
        if len(proteoforms) > 1:
            selected_proteins.append(protein)

    pathways = get_pathways()["stId"]  # Get complete list of pathways
    selected_pathways = []

    for pathway in pathways:
        participants = get_participants_by_pathway(pathway, config.proteins,
                                                   output_path)
        #     filename = get_json_filename(
        #         config.proteins, config.no_sm, output_path, pathway)
        #     if not Path(filename).exists():
        #         create_pathway_interaction_network(
        #             pathway, config.proteins, config.no_sm, output_path)
        #     G = read_graph(filename)
        if any(protein in selected_proteins
               for protein in set(participants["Id"])):
            selected_pathways.append(pathway)

    return selected_pathways
 def setUp(self):
     self.letters = {chr(ord('A') + i): i for i in range(23)}
     languages = [('C++', '.cpp'), ('Python', '.py'), ('JavaScript', '.js'),
                  ('C++', '.hpp'), ('C++', '.cpp')]
     with open('languages.txt', 'w') as file_languages:
         for entry in languages:
             file_languages.write(f"{entry[0]}\t{entry[1]}\n")
     self.languages = read_dictionary_one_to_set('./', 'languages.txt')
    def test_read_dictionary_missing_index_columns(self):
        """With two columns file, indices other than (0, 1), like (1, 2), show error."""
        # Create file with three columns, some not in lexicographic order
        pairs = [('a', 'b'), ('c', 'b'), ('d', 'e')]
        file_name = TestCase.id(self) + '_pairs.txt'
        with open(file_name, 'w') as file:
            for x, y in pairs:
                file.write(f"{x}\t{y}\n")

        with self.assertRaises(
                ValueError,
                msg=
                'Should raise an exception because needed columns of the file are missing.'
        ):
            read_dictionary_one_to_set('', file_name, col_indices=(1, 2))

        os.remove(file_name)
    def test_read_dictionary_skip_header(self):
        # Create trio file with headers
        trios = [('Column1', 'Column2', 'Column3'), (1, 1, 2), (2, 3, 2),
                 (3, 4, 5)]
        file_name = TestCase.id(self) + '_pairs.txt'
        with open(file_name, 'w') as file:
            for x, y, z in trios:
                file.write(f"{x}\t{y}\t{z}\n")

        # Execute target method
        result = read_dictionary_one_to_set('',
                                            file_name,
                                            order_pairs=True,
                                            col_indices=(1, 2),
                                            ignore_header=True)

        # Check headers are not taken as key, value pairs
        self.assertNotIn('Column1',
                         result.keys(),
                         msg="Missing key in dictionary")
        self.assertIn('1', result.keys(), msg="Missing key in dictionary")
        self.assertIn('2', result.keys(), msg="Missing key in dictionary")
        self.assertIn('4', result.keys(), msg="Missing key in dictionary")

        # Remove precondition files
        os.remove(file_name)

        def test_merge_dictionaries(self):
            d1 = {'A': {'B', 'C'}, 'D': {'C'}, 'C': {'d'}}
            d2 = {'A': {'a', 'b', 'c'}, 'B': {'b'}, 'C': {'c', 'd', 'e'}}

            d = merge_dictionaries(d1, d2)
            self.assertEqual(
                4,
                len(d.keys()),
                msg="There is a wrong number of keys in the dictionary")
            self.assertEqual(5,
                             len(d['A']),
                             msg="The number of elements in 'A' should be 5")
            self.assertTrue('a' in d['A'])
            self.assertTrue('D' in d)
            self.assertEqual(1,
                             len(d['D']),
                             msg="The number of elements in 'D' should be 1")
            self.assertTrue('B' in d)
            self.assertEqual(1,
                             len(d['B']),
                             msg="The number of elements in 'B' should be 1")
            self.assertEqual(3,
                             len(d['C']),
                             msg="The number of elements in 'C' should be 3")
            self.assertTrue('c' in d['C'])
    def test_read_dictionary_order_pairs_true(self):
        # Create file with pairs. Some with inverted lexicographic order
        pairs = [('a', 'b'), ('c', 'b'), ('d', 'e')]
        file_name = TestCase.id(self) + '_pairs.txt'
        with open(file_name, 'w') as file:
            for x, y in pairs:
                file.write(f"{x}\t{y}\n")
        # Execute target method
        result = read_dictionary_one_to_set('', file_name, order_pairs=True)

        # Check the pairs order was corrected, showing them as key and value when word1 < word2 Lexicographical order
        self.assertIn(
            'b',
            result.keys(),
            msg="Missing key because it did not order the column values")
        self.assertEqual(3, len(result.keys()), msg="Wrong number of columns")

        os.remove(file_name)
    def test_read_dictionary_indices_1_2(self):
        # Create file with three columns, some not in lexicographic order
        trios = [(1, 1, 2), (2, 3, 2), (3, 4, 5)]
        file_name = TestCase.id(self) + '_pairs.txt'
        with open(file_name, 'w') as file:
            for x, y, z in trios:
                file.write(f"{x}\t{y}\t{z}\n")

        # Execute target method
        result = read_dictionary_one_to_set('',
                                            file_name,
                                            order_pairs=True,
                                            col_indices=(1, 2))

        # Check values are correct
        self.assertIn('1', result.keys(), msg="Missing key in dictionary")
        self.assertIn('2', result.keys(), msg="Missing key in dictionary")
        self.assertNotIn('3', result.keys(), msg="Incorrect key in dictionary")
        self.assertIn('4', result.keys(), msg="Missing key in dictionary")

        # Remove file
        os.remove(file_name)