示例#1
0
    def _paths(finished_paths, path_so_far, start, muts):
        """Return all paths from the start position through the mutation graph
        :param finished_paths: completed paths
        :param path_so_far: the accumulated mutation->mutation path so far
        :param start: the start position to travers the muts from
        :param muts: a dictionary in the form {start_position: [Mutation]}
        :return: All paths through adjacent mutations starting with mutations at chromosome position start
        """
        if muts == [] or start not in muts:

            # No mutations available to continue this chain
            finished_paths.append(path_so_far)
        else:
            # return reduce(operator.concat, lambda mut: OnpCombiner._paths(path + [mut], mut.end+1, muts), [])
            # path =  map(lambda mut: OnpQueue._paths(path + [mut], int(mut.end)+1, muts), muts[start])
            for mut in muts[start]:
                if len(path_so_far) > 0 and not PhasingUtils.is_in_phase(
                        path_so_far[-1], mut):

                    # Next mutation not in phase, so stop this path here.
                    finished_paths.append(path_so_far)
                else:
                    OnpQueue._paths(finished_paths, path_so_far + [mut],
                                    int(mut.end) + 1, muts)
            return finished_paths
示例#2
0
    def test_phasing_info_missing(self):
        """Test whether we accurately say whether the phasing info present test works"""
        m1 = MutationData()
        m2 = MutationData()
        m3 = MutationData()
        m4 = MutationData()

        m1.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_genotype", "0|1")
        m4.createAnnotation("phasing_genotype", "0|1")

        # m1 missing gt, m2 complete, m3 missing everything, m4 missing ID
        self.assertFalse(PhasingUtils.has_phasing_information(m1))
        self.assertTrue(PhasingUtils.has_phasing_information(m2))
        self.assertFalse(PhasingUtils.has_phasing_information(m3))
        self.assertFalse(PhasingUtils.has_phasing_information(m4))
示例#3
0
    def test_phasing_info_missing(self):
        """Test whether we accurately say whether the phasing info present test works"""
        m1 = MutationData()
        m2 = MutationData()
        m3 = MutationData()
        m4 = MutationData()

        m1.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_genotype", "0|1")
        m4.createAnnotation("phasing_genotype", "0|1")

        # m1 missing gt, m2 complete, m3 missing everything, m4 missing ID
        self.assertFalse(PhasingUtils.has_phasing_information(m1))
        self.assertTrue(PhasingUtils.has_phasing_information(m2))
        self.assertFalse(PhasingUtils.has_phasing_information(m3))
        self.assertFalse(PhasingUtils.has_phasing_information(m4))
示例#4
0
    def _paths(finished_paths, path_so_far, start, muts):
        """Return all paths from the start position through the mutation graph
        :param finished_paths: completed paths
        :param path_so_far: the accumulated mutation->mutation path so far
        :param start: the start position to travers the muts from
        :param muts: a dictionary in the form {start_position: [Mutation]}
        :return: All paths through adjacent mutations starting with mutations at chromosome position start
        """
        if muts == [] or start not in muts:

            # No mutations available to continue this chain
            finished_paths.append(path_so_far)
        else:
            # return reduce(operator.concat, lambda mut: OnpCombiner._paths(path + [mut], mut.end+1, muts), [])
            # path =  map(lambda mut: OnpQueue._paths(path + [mut], int(mut.end)+1, muts), muts[start])
            for mut in muts[start]:
                if len(path_so_far) > 0 and not PhasingUtils.is_in_phase(path_so_far[-1], mut):

                    # Next mutation not in phase, so stop this path here.
                    finished_paths.append(path_so_far)
                else:
                    OnpQueue._paths(finished_paths, path_so_far + [mut], int(mut.end) + 1, muts)
            return finished_paths
示例#5
0
    def test_phasing_check(self):
        """
        Test the actual phasing check.
        """
        m1 = MutationData()
        m2 = MutationData()
        m3 = MutationData()
        m4 = MutationData()
        m5 = MutationData()
        m6 = MutationData()
        m7 = MutationData()

        m1.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_genotype", "0|1")
        m4.createAnnotation("phasing_genotype", "0|1")
        m5.createAnnotation("phasing_id", "blah")
        m5.createAnnotation("phasing_genotype", "0|1")
        m6.createAnnotation("phasing_id", "blahdifferent")
        m6.createAnnotation("phasing_genotype", "0|1")

        # m1 and m2 should not be in phase, even though they share IDs, since m1 is missing the genotype info
        unknown_val = True
        self.assertFalse(PhasingUtils.is_in_phase(m1, m2, unknown_val))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m1, unknown_val))

        unknown_val = False
        self.assertFalse(PhasingUtils.is_in_phase(m1, m2, unknown_val))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m1, unknown_val))

        # m2 and m4 should not be in phase, since m4 is missing the ID
        unknown_val = True
        self.assertFalse(PhasingUtils.is_in_phase(m4, m2, unknown_val))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m4, unknown_val))

        unknown_val = False
        self.assertFalse(PhasingUtils.is_in_phase(m4, m2, unknown_val))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m4, unknown_val))

        # m3 and m7 should be unknown_val, since phasing info is missing.
        unknown_val = True
        self.assertTrue(PhasingUtils.is_in_phase(m3, m7, unknown_val) == unknown_val)
        self.assertTrue(PhasingUtils.is_in_phase(m7, m3, unknown_val) == unknown_val)

        unknown_val = False
        self.assertTrue(PhasingUtils.is_in_phase(m3, m7, unknown_val) == unknown_val)
        self.assertTrue(PhasingUtils.is_in_phase(m7, m3, unknown_val) == unknown_val)

        # m2 and m5 should be in phase, regardless of the unknown_val parameter
        self.assertTrue(PhasingUtils.is_in_phase(m2, m5, True))
        self.assertTrue(PhasingUtils.is_in_phase(m5, m2, False))
        self.assertTrue(PhasingUtils.is_in_phase(m2, m5, False))
        self.assertTrue(PhasingUtils.is_in_phase(m5, m2, True))

        # m2 and m6 should not be in phase, since the ID is different, regardless of the unknown_val parameter
        self.assertFalse(PhasingUtils.is_in_phase(m2, m6, True))
        self.assertFalse(PhasingUtils.is_in_phase(m6, m2, False))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m6, False))
        self.assertFalse(PhasingUtils.is_in_phase(m6, m2, True))
示例#6
0
    def test_phasing_check(self):
        """
        Test the actual phasing check.
        """
        m1 = MutationData()
        m2 = MutationData()
        m3 = MutationData()
        m4 = MutationData()
        m5 = MutationData()
        m6 = MutationData()
        m7 = MutationData()

        m1.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_id", "blah")
        m2.createAnnotation("phasing_genotype", "0|1")
        m4.createAnnotation("phasing_genotype", "0|1")
        m5.createAnnotation("phasing_id", "blah")
        m5.createAnnotation("phasing_genotype", "0|1")
        m6.createAnnotation("phasing_id", "blahdifferent")
        m6.createAnnotation("phasing_genotype", "0|1")

        # m1 and m2 should not be in phase, even though they share IDs, since m1 is missing the genotype info
        unknown_val = True
        self.assertFalse(PhasingUtils.is_in_phase(m1, m2, unknown_val))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m1, unknown_val))

        unknown_val = False
        self.assertFalse(PhasingUtils.is_in_phase(m1, m2, unknown_val))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m1, unknown_val))

        # m2 and m4 should not be in phase, since m4 is missing the ID
        unknown_val = True
        self.assertFalse(PhasingUtils.is_in_phase(m4, m2, unknown_val))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m4, unknown_val))

        unknown_val = False
        self.assertFalse(PhasingUtils.is_in_phase(m4, m2, unknown_val))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m4, unknown_val))

        # m3 and m7 should be unknown_val, since phasing info is missing.
        unknown_val = True
        self.assertTrue(
            PhasingUtils.is_in_phase(m3, m7, unknown_val) == unknown_val)
        self.assertTrue(
            PhasingUtils.is_in_phase(m7, m3, unknown_val) == unknown_val)

        unknown_val = False
        self.assertTrue(
            PhasingUtils.is_in_phase(m3, m7, unknown_val) == unknown_val)
        self.assertTrue(
            PhasingUtils.is_in_phase(m7, m3, unknown_val) == unknown_val)

        # m2 and m5 should be in phase, regardless of the unknown_val parameter
        self.assertTrue(PhasingUtils.is_in_phase(m2, m5, True))
        self.assertTrue(PhasingUtils.is_in_phase(m5, m2, False))
        self.assertTrue(PhasingUtils.is_in_phase(m2, m5, False))
        self.assertTrue(PhasingUtils.is_in_phase(m5, m2, True))

        # m2 and m6 should not be in phase, since the ID is different, regardless of the unknown_val parameter
        self.assertFalse(PhasingUtils.is_in_phase(m2, m6, True))
        self.assertFalse(PhasingUtils.is_in_phase(m6, m2, False))
        self.assertFalse(PhasingUtils.is_in_phase(m2, m6, False))
        self.assertFalse(PhasingUtils.is_in_phase(m6, m2, True))