Пример #1
0
 def test_random_chromosome_compostion(self):
     """Tests creation of a chromosome of random length and sequence. Length 
        is drawn from a negative binomial distribution with a mean 
        of the expected dna length."""
     chrom = Chromosome()
     assert type(chrom.sequence) is str
     assert re.match('[abcd*:/?+]+', chrom.sequence)
Пример #2
0
    def generate(self):

        dna = Chromosome()
        cfg = AppSettings()

        if not os.path.exists("reports"):  # pragma: no cover
            os.makedirs("reports")

        f = open("reports/_" + str(int(time.time())) + ".txt", "w")
        f.write("==========================\n" +
                "==========================\n" + "==\n"
                "== Genetics\n\n" + "nucleotides: " + dna.nucleotides() +
                "\n" + "expected length: " +
                str(cfg.genetics.chromosome_length) + "\n\n" + "Examples: \n")
        for i in range(1, 10):
            dna = Chromosome()
            f.write(dna.sequence + "\n")

        lengths = []
        for i in range(1, 1000):
            dna = Chromosome()
            lengths.append(len(dna.sequence))

        f.write(f"\n\nmean_length (standard deviation):  " + \
                f"{statistics.mean(lengths)} " + \
                f"({statistics.stdev(lengths)})")
        f.close()
Пример #3
0
    def test_insertion_length(self):
        """Tests that insertion mutations are of the correct length"""
        cfg = AppSettings()
        reps = 1000
        deltas = []

        for _ in range(0, reps):
            dna = Chromosome()
            init_length = len(dna.sequence)
            dna.insertion()
            deltas.append(len(dna.sequence) - init_length)

        expected_delta = cfg.genetics.mutation_length
        var = nbinom.var(
            1,
            cfg.genetics.mutation_length / (1 + cfg.genetics.mutation_length))

        conf_99 = ((var / reps)**(1 / 2)) * 4
        observed_delta = (sum(deltas) / reps)
        assert (expected_delta - conf_99) < observed_delta < (expected_delta +
                                                              conf_99)
Пример #4
0
    def test_substitutions_changes(self):
        """Test that substitions occur at the expected rate."""
        cfg = AppSettings()
        reps = 1000
        deltas = []

        for _ in range(0, reps):
            seq = "a" * 100
            dna = Chromosome(seq)
            dna.substitutions()
            deltas.append(sum(1 for a, b in zip(seq, dna.sequence) if a != b))

        # Expand the conf_99 to compensate for repeated mutations in the same place
        expected_delta = cfg.genetics.mutation_rate * 100 * \
                         (1 - 1/len(Chromosome.nucleotides()))

        # Because there is a little slop around synonymous substitions I multiply
        # the confidence by 10 just to limit the number of failing tests.
        conf_99 = ((poisson.var(cfg.genetics.mutation_rate * 100) / 1000)
                   **(1 / 2)) * 10
        observed_delta = sum(deltas) / reps
        assert (expected_delta - conf_99) < observed_delta < (expected_delta +
                                                              conf_99)
Пример #5
0
    def test_deletion_length(self):
        """Test that deletions return the correct averge length"""
        cfg = AppSettings()
        reps = 1000
        deltas = []

        for _ in range(0, reps):
            dna = Chromosome()
            init_length = len(dna.sequence)
            dna.deletion()
            deltas.append(init_length - len(dna.sequence))

        expected_delta = cfg.genetics.mutation_length
        var = nbinom.var(
            1,
            cfg.genetics.mutation_length / (1 + cfg.genetics.mutation_length))

        # Because there is a little slop around short strings or positions near the
        # end of the string, I multiply
        # the confidence by 10 just to limit the number of failing tests.
        conf_99 = ((var / reps)**(1 / 2)) * 10
        observed_delta = sum(deltas) / reps
        assert (expected_delta - conf_99) < observed_delta < (expected_delta +
                                                              conf_99)
Пример #6
0
    def test_inversion_diffs(self):
        cfg = AppSettings()

        reps = 1000
        deltas = []  # observed number of differences

        for _ in range(0, reps):
            dna = Chromosome()
            old_seq = dna.sequence
            dna.inversion()
            deltas.append(
                sum(1 for a, b in zip(old_seq, dna.sequence) if a != b))

        pmfs = []
        expected_deltas = []  # expected differences

        # Assumes the length of an inversion is drawn from a negative binomial
        # distribution. Calculates the probability of each length until
        # 99.99% of the distribution is accounted for. The expected number of
        # differences for each length is multiplied by the probability of that length
        # and the sum of that gives the expected differences overall.
        k = 0
        while sum(pmfs) <= 0.9999:
            pmf = nbinom.pmf(k, 1, (1 - cfg.genetics.mutation_length /
                                    (1 + cfg.genetics.mutation_length)))
            pmfs.append(pmf)

            diffs = math.floor(
                k / 2) * (1 - 1 / len(Chromosome.nucleotides())) * 2
            expected_deltas.append(pmf * diffs)
            k += 1

        expected_delta = sum(expected_deltas)

        # Since we are multiplying the binomial distribution (probably of differences at
        # a given lenght) by a negative binomial distribution (probability of a length)
        # we must compute the variance of two independent random variables
        # is Var(X * Y) = var(x) * var(y) + var(x) * mean(y) + mean(x) * var(y)
        # http://www.odelama.com/data-analysis/Commonly-Used-Math-Formulas/

        mean_binom = cfg.genetics.mutation_length
        var_binom = binom.var(mean_binom, 1 / (len(Chromosome.nucleotides())))

        mean_nbinom = cfg.genetics.mutation_length
        var_nbinom = nbinom.var(cfg.genetics.mutation_length,
                                mean_nbinom / (1 + mean_nbinom))

        var = var_binom * var_nbinom + \
              var_binom * mean_nbinom + \
              mean_binom * var_nbinom

        observed_delta = sum(deltas) / reps
        conf_99 = ((var / reps)**(1 / 2)) * 5
        assert expected_delta - conf_99 < observed_delta < expected_delta + conf_99
Пример #7
0
    def test_random_chromosome_length(self):
        """Ensures that random chromosomes are created at the correct average
           length."""
        reps = 1000
        cfg = AppSettings()
        lengths = []
        for _ in range(0, reps):
            chrom = Chromosome()
            lengths.append(len(chrom.sequence))

        mean_length = float(sum(lengths)) / len(lengths)
        expected_length = cfg.genetics.chromosome_length

        p = 1 - (expected_length / (1 + expected_length))
        conf_99 = (nbinom.var(1, p) / reps)**(1 / 2) * 4
        assert (expected_length - conf_99) <= mean_length <= (expected_length +
                                                              conf_99)
Пример #8
0
 def test_substitutions_length(self):
     """Ensure the substitions don't change sequence length."""
     dna = Chromosome("a" * 100)
     dna.substitutions()
     assert len(dna.sequence) == 100
Пример #9
0
 def test_nucleotides(self):
     """Tests nucleotide method returns correct value."""
     assert Chromosome.nucleotides() == "abcd/:*+?"
Пример #10
0
 def test_defined_sequence(self):
     """Tests creation of a chromosome from a specified sequence."""
     chrom = Chromosome("abcd")
     assert chrom.sequence == "abcd"