def test_paralinear_for_determinant_lte_zero(self):
     """returns distance of None if the determinant is <= 0"""
     data = dict(seq1="AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT",
                 seq2="TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")
     aln = LoadSeqs(data=data, moltype=DNA)
     
     paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln)
     paralinear_calc.run(show_progress=False)
     dists = paralinear_calc.getPairwiseDistances()
     self.assertTrue(dists.values()[0] is None)
     paralinear_calc.run(show_progress=False)
     dists = paralinear_calc.getPairwiseDistances()
     self.assertTrue(dists.values()[0] is None)
Exemplo n.º 2
0
    def test_paralinear_for_determinant_lte_zero(self):
        """returns distance of None if the determinant is <= 0"""
        data = dict(
            seq1=
            "AGGGGGGGGGGCCCCCCCCCCCCCCCCCGGGGGGGGGGGGGGGCGGTTTTTTTTTTTTTTTTTT",
            seq2=
            "TAAAAAAAAAAGGGGGGGGGGGGGGGGGGTTTTTTTTTTTTTTTTTTCCCCCCCCCCCCCCCCC")
        aln = LoadSeqs(data=data, moltype=DNA)

        paralinear_calc = ParalinearPair(moltype=DNA, alignment=aln)
        paralinear_calc.run(show_progress=False)
        dists = paralinear_calc.getPairwiseDistances()
        self.assertTrue(dists.values()[0] is None)
        paralinear_calc.run(show_progress=False)
        dists = paralinear_calc.getPairwiseDistances()
        self.assertTrue(dists.values()[0] is None)
Exemplo n.º 3
0
 def test_paralinear_pair_aa(self):
     """paralinear shouldn't fail to produce distances for aa seqs"""
     aln = LoadSeqs('data/brca1_5.paml', moltype=DNA)
     aln = aln.getTranslation()
     paralinear_calc = ParalinearPair(moltype=PROTEIN, alignment=aln)
     paralinear_calc.run(show_progress=False)
     dists = paralinear_calc.getPairwiseDistances()
 def test_paralinear_pair_aa(self):
     """paralinear shouldn't fail to produce distances for aa seqs"""
     aln = LoadSeqs('data/brca1_5.paml', moltype=DNA)
     aln = aln.getTranslation()
     paralinear_calc = ParalinearPair(moltype=PROTEIN, alignment=aln)
     paralinear_calc.run(show_progress=False)
     dists = paralinear_calc.getPairwiseDistances()
Exemplo n.º 5
0
def get_paralinear_distances(gene, data_directory=None, third_position=False, **kw):
    filenames = glob.glob(os.path.join(data_directory, gene+'.fasta*'))
    assert len(filenames) == 1, 'Wrong number of alignment files for ' + gene
    filename = filenames[0]
    if filename.endswith('.fasta'):
        with open(filename) as fastafile:
            fastadata = fastafile.read()
    elif filename.endswith('.fasta.gz'):
        with GzipFile(filename) as fastafile:
            fastadata = fastafile.read()
    else:
        raise RuntimeError(gene + ' file could not be read')

    sequences = LoadSeqs(data=fastadata)
    if third_position:
        indices = [(i, i+1) for i in range(len(sequences))[2::3]]
        pos3 = sequences.addFeature('pos3', 'pos3', indices)
        sequences = pos3.getSlice()
    sequences = sequences.filtered(lambda x: set(''.join(x)) <= set(DNA))

    paralinear_calc = ParalinearPair(moltype=DNA, alignment=sequences)
    paralinear_calc.run(show_progress=False)
    dists = paralinear_calc.getPairwiseDistances()

    return {frozenset(k):v for k, v in dists.items()}