Пример #1
0
 def test_clustal_outfile(self):
     out = self.seq.replace('sequence.fa', 'msa.out.clustal.fa')
     aln = msa(CLUSTAL, self.seq, outfile=out)
     self.assertTrue(os.path.isfile(aln))
     self.assertEqual(out, aln)
     self.rm = aln
Пример #2
0
 def test_clustal_default(self):
     aln = msa(CLUSTAL, self.seq)
     self.assertTrue(os.path.isfile(aln))
     self.rm = aln
Пример #3
0
 def test_mafft_outfile(self):
     out = self.seq.replace('sequence.fa', 'msa.out.mafft.fa')
     aln = msa(MAFFT, self.seq, outfile=out)
     self.assertTrue(os.path.isfile(aln))
     self.assertEqual(out, aln)
     self.rm = aln
Пример #4
0
 def test_mafft_default(self):
     aln = msa(MAFFT, self.seq)
     self.assertTrue(os.path.isfile(aln))
     self.rm = aln
Пример #5
0
 def test_muscle_default(self):
     aln = msa(MUSCLE, self.seq)
     self.assertTrue(os.path.isfile(aln))
     self.rm = aln
Пример #6
0
def _sequencing(sequence, tree, aligner, ancestor, wd, asr_model, verbose):
    """
    Identify the type of the sequence file.
    
    :param sequence: str, path to a sequence data file.
    :param tree: str, path to a NEWICK tree file.
    :return: tuple, sequence, alignment, ancestor, and simulation data file.
    """

    if tree:
        utilities.Tree(tree, leave=True)
        AA, lengths, aa = set(AMINO_ACIDS), [], []

        with open(sequence) as handle:
            line = handle.readline().strip()
            if line.startswith('>'):
                handle.seek(0)
                records = SeqIO.parse(handle, 'fasta')
                for record in records:
                    lengths.append(len(record.seq))
                    aa.append(set(record.seq).issubset(AA))
            else:
                error('NEWICK format tree was provided, but the sequence file '
                      'was not in the FASTA format.')
                sys.exit(1)

        if len(set(lengths)) == 1:
            alignment = sequence
            if all(aa):
                trimmed = alignment
            else:
                trimmed = ''.join(
                    [utilities.basename(alignment), '.trimmed.fasta'])
                if os.path.isfile(trimmed):
                    info('Using pre-existed trimmed alignment file.')
                else:
                    _, trimmed = utilities.trim(alignment, outfile=trimmed)
        else:
            if aligner:
                aler, _ = msa._guess(aligner)
                outfile = ''.join(
                    [utilities.basename(sequence), '.{}.fasta'.format(aler)])
                if os.path.isfile(outfile):
                    info('Using pre-existed alignment file')
                    alignment = outfile
                    trimmed = ''.join(
                        [utilities.basename(alignment), '.trimmed.fasta'])
                    if os.path.isfile(trimmed):
                        info('Using pre-existed trimmed alignment file.')
                    else:
                        _, trimmed = utilities.trim(alignment, outfile=trimmed)
                else:
                    trimmed = msa.msa(aligner,
                                      sequence,
                                      verbose=verbose,
                                      outfile=outfile,
                                      trimming=True)
            else:
                error('FASTA format sequence file was provided, but no '
                      'alignment program was provided.')
                sys.exit(1)

        if trimmed:
            if ancestor:
                if trimmed.endswith('.trimmed.fasta'):
                    name = trimmed.replace('.trimmed.fasta', '')
                else:
                    name = trimmed

                aser, _ = asr._guess(ancestor)
                outfile = '{}.{}.tsv'.format(utilities.basename(name), aser)
                if os.path.isfile(outfile):
                    info('Using pre-existed ancestral states sequence file.')
                    sequence = outfile
                else:
                    sequence = asr.asr(ancestor,
                                       trimmed,
                                       tree,
                                       asr_model,
                                       verbose=verbose,
                                       outfile=outfile)
            else:
                error('No ancestral reconstruction program was provided.')
                sys.exit(1)
        else:
            sys.exit(1)

    tree, rate, records, aps, size = _load(sequence)
    return tree, rate, records, aps, size, sequence