Python Beauti примеры использования

Язык программирования: Python

Пространство имен/Пакет: beauti

Класс/Тип: Beauti

Примеров на hotexamples.com: 4

Python Beauti - 4 примера найдено. Это лучшие примеры Python кода для beauti.Beauti, полученные из open source проектов. Вы можете ставить оценку каждому примеру, чтобы помочь нам улучшить качество примеров.

Основные методы

Показать Скрыть

Beauti(1)

parse_log(1)

parse_treelog(1)

populate(1)

write(1)

Пример #1

Показать файл

    def __init__(self,
                 csv=None,
                 iso_format=True,
                 origin=date(1970, 1, 1),
                 delimiter='_',
                 field=-1,
                 ft2path=None,
                 Rpath=None,
                 java=None,
                 tmpfile='anchre-tmp',
                 beast_xml_template=None):
        self.csv = csv
        self.iso_format = iso_format
        self.origin = origin
        self.delimiter = delimiter
        self.field = field

        # paths to binaries
        self.ft2path = ft2path
        self.Rpath = Rpath
        self.java = java

        self.pyphy = PyPhy(os.getcwd(), 1)  # instance of HyPhy
        self.beauti = Beauti(beast_xml_template)

        # if given, parse dates from csv
        self.dates = {}
        if self.csv is not None:
            self.parse_date_csv()

        # store sequence records
        self.fasta = {}
        self.last_date = None

        self.tmp = tempfile.gettempdir()
        self.tmpfile = os.path.join(self.tmp, tmpfile)
        self.test()

Пример #2

Показать файл

Файл: anchre.py Проект: ArtPoon/anchre

    def __init__(
        self,
        csv=None,
        iso_format=True,
        origin=date(1970, 1, 1),
        delimiter="_",
        field=-1,
        ft2path=None,
        Rpath=None,
        java=None,
        tmpfile="anchre-tmp",
        beast_xml_template=None,
    ):
        self.csv = csv
        self.iso_format = iso_format
        self.origin = origin
        self.delimiter = delimiter
        self.field = field

        # paths to binaries
        self.ft2path = ft2path
        self.Rpath = Rpath
        self.java = java

        self.pyphy = PyPhy(os.getcwd(), 1)  # instance of HyPhy
        self.beauti = Beauti(beast_xml_template)

        # if given, parse dates from csv
        self.dates = {}
        if self.csv is not None:
            self.parse_date_csv()

        # store sequence records
        self.fasta = {}
        self.last_date = None

        self.tmp = tempfile.gettempdir()
        self.tmpfile = os.path.join(self.tmp, tmpfile)
        self.test()

Пример #3

Показать файл

Файл: anchre.py Проект: ArtPoon/anchre

class Anchre:
    def __init__(
        self,
        csv=None,
        iso_format=True,
        origin=date(1970, 1, 1),
        delimiter="_",
        field=-1,
        ft2path=None,
        Rpath=None,
        java=None,
        tmpfile="anchre-tmp",
        beast_xml_template=None,
    ):
        self.csv = csv
        self.iso_format = iso_format
        self.origin = origin
        self.delimiter = delimiter
        self.field = field

        # paths to binaries
        self.ft2path = ft2path
        self.Rpath = Rpath
        self.java = java

        self.pyphy = PyPhy(os.getcwd(), 1)  # instance of HyPhy
        self.beauti = Beauti(beast_xml_template)

        # if given, parse dates from csv
        self.dates = {}
        if self.csv is not None:
            self.parse_date_csv()

        # store sequence records
        self.fasta = {}
        self.last_date = None

        self.tmp = tempfile.gettempdir()
        self.tmpfile = os.path.join(self.tmp, tmpfile)
        self.test()

    def test(self):
        """
        Check whether expected binaries are accessible
        :return:
        """
        if not os.path.exists(self.ft2path):
            print "ERROR: Failed to detect FastTree2 at", self.ft2path
            sys.exit()
        if not os.path.exists(self.Rpath):
            print "ERROR: Failed to detect R at", self.ft2path
            sys.exit()

    def parse_date(self, date_str):
        """
        Convert a string representation of a sample collection date into
        an integer value (number of days since some time in the past).
        """
        if self.iso_format:
            try:
                days = (dateup.parse(date_str).date() - self.origin).days
            except ValueError:
                print "ERROR: Failed to parse date", date_str
                raise
        else:
            # expressed as number of days since some time in the past (BEAST style)
            try:
                days = int(date_str)
            except:
                print "ERROR: Expected integer value for sequence date, found", date_str
                raise
        return days

    def parse_dates_csv(self):
        """
        Parse dates from CSV.
        """
        reader = DictReader(open(self.csv, "rU"))
        for row in reader:
            self.dates.update({row["header"]: self.parse_date(row["date"])})

    def read(self, handle):
        """
        Parse open file as FASTA.  Clean sequence labels.
        """
        self.fasta = {}  # reset container
        h = None
        sequence = ""
        count = 0
        for line in handle:
            if line.startswith("$"):  # skip comments
                continue

            if line.startswith(">") or line.startswith("#"):
                if sequence:
                    # create record
                    days = self.get_date(h)
                    self.fasta.update({h: {"header": "%d_%d" % (count, days), "sequence": sequence, "days": days}})
                    sequence = ""  # reset container

                h = line.strip(">#\n")
                count += 1
            else:
                sequence += line.strip("\n").upper()

        # append last entry
        days = self.get_date(h)
        self.fasta.update({h: {"header": "%d_%d" % (count, days), "sequence": sequence, "days": days}})

        # determine most recent sample date
        all_dates = [v["days"] for v in self.fasta.itervalues()]
        assert len(set(all_dates)) > 1, "ERROR: Only one sample date in data"
        all_dates.sort(reverse=True)
        self.last_date = all_dates[0]

    def get_date(self, h):
        """
        If dates were provided as a CSV input file, then return the
        date associated with the sequence header supplied as the first argument.
        Otherwise, parse date field from sequence header in FASTA object.
        """
        if self.csv:
            try:
                # this will always be days since X
                return self.dates[h]
            except:
                print "ERROR: sequence header", h, "not found in dates parsed from CSV"
                raise

        # otherwise, parse date from sequence headers
        date_field = h.split(self.delimiter)[self.field]
        return self.parse_date(date_field)

    def newick2phylo(self, nwk):
        handle = StringIO(nwk)
        phy = Phylo.read(handle, "newick")
        return phy

    def phylo2newick(self, t):
        """
        Convert Phylo into Newick tree string.
        """
        output = StringIO()
        Phylo.write(t, output, "newick")
        return output.getvalue()

    def plurality_consensus(self, column, alphabet="ACGT", resolve=False):
        """
        Plurality consensus - nucleotide with highest frequency.
        In case of tie, report mixtures.
        """
        mixture_dict = {
            "W": "AT",
            "R": "AG",
            "K": "GT",
            "Y": "CT",
            "S": "CG",
            "M": "AC",
            "V": "AGC",
            "H": "ATC",
            "D": "ATG",
            "B": "TGC",
            "N": "ATGC",
            "-": "ATGC",
        }
        ambig_dict = dict(("".join(sorted(v)), k) for k, v in mixture_dict.iteritems())
        freqs = {}

        for char in alphabet:
            freqs.update({char: 0})
        # freqs = {"A": 0, "T": 0, "C": 0, "G": 0, "-": 0}
        for char in column:
            if char in alphabet:
                freqs[char] += 1
            elif mixture_dict.has_key(char):
                # handled ambiguous nucleotides with equal weighting
                resolutions = mixture_dict[char]
                for char2 in resolutions:
                    freqs[char2] += 1.0 / len(resolutions)
            else:
                # unrecognized nucleotide character
                pass

        base = max(freqs, key=lambda n: freqs[n])
        max_count = freqs[base]
        possib = filter(lambda n: freqs[n] == max_count, freqs)
        if len(possib) == 1:
            return possib[0]
        elif "-" in possib:
            if resolve:
                possib.remove("-")
                if len(possib) == 0:
                    return "-"
                elif len(possib) == 1:
                    return possib[0]
                else:
                    return ambig_dict["".join(sorted(possib))]
            else:
                # gap character overrides ties
                return "-"
        else:
            return ambig_dict["".join(sorted(possib))]

    def consensus(self, seqs, alphabet="ACGT", resolve=False):
        """
        Return plurality consensus of alignment.
        """
        # transpose the alignment
        n_columns = len(seqs[0])
        columns = []
        for c in range(n_columns):
            columns.append([s[c] for s in seqs])

        consen = []
        for column in columns:
            consen.append(self.plurality_consensus(column, alphabet=alphabet, resolve=resolve))

        return "".join(consen)

    def earliest_sample(self):
        # determine the earliest sample date
        dates = [v["days"] for v in self.fasta.itervalues()]
        dates.sort()  # defaults to increasing order
        earliest_date = dates[0]

        # retrieve all sequences with this date
        first_sample = [v["sequence"] for k, v in self.fasta.iteritems() if v["days"] == earliest_date]
        return first_sample

    def consensus_earliest(self):
        """
        Return the consensus of sequences from the earliest sample.
        :param fasta:
        :return:
        """
        if not self.fasta:
            # no sequences have been parsed
            return None

        sample = self.earliest_sample()  # list of sequences
        return self.consensus(sample)

    def consensus_all(self):
        """
        Return the consensus of all sequences.
        :return:
        """
        all_seqs = [v["sequence"] for v in self.fasta.itervalues()]
        return self.consensus(all_seqs)

    def output_fasta(self):
        """
        Write contents of self.fasta to temporary file
        :return:  Absolute path to temporary file
        """
        with open(self.tmpfile, "w") as f:
            for i, (h, data) in enumerate(self.fasta.iteritems()):
                f.write(">%s\n%s\n" % (data["header"], data["sequence"]))

    def call_fasttree2(self, raw=False):
        """
        Call FastTree2 on FASTA file
        :param raw: if True, retain original sequence headers
        :return:
        """
        self.output_fasta()  # writes to self.tmpfile

        p = subprocess.Popen(
            [self.ft2path, "-quiet", "-nosupport", "-nt", "-gtr"],
            stdin=open(self.tmpfile, "rU"),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
        )
        stdout, stderr = p.communicate()
        return stdout

    def call_rtt(self, tree):
        """
        Call an R script that implements Rosemary's rtt() function for re-rooting
        a tree based on tip dates.
        :param tree: Newick tree string
        :return: dictionary with two key-value pairs for rooted and dated trees
        """
        os.chdir("R/")
        p = subprocess.Popen([self.Rpath, "rtt.r", tree], stdout=subprocess.PIPE)
        stdout, stderr = p.communicate()
        # clean kludge from R stdout
        # rooted_tree, dated_tree = map(lambda s: s.replace('[1] "', '').replace('NA;"', '0:0;'),
        #                              stdout.split('\n')[:2])
        # res = {'rooted': rooted_tree, 'dated': dated_tree}
        rooted_tree = stdout.replace("[1] ", "").strip('"\n')
        os.chdir("../")
        return rooted_tree

    def call_root2tip(self, tree):
        """
        Call jar file that implements a modified version of Andrew Rambaut's
        root-to-tip method (Path-O-Gen).
        :param tree: a Newick tree string
        :return: a dictionary that includes the time-scaled tree
        """
        # write tree to temporary file
        with open(self.tmpfile, "w") as handle:
            handle.write(tree)

        out1 = os.path.join(self.tmp, "anchre.r2t.timetree")
        out2 = os.path.join(self.tmp, "anchre.r2t.csv")

        p = subprocess.check_call(
            [self.java, "-jar", "java/RLRootToTip.jar", "-timetree", out1, "-newick", self.tmpfile, out2],
            stdout=subprocess.PIPE,
        )

        # read outputs
        with open(out1, "rU") as handle:
            timetree = Phylo.read(handle, "nexus")
        with open(out2, "rU") as handle:
            coef = handle.readlines()

        # convert NEXUS to Newick string
        newick = self.phylo2newick(timetree)
        res = {"timetree": newick}
        values = coef[1].strip("\n").split(",")
        for i, key in enumerate(coef[0].strip("\n").split(",")):
            res.update({key: values[i]})

        return res

    def call_hyphy_ancre(self, tree, model_spec="010010", is_codon=False):
        """
        Ancestral reconstruction with HyPhy
        :param tree: Newick tree string
        :param is_codon: if True, interpret alignment as codon sequences
        :return: [ancseq] is a dictionary of header/sequence pairs.
                   "Node0" keys the root node.
                 [lf] is a serialization of the likelihood function.
        """

        # cast Newick tree string as Phylo object to extract tip labels
        phy = self.newick2phylo(tree)
        tips = phy.get_terminals()
        tipnames = [tip.name for tip in tips]
        tipnames.sort()

        # make sure the tree labels match the sequence headers
        headers = [v["header"] for v in self.fasta.itervalues()]
        headers.sort()
        if headers != tipnames:
            print "Warning: tree labels do not match FASTA in call_hyphy_ancre()"
            print set(headers).difference(set(tipnames))
            sys.exit()

        ancseq, lf = self.pyphy.ancre(fasta=self.fasta, newick=tree, model_spec=model_spec, is_codon=is_codon)
        return dict(ancseq), lf

    def call_beast(
        self, chain_length=1e6, screen_step=1e5, log_step=1e4, treelog_step=1e4, sample_size=100, root_height=None
    ):
        """
        Use BEAST to sample trees from the posterior density under a
        strict molecular clock model.  If you want different settings,
        modify the template XML file.
        :return: a list of Newick tree strings
        """
        log, treelog = self.beauti.populate(
            fasta=self.fasta,
            stem=os.path.join(self.tmp, "beast"),
            chain_length=chain_length,
            screen_step=screen_step,
            log_step=log_step,
            treelog_step=treelog_step,
            root_height=root_height,
        )

        self.beauti.write(self.tmpfile)
        # this was tested on version 1.8.1
        # 1.8.1 has a bug that results in zombie processes that fail to terminate - use 1.8.2
        p = subprocess.Popen(
            [self.java, "-Xms64m", "-Xmx256m", "-jar", "java/beast.jar", "-beagle_off", "-overwrite", self.tmpfile],
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE,
            bufsize=0,
        )
        for i, line in enumerate(p.stdout):
            if i % 10 == 0:
                sys.stdout.write(".")  # progress monitor

        sys.stdout.write("\n")

        with open(log, "rU") as f:
            traces = self.beauti.parse_log(f)

        with open(treelog, "rU") as f:
            trees = self.beauti.parse_treelog(f, sample_size=sample_size)

        return traces, trees

Пример #4

Показать файл

class Anchre:
    def __init__(self,
                 csv=None,
                 iso_format=True,
                 origin=date(1970, 1, 1),
                 delimiter='_',
                 field=-1,
                 ft2path=None,
                 Rpath=None,
                 java=None,
                 tmpfile='anchre-tmp',
                 beast_xml_template=None):
        self.csv = csv
        self.iso_format = iso_format
        self.origin = origin
        self.delimiter = delimiter
        self.field = field

        # paths to binaries
        self.ft2path = ft2path
        self.Rpath = Rpath
        self.java = java

        self.pyphy = PyPhy(os.getcwd(), 1)  # instance of HyPhy
        self.beauti = Beauti(beast_xml_template)

        # if given, parse dates from csv
        self.dates = {}
        if self.csv is not None:
            self.parse_date_csv()

        # store sequence records
        self.fasta = {}
        self.last_date = None

        self.tmp = tempfile.gettempdir()
        self.tmpfile = os.path.join(self.tmp, tmpfile)
        self.test()

    def test(self):
        """
        Check whether expected binaries are accessible
        :return:
        """
        if not os.path.exists(self.ft2path):
            print 'ERROR: Failed to detect FastTree2 at', self.ft2path
            sys.exit()
        if not os.path.exists(self.Rpath):
            print 'ERROR: Failed to detect R at', self.ft2path
            sys.exit()

    def parse_date(self, date_str):
        """
        Convert a string representation of a sample collection date into
        an integer value (number of days since some time in the past).
        """
        if self.iso_format:
            try:
                days = (dateup.parse(date_str).date() - self.origin).days
            except ValueError:
                print 'ERROR: Failed to parse date', date_str
                raise
        else:
            # expressed as number of days since some time in the past (BEAST style)
            try:
                days = int(date_str)
            except:
                print 'ERROR: Expected integer value for sequence date, found', date_str
                raise
        return days

    def parse_dates_csv(self):
        """
        Parse dates from CSV.
        """
        reader = DictReader(open(self.csv, 'rU'))
        for row in reader:
            self.dates.update({row['header']: self.parse_date(row['date'])})

    def read(self, handle):
        """
        Parse open file as FASTA.  Clean sequence labels.
        """
        self.fasta = {}  # reset container
        h = None
        sequence = ''
        count = 0
        for line in handle:
            if line.startswith('$'):  # skip comments
                continue

            if line.startswith('>') or line.startswith('#'):
                if sequence:
                    # create record
                    days = self.get_date(h)
                    self.fasta.update({
                        h: {
                            'header': '%d_%d' % (count, days),
                            'sequence': sequence,
                            'days': days
                        }
                    })
                    sequence = ''  # reset container

                h = line.strip('>#\n')
                count += 1
            else:
                sequence += line.strip('\n').upper()

        # append last entry
        days = self.get_date(h)
        self.fasta.update({
            h: {
                'header': '%d_%d' % (count, days),
                'sequence': sequence,
                'days': days
            }
        })

        # determine most recent sample date
        all_dates = [v['days'] for v in self.fasta.itervalues()]
        assert len(set(all_dates)) > 1, 'ERROR: Only one sample date in data'
        all_dates.sort(reverse=True)
        self.last_date = all_dates[0]

    def get_date(self, h):
        """
        If dates were provided as a CSV input file, then return the
        date associated with the sequence header supplied as the first argument.
        Otherwise, parse date field from sequence header in FASTA object.
        """
        if self.csv:
            try:
                # this will always be days since X
                return self.dates[h]
            except:
                print 'ERROR: sequence header', h, 'not found in dates parsed from CSV'
                raise

        # otherwise, parse date from sequence headers
        date_field = h.split(self.delimiter)[self.field]
        return self.parse_date(date_field)

    def newick2phylo(self, nwk):
        handle = StringIO(nwk)
        phy = Phylo.read(handle, 'newick')
        return phy

    def phylo2newick(self, t):
        """
        Convert Phylo into Newick tree string.
        """
        output = StringIO()
        Phylo.write(t, output, 'newick')
        return output.getvalue()

    def plurality_consensus(self, column, alphabet='ACGT', resolve=False):
        """
        Plurality consensus - nucleotide with highest frequency.
        In case of tie, report mixtures.
        """
        mixture_dict = {
            'W': 'AT',
            'R': 'AG',
            'K': 'GT',
            'Y': 'CT',
            'S': 'CG',
            'M': 'AC',
            'V': 'AGC',
            'H': 'ATC',
            'D': 'ATG',
            'B': 'TGC',
            'N': 'ATGC',
            '-': 'ATGC'
        }
        ambig_dict = dict(
            ("".join(sorted(v)), k) for k, v in mixture_dict.iteritems())
        freqs = {}

        for char in alphabet:
            freqs.update({char: 0})
        #freqs = {"A": 0, "T": 0, "C": 0, "G": 0, "-": 0}
        for char in column:
            if char in alphabet:
                freqs[char] += 1
            elif mixture_dict.has_key(char):
                # handled ambiguous nucleotides with equal weighting
                resolutions = mixture_dict[char]
                for char2 in resolutions:
                    freqs[char2] += 1. / len(resolutions)
            else:
                # unrecognized nucleotide character
                pass

        base = max(freqs, key=lambda n: freqs[n])
        max_count = freqs[base]
        possib = filter(lambda n: freqs[n] == max_count, freqs)
        if len(possib) == 1:
            return possib[0]
        elif "-" in possib:
            if resolve:
                possib.remove("-")
                if len(possib) == 0:
                    return "-"
                elif len(possib) == 1:
                    return possib[0]
                else:
                    return ambig_dict["".join(sorted(possib))]
            else:
                # gap character overrides ties
                return "-"
        else:
            return ambig_dict["".join(sorted(possib))]

    def consensus(self, seqs, alphabet='ACGT', resolve=False):
        """
        Return plurality consensus of alignment.
        """
        # transpose the alignment
        n_columns = len(seqs[0])
        columns = []
        for c in range(n_columns):
            columns.append([s[c] for s in seqs])

        consen = []
        for column in columns:
            consen.append(
                self.plurality_consensus(column,
                                         alphabet=alphabet,
                                         resolve=resolve))

        return "".join(consen)

    def earliest_sample(self):
        # determine the earliest sample date
        dates = [v['days'] for v in self.fasta.itervalues()]
        dates.sort()  # defaults to increasing order
        earliest_date = dates[0]

        # retrieve all sequences with this date
        first_sample = [
            v['sequence'] for k, v in self.fasta.iteritems()
            if v['days'] == earliest_date
        ]
        return first_sample

    def consensus_earliest(self):
        """
        Return the consensus of sequences from the earliest sample.
        :param fasta:
        :return:
        """
        if not self.fasta:
            # no sequences have been parsed
            return None

        sample = self.earliest_sample()  # list of sequences
        return self.consensus(sample)

    def consensus_all(self):
        """
        Return the consensus of all sequences.
        :return:
        """
        all_seqs = [v['sequence'] for v in self.fasta.itervalues()]
        return self.consensus(all_seqs)

    def output_fasta(self):
        """
        Write contents of self.fasta to temporary file
        :return:  Absolute path to temporary file
        """
        with open(self.tmpfile, 'w') as f:
            for i, (h, data) in enumerate(self.fasta.iteritems()):
                f.write('>%s\n%s\n' % (data['header'], data['sequence']))

    def call_fasttree2(self, raw=False):
        """
        Call FastTree2 on FASTA file
        :param raw: if True, retain original sequence headers
        :return:
        """
        self.output_fasta()  # writes to self.tmpfile

        p = subprocess.Popen(
            [self.ft2path, '-quiet', '-nosupport', '-nt', '-gtr'],
            stdin=open(self.tmpfile, 'rU'),
            stdout=subprocess.PIPE,
            stderr=subprocess.PIPE)
        stdout, stderr = p.communicate()
        return stdout

    def call_rtt(self, tree):
        """
        Call an R script that implements Rosemary's rtt() function for re-rooting
        a tree based on tip dates.
        :param tree: Newick tree string
        :return: dictionary with two key-value pairs for rooted and dated trees
        """
        os.chdir('R/')
        p = subprocess.Popen([self.Rpath, 'rtt.r', tree],
                             stdout=subprocess.PIPE)
        stdout, stderr = p.communicate()
        # clean kludge from R stdout
        #rooted_tree, dated_tree = map(lambda s: s.replace('[1] "', '').replace('NA;"', '0:0;'),
        #                              stdout.split('\n')[:2])
        #res = {'rooted': rooted_tree, 'dated': dated_tree}
        rooted_tree = stdout.replace('[1] ', '').strip('"\n')
        os.chdir('../')
        return rooted_tree

    def call_root2tip(self, tree):
        """
        Call jar file that implements a modified version of Andrew Rambaut's
        root-to-tip method (Path-O-Gen).
        :param tree: a Newick tree string
        :return: a dictionary that includes the time-scaled tree
        """
        # write tree to temporary file
        with open(self.tmpfile, 'w') as handle:
            handle.write(tree)

        out1 = os.path.join(self.tmp, 'anchre.r2t.timetree')
        out2 = os.path.join(self.tmp, 'anchre.r2t.csv')

        p = subprocess.check_call([
            self.java, '-jar', 'java/RLRootToTip.jar', '-timetree', out1,
            '-newick', self.tmpfile, out2
        ],
                                  stdout=subprocess.PIPE)

        # read outputs
        with open(out1, 'rU') as handle:
            timetree = Phylo.read(handle, 'nexus')
        with open(out2, 'rU') as handle:
            coef = handle.readlines()

        # convert NEXUS to Newick string
        newick = self.phylo2newick(timetree)
        res = {'timetree': newick}
        values = coef[1].strip('\n').split(',')
        for i, key in enumerate(coef[0].strip('\n').split(',')):
            res.update({key: values[i]})

        return res

    def call_hyphy_ancre(self, tree, model_spec='010010', is_codon=False):
        """
        Ancestral reconstruction with HyPhy
        :param tree: Newick tree string
        :param is_codon: if True, interpret alignment as codon sequences
        :return: [ancseq] is a dictionary of header/sequence pairs.
                   "Node0" keys the root node.
                 [lf] is a serialization of the likelihood function.
        """

        # cast Newick tree string as Phylo object to extract tip labels
        phy = self.newick2phylo(tree)
        tips = phy.get_terminals()
        tipnames = [tip.name for tip in tips]
        tipnames.sort()

        # make sure the tree labels match the sequence headers
        headers = [v['header'] for v in self.fasta.itervalues()]
        headers.sort()
        if headers != tipnames:
            print 'Warning: tree labels do not match FASTA in call_hyphy_ancre()'
            print set(headers).difference(set(tipnames))
            sys.exit()

        ancseq, lf = self.pyphy.ancre(fasta=self.fasta,
                                      newick=tree,
                                      model_spec=model_spec,
                                      is_codon=is_codon)
        return dict(ancseq), lf

    def call_beast(self,
                   chain_length=1E6,
                   screen_step=1E5,
                   log_step=1E4,
                   treelog_step=1E4,
                   sample_size=100,
                   root_height=None):
        """
        Use BEAST to sample trees from the posterior density under a
        strict molecular clock model.  If you want different settings,
        modify the template XML file.
        :return: a list of Newick tree strings
        """
        log, treelog = self.beauti.populate(fasta=self.fasta,
                                            stem=os.path.join(
                                                self.tmp, 'beast'),
                                            chain_length=chain_length,
                                            screen_step=screen_step,
                                            log_step=log_step,
                                            treelog_step=treelog_step,
                                            root_height=root_height)

        self.beauti.write(self.tmpfile)
        # this was tested on version 1.8.1
        # 1.8.1 has a bug that results in zombie processes that fail to terminate - use 1.8.2
        p = subprocess.Popen([
            self.java, '-Xms64m', '-Xmx256m', '-jar', 'java/beast.jar',
            '-beagle_off', '-overwrite', self.tmpfile
        ],
                             stdout=subprocess.PIPE,
                             stderr=subprocess.PIPE,
                             bufsize=0)
        for i, line in enumerate(p.stdout):
            if i % 10 == 0:
                sys.stdout.write('.')  # progress monitor

        sys.stdout.write('\n')

        with open(log, 'rU') as f:
            traces = self.beauti.parse_log(f)

        with open(treelog, 'rU') as f:
            trees = self.beauti.parse_treelog(f, sample_size=sample_size)

        return traces, trees