示例#1
0
def simulate(self,
             calculatePatterns=True,
             resetSequences=True,
             resetNexusSetsConstantMask=True):
    """Simulate into the attached data.

    The tree self needs to have a data and model attached.

    This week, generation of random numbers uses the C language random
    function, which is in stdlib on Linux.  It will use the same
    series of random numbers over and over, unless you tell it
    otherwise.  That means that (unless you tell it otherwise) it will
    generate the same simulated data if you run it twice.  To reset
    the randomizer, you can use func.reseedCRandomizer(), eg

    func.reseedCRandomizer(os.getpid())

    
    """

    self._commonCStuff()

    # If there is a NexusSets object attached to any of the alignments
    # in the Data, the constant sites mask at least will become out of sync, but we can't just
    # delete the whole nexusSets object, as they define what the parts are.
    #for a in self.data.alignments:
    #
    #    if a.nexusSets:
    #        a.nexusSets = None

    # Probably better to do something like this
    # a.nexusSets.constant.mask = self.constantMask()
    # at the end.

    #print "About to pf.p4_simulate(self.cTree)"
    pf.p4_simulate(self.cTree)
    if calculatePatterns:
        for p in self.data.parts:
            pf.makePatterns(p.cPart)
            pf.setGlobalInvarSitesVec(p.cPart)
    if resetSequences:
        self.data.resetSequencesFromParts()
        if resetNexusSetsConstantMask:
            for a in self.data.alignments:
                if a.nexusSets:
                    a.nexusSets.constant.mask = a.constantMask()
    else:
        if resetNexusSetsConstantMask:
            gm = ['Tree.simulate().']
            gm.append(
                "resetSequences is not set, but resetNexusSetsConstantMask is set,"
            )
            gm.append("which is probably not going to work as you want.")
            raise Glitch, gm
示例#2
0
def simulate(self, calculatePatterns=True, resetSequences=True, resetNexusSetsConstantMask=True):
    """Simulate into the attached data.

    The tree self needs to have a data and model attached.

    This week, generation of random numbers uses the C language random
    function, which is in stdlib on Linux.  It will use the same
    series of random numbers over and over, unless you tell it
    otherwise.  That means that (unless you tell it otherwise) it will
    generate the same simulated data if you run it twice.  To reset
    the randomizer, you can use func.reseedCRandomizer(), eg

    func.reseedCRandomizer(os.getpid())

    
    """
    
    self._commonCStuff()

    # If there is a NexusSets object attached to any of the alignments
    # in the Data, the constant sites mask at least will become out of sync, but we can't just
    # delete the whole nexusSets object, as they define what the parts are.
    #for a in self.data.alignments:
    # 
    #    if a.nexusSets:
    #        a.nexusSets = None

    # Probably better to do something like this
    # a.nexusSets.constant.mask = self.constantMask()
    # at the end.
    
    #print "About to pf.p4_simulate(self.cTree)"
    pf.p4_simulate(self.cTree)
    if calculatePatterns:
        for p in self.data.parts:
            pf.makePatterns(p.cPart)
            pf.setGlobalInvarSitesVec(p.cPart)
    if resetSequences:
        self.data.resetSequencesFromParts()
        if resetNexusSetsConstantMask:
            for a in self.data.alignments:
                if a.nexusSets:
                    a.nexusSets.constant.mask = a.constantMask()
    else:
        if resetNexusSetsConstantMask:
            gm = ['Tree.simulate().']
            gm.append("resetSequences is not set, but resetNexusSetsConstantMask is set,")
            gm.append("which is probably not going to work as you want.")
            raise Glitch(gm)
示例#3
0
def _initParts(self):
    gm = ['Alignment._initParts()']

    if len(self.parts):
        for p in self.parts:
            del (p)
    self.parts = []
    if self.equates:
        eqSymb = list(self.equates.keys())
        eqSymb.sort()
        eqSymb = string.join(eqSymb, '')
    else:
        eqSymb = ''

    if len(self.sequences) and self.length and self.symbols and self.dim:
        pass
    else:
        gm.append("Can't allocate part.")
        if not len(self.sequences):
            gm.append("-no sequences.")
        elif not self.length:
            gm.append("-the sequences have no length")
        elif not self.symbols:
            gm.append("-no symbols")
        elif not self.dim:
            gm.append("-dim not set")
        raise Glitch(gm)

    if not self.nexusSets or not self.nexusSets.charPartition:  # its all one part
        aPart = Part()
        aPart.alignment = self
        aPart.name = 'all'
        aPart.lowName = 'all'
        aPart.dataType = self.dataType
        aPart.dim = self.dim
        aPart.symbols = self.symbols
        aPart.equates = self.equates
        aPart.nTax = len(self.sequences)
        aPart.nChar = self.length
        assert aPart.nChar

        if 0:
            print(gm[0])
            print("    symbols=%s" % self.symbols)

        aPart.cPart = pf.newPart(len(self.sequences), self.length, eqSymb,
                                 self.symbols)
        if not aPart or not aPart.cPart:
            gm.append("Failed to get memory for part.")
            raise Glitch(gm)

        # Make the equates table
        verbose = 0
        equatesTable = []
        if verbose:
            print("equates is %s" % self.equates)
            print("eqSymb is %s" % eqSymb)  # the keys
            print("symbols is %s" % self.symbols)
        for i in range(len(eqSymb)):
            if verbose: print("%3s: " % eqSymb[i], end=' ')
            e = self.equates[eqSymb[i]]
            if verbose: print("%8s : " % e, end=' ')
            for s in self.symbols:
                if s in e:
                    if verbose: print("%1i" % 1, end=' ')
                    equatesTable.append('1')
                else:
                    if verbose: print("%1i" % 0, end=' ')
                    equatesTable.append('0')
            if verbose: print('')
        equatesTable = string.join(equatesTable, '')
        if verbose:
            print("\n\nequatesTable:")
            print(equatesTable)
        pf.pokeEquatesTable(aPart.cPart, equatesTable)

        sList = []
        for s in self.sequences:
            sList.append(s.sequence)
        if 0:
            print(gm[0])
            print("sList = %s" % sList)
            print("joined = %s" % string.join(sList, ''))
        pf.pokeSequences(aPart.cPart, string.join(sList, ''))
        #print "about to makePatterns ..."
        pf.makePatterns(aPart.cPart)
        #print "about to setInvar"
        pf.setGlobalInvarSitesVec(aPart.cPart)

        #pf.dumpPart(aPart.cPart)
        self.parts.append(aPart)

    elif self.nexusSets.charPartition:
        for cpp in self.nexusSets.charPartition.subsets:
            #print "Doing subset '%s', mask: %s" % (cpp.name, cpp.mask)
            #print "About to subsetUsingMask (self length is %i)" % self.length
            b = self.subsetUsingMask(cpp.mask)
            b._initParts(
            )  # This very method, but now there are no charPartitions in b.
            b.parts[0].name = cpp.name
            b.parts[0].lowName = string.lower(cpp.name)
            self.parts.append(b.parts[0])
            b.parts = []  # so we don't try free-ing it twice
示例#4
0
def resetPartsContentFromSequences(self):
    """Reset Part.cPart sequences from self.sequences.

    It then makes patterns, and sets the global invariant sites
    array.  """

    gm = ['Alignment.resetPartsContentFromSequences()']
    if len(self.parts) == 1:  # its all one part
        aPart = self.parts[0]
        if not var.doDataPart:
            sList = []
            for s in self.sequences:
                sList.append(s.sequence)
            pf.pokeSequences(aPart.cPart, string.join(sList, ''))
            # are the following necessary?
            pf.makePatterns(aPart.cPart)
            pf.setGlobalInvarSitesVec(aPart.cPart)
        else:
            for sNum in range(len(self.sequences)):
                s = self.sequences[sNum]
                for cNum in range(self.length):
                    theChar = s.sequence[cNum]
                    if theChar == '-':
                        aPart.seq[sNum, cNum] = var.GAP_CODE
                    if theChar == '?':
                        aPart.seq[sNum, cNum] = var.QMARK_CODE
                    elif aPart.equateSymbols and theChar in aPart.equateSymbols:
                        aPart.seq[
                            sNum,
                            cNum] = var.EQUATES_BASE + aPart.equateSymbols.index(
                                theChar)
                    else:
                        aPart.seq[sNum, cNum] = aPart.symbols.index(theChar)

    elif len(self.parts) > 1:
        # the number of parts is also the length of the subsets list
        if self.nexusSets and self.nexusSets.charPartition and \
               self.nexusSets.charPartition.subsets and \
               len(self.nexusSets.charPartition.subsets) == len(self.parts):
            pass
        else:
            gm.append(
                'Something is wrong with the nexusSets or its charPartition')
            raise Glitch(gm)
        for i in range(len(self.parts)):
            cpSubset = self.nexusSets.charPartition.subsets[i]
            aPart = self.parts[i]
            b = self.subsetUsingMask(cpSubset.mask)
            if not var.doDataPart:
                sList = []
                for s in b.sequences:
                    sList.append(s.sequence)
                pf.pokeSequences(aPart.cPart, string.join(sList, ''))
                # are the following necessary?
                pf.makePatterns(aPart.cPart)
                pf.setGlobalInvarSitesVec(aPart.cPart)
            else:
                for sNum in range(len(b.sequences)):
                    s = b.sequences[sNum]
                    for cNum in range(b.length):
                        theChar = s.sequence[cNum]
                        if theChar == '-':
                            aPart.seq[sNum, cNum] = var.GAP_CODE
                        if theChar == '?':
                            aPart.seq[sNum, cNum] = var.QMARK_CODE
                        elif aPart.equateSymbols and theChar in aPart.equateSymbols:
                            aPart.seq[
                                sNum,
                                cNum] = var.EQUATES_BASE + aPart.equateSymbols.index(
                                    theChar)
                        else:
                            aPart.seq[sNum,
                                      cNum] = aPart.symbols.index(theChar)

    else:
        gm.append("No parts.")
        raise Glitch(gm)
示例#5
0
    def simulate(self, calculatePatterns=True, resetSequences=True, resetNexusSetsConstantMask=True, refTree=None):
        """Simulate into the attached data.

        The tree self needs to have a data and model attached.

        This week, generation of random numbers uses the C language random
        function, which is in stdlib on Linux.  It will use the same
        series of random numbers over and over, unless you tell it
        otherwise.  That means that (unless you tell it otherwise) it will
        generate the same simulated data if you run it twice.  To reset
        the randomizer, you can use func.reseedCRandomizer(), eg

        func.reseedCRandomizer(os.getpid())

        The usual way to simulate does not use reference data.  An unsual way to
        simulate comes from (inspired by?) PhyloBayes, where the simulation is
        conditional on the original data.  It uses conditional likelihoods of
        that reference data at the root.  To turn that on, set refTree to the
        tree+model+data that you would like to use.  Calculate a likelihood with
        that refTree before using it, so that conditional likelihoods are set.
        The tree and model for refTree should be identical to the tree and model
        for self.

        Args: 

            calculatePatterns (bool): True by default. Whether to "compress" the
                newly simulated data to facilitate a faster likelihood
                calculation.

            resetSequences (bool): True by default. whether to bring the
                simulated sequences in C back into Python

            resetNexusSetsConstantMask (bool): True by default.  When
                simulations are made, the constant mask in any associated nexus
                sets will get out of sync.  Setting this to True makes a new
                mask and sets it.

            refTree (Tree): None by default.  If supplied, a tree+model+data
                which has had its likelihood calculated, where the tree+model is
                identical to self.

        """

        if refTree:
            from tree import Tree
            assert isinstance(refTree, Tree)
            assert refTree.model
            assert refTree.data
            if not refTree.cTree:
                refTree.calcLogLike(verbose=False)
            assert refTree.model.cModel
            assert refTree.data.cData
            
            

        self._commonCStuff()
        if refTree:
            assert refTree.data.cData != self.data.cData
            assert refTree.data.nParts == self.data.nParts
            assert refTree.data.nTax == self.data.nTax
            for i in range(self.data.nTax):
                assert refTree.data.taxNames[i] == self.data.taxNames[i]
            assert len(refTree.data.alignments) == len(self.data.alignments)
            assert refTree.logLike, "Do a likelihood calculation with the refTree before using it here."
            # could have some more checks ...
            

        # If there is a NexusSets object attached to any of the alignments
        # in the Data, the constant sites mask at least will become out of sync, but we can't just
        # delete the whole nexusSets object, as they define what the parts are.
        # for a in self.data.alignments:
        #
        #    if a.nexusSets:
        #        a.nexusSets = None

        # Probably better to do something like this
        # a.nexusSets.constant.mask = self.constantMask()
        # at the end.

        # print "About to pf.p4_simulate(self.cTree)"
        if refTree:
            pf.p4_simulate(self.cTree, refTree.cTree)
        else:
            pf.p4_simulate(self.cTree, 0)
        if calculatePatterns:
            for p in self.data.parts:
                pf.makePatterns(p.cPart)
                pf.setGlobalInvarSitesVec(p.cPart)
        if resetSequences:
            self.data.resetSequencesFromParts()
            if resetNexusSetsConstantMask:
                for a in self.data.alignments:
                    if a.nexusSets:
                        a.nexusSets.constant.mask = a.constantMask()
        else:
            if resetNexusSetsConstantMask:
                gm = ['Tree.simulate().']
                gm.append(
                    "resetSequences is not set, but resetNexusSetsConstantMask is set,")
                gm.append("which is probably not going to work as you want.")
                raise P4Error(gm)
    def resetPartsContentFromSequences(self):
        """Reset Part.cPart sequences from self.sequences.

        It then makes patterns, and sets the global invariant sites
        array.  """

        gm = ['Alignment.resetPartsContentFromSequences()']
        if len(self.parts) == 1:  # its all one part
            aPart = self.parts[0]
            if not var.doDataPart:
                sList = []
                for s in self.sequences:
                    sList.append(s.sequence)
                pf.pokeSequences(aPart.cPart, string.join(sList, ''))
                # are the following necessary?
                pf.makePatterns(aPart.cPart)
                pf.setGlobalInvarSitesVec(aPart.cPart)
            else:
                for sNum in range(len(self.sequences)):
                    s = self.sequences[sNum]
                    for cNum in range(self.length):
                        theChar = s.sequence[cNum]
                        if theChar == '-':
                            aPart.seq[sNum, cNum] = var.GAP_CODE
                        if theChar == '?':
                            aPart.seq[sNum, cNum] = var.QMARK_CODE
                        elif aPart.equateSymbols and theChar in aPart.equateSymbols:
                            aPart.seq[sNum, cNum] = var.EQUATES_BASE + \
                                aPart.equateSymbols.index(theChar)
                        else:
                            aPart.seq[sNum, cNum] = aPart.symbols.index(
                                theChar)

        elif len(self.parts) > 1:
            # the number of parts is also the length of the subsets list
            if self.nexusSets and self.nexusSets.charPartition and \
                    self.nexusSets.charPartition.subsets and \
                    len(self.nexusSets.charPartition.subsets) == len(self.parts):
                pass
            else:
                gm.append(
                    'Something is wrong with the nexusSets or its charPartition')
                raise P4Error(gm)
            for i in range(len(self.parts)):
                cpSubset = self.nexusSets.charPartition.subsets[i]
                aPart = self.parts[i]
                b = self.subsetUsingMask(cpSubset.mask)
                if not var.doDataPart:
                    sList = []
                    for s in b.sequences:
                        sList.append(s.sequence)
                    pf.pokeSequences(aPart.cPart, string.join(sList, ''))
                    # are the following necessary?
                    pf.makePatterns(aPart.cPart)
                    pf.setGlobalInvarSitesVec(aPart.cPart)
                else:
                    for sNum in range(len(b.sequences)):
                        s = b.sequences[sNum]
                        for cNum in range(b.length):
                            theChar = s.sequence[cNum]
                            if theChar == '-':
                                aPart.seq[sNum, cNum] = var.GAP_CODE
                            if theChar == '?':
                                aPart.seq[sNum, cNum] = var.QMARK_CODE
                            elif aPart.equateSymbols and theChar in aPart.equateSymbols:
                                aPart.seq[
                                    sNum, cNum] = var.EQUATES_BASE + aPart.equateSymbols.index(theChar)
                            else:
                                aPart.seq[sNum, cNum] = aPart.symbols.index(
                                    theChar)

        else:
            gm.append("No parts.")
            raise P4Error(gm)
    def _initParts(self):
        gm = ['Alignment._initParts()']

        if len(self.parts):
            for p in self.parts:
                del(p)
        self.parts = []
        if self.equates:
            eqSymb = self.equates.keys()
            eqSymb.sort()
            eqSymb = string.join(eqSymb, '')
        else:
            eqSymb = ''

        if len(self.sequences) and self.length and self.symbols and self.dim:
            pass
        else:
            gm.append("Can't allocate part.")
            if not len(self.sequences):
                gm.append("-no sequences.")
            elif not self.length:
                gm.append("-the sequences have no length")
            elif not self.symbols:
                gm.append("-no symbols")
            elif not self.dim:
                gm.append("-dim not set")
            raise P4Error(gm)

        # its all one part
        if not self.nexusSets or not self.nexusSets.charPartition:
            aPart = Part()
            aPart.alignment = self
            aPart.name = 'all'
            aPart.lowName = 'all'
            aPart.dataType = self.dataType
            aPart.dim = self.dim
            aPart.symbols = self.symbols
            aPart.equates = self.equates
            aPart.nTax = len(self.sequences)
            aPart.nChar = self.length
            assert aPart.nChar

            if 0:
                print gm[0]
                print "    symbols=%s" % self.symbols

            aPart.cPart = pf.newPart(len(self.sequences), self.length,
                                     eqSymb, self.symbols)
            if not aPart or not aPart.cPart:
                gm.append("Failed to get memory for part.")
                raise P4Error(gm)

            # Make the equates table
            verbose = 0
            equatesTable = []
            if verbose:
                print "equates is %s" % self.equates
                print "eqSymb is %s" % eqSymb  # the keys
                print "symbols is %s" % self.symbols
            for i in range(len(eqSymb)):
                if verbose:
                    print "%3s: " % eqSymb[i],
                e = self.equates[eqSymb[i]]
                if verbose:
                    print "%8s : " % e,
                for s in self.symbols:
                    if s in e:
                        if verbose:
                            print "%1i" % 1,
                        equatesTable.append('1')
                    else:
                        if verbose:
                            print "%1i" % 0,
                        equatesTable.append('0')
                if verbose:
                    print ''
            equatesTable = string.join(equatesTable, '')
            if verbose:
                print "\n\nequatesTable:"
                print equatesTable
            pf.pokeEquatesTable(aPart.cPart, equatesTable)

            sList = []
            for s in self.sequences:
                sList.append(s.sequence)
            if 0:
                print gm[0]
                print "sList = %s" % sList
                print "joined = %s" % string.join(sList, '')
            pf.pokeSequences(aPart.cPart, string.join(sList, ''))
            # print "about to makePatterns ..."
            pf.makePatterns(aPart.cPart)
            # print "about to setInvar"
            pf.setGlobalInvarSitesVec(aPart.cPart)

            # pf.dumpPart(aPart.cPart)
            self.parts.append(aPart)

        elif self.nexusSets.charPartition:
            for cpp in self.nexusSets.charPartition.subsets:
                # print "Doing subset '%s', mask: %s" % (cpp.name, cpp.mask)
                # print "About to subsetUsingMask (self length is %i)" %
                # self.length
                b = self.subsetUsingMask(cpp.mask)
                # This very method, but now there are no charPartitions in b.
                b._initParts()
                b.parts[0].name = cpp.name
                b.parts[0].lowName = string.lower(cpp.name)
                self.parts.append(b.parts[0])
                b.parts = []  # so we don't try free-ing it twice