if [ch.traits[traitName] for ch in k.children].count(kloc) > 0: proceed = True #print k.index,k.parent.index,k.traits,k.parent.traits,k.traits[traitName] if proceed == True: ## if at least one valid tip and no hanging nodes subtree = copy.deepcopy( ll.traverseWithinTrait(k, traitName)) subtree_leaves = [ x.name for x in subtree if isinstance(x, bt.leaf) ] if len(subtree_leaves) > 0: mostRecentTip = max([ bt.decimalDate( x.strip("'").split('|')[-1]) for x in subtree_leaves ]) while sum( [ len(nd.children) - sum([ 1 if ch in subtree else 0 for ch in nd.children ]) for nd in subtree if isinstance( nd, bt.node) and nd.index != 'Root' ] ) > 0: ## keep removing nodes as long as there are nodes with children that are not entirely within subtree for nd in sorted( [ q for q in subtree if isinstance(q, bt.node)
if 'translate' in l.lower(): ## start looking for tips tipFlag = True if ';' in l: ## stop looking for tips tipFlag = False print "Number of objects found in tree string: %d" % (len(ll.Objects)) ## rename tips, find the highest tip (in absolute time) in the tree if len( tips ) == 0: ## use this if tip names in the string are already the final format for k in ll.Objects: if isinstance(k, bt.leaf): k.name = k.numName highestTip = max([ bt.decimalDate(x.name.strip("'").split('|')[-1].replace("_", "-"), variable=True) for x in ll.Objects if isinstance(x, bt.leaf) ]) else: ## there's a tip name map at the beginning, so translate the names ll.renameTips(tips) ## give each tip a name highestTip = max([ bt.decimalDate(x.strip("'").split('|')[-1].replace("_", "-"), variable=True) for x in tips.values() ]) ll.sortBranches() ll.setAbsoluteTime(highestTip) lowestTip = min([x.absoluteTime for x in ll.Objects if x != ll.root]) timeTraversed = highestTip - lowestTip
tips) ## Rename tips so their name refers to sequence name else: for k in ll.Objects: if isinstance(k, leaf): k.name = k.numName ## otherwise every tip gets a name that's the same as tree string names #### calibration dateCerberus = re.compile( tformat) ## search pattern + brackets on actual calendar date if calibration == True: ## Calibrate tree so everything has a known position in actual time tipDatesRaw = [ dateCerberus.search(x).group(1) for x in tips.values() ] tipDates = [] for tip_date in tipDatesRaw: tipDates.append( bt.decimalDate(tip_date, fmt=dformat, variable=True)) maxDate = max(tipDates) ## identify most recent tip ll.setAbsoluteTime(maxDate) outfile.write('%s' % cerberus.group(1) ) ## write MCMC state number to output log file ################################################################################ if 'treeLength' in analyses: treeL = sum([k.length for k in ll.Objects]) ## do analysis outfile.write('\t%s' % (treeL)) ## output to file ################################################### if 'RC' in analyses: ## 'RC' was queued as an analysis Ns = [] ## empty list Ss = [] uNs = [] uSs = [] for k in ll.Objects: ## iterate over branch objects in the tree
bt.make_tree(treestring,ll) ## pass it to make_tree function ll.traverse_tree() ## Traverse the tree - sets the height of each object in the tree #### renaming tips if len(tips)>0: ll.renameTips(tips) ## Rename tips so their name refers to sequence name else: for k in ll.Objects: if isinstance(k,leaf): k.name=k.numName ## otherwise every tip gets a name that's the same as tree string names #### calibration dateCerberus=re.compile(tformat) ## search pattern + brackets on actual calendar date if calibration==True: ## Calibrate tree so everything has a known position in actual time tipDatesRaw=[dateCerberus.search(x).group(1) for x in tips.values()] tipDates=[] for tip_date in tipDatesRaw: tipDates.append(bt.decimalDate(tip_date,fmt=dformat,variable=True)) maxDate=max(tipDates) ## identify most recent tip ll.setAbsoluteTime(maxDate) outfile.write('%s'%cerberus.group(1)) ## write MCMC state number to output log file ################################################################################ if 'treeLength' in analyses: treeL=sum([k.length for k in ll.Objects]) ## do analysis outfile.write('\t%s'%(treeL)) ## output to file ################################################### if 'RC' in analyses: ## 'RC' was queued as an analysis Ns=[] ## empty list Ss=[] uNs=[] uSs=[] for k in ll.Objects: ## iterate over branch objects in the tree if k.traits.has_key('N'): ## if branch has a trait labelled "N"...
elif ';' not in l: ## something's wrong - nothing that matches the tip regex is being captured where it should be in the file print 'tip not captured by regex:',l.replace('\t','') if 'translate' in l.lower(): ## start looking for tips tipFlag=True if ';' in l: ## stop looking for tips tipFlag=False print "Number of objects found in tree string: %d"%(len(ll.Objects)) ## rename tips, find the highest tip (in absolute time) in the tree if len(tips)==0: ## use this if tip names in the string are already the final format for k in ll.Objects: if isinstance(k,bt.leaf): k.name=k.numName highestTip=max([bt.decimalDate(x.name.strip("'").split('|')[-1].replace("_","-"),variable=True) for x in ll.Objects if isinstance(x,bt.leaf)]) else: ## there's a tip name map at the beginning, so translate the names ll.renameTips(tips) ## give each tip a name highestTip=max([bt.decimalDate(x.strip("'").split('|')[-1].replace("_","-"),variable=True) for x in tips.values()]) ll.sortBranches() ll.setAbsoluteTime(highestTip) lowestTip=min([x.absoluteTime for x in ll.Objects if x != ll.root]) timeTraversed = highestTip - lowestTip coords = pd.read_csv("data/coordinates.csv", names=["Country", "lat", "lng", "Continent"]) coords["Country_NA_EARTH"] = coords["Country"] # Match country names in coordinates.csv with names in shape file