def testTemplateIsOpenFile(self): """ BEAST2XML must run correctly when initialized from an open file pointer. """ ET.fromstring( BEAST2XML(template=StringIO(BEAST2XML().toString())).toString())
def testChainLength(self): """ Passing a chain length to toString must result in the expected XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) tree = ET.ElementTree(ET.fromstring(xml.toString(chainLength=100))) elements = BEAST2XML.findElements(tree) self.assertEqual('100', elements['run'].get('chainLength'))
def testDateUnit(self): """ Passing a dateUnit value to toString must result in the expected XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) tree = ET.ElementTree(ET.fromstring(xml.toString(dateUnit='day'))) elements = BEAST2XML.findElements(tree) trait = elements['./run/state/tree/trait'] self.assertEqual('day', trait.get('units'))
def testScreenLogEvery(self): """ Passing a screenLogEvery value to toString must result in the expected XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) tree = ET.ElementTree(ET.fromstring(xml.toString(screenLogEvery=300))) elements = BEAST2XML.findElements(tree) logger = elements["./run/logger[@id='screenlog']"] self.assertEqual('300', logger.get('logEvery'))
def testDefaultDateDirection(self): """ Passing no dateDirection toString must result in the expected date direction in the XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) tree = ET.ElementTree(ET.fromstring(xml.toString())) elements = BEAST2XML.findElements(tree) trait = elements['./run/state/tree/trait'] self.assertEqual('date-backward', trait.get('traitname'))
def testNoDateUnit(self): """ Passing no dateUnit value to toString must result in the expected XML (with no 'units' attribute in the trait). """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) tree = ET.ElementTree(ET.fromstring(xml.toString())) elements = BEAST2XML.findElements(tree) trait = elements['./run/state/tree/trait'] self.assertEqual(None, trait.get('units'))
def testDefaultAge(self): """ Passing a default age to toString must result in the expected XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) xml.addSequence(Read('id1', 'ACTG')) tree = ET.ElementTree(ET.fromstring(xml.toString(defaultAge=33.0))) elements = BEAST2XML.findElements(tree) # The sequence id with the default age of 0.0 must be in the traits. trait = elements['./run/state/tree/trait'] self.assertTrue(trait.text.find('id1=33.0') > -1)
def testOneSequenceWithAgeAddedTogether(self): """ Adding a sequence with an age (both passed to addSequence) must result in the expected XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) xml.addSequence(Read('id1', 'ACTG'), 44) tree = ET.ElementTree(ET.fromstring(xml.toString())) elements = BEAST2XML.findElements(tree) # The sequence id with the given age must be in the traits. trait = elements['./run/state/tree/trait'] self.assertTrue(trait.text.find('id1=44.0') > -1)
def testSequenceIdDateRegex(self): """ Using a sequence id date regex must result in the expected XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL, sequenceIdDateRegex='^.*_([0-9]+)') xml.addSequence(Read('id1_80_xxx', 'ACTG')) tree = ET.ElementTree(ET.fromstring(xml.toString())) elements = BEAST2XML.findElements(tree) # The sequence id with the default age of 0.0 must be in the traits. trait = elements['./run/state/tree/trait'] self.assertTrue(trait.text.find('id1_80_xxx=80.0') > -1)
def testSequenceIdDateRegexNonMatchingNotAnError(self): """ Using a sequence id date regex that doesn't match is not an error if we pass sequenceIdDateRegexMayNotMatch=True, in which case the default age should be assigned. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL, sequenceIdDateRegex='^.*_([0-9]+)', sequenceIdDateRegexMayNotMatch=True) xml.addSequence(Read('id1_xxx', 'ACTG')) tree = ET.ElementTree(ET.fromstring(xml.toString(defaultAge=50))) elements = BEAST2XML.findElements(tree) # The sequence id with the passed default age must be in the traits. trait = elements['./run/state/tree/trait'] self.assertTrue(trait.text.find('id1_xxx=50.0') > -1)
def testTemplateWithNoRun(self, mock): """ Passing a template that has no <run> tag to BEAST2XML must raise a ValueError when toString is called. """ xml = BEAST2XML(template='filename') error = "^Could not find 'run' tag in XML template$" assertRaisesRegex(self, ValueError, error, xml.toString)
def testLogFileBaseName(self): """ Passing a log file base name to toString must result in the expected log file names in the XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) tree = ET.ElementTree( ET.fromstring(xml.toString(logFileBasename='xxx'))) elements = BEAST2XML.findElements(tree) logger = elements["./run/logger[@id='tracelog']"] self.assertEqual('xxx' + BEAST2XML.TRACELOG_SUFFIX, logger.get('fileName')) logger = elements["./run/logger[@id='treelog.t:alignment']"] self.assertEqual('xxx' + BEAST2XML.TREELOG_SUFFIX, logger.get('fileName'))
def testAddSequences(self): """ Adding several sequences must result in the expected XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) xml.addSequences( [Read('id1', 'GG'), Read('id2', 'CC'), Read('id3', 'AA')]) tree = ET.ElementTree(ET.fromstring(xml.toString())) elements = BEAST2XML.findElements(tree) # The sequences must be the children of the <data> tag. data = elements['data'] self.assertEqual(3, len(data)) child = data[0] self.assertEqual('sequence', child.tag) self.assertEqual('GG', child.get('value')) self.assertEqual('4', child.get('totalcount')) self.assertEqual('id1', child.get('taxon')) self.assertEqual('seq_id1', child.get('id')) self.assertIs(None, child.text) child = data[1] self.assertEqual('sequence', child.tag) self.assertEqual('CC', child.get('value')) self.assertEqual('4', child.get('totalcount')) self.assertEqual('id2', child.get('taxon')) self.assertEqual('seq_id2', child.get('id')) self.assertIs(None, child.text) child = data[2] self.assertEqual('sequence', child.tag) self.assertEqual('AA', child.get('value')) self.assertEqual('4', child.get('totalcount')) self.assertEqual('id3', child.get('taxon')) self.assertEqual('seq_id3', child.get('id')) self.assertIs(None, child.text) # The sequence ids with the default age of 0.0 must be in the traits. trait = elements['./run/state/tree/trait'] self.assertTrue(trait.text.find('id1=0.0') > -1) self.assertTrue(trait.text.find('id2=0.0') > -1) self.assertTrue(trait.text.find('id3=0.0') > -1)
def testTemplateWithNoTracelog(self, mock): """ Passing a template that has no tracelog logger tag to BEAST2XML must raise a ValueError when toString is called. """ xml = BEAST2XML(template='filename') error = ('^Could not find "\./run/logger\[@id=\'tracelog\'\]" tag ' 'in XML template$') assertRaisesRegex(self, ValueError, error, xml.toString)
def testTemplateWithNoTrait(self, mock): """ Passing a template that has no <trait> tag to BEAST2XML must raise a ValueError when toString is called. """ xml = BEAST2XML(template='filename') error = ("^Could not find '\./run/state/tree/trait' tag in XML " "template$") assertRaisesRegex(self, ValueError, error, xml.toString)
def testDontMimicBEAUti(self): """ If mimicBEAUti is not passed to toString the BEAUti attributes must not appear in the <beast> tag in the XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) tree = ET.ElementTree(ET.fromstring(xml.toString())) root = tree.getroot() self.assertEqual(None, root.get('beautitemplate')) self.assertEqual(None, root.get('beautistatus'))
def testMimicBEAUti(self): """ Passing mimicBEAUti=True to toString must result in the expected BEAUti attributes in the <beast> tag in the XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) tree = ET.ElementTree(ET.fromstring(xml.toString(mimicBEAUti=True))) root = tree.getroot() self.assertEqual('Standard', root.get('beautitemplate')) self.assertEqual('', root.get('beautistatus'))
def testOneSequenceWithDateRegexAndDateUnitInDays(self): """ Adding a sequence with a date regex and date units in days must result in the expected XML. """ sequenceDate = (date.today() - timedelta(days=10)).strftime('%Y-%m-%d') r = r'^.*_(?P<year>\d\d\d\d)-(?P<month>\d\d)-(?P<day>\d\d)' id_ = 'id1_' + sequenceDate xml = BEAST2XML(clockModel=self.CLOCK_MODEL, sequenceIdDateRegex=r, dateUnit='day') xml.addSequence(Read(id_, 'ACTG')) tree = ET.ElementTree(ET.fromstring(xml.toString())) elements = BEAST2XML.findElements(tree) # The sequence id with an age of 10 days must be in the traits. trait = elements['./run/state/tree/trait'] self.assertTrue(trait.text.find(id_ + '=10.0') > -1) self.assertEqual('day', trait.get('units'))
def testExpectedTemplate(self): """ Passing a 'strict' clock model must result in the expected XML template being loaded. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) tree = ET.ElementTree(ET.fromstring(xml.toString())) root = tree.getroot() logger = root.find( './run/logger[@id="tracelog"]/log[@idref="clockRate.c:alignment"]') self.assertTrue(logger is not None)
def testSequenceIdDateRegexNonMatching(self): """ Using a sequence id date regex with a sequence id that does not match must result in a ValueError. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL, sequenceIdDateRegex='^.*_([0-9]+)') error = ("^No sequence date could be found in 'id1' using the " "sequence id date regex$") assertRaisesRegex(self, ValueError, error, xml.addSequence, Read('id1', 'ACTG'))
def testOneSequence(self): """ Adding a sequence must result in the expected XML. """ xml = BEAST2XML(clockModel=self.CLOCK_MODEL) xml.addSequence(Read('id1', 'ACTG')) tree = ET.ElementTree(ET.fromstring(xml.toString())) elements = BEAST2XML.findElements(tree) # The sequence must be the only child of the <data> tag. data = elements['data'] self.assertEqual(1, len(data)) child = data[0] self.assertEqual('sequence', child.tag) self.assertEqual('ACTG', child.get('value')) self.assertEqual('4', child.get('totalcount')) self.assertEqual('id1', child.get('taxon')) self.assertEqual('seq_id1', child.get('id')) self.assertIs(None, child.text) # The sequence id with the default age of 0.0 must be in the traits. trait = elements['./run/state/tree/trait'] self.assertTrue(trait.text.find('id1=0.0') > -1)
def testTransformFunction(self): """ Passing a transform function to toString must result in the expected XML. """ def transform(tree): return ET.ElementTree( ET.fromstring( "<?xml version='1.0' encoding='UTF-8'?><hello/>")) xml = BEAST2XML(clockModel=self.CLOCK_MODEL) expected = ("<?xml version='1.0' encoding='" + ('UTF-8' if PY3 else 'utf-8') + "'?>\n<hello />") self.assertEqual(expected, xml.toString(transformFunc=transform))
# backwards compatibility. '--sequenceIdRegexMayNotMatch', '--sequenceIdDateRegexMayNotMatch', action='store_false', dest='sequenceIdRegexMustMatch', help=('If specified (and --sequenceIdDateRegex or --sequenceIdAgeRegex is ' 'given) it will not be considered an error if a sequence id does ' 'not match the given regular expression. In that case, sequences ' 'will be assigned an age of zero unless one is given via --age.')) addFASTACommandLineOptions(parser) args = parser.parse_args() reads = parseFASTACommandLineOptions(args) xml = BEAST2XML( template=args.templateFile, clockModel=args.clockModel, sequenceIdDateRegex=args.sequenceIdDateRegex, sequenceIdAgeRegex=args.sequenceIdAgeRegex, sequenceIdRegexMustMatch=args.sequenceIdRegexMustMatch, dateUnit=args.dateUnit) xml.addSequences(reads) if args.age: # Flatten lists of lists that we get from using both nargs='+' and # action='append'. We use both because it allows people to use --age on the # command line either via "--age id1=33 --age id2=21" or "--age id1=33 # id2=21", or a combination of these. That way it's not necessary to # remember which way you're supposed to use it and you also can't be hit by # the subtle problem encountered in # https://github.com/acorg/dark-matter/issues/453 ages = list(chain.from_iterable(args.age))
def testNoArgsGivesValidXML(self): """ Passing no template or clock model to BEAST2XML and no arguments to toString must produce valid XML. """ ET.fromstring(BEAST2XML().toString())
'match the id from its beginning.')) parser.add_argument( '--sequenceIdDateRegexMayNotMatch', action='store_true', default=False, help=('If specified (and --sequenceIdDateRegex is given) it will not be ' 'considered an error if a sequence id does not match the given ' 'regular expression. In that case, sequences will be assigned the ' 'default date unless one is given via --age.')) addFASTACommandLineOptions(parser) args = parser.parse_args() reads = parseFASTACommandLineOptions(args) xml = BEAST2XML( template=args.templateFile, clockModel=args.clockModel, sequenceIdDateRegex=args.sequenceIdDateRegex, sequenceIdDateRegexMayNotMatch=args.sequenceIdDateRegexMayNotMatch, ) xml.addSequences(reads) if args.age: # Flatten lists of lists that we get from using both nargs='+' and # action='append'. We use both because it allows people to use --age on the # command line either via "--age id1=33 --age id2=21" or "--age id1=33 # id2=21", or a combination of these. That way it's not necessary to # remember which way you're supposed to use it and you also can't be hit by # the subtle problem encountered in # https://github.com/acorg/dark-matter/issues/453 ages = list(chain.from_iterable(args.age)) for ageInfo in ages: