def test_missing_end1(): exp = Martel.ParseRecords("dataset", {}, Martel.Group("record", Martel.Re(r"(b*\R)*a\R")), RecordReader.EndsWith, ("a",)) lines = [ "bbb", "bb", "a", "b", "a", "a", ] text = "\n".join(lines) + "\n" iterator = exp.make_iterator("record") # This should work for x in iterator.iterateString(text): pass # This should not lines.append("c") text = "\n".join(lines) + "\n" try: for x in iterator.iterateString(text): pass raise AssertionError except Parser.ParserPositionException, exc: assert exc.pos == 15, exc.pos
def delimiter(delim): assert len(delim) == 1, \ "delimiter can only be a single character long, not %s" % repr(delim) assert delim not in "\n\r", "Cannot use %s as a delimiter" % repr(delim) field = Martel.Group("field", Martel.Rep(Martel.AnyBut(delim + "\r\n"))) line = field + Martel.Rep(Martel.Str(delim) + field) + Martel.AnyEol() record = Martel.Group("record", line) format = Martel.ParseRecords("delimited", {}, record, RecordReader.CountLines, (1, )) return format
def test_make_iterparsers1(): exp = Martel.ParseRecords("dataset", {}, Martel.Group("spam", Martel.Re(r"a*\R")), RecordReader.CountLines, (1,)) iterator = exp.make_iterator("spam") assert isinstance(iterator, IterParser.IterRecords) lines = [] for i in range(0, 10): lines.append("a" * i + "\n") text = "".join(lines) i = 0 for rec in iterator.iterateString(text): assert len(rec["spam"][0][:-1]) == i, (i, rec["spam"][0]) i = i + 1 assert i == 10
def test_record_parser(): format = Martel.Re("(?P<term?field=first>...)" "(?P<term?field=second>...)" "(?P<last>.)\R") format = Martel.ParseRecords("all", {"author": "guido"}, format, RecordReader.CountLines, (1, )) parser = format.make_parser() grab = GrabElements() parser.setContentHandler(grab) parser.parseString("aaabbbZ\ncccdddZ\n") elements = grab.elements assert len(elements) == 7 check_element(elements[0], ("all", {"author": "guido"})) check_element(elements[1], ("term", {"field": "first"})) check_element(elements[2], ("term", {"field": "second"})) check_element(elements[3], ("last", {})) check_element(elements[4], ("term", {"field": "first"})) check_element(elements[5], ("term", {"field": "second"})) check_element(elements[6], ("last", {}))
#--- // end = Martel.Group("END", Martel.Str("//") + Martel.AnyEol()) ####################### put it all together record = Martel.Group("swissprot38_record", \ ID + \ AC + \ DT_created + \ DT_seq_update + \ DT_ann_update + \ Martel.Opt(DE_block) + \ Martel.Opt(GN_block) + \ Martel.Opt(OS_block) + \ Martel.Opt(OG_block) + \ Martel.Opt(OC_block) + \ Martel.Group("reference_block", Martel.Rep(reference)) + \ comment + \ Martel.Opt(DR_block) + \ Martel.Opt(KW_block) + \ Martel.Opt(feature_block) + \ sequence + \ end ) format_expression = Martel.Group("swissprot38", Martel.Rep1(record)) format = Martel.ParseRecords("swissprot38", {}, record, RecordReader.EndsWith, ("//\n", ))