Exemplo n.º 1
0
def test_missing_end2():
    # Same as the test_missing_end1 but using HeaderFooter
    exp = Martel.HeaderFooter("dataset", {},
                              None, None, None,
                              Martel.Group("record", Martel.Re(r"(b*\R)*a\R")),
                              RecordReader.EndsWith, ("a",),
                              None, None, None
                              )
    lines = [
        "bbb",
        "bb",
        "a",
        "b",
        "a",
        "a",
        ]
    text = "\n".join(lines) + "\n"

    iterator = exp.make_iterator("record")
    # This should work
    for x in iterator.iterateString(text):
        pass

    # This should not
    lines.append("c")
    text = "\n".join(lines) + "\n"
    try:
        for x in iterator.iterateString(text):
            pass
        raise AssertionError
    except Parser.ParserPositionException, exc:
        assert exc.pos == 15, exc.pos
Exemplo n.º 2
0
def test_header_footer_parser():
    # Check that I can pass same tag names in the header, record and
    # footer but not have them collide.

    header_format = Martel.Re("(?P<term?pos=header>a+)\R")
    record_format = Martel.Re("(?P<term?pos=body>b+)\R")
    footer_format = Martel.Re("(?P<term?pos=footer>c+)\R")

    format = Martel.HeaderFooter(
        "all",
        {"state": "New Mexico"},
        header_format,
        RecordReader.CountLines,
        (1, ),
        record_format,
        RecordReader.CountLines,
        (1, ),
        footer_format,
        RecordReader.CountLines,
        (1, ),
    )

    parser = format.make_parser()
    grab = GrabElements()
    parser.setContentHandler(grab)
    parser.parseString("a\nbb\nbb\nccc\n")
    elements = grab.elements
    assert len(elements) == 5, len(elements)
    check_element(elements[0], ("all", {"state": "New Mexico"}))
    check_element(elements[1], ("term", {"pos": "header"}))
    check_element(elements[2], ("term", {"pos": "body"}))
    check_element(elements[3], ("term", {"pos": "body"}))
    check_element(elements[4], ("term", {"pos": "footer"}))
Exemplo n.º 3
0
def test_hf4():
    ip = IterParser.IterHeaderFooter(
        Martel.Re(r"a*\R").make_parser(),
        RecordReader.CountLines,
        (1,),

        Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(),
        RecordReader.CountLines,
        (2,),

        None,
        None,
        None,

        "spam")

    lines = ["aaaaaaaaa",
             "b",
             "c",
             "bb",
             "cc", 
             "bbb",
             "ccc",
             ]
    text = "\n".join(lines) + "\n"

    i = 1
    for x in ip.iterateString(text):
        assert x["spam"][0] == "b" * i + "\n" + "c" * i + "\n"
        i = i + 1
Exemplo n.º 4
0
def test_Spaces():
    parseString = Martel.Spaces().make_parser().parseString
    for term in (" ", "\t", "  ", " \t  \t\t  "):
        must_parse("Spaces", parseString, term)
    for term in ("\n", " \n", " X ", ""):
        must_not_parse("not Spaces", parseString, term)
    has_group(Martel.Spaces("spam", {"x": "pick"}), " " * 100, "spam", "pick")
    has_group(Martel.Spaces("eggs", {"x": "name"}), "\t" * 200, "eggs", "name")
    has_no_group(Martel.Spaces(), " ")
Exemplo n.º 5
0
def test_Word():
    parseString = Martel.Word().make_parser().parseString
    for term in ("Andrew", "Dalke", "was_here", "test12", "12df"):
        must_parse("Word", parseString, term)
    for term in ("*", "", "this-that"):
        must_not_parse("not Word", parseString, term)

    has_group(Martel.Word("spam", {"x": "fly"}), "_", "spam", "fly")
    has_group(Martel.Word("eggs", {"x": "boy"}), "9", "eggs", "boy")
    has_no_group(Martel.Word(), "__")
Exemplo n.º 6
0
def test_DelimitedFields():
    exp = Martel.Group("test", Martel.DelimitedFields("Field", "/"))
    parser = exp.make_parser()

    file = StringIO.StringIO()
    parser.setContentHandler(saxutils.XMLGenerator(file))
    parser.parseString("a/b/cde/f//\n")
    s = file.getvalue()
    expect = "<test><Field>a</Field>/<Field>b</Field>/<Field>cde</Field>/" \
             "<Field>f</Field>/<Field></Field>/<Field></Field>\n</test>"
    assert string.find(s, expect) != -1, ("Got: %s" % (repr(s), ))
Exemplo n.º 7
0
def test_Digits():
    parseString = Martel.Digits().make_parser().parseString
    for term in ("0", "1", "2", "3", "4", "5", "6", "7", "8", "9", "10", "20",
                 "99", "453", "34653", "34359739467623"):
        must_parse("Digits", parseString, term)

    for term in ("A", "1A", "123123123T", "-1"):
        must_not_parse("not Digits", parseString, term)

    has_group(Martel.Digits("spam", {"x": "this"}), "5", "spam", "this")
    has_group(Martel.Digits("eggs", {"x": "that"}), "9", "eggs", "that")
    has_no_group(Martel.Digits(), "00")
Exemplo n.º 8
0
def test_ToSep():
    exp = Martel.Group("test",
                       Martel.ToSep("colon", ":") + \
                       Martel.ToSep("space", " ") + \
                       Martel.ToSep("empty", "!"))
    parser = exp.make_parser()

    file = StringIO.StringIO()
    parser.setContentHandler(saxutils.XMLGenerator(file))
    parser.parseString("q:wxy !")
    s = file.getvalue()
    expect = "<test><colon>q</colon>:<space>wxy</space> <empty></empty>!</test>"
    assert string.find(s, expect) != -1, ("Got: %s" % (repr(s), ))
Exemplo n.º 9
0
def test_Punctuation():
    parseString = Martel.Punctuation().make_parser().parseString
    for i in range(0, 256):
        c = chr(i)
        if c in string.punctuation:
            must_parse("Punctuation", parseString, c)
        else:
            must_not_parse("not Punctuation", parseString, c)

    has_group(Martel.Punctuation("spam", {"x": "Iran"}), string.punctuation[0],
              "spam", "Iran")
    has_group(Martel.Punctuation("eggs", {"x": "Iraq"}),
              string.punctuation[-1], "eggs", "Iraq")
    has_no_group(Martel.Punctuation(), string.punctuation[1])
Exemplo n.º 10
0
def gen_iterator():
    return IterParser.IterHeaderFooter(
        Martel.Re(r"a*\R").make_parser(),
        RecordReader.CountLines,
        (1,),

        Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1),
        RecordReader.CountLines,
        (2,),

        Martel.Re(r"d*\R").make_parser(),
        RecordReader.CountLines,
        (1,),

        "spam")
Exemplo n.º 11
0
def test_header_footer7():
    exp = Martel.HeaderFooter("dataset", {}, None, None, None,
                              Martel.Re("a(?P<b>b*)a\R"),
                              RecordReader.CountLines, (1, ), None, None, None)
    lines = [
        "aa",
        "aBbbba",
        "abba",
    ]
    text = "\n".join(lines) + "\n"

    try:
        for info in exp.make_iterator("b").iterateString(text):
            pass
    except Parser.ParserPositionException, exc:
        assert exc.pos == 4, exc.pos
Exemplo n.º 12
0
    def __init__(self, debug = 0):
        """Initialize the scanner by setting up our caches.

        Creating the parser takes a long time, so we want to cache it
        to reduce parsing time.

        Arguments:
        o debug - The level of debugging that the parser should
        display. Level 0 is no debugging, Level 2 displays the most
        debugging info (but is much slower). See Martel documentation
        for more info on this.
        """
        # a listing of all tags we are interested in scanning for
        # in the MartelParser
        self.interest_tags = [ "input_file_name", "num_int_metabolites", \
            "num_reactions", "metabolite_line",  "unbalanced_metabolite", \
            "num_rows", "num_cols", "irreversible_vector", \
            "branch_metabolite", "non_branch_metabolite", \
            "stoichiometric_tag", "kernel_tag", "subsets_tag", \
            "reduced_system_tag", "convex_basis_tag", \
            "conservation_relations_tag", "elementary_modes_tag", \
            "reaction", "enzyme", "matrix_row", "sum_is_constant_line", \
            "end_stochiometric", "end_kernel", "end_subsets", \
            "end_reduced_system", "end_convex_basis", \
            "end_conservation_relations", "end_elementary_modes" ]

        # make a parser that returns only the tags we are interested in
        expression = Martel.select_names( metatool_format.metatool_record,
                                         self.interest_tags)
        self._parser = expression.make_parser(debug_level = debug)
Exemplo n.º 13
0
def make_expression(format, tag_format="%s"):
    """format, tag_format = "%s" -> Martel Expresion

    Turn the given time format string into the corresponding Martel
    Expression.  A format term may contain a Group name and attribute
    information.  If present, the group name is %'ed with the
    tag_format to produce the tag name to use.  Use None to specify
    that named groups should not be used.

    >>> from Martel import Time
    >>> from xml.sax import saxutils
    >>> exp = Time.make_expression("%m-%Y\\n", "created-%s")
    >>> parser = exp.make_parser()
    >>> parser.setContentHandler(saxutils.XMLGenerator())
    >>> parser.parseString("05-1921\n")
    <?xml version="1.0" encoding="iso-8859-1"?>
    <created-month type="numeric">05</created-month>-<created-year type="long">1921</created-year>
    >>> 

    See the Time module docstring for more information.
    
    """
    return _parse_time(format,
                       tag_format,
                       text_to_result=Martel.Str,
                       group_to_result=Martel.Group,
                       re_to_result=Martel.Re,
                       t=Martel.NullOp())
Exemplo n.º 14
0
def test_Float():
    parseString = Martel.Float().make_parser().parseString

    for head in ("", "-", "+", "-1", "+2", "3"):
        for tail in ("", "E0", "E+0", "E-0", "E4", "e+5", "e-6", "E10", "E-19",
                     "e+28"):
            for middle in (".1", "5.", "7.6", "989", ".0001"):
                must_parse("Float", parseString, head + middle + tail)

    for term in ("1E", ".E", "1.E", "1/", "E0", "1.2E0K", "=1", "+-1", ".",
                 "e", "-e", "-e0"):
        must_not_parse("not Float", parseString, term)

    has_group(Martel.Float("spam", {"x": "spot"}), "1.0", "spam", "spot")
    has_group(Martel.Float("eggs", {"x": "SPOT"}), "0.8", "eggs", "SPOT")
    has_no_group(Martel.Float(), "+1")
Exemplo n.º 15
0
    def __init__(self, debug = 0):
        """Initialize the scanner by setting up our caches.

        Creating the parser takes a long time, so we want to cache it
        to reduce parsing time.

        Arguments:
        o debug - The level of debugging that the parser should
        display. Level 0 is no debugging, Level 2 displays the most
        debugging info (but is much slower). See Martel documentation
        for more info on this.
        """
        # a listing of all tags we are interested in scanning for
        # in the MartelParser
        self.interest_tags = [ "input_file_name", "num_int_metabolites", \
            "num_reactions", "metabolite_line",  "unbalanced_metabolite", \
            "num_rows", "num_cols", "irreversible_vector", \
            "branch_metabolite", "non_branch_metabolite", \
            "stoichiometric_tag", "kernel_tag", "subsets_tag", \
            "reduced_system_tag", "convex_basis_tag", \
            "conservation_relations_tag", "elementary_modes_tag", \
            "reaction", "enzyme", "matrix_row", "sum_is_constant_line", \
            "end_stochiometric", "end_kernel", "end_subsets", \
            "end_reduced_system", "end_convex_basis", \
            "end_conservation_relations", "end_elementary_modes" ]

        # make a parser that returns only the tags we are interested in
        expression = Martel.select_names( metatool_format.metatool_record,
                                         self.interest_tags)
        self._parser = expression.make_parser(debug_level = debug)
Exemplo n.º 16
0
def test_Unprintable():
    parseString = Martel.Unprintable().make_parser().parseString
    unprintables = []
    for i in range(0, 256):
        c = chr(i)
        if c in string.printable:
            must_not_parse("not Unprintable", parseString, c)
        else:
            must_parse("Unprintable", parseString, c)
            unprintables.append(c)

    has_group(Martel.Unprintable("spam", {"x": "import"}), unprintables[0],
              "spam", "import")
    has_group(Martel.Unprintable("eggs", {"x": "export"}), unprintables[-1],
              "eggs", "export")
    has_no_group(Martel.Unprintable(), unprintables[1])
Exemplo n.º 17
0
def test_make_iterparsers1():
    exp = Martel.ParseRecords("dataset", {},
                              Martel.Group("spam", Martel.Re(r"a*\R")),
                              RecordReader.CountLines, (1,))
    iterator = exp.make_iterator("spam")
    assert isinstance(iterator, IterParser.IterRecords)
    lines = []
    for i in range(0, 10):
        lines.append("a" * i + "\n")
    text = "".join(lines)

    i = 0
    for rec in iterator.iterateString(text):
        assert len(rec["spam"][0][:-1]) == i, (i, rec["spam"][0])
        i = i + 1
    assert i == 10
def index(handle, index_fn=None):
    """index(handle[, index_fn]) -> list of (PMID, MedlineID, start, end)

    Index a Medline XML file.  Returns where the records are, as
    offsets from the beginning of the handle.  index_fn is a callback
    function with parameters (PMID, MedlineID, start, end) and is
    called as soon as each record is indexes.

    """
    # Find the correct format to parse the data.
    data = handle.read(1000)
    format_module = choose_format(data)
    handle = _SavedDataHandle(handle, data)
    format = format_module.format
    wanted = ["MedlineCitation", "PMID", "MedlineID"]
    format = Martel.select_names(format, wanted)

    # Create an indexer that will save all the index information and
    # call index_fn if appropriate.
    indexes = []
    def citation_fn(pmid, medline_id, start, end,
                    indexes=indexes, index_fn=index_fn):
        if index_fn is not None:
            index_fn(pmid, medline_id, start, end)
        indexes.append((pmid, medline_id, start, end))
    indexer = _IndexerHandler(citation_fn)

    # Create the parser and parse the results.
    parser = format.make_parser(debug_level=0)
    parser.setContentHandler(indexer)
    parser.setErrorHandler(handler.ErrorHandler())
    parser.parseFile(handle)
    return indexes
Exemplo n.º 19
0
def test_header_footer3():
    exp = Martel.HeaderFooter("dataset", {}, None, None, None,
                              Martel.Re("a(?P<b>b*)a\R"),
                              RecordReader.CountLines, (1, ), None, None, None)
    lines = [
        "aa",
        "aba",
        "abba",
    ]
    text = "\n".join(lines) + "\n"

    i = 0
    for info in exp.make_iterator("b").iterateString(text):
        assert len(info["b"]) == 1
        assert len(info["b"][0]) == i, (info["b"][0], i)
        i = i + 1
    assert i == 3, i
Exemplo n.º 20
0
def test_multi():
    ele = get_element(Martel.Re("(?P<qwe?def=%7Edalke&a=1&b=2&cc=33>X)"))
    check_element(ele, ("qwe", {
        "def": "~dalke",
        "a": "1",
        "b": "2",
        "cc": "33"
    }))
Exemplo n.º 21
0
    def __init__(self, debug = 0):
        self.interest_tags = ["primer_name", "amplifier", 
                              "amplifier_sequence", "amplifier_length",
                              "end_record"]

        expression = Martel.select_names(primersearch_format.record,
                                            self.interest_tags)
        self._parser = expression.make_parser(debug_level = debug)
Exemplo n.º 22
0
def test_record_parser():
    record = Martel.Group("A", Martel.Str("X\n") + Martel.Re("a*\n"))
    p = record.make_parser()

    parser = Parser.RecordParser("blah", {}, p.tagtable, (0, 1, {}),
                                 RecordReader.StartsWith, ("X", ))

    err = CountErrors()
    parser.setErrorHandler(err)
    count = CountRecords("A")
    parser.setContentHandler(count)

    parser.parseString("X\na\nX\nb\nX\naaa\nX\naaaa\nX\nq\nX\na\n")

    assert err.fatal_error_count == 0, err.fatal_error_count
    assert err.error_count == 2, err.error_count
    assert count.count == 4, count.count
Exemplo n.º 23
0
def test_make_iterparsers2():
    exp = Martel.HeaderFooter("dataset", {},
                              Martel.Group("header", Martel.Re(r"(a*\R)*")),
                              RecordReader.Until, ("b",),
                              
                              Martel.Group("record", Martel.Re(r"(b*\R)*")),
                              RecordReader.Until, ("c",),
                              
                              Martel.Group("footer", Martel.Re(r"(c*\R)*")),
                              RecordReader.Everything, (),)
    
    iterator = exp.make_iterator("record")
    assert isinstance(iterator, IterParser.IterHeaderFooter), iterator
    lines = ["a"
             "aa",
             "aaaaaaa",
             "b",
             "bb",
             "bbbb",
             "bbbbbbbb",
             "bbbbbbbbbbbbbbbb",
             "bbbbbbbbbbbbbbbbbbbbbbbbbbbbbbbb",
             "cccc",
             "cc",
             "c",
             ]
    
    text = "\n".join(lines) + "\n"

    i = 0
    for rec in iterator.iterateString(text):
        i = i + 1
    assert i == 1, i
Exemplo n.º 24
0
    def __init__(self, debug=0):
        self.interest_tags = [
            "primer_name", "amplifier", "amplifier_sequence",
            "amplifier_length", "end_record"
        ]

        expression = Martel.select_names(primersearch_format.record,
                                         self.interest_tags)
        self._parser = expression.make_parser(debug_level=debug)
Exemplo n.º 25
0
def test_record_parser():
    format = Martel.Re("(?P<term?field=first>...)"
                       "(?P<term?field=second>...)"
                       "(?P<last>.)\R")
    format = Martel.ParseRecords("all", {"author": "guido"}, format,
                                 RecordReader.CountLines, (1, ))
    parser = format.make_parser()
    grab = GrabElements()
    parser.setContentHandler(grab)
    parser.parseString("aaabbbZ\ncccdddZ\n")
    elements = grab.elements
    assert len(elements) == 7
    check_element(elements[0], ("all", {"author": "guido"}))
    check_element(elements[1], ("term", {"field": "first"}))
    check_element(elements[2], ("term", {"field": "second"}))
    check_element(elements[3], ("last", {}))
    check_element(elements[4], ("term", {"field": "first"}))
    check_element(elements[5], ("term", {"field": "second"}))
    check_element(elements[6], ("last", {}))
Exemplo n.º 26
0
def test_same_tag():
    format = Martel.Re("(?P<char?type=a>a+)(?P<char?type=b>b+)")
    parser = format.make_parser()
    grab = GrabElements()
    parser.setContentHandler(grab)
    parser.parseString("aaabb")
    assert len(grab.elements) == 2, len(grab.elements)

    check_element(grab.elements[0], ("char", {"type": "a"}))
    check_element(grab.elements[1], ("char", {"type": "b"}))
Exemplo n.º 27
0
def test_header_footer3():
    # Have a footer but no header
    s = """\
ID 1
This is some data
//
ID 2
This is some more data
//
Okay, that was all of the data.
"""
    gold = """\
<?xml version="1.0" encoding="iso-8859-1"?>
<hf><record>ID 1
This is some data
//
</record><record>ID 2
This is some more data
//
</record><footer>Okay, that was all of the data.
</footer></hf>"""

    # Don't use a regexp like this in your code - for testing only!
    record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*"))
    # Require at least 5 characters (just to be safe)
    footer = Martel.Group("footer", Martel.Re(r".....(.|\n)*"))

    record = record.make_parser()
    footer = footer.make_parser()

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Nothing, (), (),
                                   RecordReader.EndsWith, ("//\n", ),
                                   record.tagtable, RecordReader.Everything,
                                   (), footer.tagtable, (0, 1, {}))

    outfile = StringIO()
    hf.setContentHandler(saxutils.XMLGenerator(outfile))
    hf.setErrorHandler(handler.ErrorHandler())
    hf.parseFile(StringIO(s))

    text = outfile.getvalue()
    assert text == gold, (text, gold)
Exemplo n.º 28
0
def delimiter(delim):
    assert len(delim) == 1, \
           "delimiter can only be a single character long, not %s" % repr(delim)
    assert delim not in "\n\r", "Cannot use %s as a delimiter" % repr(delim)

    field = Martel.Group("field", Martel.Rep(Martel.AnyBut(delim + "\r\n")))

    line = field + Martel.Rep(Martel.Str(delim) + field) + Martel.AnyEol()
    record = Martel.Group("record", line)

    format = Martel.ParseRecords("delimited", {}, record,
                                 RecordReader.CountLines, (1, ))
    return format
Exemplo n.º 29
0
    def __init__(self, debug=0):
        self.interest_tags = [
            "comments", "single_primer_line", "start_primer", "product_size",
            "forward_start", "forward_length", "forward_tm", "forward_gc",
            "forward_seq", "reverse_start", "reverse_length", "reverse_tm",
            "reverse_gc", "reverse_seq", "internal_start", "internal_length",
            "internal_tm", "internal_gc", "internal_seq", "end_record"
        ]

        expression = Martel.select_names(primer3_format.record,
                                         self.interest_tags)
        self._parser = expression.make_parser(debug_level=debug)
Exemplo n.º 30
0
def test_header_footer2():
    # Have a header but no footer
    s = """
This is some misc. header text
that goes on until the end.
ID 1
This is some data
ID 2
This is some more data
"""
    gold = """\
<?xml version="1.0" encoding="iso-8859-1"?>
<hf><header>
This is some misc. header text
that goes on until the end.
</header><record>ID 1
This is some data
</record><record>ID 2
This is some more data
</record></hf>"""

    # Don't use a regexp like this in your code - for testing only!
    header = Martel.Group("header", Martel.Re(r"(.|\n)*"))
    record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*"))

    header = header.make_parser()
    record = record.make_parser()

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ),
                                   header.tagtable, RecordReader.StartsWith,
                                   ("ID", ), record.tagtable,
                                   RecordReader.Nothing, (), (), (0, 1, {}))

    outfile = StringIO()
    hf.setContentHandler(saxutils.XMLGenerator(outfile))
    hf.setErrorHandler(handler.ErrorHandler())
    hf.parseFile(StringIO(s))

    text = outfile.getvalue()
    assert text == gold, (text, gold)
Exemplo n.º 31
0
def test_header_footer7():
    # header and footer but with no record data
    s = """\
This is some misc. header text
that goes on until the end.
FOOTER
"""
    header = Martel.Group("header", Martel.Re(r"(.|\n)*"))
    record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*"))
    footer = Martel.Group("footer", Martel.Re("FOOTER(.|\n)*"))

    header = header.make_parser()
    record = record.make_parser()
    footer = footer.make_parser()

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.CountLines, (2, ),
                                   header.tagtable, RecordReader.EndsWith,
                                   ("//", ), record.tagtable,
                                   RecordReader.StartsWith, ("FOOTER", ),
                                   footer.tagtable, (0, 1, {}))
    count = CountRecords("record")
    hf.setContentHandler(count)
    err = CountErrors()
    hf.setErrorHandler(err)
    hf.parseFile(StringIO(s))

    assert err.error_count == 0, err.error_count
    assert err.fatal_error_count == 0, err.fatal_error_count
    assert count.count == 0, count.count
Exemplo n.º 32
0
def test_header_footer5():
    # Make sure I can skip records when there are not footer records
    s = """
This is some misc. header text
that goes on until the end.
ID 1
This is some data
ID A
This is some more data
ID 3
This is again some more data
ID Q
This blah
ID W
QWE
ID 987
To be
ID 897
Or not to be
"""
    header = Martel.Group("header", Martel.Re(r"(.|\n)*"))
    record = Martel.Group("record", Martel.Re(r"ID \d+(.|\n)*"))

    header = header.make_parser()
    record = record.make_parser()

    hf = Parser.HeaderFooterParser("hf", {}, RecordReader.Until, ("ID", ),
                                   header.tagtable, RecordReader.StartsWith,
                                   ("ID", ), record.tagtable, None, (), (),
                                   (0, 1, {}))
    count = CountRecords("record")
    hf.setContentHandler(count)
    err = CountErrors()
    hf.setErrorHandler(err)
    hf.parseFile(StringIO(s))

    assert err.error_count == 3, err.error_count
    assert err.fatal_error_count == 0, err.fatal_error_count
    assert count.count == 4, count.count
Exemplo n.º 33
0
def test_ri3():
    # error in the second record
    ip = IterParser.IterRecords(
        Martel.Group("spam", Martel.Re(r"b*\Rc*\R")).make_parser(debug_level = 1),
        RecordReader.CountLines,
        (2,),
        
        "spam")

    lines = ["b",
             "c",
             "b-",
             "cc",
             "bbb",
             "ccc",
             ]
    text = "\n".join(lines) + "\n"
    try:
        for x in ip.iterateString(text):
            pass
    except Parser.ParserPositionException, exc:
        assert exc.pos == 5, exc.pos
Exemplo n.º 34
0
    def __init__(self, debug = 0):
        self.interest_tags = ["comments", "single_primer_line",
                              "start_primer", "product_size",
                              "forward_start", "forward_length",
                              "forward_tm", "forward_gc", "forward_seq",
                              "reverse_start", "reverse_length",
                              "reverse_tm", "reverse_gc", "reverse_seq",
                              "internal_start", "internal_length",
                              "internal_tm", "internal_gc", "internal_seq",
                              "end_record"]

        expression = Martel.select_names(primer3_format.record,
                                         self.interest_tags)
        self._parser = expression.make_parser(debug_level = debug)
    def make_iterator(self, tag="record", select_names=None, debug_level=0):
        """S.make_iterator([tag][, select_names][, debug_level]) -> iterator"""
        if select_names is not None:
            select_names = list(select_names)
            select_names.sort()
            key = tuple(select_names), debug_level
        else:
            key = None, debug_level

        if not self._iterator_cache.has_key(key):
            import Martel
            exp = self.expression
            if select_names is not None:
                exp = Martel.select_names(exp, select_names)
            p = exp.make_iterator(tag, debug_level = debug_level)
            self._iterator_cache[key] = p
        return self._iterator_cache[key].copy()
    def make_parser(self, select_names=None, debug_level=0):
        """S.make_parser([select_names][, debug_level]) -> parser"""
        if select_names is not None:
            select_names = list(select_names)
            select_names.sort()
            key = tuple(select_names), debug_level
        else:
            key = None, debug_level

        if not self._parser_cache.has_key(key):
            import Martel
            exp = self.expression
            if select_names is not None:
                exp = Martel.select_names(exp, select_names)
            p = exp.make_parser(debug_level = debug_level)
            self._parser_cache[key] = p
        return self._parser_cache[key].copy()
Exemplo n.º 37
0
    def __init__(self, debug = 0):
        """Initialize the scanner by setting up our caches.

        Creating the parser takes a long time, so we want to cache it
        to reduce parsing time.

        Arguments:
        o debug - The level of debugging that the parser should
        display. Level 0 is no debugging, Level 2 displays the most
        debugging info (but is much slower). See Martel documentation
        for more info on this.
        """
        # a listing of all tags we are interested in scanning for
        # in the MartelParser
        self.interest_tags = ["comment", "title_line", "sequence" ]

        # make a parser that returns only the tags we are interested in
        expression = Martel.select_names(intelligenetics_format.intelligenetics_record, self.interest_tags)
        self._parser = expression.make_parser(debug_level = debug)
Exemplo n.º 38
0
    def __init__(self, debug = 0):
        """Initialize the scanner by setting up our caches.

        Creating the parser takes a long time, so we want to cache it
        to reduce parsing time.

        Arguments:
        o debug - The level of debugging that the parser should
        display. Level 0 is no debugging, Level 2 displays the most
        debugging info (but is much slower). See Martel documentation
        for more info on this.
        """
        # a listing of all tags we are interested in scanning for
        # in the MartelParser
        self.interest_tags = [ 'header_line', 'system_line', 'substance_multiline', \
            'reactor_multiline', 'include_line' ]

        # make a parser that returns only the tags we are interested in
        expression = Martel.select_names( ecell_format.ecell_record, self.interest_tags)
        self._parser = expression.make_parser(debug_level = debug)
Exemplo n.º 39
0
    def __init__(self, debug_level = 0):
        """Initialize the scanner by setting up our caches.

        Creating the parser takes a long time, so we want to cache it
        to reduce parsing time.

        Arguments:
        o debug - The level of debugging that the parser should
        display. Level 0 is no debugging, Level 2 displays the most
        debugging info (but is much slower). See Martel documentation
        for more info on this.
        """
        # a listing of all tags we are interested in scanning for
        # in the MartelParser
        self.interest_tags = [ "cd_tag", \
            "description_tag", \
            "status_tag", \
            "source_tag", \
            "date_tag", \
            "taxonomy_tag", \
            "aligned_tag", \
            "representative_tag", \
            "range_tag", \
            "sequence_tag", \
            "description_contents_multiline", \
            "status_contents_multiline", \
            "source_contents_multiline", \
            "date_contents_multiline", \
            "reference_contents_multiline", \
            "taxonomy_contents_multiline", \
            "aligned_contents_multiline", \
            "representative_contents_multiline", \
            "range_contents_multiline", \
            "cd_contents_multiline", \
            "sequence_contents_multiline", \
            "table_entry" ]

        # make a parser that returns only the tags we are interested in
        expression = Martel.select_names( cdd_format.cdd_record, self.interest_tags)
        self._parser = expression.make_parser(debug_level )
Exemplo n.º 40
0
def _fixup_sp_pattern(exp):
    import re
    import Martel
    exp = Martel.select_names(exp, (Std.dbxref_dbname.tag,Std.dbxref_dbid.tag))
                               
    e = exp._find_groups(Std.dbxref_dbname.tag)
    assert len(e) == 1
    e = e[0]
    e.name = "dbname"
    dbstyle = e.attrs["style"]
    e.attrs = {}
    e = exp._find_groups(Std.dbxref_dbid.tag)
    assert len(e) == 2
    e[0].name = "primary_dbid"
    primary_type = e[0].attrs["type"]
    e[0].attrs = {}
    e[1].name = "secondary_dbid"
    secondary_type = e[1].attrs["type"]
    e[1].attrs = {}
    pattern = str(exp) + "$"
    pat = re.compile(pattern)
    return pat, dbstyle, primary_type, secondary_type
Exemplo n.º 41
0
			print "acc Start"
			self.m_accession=1
		if name=="pname":
			self.m_pname=1
		if name=="bioformat:sequence":
			self.m_seq=1
		if name=="bioformat:sequence_block":
			self.seq=''
			self.m_seq_block=1


	def endElement(self, name):
		if name == "accession":
			print "acc End"
			self.m_accession=0
		if name == "pname":
			self.m_pname=0
		if name=="bioformat:sequence":
			self.m_seq=0
		if name=="bioformat:sequence_block":
			#self.outf.write(self.seq+'\n')
			self.m_seq_block=0


if __name__=="__main__":
	exp = Martel.select_names(embl.format, ("record", "accession", "pname", "bioformat:sequence",))
	parser=embl.format_expression.make_parser()
	#outf=open(sys.argv[2],'w')
	parser.setContentHandler(my_handler(sys.argv[2]))
	parser.parse(sys.argv[1])
Exemplo n.º 42
0
          Martel.Opt(Martel.Str(" ") + bib) +
          Martel.AnyEol())

# Here's the neq SQ line format -- uses a CRC64
# SQ   SEQUENCE   889 AA;  100368 MW;  ABD7E3CD53961B78 CRC64;
SQ_exp = Martel.Re("SQ   SEQUENCE +(?P<sequence_length>\d+) AA;" \
                   " +(?P<molecular_weight>\d+) MW;" \
                   " +(?P<crc?type=64>\w+) CRC64;\R")

replacements = [
    ("DT_created", DT_created_exp),
    ("OX_block", OX_exp),
    ("RX", RX_exp),
    ("SQ", SQ_exp),
    ]
record = Martel.replace_groups(sprot38.record, replacements)


format_expression = Martel.replace_groups(
    sprot38.format_expression, replacements)


format = Martel.replace_groups(sprot38.format, replacements)

if __name__ == "__main__":
    parser = format.make_parser()
    filename = "/home/dalke/ftps/databases/swiss-prot/release_compressed/sprot40.dat"
##    import os
##    infile = os.popen("zcat " + filename)
    infile = open(filename)
    infile.seek(107976062)
Exemplo n.º 43
0
    AC_block +
    DT_created +
    DT_seq_update +
    DT_ann_update +
    Martel.Opt(DE_block) +
    Martel.Opt(GN_block) +
    Martel.Opt(OS_block) +
    Martel.Opt(OG_block) +
    Martel.Opt(OC_block) +
    Martel.Group("OX_block", Martel.NullOp()) +
    Martel.Group("reference_block", Martel.Rep(reference)) +
    comment +
    Martel.Opt(DR_block) +
    Martel.Opt(KW_block) +
    Martel.Opt(feature_block) +
    sequence +
    end,
                      {"format": "swissprot/38"})


format_expression = Martel.Group("dataset", Martel.Rep1(record),
                                 {"format": "swissprot/38"})

format = Martel.ParseRecords("dataset", {"format": "swissprot/38"},
                             record, RecordReader.EndsWith, ("//\n",) )
                             
if __name__ == "__main__":
    exp = Martel.select_names(format, ("entry_name", "sequence"))
    parser = exp.make_parser()
    parser.parseFile(open("/home/dalke/ftps/swissprot/sprot38.dat"))