示例#1
0
                        Martel.ToEol(block_data) +
                        Martel.Rep(Martel.AnyEol() |
                                   (Martel.Str(" " * INDENT) + Martel.ToEol(block_data))))
                                   

# The first line
# LOCUS       AC007323    86436 bp    DNA             PLN       19-JAN-2000
locus = Std.dbid(Martel.Word(), {"dbname": "gb", "type": "primary"})

size = Martel.Group("size",
                    Martel.Rep1(Martel.Integer()))

# deal with the different kinds of residues we can have
residue_prefixes = Martel.Str("ss-", "ds-", "ms-")
residue_types = [
    Std.alphabet(Martel.Str("DNA"), {"alphabet": "iupac-ambiguous-dna"}),
    Std.alphabet(Martel.Str("RNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("mRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("tRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("rRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("uRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("snRNA"), {"alphabet": "iupac-ambiguous-rna"}),
    Std.alphabet(Martel.Str("PROTEIN"), {"alphabet": "iupac-protein"}),
    ]


residue_type = Martel.Group("residue_type",
                            Martel.Opt(Martel.Alt(residue_prefixes)) +
                            Martel.Opt(Martel.Alt(*residue_types)) +
                            Martel.Opt(Martel.Opt(blank_space) + 
                                       Martel.Str("circular", "linear")))
## ID - identification             (begins each entry; 1 per entry)
# ID   entryname  dataclass; molecule; division; sequencelength BP.

divisions = Martel.Re("EST|PHG|FUN|GSS|HTC|HTG|HUM|INV|ORG|MAM|VRT|PLN|" + \
                      "PRO|ROD|SYN|STS|UNC|VRL|[A-Z]{3}")

# XXX is found in S40706
ID_line = Martel.Str("ID   ") + \
          Std.dbid(Martel.UntilSep("entry_name", " "), {"type": "primary",
                                                        "dbname": "embl"}) + \
          whitespace + \
          Martel.ToSep("dataclass", ";") + \
          whitespace + \
          Martel.Group("molecule",
                       Std.alphabet(Martel.Str("DNA", "circular DNA"),
                                    {"alphabet": "iupac-ambiguous-dna"}) |
                       Std.alphabet(Martel.Str("RNA", "circular RNA"),
                                    {"alphabet": "iupac-ambiguous-rna"}) |
                       Std.alphabet(Martel.Str("XXX"),
                                    {"alphabet": "nucleotide"})) + \
          Martel.Str("; ") + \
          Martel.Group("division", divisions) + \
          Martel.Str("; ") + \
          Martel.Digits("length") + \
          Martel.Str(" BP.") + \
          Martel.AnyEol()


## AC - accession number           (>=1 per entry)
accession = Std.dbid(Martel.UntilSep("accession", ";"),
                     {"type": "accession",