Martel.ToEol(block_data) + Martel.Rep(Martel.AnyEol() | (Martel.Str(" " * INDENT) + Martel.ToEol(block_data)))) # The first line # LOCUS AC007323 86436 bp DNA PLN 19-JAN-2000 locus = Std.dbid(Martel.Word(), {"dbname": "gb", "type": "primary"}) size = Martel.Group("size", Martel.Rep1(Martel.Integer())) # deal with the different kinds of residues we can have residue_prefixes = Martel.Str("ss-", "ds-", "ms-") residue_types = [ Std.alphabet(Martel.Str("DNA"), {"alphabet": "iupac-ambiguous-dna"}), Std.alphabet(Martel.Str("RNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("mRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("tRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("rRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("uRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("snRNA"), {"alphabet": "iupac-ambiguous-rna"}), Std.alphabet(Martel.Str("PROTEIN"), {"alphabet": "iupac-protein"}), ] residue_type = Martel.Group("residue_type", Martel.Opt(Martel.Alt(residue_prefixes)) + Martel.Opt(Martel.Alt(*residue_types)) + Martel.Opt(Martel.Opt(blank_space) + Martel.Str("circular", "linear")))
## ID - identification (begins each entry; 1 per entry) # ID entryname dataclass; molecule; division; sequencelength BP. divisions = Martel.Re("EST|PHG|FUN|GSS|HTC|HTG|HUM|INV|ORG|MAM|VRT|PLN|" + \ "PRO|ROD|SYN|STS|UNC|VRL|[A-Z]{3}") # XXX is found in S40706 ID_line = Martel.Str("ID ") + \ Std.dbid(Martel.UntilSep("entry_name", " "), {"type": "primary", "dbname": "embl"}) + \ whitespace + \ Martel.ToSep("dataclass", ";") + \ whitespace + \ Martel.Group("molecule", Std.alphabet(Martel.Str("DNA", "circular DNA"), {"alphabet": "iupac-ambiguous-dna"}) | Std.alphabet(Martel.Str("RNA", "circular RNA"), {"alphabet": "iupac-ambiguous-rna"}) | Std.alphabet(Martel.Str("XXX"), {"alphabet": "nucleotide"})) + \ Martel.Str("; ") + \ Martel.Group("division", divisions) + \ Martel.Str("; ") + \ Martel.Digits("length") + \ Martel.Str(" BP.") + \ Martel.AnyEol() ## AC - accession number (>=1 per entry) accession = Std.dbid(Martel.UntilSep("accession", ";"), {"type": "accession",