_to_secondary_end = Martel.Re(r"([^.\R]|(?!.\R)\.)+") database_id = Std.dbxref_dbname(Martel.UntilSep("database_identifier", ";"), {"style": "sp"}) primary_id = Std.dbxref_dbid(Martel.UntilSep("primary_identifier", ";"), {"type": "primary"}) secondary_id = Std.dbxref_dbid(Martel.Group("secondary_identifier", _to_secondary_end), {"type": "accession"}) # used in StdHandler for fast dxbref - don't rename! real_DR_general = Std.dbxref(database_id + Martel.Str("; ") + \ primary_id + Martel.Str("; ") + \ secondary_id, ) fast_DR_general = Std.fast_dbxref(real_DR_general, {"style": "sp-general"}) DR_general = Martel.FastFeature(fast_DR_general, "fast-sp-dbxref", real_DR_general.group_names() ) # used in StdHandler for fast dxbref - don't rename! real_DR_prosite = Std.dbxref( Std.dbxref_dbname(Martel.Group("database_identifier", Martel.Str("PROSITE", "PFAM")), {"style": "sp"}) + Martel.Str("; ") + primary_id +
FH_block = Martel.Str("FH Key Location/Qualifiers") + \ Martel.AnyEol() + \ Martel.Str("FH") + \ Martel.AnyEol() ## FT - feature table data (>=0 per entry) ##FT_line = Martel.Str("FT ") + \ ## Martel.ToEol("ft_data") ##FT_block = Martel.Rep1(FT_line) fq_dbxref = Std.feature_qualifier_name(Martel.Str("db_xref")) + \ Martel.Str('=') + \ Std.feature_qualifier_description( Martel.Str('"') + \ Std.dbxref(Std.dbxref_dbname(Martel.UntilSep(None, ":")) + \ Martel.Str(":") + \ Std.dbxref_dbid(Martel.UntilSep(None, '"'))) + \ Martel.Str('"')) + \ Martel.AnyEol() fq_generic = \ Martel.Assert(Martel.Word() + Martel.Str("=")) + \ Std.feature_qualifier_name(Martel.Word()) + \ Martel.Str("=") + \ Std.feature_qualifier_description(Martel.UntilEol()) + \ Martel.AnyEol() + \ Martel.Rep( Martel.Str("FT ") + \ (Martel.AssertNot(Martel.Str("/")) | Martel.AssertNot(Martel.Re(r"/\w+="))) + \
# General database identifier gnl|database|identifier gnl_id = Str("gnl|") + \ Std.dbxref_dbname(UntilSep(sep = "| ")) + \ Str("|") + \ Std.dbxref_dbid(UntilSep(sep = "| ")) ids.append(gnl_id) # NCBI Reference Sequence ref|accession|locus ids.append(make_2id("ref", "x-ref", "primary", "secondary")) # Local Sequence identifier lcl|identifier ids.append(make_1id("lcl", "local", "primary")) # "|" them all together ncbi_word = Std.dbxref(reduce(operator.or_, ids)) #ncbi_term = Assert(Re("[^ \R]+\|")) + \ ncbi_term = ncbi_word + Rep(Str("|") + ncbi_word) # Anything else generic_term = Std.dbxref( Std.dbxref_dbid(UntilSep(sep = " "), {"dbname": "local"}) ) id_term = ncbi_term | generic_term ########################################################### comment_lines = Rep(Str("#") + ToEol()) title = Str(">") + Std.description_line(id_term + UntilEol()) + AnyEol() seqline = AssertNot(Str(">")) + Std.sequence(UntilEol()) + AnyEol() # can get a sequence line without an Eol at the end of a file