def make_2id(s, dbname, primary_name, secondary_name): assert secondary_name is not None if primary_name is None: return Str(s + "||") + \ Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname, "type": secondary_name}) return Str(s + "|") + \ Std.dbxref_dbid(UntilSep(sep = "|"), {"dbname": dbname, "type": primary_name}) + \ Str("|") + \ Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname, "type": secondary_name})
Martel.AnyEol() + \ Martel.Str("FH") + \ Martel.AnyEol() ## FT - feature table data (>=0 per entry) ##FT_line = Martel.Str("FT ") + \ ## Martel.ToEol("ft_data") ##FT_block = Martel.Rep1(FT_line) fq_dbxref = Std.feature_qualifier_name(Martel.Str("db_xref")) + \ Martel.Str('=') + \ Std.feature_qualifier_description( Martel.Str('"') + \ Std.dbxref(Std.dbxref_dbname(Martel.UntilSep(None, ":")) + \ Martel.Str(":") + \ Std.dbxref_dbid(Martel.UntilSep(None, '"'))) + \ Martel.Str('"')) + \ Martel.AnyEol() fq_generic = \ Martel.Assert(Martel.Word() + Martel.Str("=")) + \ Std.feature_qualifier_name(Martel.Word()) + \ Martel.Str("=") + \ Std.feature_qualifier_description(Martel.UntilEol()) + \ Martel.AnyEol() + \ Martel.Rep( Martel.Str("FT ") + \ (Martel.AssertNot(Martel.Str("/")) | Martel.AssertNot(Martel.Re(r"/\w+="))) + \ Std.feature_qualifier_description(Martel.UntilEol()) + \
) ) #--- DR # This is needed for things like # DR MGD; MGI:95401; EPB4.1. # where I need to scan up to the last "." That is, I want # "EPB4.1" to be the secondary identifier, not "EPB4" nor "EPB4.1." _to_secondary_end = Martel.Re(r"([^.\R]|(?!.\R)\.)+") database_id = Std.dbxref_dbname(Martel.UntilSep("database_identifier", ";"), {"style": "sp"}) primary_id = Std.dbxref_dbid(Martel.UntilSep("primary_identifier", ";"), {"type": "primary"}) secondary_id = Std.dbxref_dbid(Martel.Group("secondary_identifier", _to_secondary_end), {"type": "accession"}) # used in StdHandler for fast dxbref - don't rename! real_DR_general = Std.dbxref(database_id + Martel.Str("; ") + \ primary_id + Martel.Str("; ") + \ secondary_id, ) fast_DR_general = Std.fast_dbxref(real_DR_general, {"style": "sp-general"}) DR_general = Martel.FastFeature(fast_DR_general, "fast-sp-dbxref", real_DR_general.group_names() )
def make_1id(s, dbname, name): return Str(s + "|") + \ Std.dbxref_dbid(UntilSep(sep = "| "), {"dbname": dbname, "type": name})
ids.append(make_2id("sp", "sp", "primary", "secondary")) # Brookhaven Protein Data Bank pdb|entry|chain ids.append(make_2id("pdb", "x-pdb", "primary", "secondary")) # XXX not correct # Patents pat|country|number ids.append(make_2id("pat", "x-pat", "primary", "secondary")) # XXX not correct # GenInfo Backbone Id bbs|number ids.append(make_1id("bbs", "x-bbs", "primary")) # General database identifier gnl|database|identifier gnl_id = Str("gnl|") + \ Std.dbxref_dbname(UntilSep(sep = "| ")) + \ Str("|") + \ Std.dbxref_dbid(UntilSep(sep = "| ")) ids.append(gnl_id) # NCBI Reference Sequence ref|accession|locus ids.append(make_2id("ref", "x-ref", "primary", "secondary")) # Local Sequence identifier lcl|identifier ids.append(make_1id("lcl", "local", "primary")) # "|" them all together ncbi_word = Std.dbxref(reduce(operator.or_, ids)) #ncbi_term = Assert(Re("[^ \R]+\|")) + \ ncbi_term = ncbi_word + Rep(Str("|") + ncbi_word) # Anything else
# BL CCY; width=14; seqs=44; 99.5%=717; strength=1059 BL = Str("BL ") + ToSep("protomat_id", ";") + \ Str(" width=") + Digits("width") + \ Str("; seqs=") + Digits("numseqs") + \ Str("; 99.5%=") + Digits("protomat_count") + \ Str("; strength=") + Digits("strength") + \ AnyEol() # PLMN_BOVIN|P06868 ( 60) CEEETDFVCRAFQY 26 # ^^^^^^^^^^^^^^^^^ # ^^^^-- number of segments # ^^^^^^^^^^^^^^-- matching sequence # ^^-- weight # identifier = (Std.dbxref_dbid(UntilSep(sep = "|."), {"dbname": "swissprot", "type": "primary"}) + \ Str("|") + \ Std.dbxref_dbid(UntilSep(sep = " "), {"dbname": "swissprot", "type": "accession"})) |\ Std.dbxref_dbid(UntilSep(sep = " ")) segment = AssertNot(Re(r".. ")) + \ identifier + \ Re(r" *\( *") + \ Integer("position") + \ Re(r"\) *") + \ Word("matching_sequence") + Spaces() + \ Digits("weight") + AnyEol() segment_block = Rep1(segment | AnyEol())