def accs(): global gtc if len(sys.argv) > 1: gtc = GlyTouCan(prefetch=False, usecache=False) for acc in sys.argv[1:]: yield acc else: gtc = GlyTouCan(prefetch=True, usecache=False) for m in w.itermotif(): yield m.get('id')
def __init__(self, **kwargs): assert kwargs.get('accession') != None kwargs['collection'] = self.id if kwargs.get('glytoucan') == None: kwargs['glytoucan'] = kwargs['accession'] if kwargs.get('wurcs') == None or kwargs.get('glycoct') == None: if not self.gtc: self.gtc = GlyTouCan() if kwargs.get('wurcs') == None: kwargs['wurcs'] = self.gtc.getseq(kwargs['glytoucan'], 'wurcs') if kwargs.get('glycoct') == None: kwargs['glycoct'] = self.gtc.getseq(kwargs['glytoucan'], 'glycoct') super(GlyTouCanMotif, self).__init__(**kwargs)
class GlyTouCanMotif(Motif): gtc = None id = 'GTC' def __init__(self, **kwargs): assert kwargs.get('accession') != None kwargs['collection'] = self.id if kwargs.get('glytoucan') == None: kwargs['glytoucan'] = kwargs['accession'] if kwargs.get('wurcs') == None or kwargs.get('glycoct') == None: if not self.gtc: self.gtc = GlyTouCan(usecache=False, prefetch=False) if kwargs.get('wurcs') == None: kwargs['wurcs'] = self.gtc.getseq(kwargs['glytoucan'], 'wurcs') if kwargs.get('glycoct') == None: kwargs['glycoct'] = self.gtc.getseq(kwargs['glytoucan'], 'glycoct') super(GlyTouCanMotif, self).__init__(**kwargs)
def accessions(): global gtc if len(sys.argv) == 2 and sys.argv[1] == "-": gtc = GlyTouCan(usecache=False) for acc in sys.stdin: yield acc.strip() elif len(sys.argv) == 2 and sys.argv[1] == "*": gtc = GlyTouCan(usecache=False) for acc in gtc.allaccessions(): if acc in replace: continue yield acc else: gtc = GlyTouCan(prefetch=False) for acc in sys.argv[1:]: yield acc
import findpygly from pygly.GlycanResource import GlyTouCan def accessions(args): if len(args) == 0: for it in sys.stdin: yield it.strip() else: for fn in args: for it in open(fn): yield it.strip() gtc = GlyTouCan() allmotifs = dict() for acc, label, redend in gtc.allmotifs(): allmotifs[acc] = dict(label=label, redend=redend) current = set() for gtcacc in accessions(sys.argv[1:]): start = time.time() g = w.get(gtcacc) newgly = False if not g: newgly = True g = Glycan(accession=gtcacc)
#!/bin/env python2 import sys, glob import findpygly from pygly.GlycanResource import GlyTouCan, GlyCosmos seqtype = sys.argv[1].split('/')[-1] assert (seqtype in ('wurcs', 'glycoct', 'genglycoct')) allfn = set(glob.glob(sys.argv[1] + "/G*.txt")) gco = GlyCosmos(usecache=False) archived = set(map(lambda d: d['accession'], gco.archived())) gtc = GlyTouCan(verbose=False, usecache=False) for acc in gtc.allaccessions(): if acc in archived: continue filename = sys.argv[1] + "/" + acc + ".txt" if filename in allfn: allfn.remove(filename) continue seq = None if seqtype == 'wurcs': seq = gtc.getseq(acc, 'wurcs') if seqtype == 'glycoct':
#!/bin/env python2 import findpygly # from pygly.GlyTouCan import GlyTouCan from pygly.GlycanResource import GlyTouCan as GTC import sys, re, time import urllib import hashlib gtc = GTC(prefetch=False) # Need wurcs skeleton codes for UniCarbKB and Byonic symbols... symbol2wurcs_definition = """ NeuAc AUd21122h_5*NCC/3=O 1 NeuGc AUd21122h_5*NCCO/3=O 2 Fuc u1221m 6 Hex uxxxxh 5 HexNAc uxxxxh_2*NCC/3=O 4 dHex uxxxxm 3 Pent uxxxh 10 P *OPO/3O/3=O -1 Phospho *OPO/3O/3=O -1 S *OSO/3=O/3=O -1 Sulpho *OSO/3=O/3=O -1 """ symbol2wurcs = {} wurcsorder = {} for l in symbol2wurcs_definition.splitlines(): if not l.strip(): continue
for it in open(fn): yield it.strip() notation = 'snfg' style = 'extended' format = 'svg' if len(sys.argv) > 1: notation = sys.argv[1] if len(sys.argv) > 2: style = sys.argv[2] if len(sys.argv) > 3: format = sys.argv[3] gtc = GlyTouCan(usecache=False) for gtcacc in accessions(sys.argv[4:]): imgfn = "%s.%s" % (gtcacc, format) if os.path.exists(imgfn): continue imgstr = gtc.getimage(gtcacc, style=style, notation=notation, format=format) if not imgstr: if style == "extended": try: imgstr = urllib.urlopen( "https://image.glycosmos.org/%s/%s/%s" % ( notation, format,
# # G45924NL is floating in-link substituent - IMO this is not a correct representation # G99993XU ditto # G94401PZ ditto # G10633KT ditto # G48302UE ditto # badacc = set(""" """.split()) # f = open('../data/basecomplist1.txt','w') # Make sure we get the latest version of everything gtc = GlyTouCan(usecache=False) def accessions(): if len(sys.argv) == 2 and sys.argv[1] == "-": for acc in sys.stdin: yield acc.strip() elif len(sys.argv) == 2 and sys.argv[1] == "*": for acc in gtc.allaccessions(): yield acc else: for acc in sys.argv[1:]: yield acc allskel = set() seen = defaultdict(set)
#!/bin/env python27 import sys from getwiki import GlycoMotifWiki, GlyTouCanMotif w = GlycoMotifWiki() import findpygly from pygly.GlycanResource import GlyTouCan gtc = GlyTouCan(usecache=False) current = set() for m, l, re in sorted(gtc.allmotifs()): motif = GlyTouCanMotif(accession=m, prefname=l, name=l, redend=re, wurcs=gtccache.gtc2wurcs(m), glycoct=gtccache.gtc2glycoct(m)) if w.update(motif): print >> sys.stderr, m current.add(m) for m in w.itermotif(collection=GlyTouCanMotif): if m.get('accession') not in current: print >> sys.stderr, "Deleting:", m.get('pagename') w.delete(m.get('pagename'))
import findpygly from pygly.GlycanResource import GlyTouCan from pygly.GlycanResource import GlyCosmos def accessions(args): if len(args) == 0: for it in sys.stdin: yield it.strip() else: for fn in args: for it in open(fn): yield it.strip() gtc = GlyTouCan(verbose=False, usecache=False) gco = GlyCosmos(verbose=False, usecache=False) allgco = set(gco.allaccessions()) # allmotifs = dict() # for acc,label,redend in gtc.allmotifs(): # allmotifs[acc] = dict(label=label,redend=redend) archived = set(map(lambda d: d['accession'], gco.archived())) print "%d accessions archived." % (len(archived), ) current = set() for gtcacc in accessions(sys.argv[1:]): start = time.time()