#!/bin/env python2 import re, sys from getwiki import GlycanData from collections import defaultdict headers = """ accession Hex HexNAc dHex NeuAc NeuGc HexA HexN S P aldi Xxx X Count """.split() w = GlycanData() print "\t".join(headers) for acc in w.iterglycanid(): g = w.get(acc) row = defaultdict(lambda: [0, False]) row['accession'] = (acc, False) for ann in g.annotations(type="MonosaccharideCount", source="EdwardsLab"): try: value = [int(ann.get('value')), False] except ValueError: value = [int(ann.get('value')[:-1]), True] prop = ann.get('property') if prop.endswith('Count'): prop = prop[:-5] row[prop] = value # print row row['Count'] = row['Monosaccharide'] if 'Xxx' not in row: row['Xxx'] = [0, False] for k in row:
for fn in args: for it in open(fn): yield it.strip() gtc = GlyTouCan() allmotifs = dict() for acc, label, redend in gtc.allmotifs(): allmotifs[acc] = dict(label=label, redend=redend) current = set() for gtcacc in accessions(sys.argv[1:]): start = time.time() g = w.get(gtcacc) newgly = False if not g: newgly = True g = Glycan(accession=gtcacc) g.delete('wurcs') g.delete('glycoct') g.delete('iupac') g.delete_annotations(source='GlyTouCan', type='Sequence') g.set_annotation(value=gtc.getseq(gtcacc, 'wurcs'), property='WURCS', source='GlyTouCan', type='Sequence')