def init_associations(self, hdr_only=False): """Read GPAD file. HTTP address okay. GZIPPED/BZIPPED file okay.""" associations = [] if self.filename is None: return associations ver = None ntgpadobj = None hdrobj = GpadHdr() ifstrm = nopen(self.filename) for line in ifstrm: # Read header if ntgpadobj is None: if line[0] == '!': if ver is None and line[1:13] == 'gpa-version:': ver = line[13:].strip() hdrobj.chkaddhdr(line) else: self.hdr = hdrobj.get_hdr() if hdr_only: return associations ntgpadobj = cx.namedtuple("ntgpadobj", " ".join(self.gpad_columns[ver])) # Read data if ntgpadobj is not None: flds = self._split_line(line) ntgpad = self._get_ntgpad(ntgpadobj, flds) associations.append(ntgpad) # GPAD file has been read readmsg = " READ {N:7,} associations: {FIN}\n" sys.stdout.write(readmsg.format(N=len(associations), FIN=self.filename)) return associations
def read_gaf(self, fin_gaf, hdr_only, prt): """Read GAF file. HTTP address okay. GZIPPED/BZIPPED file okay.""" ga_lst = [] ver = None ntgafobj = None exp_numcol = None hdrobj = GafHdr() ifstrm = nopen(fin_gaf) for line in ifstrm: # Read header if ntgafobj is None: if line[0] == '!': if line[1:13] == 'gaf-version:': ver = line[13:].strip() hdrobj.chkaddhdr(line) else: self.hdr = hdrobj.get_hdr() if hdr_only: return ga_lst ntgafobj = cx.namedtuple("ntgafobj", " ".join(self.gaf_columns[ver])) exp_numcol = self.gaf_numcol[ver] # Read data if ntgafobj is not None: flds = self._split_line(line, exp_numcol) ntgaf = self._get_ntgaf(ntgafobj, flds, ver) ga_lst.append(ntgaf) # GAF file has been read if prt is not None: readmsg = " READ {N:,} associations: {FIN}\n" prt.write(readmsg.format(N=len(ga_lst), FIN=fin_gaf)) return self.evobj.sort_nts(ga_lst, 'Evidence_Code')
def init_associations(self, hdr_only=False, namespaces=None, prt=sys.stdout): """Read GPAD file. HTTP address okay. GZIPPED/BZIPPED file okay.""" import timeit import datetime associations = [] tic = timeit.default_timer() if self.filename is None: return associations ver = None ntgpadobj_make = None hdrobj = GpadHdr() ifstrm = nopen(self.filename) _add_ns = self.godag is not None _get_ntgpadvals = self._get_ntgpadvals get_all_nss = self._get_b_all_nss(namespaces) for lnum, line in enumerate(ifstrm, 1): # Read data if ntgpadobj_make: flds = self._split_line(line) try: # pylint: disable=not-callable goid = flds[3] nspc = self._get_namespace(goid) if _add_ns else None if get_all_nss or nspc in namespaces: ntgpad = ntgpadobj_make(_get_ntgpadvals(flds, goid, nspc, _add_ns)) associations.append(ntgpad) # pylint: disable=broad-except except Exception as inst: import traceback traceback.print_exc() sys.stdout.write("\n **FATAL: {MSG}\n\n".format(MSG=str(inst))) sys.stdout.write("**FATAL: {FIN}[{LNUM}]:\n{L}\n".format( FIN=self.filename, L=line, LNUM=lnum)) for idx, (key, val) in enumerate(zip(self.gpadhdr, flds)): sys.stdout.write('{I:2} {KEY:13} {VAL}\n'.format(I=idx, KEY=key, VAL=val)) ## if datobj is not None: ## datobj.prt_line_detail(sys.stdout, line) sys.exit(1) # Read header else: if line[0] == '!': if ver is None and line[1:13] == 'gpa-version:': ver = line[13:].strip() hdrobj.chkaddhdr(line) else: self.hdr = hdrobj.get_hdr() if hdr_only: return associations ntgpadobj_make = self._get_ntgpadnt(ver, _add_ns)._make # GPAD file has been read prt.write('HMS:{HMS} {N:7,} annotations READ: {ANNO} {NSs}\n'.format( N=len(associations), ANNO=self.filename, NSs=','.join(namespaces) if namespaces else '', HMS=str(datetime.timedelta(seconds=(timeit.default_timer()-tic))))) return associations
def init_associations(self, hdr_only=False, namespaces=None, prt=sys.stdout): """Read GPAD file. HTTP address okay. GZIPPED/BZIPPED file okay.""" import timeit import datetime associations = [] tic = timeit.default_timer() if self.filename is None: return associations ver = None ntgpadobj_make = None hdrobj = GpadHdr() ifstrm = nopen(self.filename) _add_ns = self.godag is not None _get_ntgpadvals = self._get_ntgpadvals get_all_nss = self._get_b_all_nss(namespaces) for lnum, line in enumerate(ifstrm, 1): # Read data if ntgpadobj_make: flds = self._split_line(line) try: # pylint: disable=not-callable goid = flds[3] nspc = self._get_namespace(goid) if _add_ns else None if get_all_nss or nspc in namespaces: ntgpad = ntgpadobj_make(_get_ntgpadvals(flds, goid, nspc, _add_ns)) associations.append(ntgpad) # pylint: disable=broad-except except Exception as inst: import traceback traceback.print_exc() sys.stdout.write("\n **FATAL: {MSG}\n\n".format(MSG=str(inst))) sys.stdout.write("**FATAL: {FIN}[{LNUM}]:\n{L}\n".format( FIN=self.filename, L=line, LNUM=lnum)) for idx, val in enumerate(flds): sys.stdout.write('{I:2} {VAL}\n'.format(I=idx, VAL=val)) ## if datobj is not None: ## datobj.prt_line_detail(sys.stdout, line) sys.exit(1) # Read header else: if line[0] == '!': if ver is None and line[1:13] == 'gpa-version:': ver = line[13:].strip() hdrobj.chkaddhdr(line) else: self.hdr = hdrobj.get_hdr() if hdr_only: return associations ntgpadobj_make = self._get_ntgpadnt(ver, _add_ns)._make # GPAD file has been read prt.write('HMS:{HMS} {N:7,} annotations READ: {ANNO} {NSs}\n'.format( N=len(associations), ANNO=self.filename, NSs=','.join(namespaces) if namespaces else '', HMS=str(datetime.timedelta(seconds=(timeit.default_timer()-tic))))) return associations
def read_gaf(self, fin_gaf): """Read GAF file. HTTP address okay. GZIPPED/BZIPPED file okay.""" ga_lst = [] ifstrm = nopen(fin_gaf) ver = None ntgafobj = None exp_numcol = None for line in ifstrm: if ntgafobj is not None and not line.startswith('!'): flds = self._split_line(line, exp_numcol) ntgaf = self._get_ntgaf(ntgafobj, flds, ver) ga_lst.append(ntgaf) elif ntgafobj is None and line.startswith('!gaf-version:'): ver = line[13:].strip() ntgafobj = namedtuple("ntgafobj", " ".join(self.gaf_columns[ver])) exp_numcol = self.gaf_numcol[ver] self.log.write(" READ {N:,} items: {FIN}\n".format(N=len(ga_lst), FIN=fin_gaf)) return ga_lst
def read_gaf(self, fin_gaf): """Read GAF file. HTTP address okay. GZIPPED/BZIPPED file okay.""" ga_lst = [] ifstrm = nopen(fin_gaf) ver = None ntgafobj = None exp_numcol = None for line in ifstrm: if ntgafobj is not None and not line.startswith("!"): flds = self._split_line(line, exp_numcol) ntgaf = self._get_ntgaf(ntgafobj, flds, ver) ga_lst.append(ntgaf) elif ntgafobj is None and line.startswith("!gaf-version:"): ver = line[13:].strip() ntgafobj = cx.namedtuple("ntgafobj", " ".join(self.gaf_columns[ver])) exp_numcol = self.gaf_numcol[ver] self.log.write(" READ {N:,} associations: {FIN}\n".format(N=len(ga_lst), FIN=fin_gaf)) ga_lst = self.evobj.sort_nts(ga_lst, "Evidence_Code") return ga_lst