Пример #1
0
 def init_associations(self, hdr_only=False):
     """Read GPAD file. HTTP address okay. GZIPPED/BZIPPED file okay."""
     associations = []
     if self.filename is None:
         return associations
     ver = None
     ntgpadobj = None
     hdrobj = GpadHdr()
     ifstrm = nopen(self.filename)
     for line in ifstrm:
         # Read header
         if ntgpadobj is None:
             if line[0] == '!':
                 if ver is None and line[1:13] == 'gpa-version:':
                     ver = line[13:].strip()
                 hdrobj.chkaddhdr(line)
             else:
                 self.hdr = hdrobj.get_hdr()
                 if hdr_only:
                     return associations
                 ntgpadobj = cx.namedtuple("ntgpadobj", " ".join(self.gpad_columns[ver]))
         # Read data
         if ntgpadobj is not None:
             flds = self._split_line(line)
             ntgpad = self._get_ntgpad(ntgpadobj, flds)
             associations.append(ntgpad)
     # GPAD file has been read
     readmsg = "  READ {N:7,} associations: {FIN}\n"
     sys.stdout.write(readmsg.format(N=len(associations), FIN=self.filename))
     return associations
Пример #2
0
 def read_gaf(self, fin_gaf, hdr_only, prt):
     """Read GAF file. HTTP address okay. GZIPPED/BZIPPED file okay."""
     ga_lst = []
     ver = None
     ntgafobj = None
     exp_numcol = None
     hdrobj = GafHdr()
     ifstrm = nopen(fin_gaf)
     for line in ifstrm:
         # Read header
         if ntgafobj is None:
             if line[0] == '!':
                 if line[1:13] == 'gaf-version:':
                     ver = line[13:].strip()
                 hdrobj.chkaddhdr(line)
             else:
                 self.hdr = hdrobj.get_hdr()
                 if hdr_only:
                     return ga_lst
                 ntgafobj = cx.namedtuple("ntgafobj",
                                          " ".join(self.gaf_columns[ver]))
                 exp_numcol = self.gaf_numcol[ver]
         # Read data
         if ntgafobj is not None:
             flds = self._split_line(line, exp_numcol)
             ntgaf = self._get_ntgaf(ntgafobj, flds, ver)
             ga_lst.append(ntgaf)
     # GAF file has been read
     if prt is not None:
         readmsg = "  READ {N:,} associations: {FIN}\n"
         prt.write(readmsg.format(N=len(ga_lst), FIN=fin_gaf))
     return self.evobj.sort_nts(ga_lst, 'Evidence_Code')
Пример #3
0
 def init_associations(self, hdr_only=False, namespaces=None, prt=sys.stdout):
     """Read GPAD file. HTTP address okay. GZIPPED/BZIPPED file okay."""
     import timeit
     import datetime
     associations = []
     tic = timeit.default_timer()
     if self.filename is None:
         return associations
     ver = None
     ntgpadobj_make = None
     hdrobj = GpadHdr()
     ifstrm = nopen(self.filename)
     _add_ns = self.godag is not None
     _get_ntgpadvals = self._get_ntgpadvals
     get_all_nss = self._get_b_all_nss(namespaces)
     for lnum, line in enumerate(ifstrm, 1):
         # Read data
         if ntgpadobj_make:
             flds = self._split_line(line)
             try:
                 # pylint: disable=not-callable
                 goid = flds[3]
                 nspc = self._get_namespace(goid) if _add_ns else None
                 if get_all_nss or nspc in namespaces:
                     ntgpad = ntgpadobj_make(_get_ntgpadvals(flds, goid, nspc, _add_ns))
                     associations.append(ntgpad)
             # pylint: disable=broad-except
             except Exception as inst:
                 import traceback
                 traceback.print_exc()
                 sys.stdout.write("\n  **FATAL: {MSG}\n\n".format(MSG=str(inst)))
                 sys.stdout.write("**FATAL: {FIN}[{LNUM}]:\n{L}\n".format(
                     FIN=self.filename, L=line, LNUM=lnum))
                 for idx, (key, val) in enumerate(zip(self.gpadhdr, flds)):
                     sys.stdout.write('{I:2} {KEY:13} {VAL}\n'.format(I=idx, KEY=key, VAL=val))
                 ## if datobj is not None:
                 ##     datobj.prt_line_detail(sys.stdout, line)
                 sys.exit(1)
         # Read header
         else:
             if line[0] == '!':
                 if ver is None and line[1:13] == 'gpa-version:':
                     ver = line[13:].strip()
                 hdrobj.chkaddhdr(line)
             else:
                 self.hdr = hdrobj.get_hdr()
                 if hdr_only:
                     return associations
                 ntgpadobj_make = self._get_ntgpadnt(ver, _add_ns)._make
     # GPAD file has been read
     prt.write('HMS:{HMS} {N:7,} annotations READ: {ANNO} {NSs}\n'.format(
         N=len(associations), ANNO=self.filename,
         NSs=','.join(namespaces) if namespaces else '',
         HMS=str(datetime.timedelta(seconds=(timeit.default_timer()-tic)))))
     return associations
Пример #4
0
 def init_associations(self, hdr_only=False, namespaces=None, prt=sys.stdout):
     """Read GPAD file. HTTP address okay. GZIPPED/BZIPPED file okay."""
     import timeit
     import datetime
     associations = []
     tic = timeit.default_timer()
     if self.filename is None:
         return associations
     ver = None
     ntgpadobj_make = None
     hdrobj = GpadHdr()
     ifstrm = nopen(self.filename)
     _add_ns = self.godag is not None
     _get_ntgpadvals = self._get_ntgpadvals
     get_all_nss = self._get_b_all_nss(namespaces)
     for lnum, line in enumerate(ifstrm, 1):
         # Read data
         if ntgpadobj_make:
             flds = self._split_line(line)
             try:
                 # pylint: disable=not-callable
                 goid = flds[3]
                 nspc = self._get_namespace(goid) if _add_ns else None
                 if get_all_nss or nspc in namespaces:
                     ntgpad = ntgpadobj_make(_get_ntgpadvals(flds, goid, nspc, _add_ns))
                     associations.append(ntgpad)
             # pylint: disable=broad-except
             except Exception as inst:
                 import traceback
                 traceback.print_exc()
                 sys.stdout.write("\n  **FATAL: {MSG}\n\n".format(MSG=str(inst)))
                 sys.stdout.write("**FATAL: {FIN}[{LNUM}]:\n{L}\n".format(
                     FIN=self.filename, L=line, LNUM=lnum))
                 for idx, val in enumerate(flds):
                     sys.stdout.write('{I:2} {VAL}\n'.format(I=idx, VAL=val))
                 ## if datobj is not None:
                 ##     datobj.prt_line_detail(sys.stdout, line)
                 sys.exit(1)
         # Read header
         else:
             if line[0] == '!':
                 if ver is None and line[1:13] == 'gpa-version:':
                     ver = line[13:].strip()
                 hdrobj.chkaddhdr(line)
             else:
                 self.hdr = hdrobj.get_hdr()
                 if hdr_only:
                     return associations
                 ntgpadobj_make = self._get_ntgpadnt(ver, _add_ns)._make
     # GPAD file has been read
     prt.write('HMS:{HMS} {N:7,} annotations READ: {ANNO} {NSs}\n'.format(
         N=len(associations), ANNO=self.filename,
         NSs=','.join(namespaces) if namespaces else '',
         HMS=str(datetime.timedelta(seconds=(timeit.default_timer()-tic)))))
     return associations
Пример #5
0
 def read_gaf(self, fin_gaf):
     """Read GAF file. HTTP address okay. GZIPPED/BZIPPED file okay."""
     ga_lst = []
     ifstrm = nopen(fin_gaf)
     ver = None
     ntgafobj = None
     exp_numcol = None
     for line in ifstrm:
         if ntgafobj is not None and not line.startswith('!'):
             flds = self._split_line(line, exp_numcol)
             ntgaf = self._get_ntgaf(ntgafobj, flds, ver)
             ga_lst.append(ntgaf)
         elif ntgafobj is None and line.startswith('!gaf-version:'):
             ver = line[13:].strip()
             ntgafobj = namedtuple("ntgafobj", " ".join(self.gaf_columns[ver]))
             exp_numcol = self.gaf_numcol[ver]
     self.log.write("  READ {N:,} items: {FIN}\n".format(N=len(ga_lst), FIN=fin_gaf))
     return ga_lst
Пример #6
0
 def read_gaf(self, fin_gaf):
     """Read GAF file. HTTP address okay. GZIPPED/BZIPPED file okay."""
     ga_lst = []
     ifstrm = nopen(fin_gaf)
     ver = None
     ntgafobj = None
     exp_numcol = None
     for line in ifstrm:
         if ntgafobj is not None and not line.startswith("!"):
             flds = self._split_line(line, exp_numcol)
             ntgaf = self._get_ntgaf(ntgafobj, flds, ver)
             ga_lst.append(ntgaf)
         elif ntgafobj is None and line.startswith("!gaf-version:"):
             ver = line[13:].strip()
             ntgafobj = cx.namedtuple("ntgafobj", " ".join(self.gaf_columns[ver]))
             exp_numcol = self.gaf_numcol[ver]
     self.log.write("  READ {N:,} associations: {FIN}\n".format(N=len(ga_lst), FIN=fin_gaf))
     ga_lst = self.evobj.sort_nts(ga_lst, "Evidence_Code")
     return ga_lst