def open_json(self, x=None): idxs = self.jlist.curselection() if not idxs: lo.warning('cannot open JsonExplorer : idxs='+str(idxs)) return jpath = self.jpaths[int(idxs[0])] self.do_open_json(jpath)
def update(args): if args.fastq: lo.warning('re-reading of hits not currently implemented') data = json.load(file(args.json)) testsuite_paths = discover_testsuites(args.testsuite_directory or []) testsuites = {} update_testsuites(testsuites, data['info']['testsuites'], testsuite_paths) analyser = analyse.Analyser() analyser.decode(testsuites, data) analyser.update_testsuites() # save results back to .json data = analyser.encode(hits = analyser.hits is not None) j = codecs.open(args.json, 'w', 'utf-8') lo.info('re-wrote results to file ' + args.json) json.dump(data, j, indent=2)
def scan_illustrate(self, MTBC_fastq, scan_params=[]): ntf = tempfile.NamedTemporaryFile(delete=False) lo.setLevel(logging.WARNING) try: t0 = time.time() out, err = self.main(['scan', '-l', 'MTBC', '-f'] + scan_params + [ MTBC_fastq, ntf.name]) if time.time() - t0 > 10: lo.warning('scanning of %s took %.2fs' % ( os.path.basename(MTBC_fastq), time.time() - t0)) out, err = self.main(['illustrate', '-r', ntf.name]) finally: lo.setLevel(logging.INFO) ntf.close() os.remove(ntf.name) return out, err
def test_load_testsuites(self): def get_testsuites(out): line = [line for line in out.split('\n') if line.startswith('testsuites=')][0] return set([testsuite for testsuite in line[line.index('=')+1:].split(',') if testsuite]) def name_and_version(testsuites): return set([testsuite[:testsuite.find('[')] for testsuite in testsuites]) def name_only(testsuites): return set([testsuite[:testsuite.find('-')] for testsuite in testsuites]) cwd = os.getcwd() out, err = self.main(['info']) assert get_testsuites(out) == set() # suppress "loaded testsuite" messages lo.setLevel(logging.WARNING) MTBC_testsuites = set(['MTBC/phylo', 'MTBC/resistance', 'MTBC/spoligo']) # select testsuite out, err = self.main(['info', '-l', 'MTBC/phylo']) assert name_only(get_testsuites(out)) == set(['MTBC/phylo']) # select group out, err = self.main(['info', '-l', 'MTBC']) assert name_only(get_testsuites(out)) == MTBC_testsuites # select testsuite by filename path = os.path.join(testsuites_alt, 'MTBC', 'phylo.py') out, err = self.main(['info', '-l', path]) assert name_and_version(get_testsuites(out)) == set(['MTBC/phylo-0.0']) # override testsuite directory using switch out, err = self.main(['info', '-l', 'MTBC/phylo']) assert name_and_version(get_testsuites(out)) != set(['MTBC/phylo-0.0']) out, err = self.main(['-t', testsuites_alt, 'info', '-l', 'MTBC/phylo']) assert name_and_version(get_testsuites(out)) == set(['MTBC/phylo-0.0']) # override testsuite directory using KVARQ_TESTSUITES os.environ['KVARQ_TESTSUITES'] = testsuites_alt out, err = self.main(['info', '-l', 'MTBC/phylo']) assert name_and_version(get_testsuites(out)) == set(['MTBC/phylo-0.0']) del os.environ['KVARQ_TESTSUITES'] # time load all t0 = time.time() out, err = self.main(['info', '-L']) assert len(name_only(get_testsuites(out))) > 4 if time.time() - t0 > 2: lo.warning('loading all testsuites takes %.2f' % (time.time() - t0)) lo.setLevel(logging.INFO) os.chdir(cwd)
def __init__(self, fname, variant=None, fd=None, paired=False, quiet=False): ''' open ``.fastq`` or ``.fastq.gz`` file and determine its variant (setting attribute ``.Azero`` accordingly) :param fname: name of file to open :param variant: specify one of ``.vendor_variants`` -- if none is specified, then the PHRED score of the fastq file is analyzed and :param fd: specify a file descriptor to use instead of opening ``fname`` :param paired: include second file in a paired set if it is available (i.e. specify "file_1.fastq" as input file and "file_2.fastq" will be included in functions ``.filesize()`` and ``.filenames()``) ''' self.fname = fname if fd: self.fd = fd else: self.fd = None if self.fname.endswith('.fastq.gz'): self.gz = True if not self.fd: self.fd = gzip.GzipFile(self.fname, 'rb') elif self.fname.endswith('.fastq'): self.gz = False if not self.fd: self.fd = open(self.fname, 'rb') else: raise FastqFileFormatException( 'fastq file must have extension ".fastq" or ".fastq.gz"') # save second name of base if exists self.fname2 = None if paired: base = fname[:fname.rindex('.fastq')] if base[-2:] == '_1': fname2 = base[:-2] + '_2' + fname[fname.rindex('.fastq'):] if os.path.exists(fname2): lo.info('including paired file "%s"' % fname2) self.fname2 = fname2 if sum(self.filesizes()) == 0: raise FastqFileFormatException('cannot scan empty file') # scan some records min_pos, max_pos = self.min_max_score_check_file() lo.debug('min_pos=%d max_pos=%d' % (min_pos, max_pos)) if variant and variant not in self.vendor_variants: raise FastqFileFormatException( 'unknown vendor variant "%s"' % variant) # create list of variants compatible with PHRED scores variants = [] dQs = [] for name, vendor_variant in Fastq.vendor_variants.items(): if ((min_pos - vendor_variant.dQ) in vendor_variant.Qrange and (max_pos - vendor_variant.dQ) in vendor_variant.Qrange): dQs.append(vendor_variant.dQ) variants.append(name) if variant is None: # set variant from guesses if not variants: raise FastqFileFormatException( 'could not find any suitable fastq vendor variant') if len(set(dQs)) > 1: raise FastqFileFormatException( 'cannot determine dQ with guessed vendor variants "%s"' % str(variants)) self.variants = variants self.dQ = dQs[0] else: # check specified variant if variant not in variants: lo.warning('specified vendor variant "%s" seems not to be ' 'compatible with file' % variant) self.variants = [variant] self.dQ = self.vendor_variants[variant].dQ self.Azero = self.ASCII[self.dQ] # estimate readlength/records_approx self.fd.seek(0) lines = [self.fd.readline() for i in range(4)] self.readlength = len(lines[1].strip('\r\n')) if self.gz: self.records_approx = None else: self.records_approx = os.path.getsize(self.fname) / len(''.join(lines)) if self.fname2 is not None: self.records_approx *= 2 # output some infos if not quiet: if self.gz: lo.info('gzipped fastq : readlength=? records_approx=? dQ=%d variants=%s' % ( self.dQ, str(self.variants))) else: lo.info('fastq : readlength=%d records_approx=%d dQ=%d variants=%s' % ( self.readlength, self.records_approx, self.dQ, str(self.variants)))