示例#1
0
文件: explorer.py 项目: kvarq/kvarq
 def open_json(self, x=None):
     idxs = self.jlist.curselection()
     if not idxs:
         lo.warning('cannot open JsonExplorer : idxs='+str(idxs))
         return
     jpath = self.jpaths[int(idxs[0])]
     self.do_open_json(jpath)
示例#2
0
文件: cli.py 项目: kvarq/kvarq
def update(args):

    if args.fastq:
        lo.warning('re-reading of hits not currently implemented')

    data = json.load(file(args.json))

    testsuite_paths = discover_testsuites(args.testsuite_directory or [])
    testsuites = {}
    update_testsuites(testsuites, data['info']['testsuites'], testsuite_paths)

    analyser = analyse.Analyser()
    analyser.decode(testsuites, data)
    analyser.update_testsuites()

    # save results back to .json
    data = analyser.encode(hits = analyser.hits is not None)
    j = codecs.open(args.json, 'w', 'utf-8')
    lo.info('re-wrote results to file ' + args.json)
    json.dump(data, j, indent=2)
示例#3
0
文件: test_cli.py 项目: kvarq/kvarq
    def scan_illustrate(self, MTBC_fastq, scan_params=[]):

        ntf = tempfile.NamedTemporaryFile(delete=False)
        lo.setLevel(logging.WARNING)

        try:

            t0 = time.time()
            out, err = self.main(['scan', '-l', 'MTBC', '-f'] + scan_params + [
                MTBC_fastq, ntf.name])

            if time.time() - t0 > 10:
                lo.warning('scanning of %s took %.2fs' % (
                    os.path.basename(MTBC_fastq), time.time() - t0))

            out, err = self.main(['illustrate', '-r',
                ntf.name])

        finally:
            lo.setLevel(logging.INFO)
            ntf.close()
            os.remove(ntf.name)

        return out, err
示例#4
0
文件: test_cli.py 项目: kvarq/kvarq
    def test_load_testsuites(self):


        def get_testsuites(out):
            line = [line for line in out.split('\n')
                    if line.startswith('testsuites=')][0]
            return set([testsuite
                for testsuite in line[line.index('=')+1:].split(',')
                if testsuite])

        def name_and_version(testsuites):
            return set([testsuite[:testsuite.find('[')]
                for testsuite in testsuites])

        def name_only(testsuites):
            return set([testsuite[:testsuite.find('-')]
                for testsuite in testsuites])

        cwd = os.getcwd()

        out, err = self.main(['info'])
        assert get_testsuites(out) == set()

        # suppress "loaded testsuite" messages
        lo.setLevel(logging.WARNING)

        MTBC_testsuites = set(['MTBC/phylo', 'MTBC/resistance', 'MTBC/spoligo'])

        # select testsuite
        out, err = self.main(['info', '-l', 'MTBC/phylo'])
        assert name_only(get_testsuites(out)) == set(['MTBC/phylo'])

        # select group
        out, err = self.main(['info', '-l', 'MTBC'])
        assert name_only(get_testsuites(out)) == MTBC_testsuites

        # select testsuite by filename
        path = os.path.join(testsuites_alt, 'MTBC', 'phylo.py')
        out, err = self.main(['info', '-l', path])
        assert name_and_version(get_testsuites(out)) == set(['MTBC/phylo-0.0'])

        # override testsuite directory using switch
        out, err = self.main(['info', '-l', 'MTBC/phylo'])
        assert name_and_version(get_testsuites(out)) != set(['MTBC/phylo-0.0'])
        out, err = self.main(['-t', testsuites_alt, 'info', '-l', 'MTBC/phylo'])
        assert name_and_version(get_testsuites(out)) == set(['MTBC/phylo-0.0'])

        # override testsuite directory using KVARQ_TESTSUITES
        os.environ['KVARQ_TESTSUITES'] = testsuites_alt
        out, err = self.main(['info', '-l', 'MTBC/phylo'])
        assert name_and_version(get_testsuites(out)) == set(['MTBC/phylo-0.0'])
        del os.environ['KVARQ_TESTSUITES']

        # time load all
        t0 = time.time()
        out, err = self.main(['info', '-L'])
        assert len(name_only(get_testsuites(out))) > 4
        if time.time() - t0 > 2:
            lo.warning('loading all testsuites takes %.2f' % (time.time() - t0))

        lo.setLevel(logging.INFO)
        os.chdir(cwd)
示例#5
0
文件: fastq.py 项目: kvarq/kvarq
    def __init__(self, fname, variant=None, fd=None, paired=False, quiet=False):
        '''
        open ``.fastq`` or ``.fastq.gz`` file and determine its
        variant (setting attribute ``.Azero`` accordingly)

        :param fname: name of file to open
        :param variant: specify one of ``.vendor_variants`` -- if none
            is specified, then the PHRED score of the fastq file is
            analyzed and
        :param fd: specify a file descriptor to use instead of
            opening ``fname``
        :param paired: include second file in a paired set if it is
            available (i.e. specify "file_1.fastq" as input file and
            "file_2.fastq" will be included in functions ``.filesize()``
            and ``.filenames()``)
        '''
        self.fname = fname

        if fd:
            self.fd = fd
        else:
            self.fd = None

        if self.fname.endswith('.fastq.gz'):
            self.gz = True
            if not self.fd:
                self.fd = gzip.GzipFile(self.fname, 'rb')
        elif self.fname.endswith('.fastq'):
            self.gz = False
            if not self.fd:
                self.fd = open(self.fname, 'rb')
        else:
            raise FastqFileFormatException(
                        'fastq file must have extension ".fastq" or ".fastq.gz"')

        # save second name of base if exists
        self.fname2 = None
        if paired:
            base = fname[:fname.rindex('.fastq')]
            if base[-2:] == '_1':
                fname2 = base[:-2] + '_2' + fname[fname.rindex('.fastq'):]
                if os.path.exists(fname2):
                    lo.info('including paired file "%s"' % fname2)
                    self.fname2 = fname2

        if sum(self.filesizes()) == 0:
            raise FastqFileFormatException('cannot scan empty file')

        # scan some records
        min_pos, max_pos = self.min_max_score_check_file()
        lo.debug('min_pos=%d max_pos=%d' % (min_pos, max_pos))

        if variant and variant not in self.vendor_variants:
            raise FastqFileFormatException(
                    'unknown vendor variant "%s"' % variant)

        # create list of variants compatible with PHRED scores
        variants = []
        dQs = []
        for name, vendor_variant in Fastq.vendor_variants.items():

            if ((min_pos - vendor_variant.dQ) in vendor_variant.Qrange
                    and (max_pos - vendor_variant.dQ) in vendor_variant.Qrange):
                dQs.append(vendor_variant.dQ)
                variants.append(name)

        if variant is None:
            # set variant from guesses
            if not variants:
                raise FastqFileFormatException(
                        'could not find any suitable fastq vendor variant')
            if len(set(dQs)) > 1:
                raise FastqFileFormatException(
                        'cannot determine dQ with guessed vendor variants "%s"'
                        % str(variants))
            self.variants = variants
            self.dQ = dQs[0]
        else:
            # check specified variant
            if variant not in variants:
                lo.warning('specified vendor variant "%s" seems not to be '
                        'compatible with file' % variant)
            self.variants = [variant]
            self.dQ = self.vendor_variants[variant].dQ


        self.Azero = self.ASCII[self.dQ]

        # estimate readlength/records_approx
        self.fd.seek(0)
        lines = [self.fd.readline() for i in range(4)]
        self.readlength = len(lines[1].strip('\r\n'))
        if self.gz:
            self.records_approx = None
        else:
            self.records_approx = os.path.getsize(self.fname) / len(''.join(lines))
            if self.fname2 is not None:
                self.records_approx *= 2

        # output some infos
        if not quiet:
            if self.gz:
                lo.info('gzipped fastq : readlength=? records_approx=? dQ=%d variants=%s' % (
                        self.dQ, str(self.variants)))
            else:
                lo.info('fastq : readlength=%d records_approx=%d dQ=%d variants=%s' % (
                        self.readlength, self.records_approx, self.dQ, str(self.variants)))