def determine_sys_cnf(opts):
    if 'sys_cnf' in opts.__dict__ and opts.sys_cnf:
        return verify_file(opts.sys_cnf, is_critical=True)
    else:
        opts.__dict__['sys_cnf'] = verify_file(detect_sys_cnf_by_location(),
                                               is_critical=True)

    debug('Using system configuration ' + opts.sys_cnf)
    return opts.sys_cnf
Exemplo n.º 2
0
 def _get_json(self, url_post, callee, params=None):
     r = requests.get("{0:s}{1:s}".format(self._api_url, url_post),
                      auth=self._auth, params=params, timeout=15.00)
     if r.status_code == 200:
         self._last_requests[callee] = time.time()
         return r.json()
     else:
         logger.debug("Response not OK. Status {0:d} - {1:s}".format(r.status_code, r.reason))
     return None
def determine_run_cnf(opts, is_wgs=False, is_targetseq=False):
    if opts.run_cnf:
        opts.run_cnf = adjust_path(opts.run_cnf)
    elif is_wgs:
        opts.run_cnf = defaults['run_cnf_wgs']
    elif is_targetseq:
        opts.run_cnf = defaults['run_cnf_deep_seq']
    else:
        opts.run_cnf = defaults['run_cnf_exome_seq']

    verify_file(opts.run_cnf, is_critical=True)
    debug('Using run configuration ' + opts.run_cnf)
    return opts.run_cnf
Exemplo n.º 4
0
def index_bam(cnf, bam_fpath, sambamba=None, samtools=None, use_grid=False):
    if use_grid:
        return index_bam_grid(cnf, bam_fpath, sambamba)

    sambamba = sambamba or get_system_path(
        cnf, join(get_ext_tools_dirname(), 'sambamba'), is_critical=True)
    indexed_bam = bam_fpath + '.bai'
    if not isfile(indexed_bam) or getmtime(indexed_bam) < getmtime(bam_fpath):
        info('Indexing BAM, writing ' + indexed_bam + '...')
        cmdline = '{sambamba} index {bam_fpath}'.format(**locals())
        res = call(cnf, cmdline, exit_on_error=False)
        if not isfile(
                indexed_bam) or getmtime(indexed_bam) < getmtime(bam_fpath):
            samtools = samtools or get_system_path(cnf, 'samtools')
            cmdline = '{samtools} index {bam_fpath}'.format(**locals())
            call(cnf, cmdline)
    else:
        debug('Actual "bai" index exists.')
Exemplo n.º 5
0
 def get_my_states(self, time_secs=0, icao24=None, serials=None):
     """ Retrieve state vectors for your own sensors. Authentication is required for this operation.
     If time = 0 the most recent ones are taken. Optional filters may be applied for ICAO24 addresses and sensor
     serial numbers.
     :param time_secs: time as Unix time stamp (seconds since epoch) or datetime. The datetime must be in UTC!
     :param icao24: optionally retrieve only state vectors for the given ICAO24 address(es). The parameter can either be a single address as str or an array of str containing multiple addresses
     :param serials: optionally retrieve only states of vehicles as seen by the given sensor(s). The parameter can either be a single sensor serial number (int) or a list of serial numbers.
     :return: OpenSkyStates if request was successful, None otherwise
     """
     if len(self._auth) < 2:
         raise Exception("No username and password provided for get_my_states!")
     if not self._check_rate_limit(0, 1, self.get_my_states):
         logger.debug("Blocking request due to rate limit")
         return None
     t = time_secs
     if type(time_secs) == datetime:
         t = calendar.timegm(t.timetuple())
     states_json = self._get_json("/states/own", self.get_my_states,
                                  params={"time": int(t), "icao24": icao24,
                                                          "serials": serials})
     if states_json is not None:
         return OpenSkyStates(states_json)
     return None
Exemplo n.º 6
0
def index_bam_grid(cnf, bam_fpath, sambamba=None):
    indexed_bam = bam_fpath + '.bai'
    if not isfile(indexed_bam) or getctime(indexed_bam) < getctime(bam_fpath):
        info('Indexing BAM, writing ' + indexed_bam + '...')
        sambamba = sambamba or get_system_path(
            cnf, join(get_ext_tools_dirname(), 'sambamba'), is_critical=True)
        if sambamba is None:
            sambamba = sambamba or get_system_path(
                cnf,
                join(get_ext_tools_dirname(), 'sambamba'),
                is_critical=True)
        cmdline = '{sambamba} index {bam_fpath}'.format(
            **locals())  # -F (=not) 1024 (=duplicate)
        j = submit_job(cnf,
                       cmdline,
                       basename(bam_fpath) + '_index',
                       output_fpath=indexed_bam,
                       stdout_to_outputfile=False)
        info()
        return j
    else:
        debug('Actual "bai" index exists.')
        return None
Exemplo n.º 7
0
    def get_states(self, time_secs=0, icao24=None, serials=None, bbox=()):
        """ Retrieve state vectors for a given time. If time = 0 the most recent ones are taken.
        Optional filters may be applied for ICAO24 addresses.
        :param time_secs: time as Unix time stamp (seconds since epoch) or datetime. The datetime must be in UTC!
        :param icao24: optionally retrieve only state vectors for the given ICAO24 address(es). The parameter can either be a single address as str or an array of str containing multiple addresses
        :param bbox: optionally retrieve state vectors within a bounding box. The bbox must be a tuple of exactly four values [min_latitude, max_latitude, min_longitude, max_latitude] each in WGS84 decimal degrees.
        :return: OpenSkyStates if request was successful, None otherwise
        """
        if not self._check_rate_limit(10, 5, self.get_states):
            logger.debug("Blocking request due to rate limit")
            return None

        t = time_secs
        if type(time_secs) == datetime:
            t = calendar.timegm(t.timetuple())

        params = {"time": int(t), "icao24": icao24}

        if len(bbox) == 4:
            OpenSkyApi._check_lat(bbox[0])
            OpenSkyApi._check_lat(bbox[1])
            OpenSkyApi._check_lon(bbox[2])
            OpenSkyApi._check_lon(bbox[3])

            params["lamin"] = bbox[0]
            params["lamax"] = bbox[1]
            params["lomin"] = bbox[2]
            params["lomax"] = bbox[3]
        elif len(bbox) > 0:
            raise ValueError("Invalid bounding box! Must be [min_latitude, max_latitude, min_longitude, max_latitude]")

        states_json = self._get_json("/states/all", self.get_states,
                                     params=params)
        if states_json is not None:
            return OpenSkyStates(states_json)
        return None
Exemplo n.º 8
0
def _check_paths(sys_cnf=None, run_cnf=None):
    to_exit = False

    debug('System configuration file: ' + str(sys_cnf))
    if run_cnf:
        debug('Run configuration file: ' + str(run_cnf))
    debug()

    sys_cnf = verify_file(sys_cnf, 'System config', is_critical=True)
    if run_cnf:
        run_cnf = verify_file(run_cnf, 'Run config', is_critical=True)

    errors = []
    for fn in [sys_cnf, run_cnf]:
        if fn and not fn.endswith('.yaml'):
            errors.append(
                fn + ' does not end with .yaml, maybe incorrect parameter?')
    if errors:
        critical(errors)

    return sys_cnf, run_cnf
def verify_vcf(vcf_fpath, silent=False, is_critical=False):
    if not verify_file(vcf_fpath, silent=silent, is_critical=is_critical):
        return None
    debug('File ' + vcf_fpath + ' exists and not empty')
    vcf = open_gzipsafe(vcf_fpath)
    debug('File ' + vcf_fpath + ' opened')
    l = next(vcf, None)
    if l is None:
        (critical if is_critical else err)('Error: cannot read the VCF file ' + vcf_fpath)
        return None
    if not l.startswith('##fileformat=VCF'):
        (critical if is_critical else err)('Error: VCF must start with ##fileformat=VCF ' + vcf_fpath)
        return None

    try:
        reader = vcf_parser.Reader(vcf)
    except:
        err('Error: cannot open the VCF file ' + vcf_fpath)
        if is_critical: raise
    else:
        debug('File ' + vcf_fpath + ' opened as VCF')
        try:
            rec = next(reader)
        except IndexError:
            err('Error: cannot parse records in the VCF file ' + vcf_fpath)
            debug('IndexError parsing VCF file ' + vcf_fpath)
            if is_critical: raise
        except ValueError:
            err('Error: cannot parse records in the VCF file ' + vcf_fpath)
            debug('ValueError parsing VCF file ' + vcf_fpath)
            if is_critical: raise
        except StopIteration:
            debug('No records in the VCF file ' + vcf_fpath)
            if not silent:
                warn('VCF file ' + vcf_fpath + ' has no records.')
            return vcf_fpath
        except:
            err('Error: cannot parse records in the VCF file ' + vcf_fpath)
            debug('Other error parsing VCF file ' + vcf_fpath)
            if is_critical: raise
        else:
            debug('A record was read from the VCF file ' + vcf_fpath)
            return vcf_fpath
        # f = open_gzipsafe(output_fpath)
        # l = f.readline()
        # if 'Cannot allocate memory' in l:
        #     f.close()
        #     f = open_gzipsafe(output_fpath)
        #     contents = f.read()
        #     if not silent:
        #         if is_critical:
        #             critical('SnpSift failed with memory issue:\n' + contents)
        #         else:
        #             err('SnpSift failed with memory issue:\n' + contents)
        #             return None
        #     f.close()
        #     return None
        # return output_fpath
    finally:
        vcf.close()
Exemplo n.º 10
0
def _annotate(cnf, samples):
    varannotate_cmdl = (get_script_cmdline(
        cnf, 'python', join('scripts', 'post', 'varannotate.py')) +
                        ' --sys-cnf ' + cnf.sys_cnf + ' --run-cnf ' +
                        cnf.run_cnf + ' --project-name ' + cnf.project_name +
                        (' --reuse ' if cnf.reuse_intermediate else '') +
                        ' --log-dir -' + ' --genome ' + cnf.genome.name +
                        (' --no-check ' if cnf.no_check else '') +
                        (' --qc' if cnf.qc else ' --no-qc') +
                        ((' --caller ' + cnf.caller) if cnf.caller else ''))

    total_reused = 0
    total_processed = 0
    total_success = 0
    total_failed = 0

    not_submitted_samples = samples
    while not_submitted_samples:
        jobs_to_wait = []
        submitted_samples = []
        reused_samples = []
        for sample in not_submitted_samples:
            if not sample.varannotate_dirpath:
                sample.varannotate_dirpath = join(sample.dirpath,
                                                  source.varannotate_name)
            if not sample.anno_vcf_fpath:
                sample.anno_vcf_fpath = join(
                    sample.varannotate_dirpath,
                    add_suffix(basename(sample.vcf), 'anno'))
            output_fpath = sample.anno_vcf_fpath
            if not output_fpath.endswith('.gz'):
                output_fpath += '.gz'
            debug('Checking ' + output_fpath)
            if cnf.reuse_intermediate and isfile(output_fpath) and verify_vcf(
                    output_fpath):
                info('Annotated results ' + output_fpath + ' exist, reusing.')
                reused_samples.append(sample)
                info()
                continue

            work_dir = join(cnf.work_dir,
                            source.varannotate_name + '_' + sample.name)
            j = submit_job(
                cnf,
                cmdline=varannotate_cmdl + ' --vcf ' + sample.vcf + ' -o ' +
                sample.varannotate_dirpath + ' -s ' + sample.name +
                ' --work-dir ' + work_dir + ' --output-file ' + output_fpath,
                job_name='VA_' + cnf.project_name + '_' + sample.name,
                output_fpath=output_fpath,
                stdout_to_outputfile=False,
                work_dir=work_dir)
            if not j.is_done:
                jobs_to_wait.append(j)
            submitted_samples.append(sample)
            if len(jobs_to_wait) >= cnf.threads:
                not_submitted_samples = [
                    s for s in not_submitted_samples
                    if s not in submitted_samples and s not in reused_samples
                ]

                if not_submitted_samples:
                    info('Submitted ' + str(len(jobs_to_wait)) +
                         ' jobs, waiting them to finish before '
                         'submitting more ' + str(len(not_submitted_samples)))
                else:
                    info('Submitted ' + str(len(jobs_to_wait)) + ' last jobs.')
                info()
                break
            info()

        info()
        info('-' * 70)
        if jobs_to_wait:
            info('Submitted ' + str(len(jobs_to_wait)) + ' jobs, waiting...')
            jobs_to_wait = wait_for_jobs(cnf, jobs_to_wait)
        else:
            info('No annotation jobs to submit.')
        info('')
        info('-' * 70)
        info('Finihsed annotating ' + str(len(jobs_to_wait)) + ' jobs')
        for j in jobs_to_wait:
            if j.is_done and not j.is_failed and not verify_vcf(
                    j.output_fpath):
                j.is_failed = True
            if j.is_done and not j.is_failed:
                if isdir(j.work_dir):
                    os.system('rm -rf ' + j.work_dir)
                else:
                    err('Job was done, but j.work_dir ' + j.work_dir +
                        ' does not exist')

        processed = sum(1 for j in jobs_to_wait if j.is_done)
        failed = sum(1 for j in jobs_to_wait if j.is_failed)
        success = sum(1 for j in jobs_to_wait if j.is_done and not j.is_failed)
        total_failed += failed
        total_reused += len(reused_samples)
        total_processed += processed
        total_success += success
        info('Reused: ' + str(len(reused_samples)))
        info('Processed: ' + str(processed))
        info('Success: ' + str(success))
        info('Failed: ' + str(failed))
        info()

        not_submitted_samples = [
            s for s in not_submitted_samples
            if s not in submitted_samples and s not in reused_samples
        ]

    info('-' * 70)
    info('Done with all ' + str(len(samples)) + ' samples.')
    info('Total reused: ' + str(total_reused))
    info('Total processed: ' + str(total_processed))
    info('Total success: ' + str(total_success))
    info('Total failed: ' + str(total_failed))
    info()