def determine_sys_cnf(opts): if 'sys_cnf' in opts.__dict__ and opts.sys_cnf: return verify_file(opts.sys_cnf, is_critical=True) else: opts.__dict__['sys_cnf'] = verify_file(detect_sys_cnf_by_location(), is_critical=True) debug('Using system configuration ' + opts.sys_cnf) return opts.sys_cnf
def _get_json(self, url_post, callee, params=None): r = requests.get("{0:s}{1:s}".format(self._api_url, url_post), auth=self._auth, params=params, timeout=15.00) if r.status_code == 200: self._last_requests[callee] = time.time() return r.json() else: logger.debug("Response not OK. Status {0:d} - {1:s}".format(r.status_code, r.reason)) return None
def determine_run_cnf(opts, is_wgs=False, is_targetseq=False): if opts.run_cnf: opts.run_cnf = adjust_path(opts.run_cnf) elif is_wgs: opts.run_cnf = defaults['run_cnf_wgs'] elif is_targetseq: opts.run_cnf = defaults['run_cnf_deep_seq'] else: opts.run_cnf = defaults['run_cnf_exome_seq'] verify_file(opts.run_cnf, is_critical=True) debug('Using run configuration ' + opts.run_cnf) return opts.run_cnf
def index_bam(cnf, bam_fpath, sambamba=None, samtools=None, use_grid=False): if use_grid: return index_bam_grid(cnf, bam_fpath, sambamba) sambamba = sambamba or get_system_path( cnf, join(get_ext_tools_dirname(), 'sambamba'), is_critical=True) indexed_bam = bam_fpath + '.bai' if not isfile(indexed_bam) or getmtime(indexed_bam) < getmtime(bam_fpath): info('Indexing BAM, writing ' + indexed_bam + '...') cmdline = '{sambamba} index {bam_fpath}'.format(**locals()) res = call(cnf, cmdline, exit_on_error=False) if not isfile( indexed_bam) or getmtime(indexed_bam) < getmtime(bam_fpath): samtools = samtools or get_system_path(cnf, 'samtools') cmdline = '{samtools} index {bam_fpath}'.format(**locals()) call(cnf, cmdline) else: debug('Actual "bai" index exists.')
def get_my_states(self, time_secs=0, icao24=None, serials=None): """ Retrieve state vectors for your own sensors. Authentication is required for this operation. If time = 0 the most recent ones are taken. Optional filters may be applied for ICAO24 addresses and sensor serial numbers. :param time_secs: time as Unix time stamp (seconds since epoch) or datetime. The datetime must be in UTC! :param icao24: optionally retrieve only state vectors for the given ICAO24 address(es). The parameter can either be a single address as str or an array of str containing multiple addresses :param serials: optionally retrieve only states of vehicles as seen by the given sensor(s). The parameter can either be a single sensor serial number (int) or a list of serial numbers. :return: OpenSkyStates if request was successful, None otherwise """ if len(self._auth) < 2: raise Exception("No username and password provided for get_my_states!") if not self._check_rate_limit(0, 1, self.get_my_states): logger.debug("Blocking request due to rate limit") return None t = time_secs if type(time_secs) == datetime: t = calendar.timegm(t.timetuple()) states_json = self._get_json("/states/own", self.get_my_states, params={"time": int(t), "icao24": icao24, "serials": serials}) if states_json is not None: return OpenSkyStates(states_json) return None
def index_bam_grid(cnf, bam_fpath, sambamba=None): indexed_bam = bam_fpath + '.bai' if not isfile(indexed_bam) or getctime(indexed_bam) < getctime(bam_fpath): info('Indexing BAM, writing ' + indexed_bam + '...') sambamba = sambamba or get_system_path( cnf, join(get_ext_tools_dirname(), 'sambamba'), is_critical=True) if sambamba is None: sambamba = sambamba or get_system_path( cnf, join(get_ext_tools_dirname(), 'sambamba'), is_critical=True) cmdline = '{sambamba} index {bam_fpath}'.format( **locals()) # -F (=not) 1024 (=duplicate) j = submit_job(cnf, cmdline, basename(bam_fpath) + '_index', output_fpath=indexed_bam, stdout_to_outputfile=False) info() return j else: debug('Actual "bai" index exists.') return None
def get_states(self, time_secs=0, icao24=None, serials=None, bbox=()): """ Retrieve state vectors for a given time. If time = 0 the most recent ones are taken. Optional filters may be applied for ICAO24 addresses. :param time_secs: time as Unix time stamp (seconds since epoch) or datetime. The datetime must be in UTC! :param icao24: optionally retrieve only state vectors for the given ICAO24 address(es). The parameter can either be a single address as str or an array of str containing multiple addresses :param bbox: optionally retrieve state vectors within a bounding box. The bbox must be a tuple of exactly four values [min_latitude, max_latitude, min_longitude, max_latitude] each in WGS84 decimal degrees. :return: OpenSkyStates if request was successful, None otherwise """ if not self._check_rate_limit(10, 5, self.get_states): logger.debug("Blocking request due to rate limit") return None t = time_secs if type(time_secs) == datetime: t = calendar.timegm(t.timetuple()) params = {"time": int(t), "icao24": icao24} if len(bbox) == 4: OpenSkyApi._check_lat(bbox[0]) OpenSkyApi._check_lat(bbox[1]) OpenSkyApi._check_lon(bbox[2]) OpenSkyApi._check_lon(bbox[3]) params["lamin"] = bbox[0] params["lamax"] = bbox[1] params["lomin"] = bbox[2] params["lomax"] = bbox[3] elif len(bbox) > 0: raise ValueError("Invalid bounding box! Must be [min_latitude, max_latitude, min_longitude, max_latitude]") states_json = self._get_json("/states/all", self.get_states, params=params) if states_json is not None: return OpenSkyStates(states_json) return None
def _check_paths(sys_cnf=None, run_cnf=None): to_exit = False debug('System configuration file: ' + str(sys_cnf)) if run_cnf: debug('Run configuration file: ' + str(run_cnf)) debug() sys_cnf = verify_file(sys_cnf, 'System config', is_critical=True) if run_cnf: run_cnf = verify_file(run_cnf, 'Run config', is_critical=True) errors = [] for fn in [sys_cnf, run_cnf]: if fn and not fn.endswith('.yaml'): errors.append( fn + ' does not end with .yaml, maybe incorrect parameter?') if errors: critical(errors) return sys_cnf, run_cnf
def verify_vcf(vcf_fpath, silent=False, is_critical=False): if not verify_file(vcf_fpath, silent=silent, is_critical=is_critical): return None debug('File ' + vcf_fpath + ' exists and not empty') vcf = open_gzipsafe(vcf_fpath) debug('File ' + vcf_fpath + ' opened') l = next(vcf, None) if l is None: (critical if is_critical else err)('Error: cannot read the VCF file ' + vcf_fpath) return None if not l.startswith('##fileformat=VCF'): (critical if is_critical else err)('Error: VCF must start with ##fileformat=VCF ' + vcf_fpath) return None try: reader = vcf_parser.Reader(vcf) except: err('Error: cannot open the VCF file ' + vcf_fpath) if is_critical: raise else: debug('File ' + vcf_fpath + ' opened as VCF') try: rec = next(reader) except IndexError: err('Error: cannot parse records in the VCF file ' + vcf_fpath) debug('IndexError parsing VCF file ' + vcf_fpath) if is_critical: raise except ValueError: err('Error: cannot parse records in the VCF file ' + vcf_fpath) debug('ValueError parsing VCF file ' + vcf_fpath) if is_critical: raise except StopIteration: debug('No records in the VCF file ' + vcf_fpath) if not silent: warn('VCF file ' + vcf_fpath + ' has no records.') return vcf_fpath except: err('Error: cannot parse records in the VCF file ' + vcf_fpath) debug('Other error parsing VCF file ' + vcf_fpath) if is_critical: raise else: debug('A record was read from the VCF file ' + vcf_fpath) return vcf_fpath # f = open_gzipsafe(output_fpath) # l = f.readline() # if 'Cannot allocate memory' in l: # f.close() # f = open_gzipsafe(output_fpath) # contents = f.read() # if not silent: # if is_critical: # critical('SnpSift failed with memory issue:\n' + contents) # else: # err('SnpSift failed with memory issue:\n' + contents) # return None # f.close() # return None # return output_fpath finally: vcf.close()
def _annotate(cnf, samples): varannotate_cmdl = (get_script_cmdline( cnf, 'python', join('scripts', 'post', 'varannotate.py')) + ' --sys-cnf ' + cnf.sys_cnf + ' --run-cnf ' + cnf.run_cnf + ' --project-name ' + cnf.project_name + (' --reuse ' if cnf.reuse_intermediate else '') + ' --log-dir -' + ' --genome ' + cnf.genome.name + (' --no-check ' if cnf.no_check else '') + (' --qc' if cnf.qc else ' --no-qc') + ((' --caller ' + cnf.caller) if cnf.caller else '')) total_reused = 0 total_processed = 0 total_success = 0 total_failed = 0 not_submitted_samples = samples while not_submitted_samples: jobs_to_wait = [] submitted_samples = [] reused_samples = [] for sample in not_submitted_samples: if not sample.varannotate_dirpath: sample.varannotate_dirpath = join(sample.dirpath, source.varannotate_name) if not sample.anno_vcf_fpath: sample.anno_vcf_fpath = join( sample.varannotate_dirpath, add_suffix(basename(sample.vcf), 'anno')) output_fpath = sample.anno_vcf_fpath if not output_fpath.endswith('.gz'): output_fpath += '.gz' debug('Checking ' + output_fpath) if cnf.reuse_intermediate and isfile(output_fpath) and verify_vcf( output_fpath): info('Annotated results ' + output_fpath + ' exist, reusing.') reused_samples.append(sample) info() continue work_dir = join(cnf.work_dir, source.varannotate_name + '_' + sample.name) j = submit_job( cnf, cmdline=varannotate_cmdl + ' --vcf ' + sample.vcf + ' -o ' + sample.varannotate_dirpath + ' -s ' + sample.name + ' --work-dir ' + work_dir + ' --output-file ' + output_fpath, job_name='VA_' + cnf.project_name + '_' + sample.name, output_fpath=output_fpath, stdout_to_outputfile=False, work_dir=work_dir) if not j.is_done: jobs_to_wait.append(j) submitted_samples.append(sample) if len(jobs_to_wait) >= cnf.threads: not_submitted_samples = [ s for s in not_submitted_samples if s not in submitted_samples and s not in reused_samples ] if not_submitted_samples: info('Submitted ' + str(len(jobs_to_wait)) + ' jobs, waiting them to finish before ' 'submitting more ' + str(len(not_submitted_samples))) else: info('Submitted ' + str(len(jobs_to_wait)) + ' last jobs.') info() break info() info() info('-' * 70) if jobs_to_wait: info('Submitted ' + str(len(jobs_to_wait)) + ' jobs, waiting...') jobs_to_wait = wait_for_jobs(cnf, jobs_to_wait) else: info('No annotation jobs to submit.') info('') info('-' * 70) info('Finihsed annotating ' + str(len(jobs_to_wait)) + ' jobs') for j in jobs_to_wait: if j.is_done and not j.is_failed and not verify_vcf( j.output_fpath): j.is_failed = True if j.is_done and not j.is_failed: if isdir(j.work_dir): os.system('rm -rf ' + j.work_dir) else: err('Job was done, but j.work_dir ' + j.work_dir + ' does not exist') processed = sum(1 for j in jobs_to_wait if j.is_done) failed = sum(1 for j in jobs_to_wait if j.is_failed) success = sum(1 for j in jobs_to_wait if j.is_done and not j.is_failed) total_failed += failed total_reused += len(reused_samples) total_processed += processed total_success += success info('Reused: ' + str(len(reused_samples))) info('Processed: ' + str(processed)) info('Success: ' + str(success)) info('Failed: ' + str(failed)) info() not_submitted_samples = [ s for s in not_submitted_samples if s not in submitted_samples and s not in reused_samples ] info('-' * 70) info('Done with all ' + str(len(samples)) + ' samples.') info('Total reused: ' + str(total_reused)) info('Total processed: ' + str(total_processed)) info('Total success: ' + str(total_success)) info('Total failed: ' + str(total_failed)) info()