def get(): launcher_val = os.environ.get('CHPL_LAUNCHER') if not launcher_val: comm_val = chpl_comm.get() platform_val = chpl_platform.get('target') compiler_val = chpl_compiler.get('target') if platform_val.startswith('cray-x'): has_aprun = utils.find_executable('aprun') has_slurm = utils.find_executable('srun') if has_aprun and has_slurm: launcher_val = 'none' elif has_aprun: launcher_val = 'aprun' elif has_slurm: launcher_val = 'slurm-srun' else: # FIXME: Need to detect aprun/srun differently. On a cray # system with an eslogin node, it is possible that aprun # will not be available on the eslogin node (only on the # login node). # # has_aprun and has_slurm should look other places # (maybe the modules?) to decide. # (thomasvandoren, 2014-08-12) sys.stderr.write( 'Warning: Cannot detect launcher on this system. Please ' 'set CHPL_LAUNCHER in the environment.\n') elif platform_val == 'marenostrum': launcher_val = 'marenostrum' elif comm_val == 'gasnet': substrate_val = chpl_comm_substrate.get() if substrate_val == 'udp': launcher_val = 'amudprun' elif substrate_val == 'mpi': launcher_val = 'gasnetrun_mpi' elif substrate_val == 'ibv': if platform_val == 'pwr6': # our loadleveler launcher is not yet portable/stable/ # flexible enough to serve as a good default #launcher_val = 'loadleveler' launcher_val = 'none' else: launcher_val = 'gasnetrun_ibv' elif substrate_val == 'mxm': launcher_val = 'gasnetrun_ibv' elif substrate_val == 'lapi': # our loadleveler launcher is not yet portable/stable/flexible # enough to serve as a good default #launcher_val = 'loadleveler' launcher_val = 'none' elif comm_val == 'mpi': launcher_val = 'mpirun' else: launcher_val = 'none' if launcher_val is None: launcher_val = 'none' return launcher_val
def install(self): print('Installing Nginx') if find_executable('yum'): subprocess.check_call('yum install -y nginx', shell=True) elif find_executable('apt-get'): subprocess.check_call('apt-get install -y nginx', shell=True) elif find_executable('brew'): subprocess.check_call('brew install nginx', shell=True) try: subprocess.check_call('nginx -v', shell=True) print('OK') except subprocess.CalledProcessError: print('LHC installation failed')
def filter_variants(vcf_file): """ :param vcf_file: :return: """ # locate the executable snpsift = find_executable(['SnpSift']) sample = os.path.basename(vcf_file).rsplit(".", 2)[0] snpsift_file = os.path.join(os.path.dirname(vcf_file), sample + '.snpSift.table.txt') if os.path.exists(snpsift_file): logging.critical("SnpSift file {} exists!".format(snpsift_file)) else: call = [ '{} extractFields -s "," -e "." {} CHROM POS REF ALT "ANN[*].GENE" "ANN[*].GENEID" "ANN[*].IMPACT" ' '"ANN[*].EFFECT" "ANN[*].FEATURE" "ANN[*].FEATUREID" "ANN[*].BIOTYPE" "ANN[*].RANK" "ANN[*].HGVS_C" ' '"ANN[*].HGVS_P" "ANN[*].CDNA_POS" "ANN[*].CDNA_LEN" "ANN[*].CDS_POS" "ANN[*].CDS_LEN" "ANN[*].AA_POS" ' '"ANN[*].AA_LEN" "ANN[*].DISTANCE" "EFF[*].EFFECT" "EFF[*].FUNCLASS" "EFF[*].CODON" "EFF[*].AA" ' '"EFF[*].AA_LEN" > {}'.format(snpsift, vcf_file, snpsift_file) ] cmd = " ".join(call) # filter run_shell_command(cmd=cmd, raise_errors=False, extra_env=None) return snpsift_file
def build_standard(db, task='standard', n_threads=4, extra_args=""): """ build standard kraken2 database :param db: <str> path/location to the database :param task: <str> operation to be performed :param n_threads: <int> number of cpus/threads :param extra_args: <str> extra arguments passed to the taxonomy executable :return: """ # locate the executable kraken2_build = find_executable(["kraken2-build"]) # create db directory db = os.path.join(os.path.realpath(db), '') mkdir(db) lib_dir = os.path.join(db, 'library') if os.path.exists(lib_dir) and len(os.listdir(lib_dir)) > 0: logging.critical( 'library already downloaded -> \n\t{}'.format(lib_dir)) else: # run process call = [ "{} --{} --threads {} --use-ftp --db {} {}".format( kraken2_build, task, n_threads, db, extra_args) ] cmd = " ".join(call) # run the shell command logging.info("building standard database") run_shell_command(cmd=cmd, raise_errors=False, extra_env=None) return db
def get_ps_argument_spec(filename): # This uses a very small skeleton of Ansible.Basic.AnsibleModule to return the argspec defined by the module. This # is pretty rudimentary and will probably require something better going forward. pwsh = find_executable('pwsh') if not pwsh: raise FileNotFoundError( 'Required program for PowerShell arg spec inspection "pwsh" not found.' ) script_path = os.path.join(os.path.dirname(os.path.realpath(__file__)), 'ps_argspec.ps1') proc = subprocess.Popen([script_path, filename], stdout=subprocess.PIPE, stderr=subprocess.PIPE, shell=False) stdout, stderr = proc.communicate() if proc.returncode != 0: raise AnsibleModuleImportError(stderr.decode('utf-8')) kwargs = json.loads(stdout) # the validate-modules code expects the options spec to be under the argument_spec key not options as set in PS kwargs['argument_spec'] = kwargs.pop('options', {}) return kwargs['argument_spec'], (), kwargs
def gen_ca_certs(self): """ openssl genrsa -out ca.key 2048 openssl req -new -subj '/C=ZH/ST=wakanda/L=lhc/O=lhc/OU=lhc/CN=*' -key ca.key -out any.csr openssl x509 -req -days 3650 -in any.csr -signkey ca.key -out any.crt """ openssl = find_executable('openssl') if not openssl: raise SSLError('openssl executable not found') if os.path.exists(CA_CRT): log.warn('ca cert file %s already exists' % CA_CRT) if not os.path.exists(CA_CERT_FILES_PATH): mkdirs(CA_CERT_FILES_PATH) cmds = ( (openssl, 'genrsa', '-out', CA_KEY, '2048'), (openssl, 'req', '-new', '-subj', CA_SUB, '-key', CA_KEY, '-out', CA_CSR), (openssl, 'x509', '-req', '-days', '3560', '-sha256', '-extensions', 'v3_ca', '-signkey', CA_KEY, '-in', CA_CSR, '-out', CA_CRT), ) for cmd in cmds: try: log.info(' '.join(cmd)) subprocess.check_call(cmd) except subprocess.CalledProcessError: log.error('Fail') raise
def __init__(self, certificate=None, ssl_command="openssl", force_ssl_command=False, disable_executable_search=False, debug=False): self.certificate = certificate self.connectionContext = None self.debug = debug if not os.path.exists(certificate): raise APNSCertificateNotFoundError( "Apple Push Notification Service Certificate file %s " "not found." % certificate) try: if force_ssl_command: raise ImportError("force_ssl_command skipping import") # use ssl library to handle secure connection import ssl as ssl_module self.connectionContext = SSLModuleConnection(certificate, ssl_module=ssl_module) except ImportError: # use command line openssl tool to handle secure connection if not disable_executable_search: executable = find_executable(ssl_command) else: executable = ssl_command if not executable: raise APNSNoCommandFound( "SSL Executable [%s] not found in your PATH " % ssl_command) self.connectionContext = OpenSSLCommandLine(certificate, executable, debug=debug)
def __init__(self, certificate=None, ssl_command="openssl", force_ssl_command=False, disable_executable_search=False, debug=False): self.certificate = certificate self.connectionContext = None self.debug = debug if not os.path.exists(certificate): raise APNSCertificateNotFoundError( "Apple Push Notification Service Certificate file %s " "not found." % certificate) try: if force_ssl_command: raise ImportError("force_ssl_command skipping import") # use ssl library to handle secure connection import ssl as ssl_module self.connectionContext = SSLModuleConnection(certificate, ssl_module=ssl_module) except ImportError: # use command line openssl tool to handle secure connection if not disable_executable_search: executable = find_executable(ssl_command) else: executable = ssl_command if not executable: raise APNSNoCommandFound( "SSL Executable [%s] not found in your PATH " % ssl_command) self.connectionContext = OpenSSLCommandLine(certificate, executable, debug=debug)
def get(): launcher_val = os.environ.get('CHPL_LAUNCHER') if not launcher_val: comm_val = chpl_comm.get() platform_val = chpl_platform.get('target') compiler_val = chpl_compiler.get('target') if platform_val.startswith('cray-x'): has_aprun = utils.find_executable('aprun') has_slurm = utils.find_executable('srun') if has_aprun and has_slurm: launcher_val = 'none' elif has_aprun: launcher_val = 'aprun' elif has_slurm: launcher_val = 'slurm-srun' elif platform_val == 'marenostrum': launcher_val = 'marenostrum' elif compiler_val == 'tile-cc': launcher_val = 'tile-monitor' elif comm_val == 'gasnet': substrate_val = chpl_comm_substrate.get() if substrate_val == 'udp': launcher_val = 'amudprun' elif substrate_val == 'mpi': launcher_val = 'gasnetrun_mpi' elif substrate_val == 'ibv': if platform_val == 'pwr6': # our loadleveler launcher is not yet portable/stable/ # flexible enough to serve as a good default #launcher_val = 'loadleveler' launcher_val = 'none' else: launcher_val = 'gasnetrun_ibv' elif substrate_val == 'mxm': launcher_val = 'gasnetrun_ibv' elif substrate_val == 'lapi': # our loadleveler launcher is not yet portable/stable/flexible # enough to serve as a good default #launcher_val = 'loadleveler' launcher_val = 'none' elif comm_val == 'mpi': launcher_val = 'mpirun' else: launcher_val = 'none' return launcher_val
def check_cert(self, path): openssl = find_executable('openssl') if not openssl: raise SSLError('openssl executable not found') try: return subprocess.check_output( [openssl, 'x509', '-in', path, '-noout', '-text'], stderr=subprocess.PIPE) except subprocess.CalledProcessError: raise SSLError('cert ' + path + 'not valid')
def gen_and_sign_certs_for(self, cn): # https://my.oschina.net/itblog/blog/651434 """ openssl genrsa -out ca.key 2048 openssl req -new -subj '/C=ZH/ST=wakanda/L=lhc/O=lhc/OU=lhc/CN=*' -key ca.key -out any.csr openssl x509 -req -days 3650 -in any.csr -signkey ca.key -out any.crt """ self.check_ca() openssl = find_executable('openssl') if not openssl: raise SSLError('openssl executable not found') PATH, KEY, CRT, CSR = self.get_host_cert_paths(cn) SUB = '/O=Wakanda/OU=LHC/CN=' + cn if os.path.exists(CRT): log.warn('ca cert file %s already exists' % CRT) if not os.path.exists(PATH): mkdirs(PATH) san = 'subjectAltName=DNS:{0},DNS:www.{0},DNS:*.{0}'.format(cn) req = '\n'.join(( '[ req ]', 'req_extensions = v3_req', 'distinguished_name = req_distinguished_name', '[ v3_req ]', 'basicConstraints = CA:FALSE', 'keyUsage = nonRepudiation, digitalSignature, keyEncipherment', # 'subjectAltName = @alt_names', '[ req_distinguished_name ]', 'commonName = Common Name (eg, fully qualified host name)', '[SAN]', san)) with tempfile.NamedTemporaryFile() as f, tempfile.NamedTemporaryFile( ) as s: s.write(san) s.flush() f.write(req) f.flush() cmds = ( (openssl, 'genrsa', '-out', KEY, '2048'), (openssl, 'req', '-new', '-subj', SUB, '-reqexts', 'SAN', '-config', f.name, '-key', KEY, '-out', CSR), (openssl, 'x509', '-req', '-days', '3560', '-sha256', '-CAcreateserial', '-extfile', s.name, '-CA', CA_CRT, '-CAkey', CA_KEY, '-in', CSR, '-out', CRT), ) for cmd in cmds: try: log.info(' '.join(cmd)) subprocess.check_call(cmd) except subprocess.CalledProcessError: log.error('Fail') raise
def download_library(db, library, task="download-library", n_threads=4, extra_args=""): """ download sets of standard genomes/proteins while providing a reference. :param db: <str> path/location to the database :param library: <str> name of the reference library whose sequences will be downloaded :param task: <str> operation to be performed :param n_threads: <int> number of cpus/threads :param extra_args: <str> extra arguments passed to the taxonomy executable :return: """ # locate the executable kraken2 = find_executable(["kraken2-build"]) # create the database directory if not existing mkdir(db) lib_dir = os.path.join(db, 'library', library) fnames = [ 'assembly_summary.txt', 'manifest.txt', 'prelim_map.txt', 'library.fna.masked', 'library.fna' ] ind = list(os.path.join(lib_dir, f) for f in fnames) i_bool = [os.path.isfile(filename) for filename in ind] result = all(i_bool) if result is True: logging.critical("library files exist \n\t{}".format('\n\t'.join( [filename for filename in ind if os.path.isfile(filename)]))) else: call = [ "{} --{} {} --threads {} --use-ftp --db {} {}".format( kraken2, task, library, n_threads, db, extra_args) ] cmd = " ".join(call) # run the shell command logging.info("downloading library") p = run_shell_command(cmd=cmd, logfile=f_out, raise_errors=False, extra_env=None) # if not p and os.path.exists(os.path.join(lib_dir, 'assembly_summary.txt')): # assembly_summary = os.path.join(lib_dir, 'assembly_summary.txt') # fix_ftp_paths(assembly_summary) # p = run_shell_command(cmd=cmd, logfile=f_out, raise_errors=False, extra_env=None) return db
def download_taxonomy(db, task="download-taxonomy", n_threads=4, extra_args=""): """ download the accession number to taxon maps, as well as the taxonomic name and tree information from NCBI. :param task: <str> operation to be performed :param db: <str> path/location to the database :param n_threads: <int> number of cpus/threads :param extra_args: <str> extra arguments passed to the taxonomy executable :return: """ # locate the executable kraken2 = find_executable(["kraken2-build"]) # create the database directory if not existing mkdir(db) taxonomy_dir = os.path.join(db, 'taxonomy') # prelim = os.path.join(taxonomy_dir, 'prelim_map.txt') if os.path.exists(taxonomy_dir) and len(os.listdir(taxonomy_dir)) > 0: logging.critical( 'taxonomy already downloaded \n\t{}'.format(taxonomy_dir)) else: call = [ "{} --{} --use-ftp --threads {} --use-ftp --db {} {}".format( kraken2, task, n_threads, db, extra_args) ] cmd = " ".join(call) # run the shell command logging.info("downloading taxonomy") run_shell_command(cmd=cmd, logfile=f_out, raise_errors=False, extra_env=None) # if not os.path.exists(prelim) or (os.path.exists(prelim) and os.path.getsize(prelim) <=0): # call = ["{} --{} --threads {} --db {} {}".format(kraken2, task, n_threads, db, extra_args) # ] # cmd = " ".join(call) # # run the shell command # logging.info("downloading taxonomy") # run_shell_command(cmd=cmd, logfile=f_out, raise_errors=False, extra_env=None) # else: # logging.critical('taxonomy already downloaded \n\t{}'.format(taxonomy_dir)) return db
def get(): make_val = os.environ.get('CHPL_MAKE') if not make_val: platform_val = chpl_platform.get() if platform_val.startswith('cygwin') or platform_val == 'darwin': make_val = 'make' elif platform_val.startswith('linux'): if utils.find_executable('gmake'): make_val = 'gmake' else: make_val = 'make' else: make_val = 'gmake' return make_val
def get(): make_val = os.environ.get('CHPL_MAKE') if not make_val: platform_val = chpl_platform.get() if platform_val.startswith('cygwin') or platform_val == 'darwin': make_val = 'make' elif platform_val.startswith('linux'): if utils.find_executable('gmake'): make_val = 'gmake' else: make_val = 'make' else: make_val = 'gmake' return make_val
def build_snpeff_db(reference, gff, snpeff_config, snpeff_db): """ build SnpEff database for a reference genome :param: snpeff_config :param snpeff_db: :param reference: :param gff: :return: """ # locate the executable snpeff = find_executable(['snpEff']) snpeff_db = os.path.abspath(snpeff_db) # create SnpEff database prefix = os.path.join(os.path.abspath(os.path.dirname(reference)), os.path.splitext(os.path.basename(reference))[0]) index_base = os.path.basename(prefix) snpeff_data_dir = os.path.join(snpeff_db, 'data') snpeff_genes_dir = os.path.join(snpeff_data_dir, index_base) mkdir(snpeff_data_dir) mkdir(snpeff_genes_dir) # copy the files copy_file(src=gff, dest=os.path.join(snpeff_genes_dir, 'genes.gff')) copy_file(src=reference, dest=os.path.join(snpeff_genes_dir, 'sequences.fa')) # Add a genome to the configuration file snpeff_config = os.path.join(snpeff_db, 'snpeff.config') with open(snpeff_config, 'w') as f_obj: f_obj.write('{}.genome : {}\n'.format(index_base, index_base)) # check if db exists and build if not db_bin = os.path.join(snpeff_genes_dir, 'snpEffectPredictor.bin') if os.path.exists(db_bin): logging.critical("SnpEff database exist for {}".format(index_base)) else: # build db call = [ "{} build -config {} -dataDir {} -gff3 -v {}".format( snpeff, snpeff_config, snpeff_data_dir, index_base) ] cmd = " ".join(call) logging.info("building SnpEFF database: {}".format(gff)) run_shell_command(cmd=cmd, raise_errors=False, extra_env=None) return snpeff_config, snpeff_data_dir
def get_stats(vcf_file, stats_file): """ calculate variant call stats with bcftools :param vcf_file: <str> VCF file :param stats_file: <str> file to write stats :return: """ # locate the executable bcftools = find_executable(['bcftools']) call = ["{} stats {} > {}".format(bcftools, vcf_file, stats_file)] cmd = " ".join(call) logging.info("calculating stats ") run_shell_command(cmd=cmd, raise_errors=False, extra_env=None) return stats_file
def count_reads(infile, outfile): """ Perform a read count on FASTQ files in a directory, return dict of counts (int) :param infile: <str> path to the input file in fastq format :param outfile: <str> file to write the summary :return: <str> log file """ # locate the executable reformat = find_executable(["reformat.sh"]) summary_dict = dict() # loop through the dict items # for sample, reads in read_type(datadir).items(): # lines = [] # for read in reads: #print("\n# sample: {}\n".format(read), sep=' ', end='\n', file=f_out, flush=True) call = ["{} in={} -Xmx4G".format(reformat, infile)] cmd = " ".join(call) try: logging.info( "[counting reads]\n\t" + cmd + "\n\tBrian Bushnell (2017)." "\n\tBBTools: a suite of fast, multithreaded bioinformatics tools designed for " "analysis of DNA and RNA sequence data. " "\n\thttps://jgi.doe.gov/data-and-tools/bbtools//\n ") process = run_shell_command(cmd=cmd, logfile=f_out, raise_errors=True) if process: with open(logfile, 'r') as infile: for line in infile: m1 = re.match(r"^java.*$", line.strip()) m2 = re.match(r"^Input:.*$", line.strip()) if m1: read = os.path.basename( m1.group(0).split()[-2]).strip("in=") summary_dict[read] = '' if m2: read_count = int(m2.group(0).split()[1]) if read in summary_dict: summary_dict[read] = read_count except Exception: raise Exception("ERROR: COUNTING READS FAILED") df = pd.DataFrame(summary_dict.items(), columns=['sample', 'reads']) df.to_csv(outfile.name, index=False, sep="\t") return outfile
def get(flag='host'): if flag == 'host': compiler_val = os.environ.get('CHPL_HOST_COMPILER', '') elif flag == 'target': compiler_val = os.environ.get('CHPL_TARGET_COMPILER', '') else: raise ValueError("Invalid flag: '{0}'".format(flag)) if compiler_val: return compiler_val platform_val = chpl_platform.get(flag) # The cray platforms are a special case in that we want to "cross-compile" # by default. (the compiler is different between host and target, but the # platform is the same) if platform_val.startswith('cray-x'): if flag == 'host': compiler_val = 'gnu' else: subcompiler = os.environ.get('PE_ENV', 'none') if subcompiler == 'none': stderr.write( "Warning: Compiling on {0} without a PrgEnv loaded\n". format(platform_val)) compiler_val = "cray-prgenv-{0}".format(subcompiler.lower()) else: # Normal compilation (not "cross-compiling") # inherit the host compiler if the target compiler is not set and # the host and target platforms are the same if flag == 'target': if chpl_platform.get('host') == platform_val: compiler_val = get('host') elif platform_val.startswith('pwr'): compiler_val = 'ibm' elif platform_val == 'marenostrum': compiler_val = 'ibm' elif platform_val == 'darwin': if utils.find_executable('clang'): compiler_val = 'clang' else: compiler_val = 'gnu' else: compiler_val = 'gnu' return compiler_val
def run(self): super(ProxyDocker, self).run() status = self.status() if status == 'running': raise ProxyError('Already Running') if status: subprocess.check_call('docker rm -f ' + PROXY_CONTAINER_NAME, shell=True) self.dump_nginx_conf() docker = find_executable('docker') if not docker: raise ProxyError('Need docker client executable') cmd = [ docker, 'run', '--name', PROXY_CONTAINER_NAME, '-d', '--restart', 'always', '-v', NGINX_CONF_FILE_PATH + ':/etc/nginx/nginx.conf', '-v', '{0}:{0}'.format(self.config.cache_path), '-v', '{0}:{0}'.format(CONF_PATH), '--dns', self.config.dns_resolver ] if MAC: print('configure port binding ip') subprocess.check_call('sudo ifconfig lo0 alias %s/24' % MAC_ALIAS_IP, shell=True) # TODO portmapping = [ '-p', MAC_ALIAS_IP + ':{0}:{0}'.format(self.config.http_port), '-p', MAC_ALIAS_IP + ':{0}:{0}'.format(self.config.https_port) ] else: portmapping = [ '-p', '{0}:{0}'.format(self.config.http_port), '-p', '{0}:{0}'.format(self.config.https_port) ] cmd += portmapping cmd += [NGINX_DOCKER_IMAGE] print('staring proxy container') print(' '.join(cmd)) try: subprocess.check_call(cmd) print('OK') except subprocess.CalledProcessError: print('fail running proxy container') sys.exit(1)
def annotate_snps(index_base, config, vcf_file, db, snpeff_csv, snpeff_vcf): """ annotate and predict the effect of variants :param index_base: :param config: :param vcf_file: :param db: :param snpeff_csv: :param snpeff_vcf: :param out_dir: :return: """ # locate the executable snpeff = find_executable(['snpEff']) if os.path.exists(snpeff_vcf): logging.critical( "variant annotation file {} exists".format(snpeff_vcf)) else: call = [ "snpEff {} -config {} -dataDir {} {} -csvStats {} | bgzip -c > {}" "".format(index_base, config, db, vcf_file, snpeff_csv, snpeff_vcf) ] cmd = " ".join(call) logging.info( "annotating variants and predicting effects: {}".format(vcf_file)) p = run_shell_command(cmd=cmd, raise_errors=False, extra_env=None) if p: # index the vcf file index_vcf(vcf_file=snpeff_vcf) sample = os.path.basename(snpeff_vcf).rsplit(".", 2)[0] out_dir = os.path.dirname(snpeff_vcf) html = os.path.join(os.getcwd(), 'snpEff_summary.html') snpeff_html = os.path.join(out_dir, sample + ".snpEff.summary.html") copy_file(src=html, dest=snpeff_html) os.remove(html) return snpeff_vcf
def get(flag='host'): if flag == 'host': compiler_val = os.environ.get('CHPL_HOST_COMPILER', '') elif flag == 'target': compiler_val = os.environ.get('CHPL_TARGET_COMPILER', '') else: raise ValueError("Invalid flag: '{0}'".format(flag)) if compiler_val: return compiler_val platform_val = chpl_platform.get(flag) # The cray platforms are a special case in that we want to "cross-compile" # by default. (the compiler is different between host and target, but the # platform is the same) if platform_val.startswith('cray-x'): if flag == 'host': compiler_val = 'gnu' else: subcompiler = os.environ.get('PE_ENV', 'none') if subcompiler == 'none': stderr.write("Warning: Compiling on {0} without a PrgEnv loaded\n".format(platform_val)) compiler_val = "cray-prgenv-{0}".format(subcompiler.lower()) else: # Normal compilation (not "cross-compiling") # inherit the host compiler if the target compiler is not set and # the host and target platforms are the same if flag == 'target': if chpl_platform.get('host') == platform_val: compiler_val = get('host') elif platform_val.startswith('pwr'): compiler_val = 'ibm' elif platform_val == 'marenostrum': compiler_val = 'ibm' elif platform_val == 'darwin': if utils.find_executable('clang'): compiler_val = 'clang' else: compiler_val = 'gnu' else: compiler_val = 'gnu' return compiler_val
def dbcan_cazymes(input_file, seq_type, tools, db_dir, out_dir, dbcan_args=""): """ :param input_file: <str> input file in FASTA format :param seq_type: <str> sequence type of the input :param tools: <str> tools for cazyme annotation :param db_dir: <str> path to the database directory :param out_dir <str> path to output directory :param dbcan_args: <str> extra arguments passed on to the executable :return: """ # locate the executable dbcan = find_executable(['run_dbcan.py']) tools = list(map(str, tools.split(','))) diamond_out = os.path.join(out_dir, 'diamond.out') hmmer_out = os.path.join(out_dir, 'hmmer.out') hotpep_out = os.path.join(out_dir, 'Hotpep.out') if os.path.exists(diamond_out) and os.path.exists( hmmer_out) and os.path.exists(hotpep_out): pass # logging.critical("CAZyme predicted outputs \n{}\n{}\n{} exists!".format(diamond_out, hmmer_out, hotpep_out)) else: call = [ "{} {} {} --tools {} --db_dir {} --out_dir {} {}".format( dbcan, input_file, seq_type, ",".join(tools), db_dir, out_dir, dbcan_args) ] cmd = " ".join(call) logging.info("CAZyme prediction on {}".format( os.path.basename(input_file))) run_shell_command(cmd=cmd, logfile=f_out, raise_errors=False, extra_env=None) return out_dir
def build(db, task="build", n_threads=4, extra_args=""): """ build the database using 'kraken2-build --build' once library has been installed :param db: <str> path/location to the database :param task: <str> operation to be performed :param n_threads: <int> number of cpus/threads :param extra_args: <str> extra arguments passed to the taxonomy executable :return: """ # locate the executable kraken2 = find_executable(["kraken2-build"]) # create db directory db = os.path.join(os.path.realpath(db), '') mkdir(db) # check if indices exist indices = ['opts.k2d', 'hash.k2d', 'taxo.k2d', 'seqid2taxid.map'] ind = list(os.path.join(db, ext) for ext in indices) i_bool = [os.path.isfile(filename) for filename in ind] result = all(i_bool) if result is True: logging.critical("indices exist \n\t{}".format('\n\t'.join( [filename for filename in ind if os.path.isfile(filename)]))) else: # run process call = [ "{} --{} --threads {} --db {} {}".format(kraken2, task, n_threads, db, extra_args) ] cmd = " ".join(call) # run the shell command logging.info("building database") run_shell_command(cmd=cmd, raise_errors=False, extra_env=None) return db
def get_genomes(metadata_file, out_dir): """ get taxids from the metadata file for use in downloading the genomes :param metadata_file: :param out_dir: :return: """ # locate the executable tool = find_executable(["ncbi-genome-download"]) # read the sheet having genomes metadata meta_df = pd.read_excel(metadata_file, sheet_name="Taxa_metadata") # create a dict with tax ids as key and genome names and bioproject accessions as values subset_df = meta_df[['Tax_ID', 'Species', 'BioProject Accession', 'Scientific_Name']] meta_dic = subset_df.set_index('Tax_ID').T.to_dict('list') for tax_id, records in meta_dic.items(): bioproj_acc = records[1] # fetch the genbank ftp path for genome call = ['esearch -db assembly -query "txid{}[Organism] AND {}[BioProject]" ' '| efetch -format docsum ' '| xtract -pattern DocumentSummary -element FtpPath_GenBank'.format(tax_id, bioproj_acc)] cmd = " ".join(call) # call = ['esearch -db assembly -query "{}[All Fields] AND {}[BioProject]" ' # '| efetch -format docsum ' # '| xtract -pattern DocumentSummary -element FtpPath_GenBank'.format(genome, bioproj_acc)] # cmd = " ".join(call) try: sys.stdout.write("\nfetching Genebank ftp path for taxonomy id {}".format(tax_id)) p = subprocess.Popen(cmd, shell=True, stdout=subprocess.PIPE) ftp_path = p.communicate()[0].strip().decode('utf-8') ftp_path = os.path.join(ftp_path, os.path.basename(ftp_path) + '_genomic.fna.gz') meta_dic[tax_id].append(ftp_path) except (subprocess.CalledProcessError, OSError) as error: rc = error.returncode sys.stderr.write("\nerror {} occurred when fetching genome accession\ncommand running: {}".format(rc, cmd)) for tax_id, records in meta_dic.items(): species, bioproj_acc, sciname, ftp_path = records[0], records[1], records[2], records[3] fna = os.path.basename(ftp_path) output_dir = os.path.join(out_dir, str(tax_id)) if not os.path.exists(output_dir): os.makedirs(output_dir) if fna != "_genomic.fna.gz": unzipped = os.path.join(output_dir, os.path.splitext(fna)[0]) if os.path.exists(unzipped): print(f"unzipped file {unzipped} exists") else: call = ['wget --continue -q \ --directory-prefix={} {} && gunzip {}'.format(output_dir, ftp_path, unzipped)] cmd = ' '.join(call) try: sys.stdout.write("\nfetching sequence file {}".format(ftp_path)) subprocess.check_call(cmd, shell=True) except (subprocess.CalledProcessError, OSError) as error: rc = error.returncode sys.stderr.write("\nerror {} occurred when fetching genome accession\ncommand running: {}".format(rc, cmd)) else: species, genus = records[0], records[0].split()[0] strain = records[2].replace('[', '').replace(']', '').rsplit(" ", 2)[1:] strain = " ".join(strain) output_dir = os.path.join(out_dir, str(tax_id)) if not os.path.exists(output_dir): os.makedirs(output_dir) call = ['ncbi-genome-download --section genbank --formats "fasta" ' '--assembly-levels "all" --genera "{}" --strains "{}" --taxids {} ' '--output-folder {} --flat-output -v bacteria'.format(genus, strain, tax_id, output_dir)] cmd = " ".join(call) try: sys.stdout.write("\nfetching genbank genome assembly for taxid {}\n".format(tax_id)) p = subprocess.check_call(cmd, shell=True) if p == 0: for fn in os.listdir(output_dir): uncompress_fasta(filename=os.path.join(output_dir, fn), suffix=".fna") except (subprocess.CalledProcessError, OSError) as error: rc = error.returncode sys.stderr.write("\nerror {} occurred when fetching genome accession\ncommand running: {}".format(rc, cmd))
print 'failed to import encoder_binary_name' encoder_binary_name = 'x265' # do not use debug builds for long-running tests debugs = [key for key in my_builds if 'debug' in my_builds[key][3]] for k in debugs: del my_builds[k] utils.buildall() if logger.errors: # send results to mail logger.email_results() sys.exit(1) always = '--no-info --hash=1' hasffplay = find_executable('ffplay') try: cumulative_count = 0 for key in my_builds: tests = utils.parsetestfile(key, False) cumulative_count += logger.testcountlline(len(tests)) logger.settestcount(cumulative_count) for build in my_builds: logger.setbuild(build) tests = utils.parsetestfile(key, False) for seq, command in tests: if '--codec "x264"' in command:
def __init__(self, qmake_path=None): if qmake_path: self._qmake_path = qmake_path else: self._qmake_path = find_executable("qmake")
def __init__(self, qmake_path=None): if qmake_path: self._qmake_path = qmake_path else: self._qmake_path = find_executable("qmake")
# __PIP__: {PIP} # global section defines the default values of cache and settings of proxy [global] # will cache files that has these filename extension extensions = {extensions} cache_size_limit = {cache_size_limit} cache_expire = {cache_expire} cache_key = {cache_key} http_port = {http_port} https_port = {https_port} dns_resolver = {dns_resolver} # run proxy as local process (nginx) or as docker container # local or docker mode = {mode} cache_path = {cache_path} # set it when use a outside proxy server proxy_ip = {proxy_ip} """.format(WEB=COMMON_EXTENSIONS['__WEB__'], PKG=COMMON_EXTENSIONS['__PKG__'], PIP=COMMON_EXTENSIONS['__PIP__'], **DEFAULT_CONF_ITEMS) MAC = 'Darwin' in platform.platform() LINUX = 'Linux' in platform.platform() AMD64 = 'x86_64' in platform.platform() REDHAT = os.path.exists('/etc/redhat-release') DEBIAN = find_executable('apt-get')