def main(self): self.copy_file() try: try: self.file_type = get_file_type(self.file_name) except Exception, e: self.errors.append(str(e)) return print "Starting " + self.name if self.file_type == 'bam': self.working_file = self.file_name.replace('.bam', '.fastq.gz') cmd = 'samtools bam2fq %s | gzip > %s' % ( os.path.join(self.base_dir, self.file_name), os.path.join(self.base_dir, self.working_file)) out, err = execute_command(cmd) if err: self.execution_error = err return if self.file_type == 'cram': self.working_file = self.file_name.replace( '.cram', '.fastq.gz') cmd = 'cramtools fastq -I %s | gzip > %s' % ( os.path.join(self.base_dir, self.file_name), os.path.join(self.base_dir, self.working_file)) out, err = execute_command(cmd) if err: self.execution_error = err return elif self.file_type == 'bz2': self.working_file = self.file_name.replace('.bz2', '.gz') cmd = 'bunzip2 -c < %s | gzip -c > %s' % ( os.path.join(self.base_dir, self.file_name), os.path.join(self.base_dir, self.working_file)) out, err = execute_command(cmd) if err: self.execution_error = err return elif self.file_type == 'sff': self.working_file = self.file_name.replace('.sff', '.fastq.gz') cmd = 'sff2fastq %s | gzip -c > %s' % ( os.path.join(self.base_dir, self.file_name), os.path.join(self.base_dir, self.working_file)) out, err = execute_command(cmd) if err: self.execution_error = err return self.validate()
def start_servers(): out, err = execute_command( 'ssh banana "source /etc/profile.d/lsf.sh;' ' /ebi/microarray/home/arrayexpress/ae2_production/software/framework/restart.sh"') out1, err1 = execute_command( 'ssh banana "source /etc/profile.d/lsf.sh;' ' /ebi/microarray/home/arrayexpress/ae2_perftest/software/framework/restart.sh"') out2, err2 = execute_command( 'ssh banana "source /etc/profile.d/lsf.sh;' ' /ebi/microarray/home/arrayexpress/ae2_curator/software/framework/restart.sh"') return '\n'.join([out,err, out1,err1, out2, err2])
def download_soft_file(geo_acc, by='platform'): # adf_tmp_dir =os.path.join(settings.TEMP_FOLDER, geo_accession.replace('GPL', 'A-GEOD-')) adf_tmp_dir = os.path.join(settings.ADF_LOAD_DIR, geo_acc.replace('GPL', 'A-GEOD-')) if not os.path.exists(adf_tmp_dir): os.mkdir(adf_tmp_dir) file_name = geo_acc + '_family.soft.gz' host = settings.GEO_SOFT_URL % by + geo_acc url = settings.GEO_SOFT_URL % by + geo_acc + '/' + file_name print url # link = FTP(host=settings.GEO_SOFT_URL % by + geo_acc + , timeout=5) # r = requests.get(url, stream=True) # with open(os.path.join(adf_tmp_dir, file_name), 'wb') as f: # for chunk in r.iter_content(chunk_size=1024): # if chunk: # filter out keep-alive new chunks # f.write(r.content) # with closing(urllib2.urlopen(settings.GEO_SOFT_URL % by + geo_acc + '/' + file_name)) as r: # with open(os.path.join(adf_tmp_dir, file_name), 'wb') as f: # shutil.copyfileobj(r, f) # local_filename = os.path.join(adf_tmp_dir, file_name) # print host # with closing(FTP()) as ftp: # try: # ftp.connect('ftp.ncbi.nih.gov',port=21, timeout= 30 * 60) # 30 mins timeout # # print ftp.getwelcome() # ftp.login('anonymous', '') # ftp.set_pasv(True) # ftp.sock.setsockopt(socket.SOL_SOCKET, socket.SO_KEEPALIVE, 1) # ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPINTVL, 75) # ftp.sock.setsockopt(socket.IPPROTO_TCP, socket.TCP_KEEPIDLE, 60) # with open(local_filename, 'w+b') as f: # res = ftp.retrbinary('RETR %s' % url.split('ftp.ncbi.nih.gov/')[1], f.write) # # if not res.startswith('226 Transfer complete'): # # logging.error('Downloaded of file {0} is not compile.'.format(orig_filename)) # os.remove(local_filename) # return None # # # os.rename(local_filename, self.storage + filename + file_ext) # # ftp.rename(orig_filename, orig_filename + '.copied') # # # return filename + file_ext # # except: # raise # # logging.exception('Error during download from FTP') command = """wget -m %s -O %s """ % (url, os.path.join(adf_tmp_dir, file_name)) print command print execute_command(command) return os.path.join(adf_tmp_dir, file_name)
def main(): out, err = execute_command("""sudo -u fg_cur -H sh -c "ssh oy-ena-login-1 ls -d /fire/staging/aexpress/*/" """) # print out # print '-' * 30 # print err # print '-' * 30 dirs = out.split('\n') removed = [] not_removed = [] for d in dirs: directory = d.replace('/fire/staging/aexpress/', '').replace('/','') if directory.startswith('E-MTAB'): acc = '-'.join(directory.split('-')[:3]) print acc id = retrieve_study_id_by_acc(acc) if id: out1, err1 = execute_command("""sudo -u fg_cur -H sh -c "ssh oy-ena-login-1 rm -rf %s" """%d) if err1: not_removed.append('%s:%s' % (directory, err1)) print err1 else: removed.append(directory) if removed: send_email(from_email='AE Automation<*****@*****.**>', to_emails=[CURATION_EMAIL], # to_emails=['*****@*****.**'], subject='/fire/staging Cleaning Report', body="""Dear Curator, The directories below have been deleted from /fire/staging. These were found loaded in AE database. Thanks, AE Automation Tool. REMOVED DIRECTORY LIST: ======================= %s""" % '\n'.join(removed)) if not_removed: send_email(from_email='AE Automation<*****@*****.**>', # to_emails=['*****@*****.**', CURATION_EMAIL], to_emails=['*****@*****.**'], subject='/fire/staging Error Report', body=""" ERROR DIRECTORY LIST: ======================= %s""" % '\n'.join(not_removed))
def run(self): print self.command out, err = execute_command(self.command) print self.command print 'Out:\n', out print '-' * 30 print 'Error:\n', err print '----------------------'
def status(self): cmd = "ssh ebi-login-001 \"source /etc/profile.d/lsf.sh; bjobs %s\"" % self.job_id out, err = execute_command(cmd) lines = out.split('\n') if len(lines) == 1: return 'completed' job_line = lines[1] print lines[0] print[i for i in job_line.split(' ') if i != '']
def get_jobs_ids(): out, err = execute_command('ssh banana "source /etc/profile.d/lsf.sh;bjobs -w | grep server"') ids = [] for l in out.split('\n'): try: ids.append(int(l.split(' ')[0])) except: print l return ids
def tf_install(constants: dict, warn: bool = True, max_retries: int = 2) -> int: """Run through terraform installation Args: constants (dict): config dict warn (bool, optional): require user confirmation. Defaults to True. max_retries (int): Number of times to retry in case of a failure. Returns: int: return code """ tf_init = ["terraform", "init"] tf_orc8r = ["terraform", "apply", "-target=module.orc8r", "-auto-approve"] tf_secrets = [ "terraform", "apply", "-target=module.orc8r-app.null_resource.orc8r_seed_secrets", "-auto-approve"] tf_orc8r_app = ["terraform", "apply", "-auto-approve"] for tf_cmd in [tf_init, tf_orc8r, tf_secrets, tf_orc8r_app]: cmd = " ".join(tf_cmd) if warn and not click.confirm(f'Do you want to continue with {cmd}?'): continue for i in range(max_retries): # terraform fails randomly due to timeouts click.echo(f"Running {tf_cmd}, iteration {i}") rc = execute_command(tf_cmd, cwd=constants['project_dir']) if rc == 0: break print_error_msg(f"Install failed when running {cmd} !!!") if i == (max_retries - 1): print_error_msg(f"Max retries exceeded!!!") return 1 # set the kubectl after bringing up the infra if tf_cmd in (tf_orc8r, tf_orc8r_app): kubeconfigs = glob.glob( constants['project_dir'] + "/kubeconfig_*") if len(kubeconfigs) != 1: print_error_msg( "zero or multiple kubeconfigs found %s!!!" % repr(kubeconfigs)) return kubeconfig = kubeconfigs[0] os.environ['KUBECONFIG'] = kubeconfig print_info_msg( 'For accessing kubernetes cluster, set' f' `export KUBECONFIG={kubeconfig}`') print_success_msg(f"Command {cmd} ran successfully") else: print_warning_msg(f"Skipping Command {cmd}") return 0
def kill_jobs(job_ids): print 'start kill' for i in job_ids: out, err = execute_command('ssh banana "source /etc/profile.d/lsf.sh;bkill -r %d"' % i) # print out, err # print 'end kill' while True: ids = get_jobs_ids() if not ids: break time.sleep(10)
def run_playbook(play: AnsiblePlay) -> int: """Run ansible playbook Args: play (AnsiblePlay): object describing the current play Returns: int: return code """ if play.inventory: env = {"ANSIBLE_HOST_KEY_CHECKING": "False"} return execute_command( [ "ansible-playbook", "-i", play.inventory, "-e", json.dumps(play.extra_vars), "--tags", ",".join(play.tags), play.playbook, ], env=env, ) context.CLIARGS = ImmutableDict( tags=play.tags, skip_tags=play.skip_tags, connection='smart', verbosity=play.verbosity, forks=10, become=None, become_method=None, become_user=None, check=False, syntax=None, start_at_task=None, diff=False, ) loader = DataLoader() variable_manager = VariableManager(loader=loader) variable_manager.extra_vars.update(play.extra_vars) inventory = InventoryManager(loader=loader) variable_manager.set_inventory(inventory) passwords = {} pbex = PlaybookExecutor( playbooks=[play.playbook], inventory=inventory, variable_manager=variable_manager, loader=loader, passwords=passwords, ) return pbex.run()
def copy_file(self): if settings.LOCAL_EXECUTION: cmd = 'cp %s/%s %s' % (self.ena_dir, self.file_name, self.base_dir) else: cmd = 'scp -oStrictHostKeyChecking=no oy-ena-login-1:%s/%s %s' % ( self.ena_dir, self.file_name, self.base_dir) print cmd out, err = execute_command(cmd) print out, err # out, err = execute_command('cp -r %s %s' % (ena_dir, local_dir)) # print out, err # exit() return out, err
def copy_files(ena_dir, local_dir): """Copying the data directory for the the study from ENA machine to EBI local cluster. :param ena_dir: The directory containing data files to be validated and also the SDRF file. This directory should be in `/fire/staging/aexpress/` :type ena_dir: str :param local_dir: The temp directory created on the local shared storage. This is removed after the validation ended. :type local_dir: str :return: std_out and std_err of the copy command. :rtype: :obj:`tuple` of :obj:`str` """ if not os.path.exists(local_dir): print 'creating %s' % local_dir os.mkdir(local_dir) cmd = 'scp -oStrictHostKeyChecking=no sra-login-1:%s/*.txt %s' % ( ena_dir, local_dir) print cmd if LOCAL_EXECUTION: out, err = execute_command('cp %s/*.txt %s' % (ena_dir, local_dir)) else: out, err = execute_command(cmd) print 'executed' return out, err
def import_geo_platform(geo_acc): try: soft_file = download_soft_file(geo_acc) header, table = parse_soft_file(soft_file) generate_adf(geo_acc, header, table) adf_file = os.path.join(settings.ADF_LOAD_DIR, geo_acc.replace('GPL', 'A-GEOD-'), geo_acc + '.adf.txt') print execute_command('magetab_insert_array.pl -f %s -a %s -c' % (adf_file, geo_acc.replace('GPL', 'A-GEOD-'))) # shutil.copyfile(os.path.join(settings.ADF_LOAD_DIR, geo_acc.replace('GPL', 'A-GEOD-')), ) out, err = execute_command('reset_array.pl -a A-GEOD-%s -c' % geo_acc.replace('GPL', '')) if 'error' in out.lower() or 'error' in err.lower(): msg = """Dear Curators, While trying to execute rest_array.pl for %s the we had the following output: %s %s""" % (geo_acc, out, err) send_email(from_email='AE Automation<*****@*****.**>', to_emails=['*****@*****.**', '*****@*****.**'], subject='GEO Array Error ' + geo_acc.replace('GPL', 'A-GEOD-'), body=msg) return submit_conan_task(accession=geo_acc.replace('GPL', 'A-GEOD-'), pipeline_name=CONAN_PIPELINES.load_adf) except Exception, e: msg = """The following error occurred while importing: %s %s""" % (geo_acc, str(e)) send_email(from_email='AE Automation<*****@*****.**>', to_emails=['*****@*****.**'], subject='Platform imported', body=msg)
def validate_data_files_view(request, job_id=''): if request.method == "POST": req_id = request.POST.get('id') data_dir = request.POST.get('data_dir') v = Validate.objects.filter(job_id=req_id) report = { 'file_errors': {}, 'pairs_errors': [], 'valid_files': [], 'execution_errors': [], 'integrity_errors': [] } if v: v = v[0] v.data_dir = data_dir v.validation_report = json.dumps(report) v.status = 'P' else: v = Validate(job_id=str(req_id), data_dir=data_dir, validation_report=json.dumps(report)) v.save() py_file = os.path.abspath( os.path.join(os.path.dirname(__file__), '..', '..', 'utils', 'validators', 'fastq_validators.py')) cmd = """ssh ebi-login-001 'source /etc/profile.d/lsf.sh;bsub -u ahmed -q production-rh7 "source /nfs/production3/ma/home/arrayexpress/ae_automation/resources-rh7/bashrc;which python; python %s %s %s"' """ % ( py_file, req_id, data_dir) # cmd = """export PYTHONPATH="${PYTHONPATH}:/home/gemmy/PycharmProjects/ae_automation";source /home/gemmy/automation/bin/activate; python %s %s %s """ % ( # py_file, req_id, data_dir) print cmd out, err = execute_command(cmd) print out print '=' * 30 print err return HttpResponse({}, 200) if request.method == "GET": # job_id = getattr(request, 'job_id') if not job_id: return HttpResponse('Bad Request', 400) v = Validate.objects.filter(job_id=job_id) if not v: return HttpResponse('Not Fount', 404) report = json.loads(v[0].validation_report) report['status'] = [i[1] for i in STATUS if i[0] == v[0].status][0] del report['execution_errors'] return HttpResponse(json.dumps(report), 200)
def tf_destroy( constants: dict, warn: bool = True, max_retries: int = 2, ) -> int: """Run through terraform cleanup Args: constants (dict): Config definitions warn (bool): require user confirmation. Defaults to True. max_retries (int): Number of times to retry in case of a failure. Returns: int: Return code """ if warn and not click.confirm( 'Do you want to continue with cleanup?', abort=True, ): return 0 # backup existing terraform state project_dir = constants['project_dir'] try: copyfile(tf_state_fn(project_dir), tf_backup_fn(project_dir)) except OSError: print_error_msg('Unable to backup terraform state') return 1 tf_destroy_cmds = ["terraform", "destroy", "-auto-approve"] cmd = " ".join(tf_destroy_cmds) for i in range(max_retries): click.echo(f"Running {cmd}, iteration {i}") rc = execute_command(tf_destroy_cmds, cwd=project_dir) if rc == 0: break print_error_msg("Destroy Failed!!!") if i == (max_retries - 1): print_error_msg( "Max retries exceeded!!! Attempt cleaning up using" " 'orcl cleanup raw' subcommand", ) return 1 return 0
def upgrade(ctx): """ Upgrade existing orc8r deployment """ tf_cmds = [["terraform", "init", "--upgrade"], ["terraform", "refresh"], ["terraform", "apply", "-auto-approve"]] if ctx.invoked_subcommand is None: if click.confirm('Do you want to run upgrade prechecks?'): ctx.invoke(precheck) else: print_warning_msg(f"Skipping upgrade prechecks") click.echo("Following commands will be run during upgrade\n%s" % ("\n".join((map(" ".join, tf_cmds))))) for cmd in tf_cmds: if click.confirm('Do you want to continue with %s?' % " ".join(cmd)): rc = execute_command(cmd) if rc != 0: print_error_msg("Upgrade Failed!!!") return
def main(): dirs = [ d for d in os.listdir(TEMP_FOLDER) if d.startswith('E-MTAB-') \ and os.path.isdir(os.path.join(TEMP_FOLDER, d)) ] # print dirs corrupted = [ 'E-MTAB-3800', 'E-MTAB-3964', 'E-MTAB-4002', 'E-MTAB-4069', 'E-MTAB-4082', 'E-MTAB-4096', 'E-MTAB-4159', 'E-MTAB-4549', 'E-MTAB-4694', 'E-MTAB-4686', 'E-MTAB-4723', 'E-MTAB-4846', 'E-MTAB-4264', 'E-MTAB-5044', 'E-MTAB-5169', 'E-MTAB-5362' ] errors = [] # for d in dirs: # print 'working on: ', d # combined_path = os.path.join(TEMP_FOLDER, d, 'combined.txt') # if not os.path.exists(combined_path): # continue # try: # sdrf = SdrfCollection(file_path=combined_path, combined=True) # paired = [r for r in sdrf.rows if r.is_paired] # if paired: # r = paired[0] # if not(r.ena_run+'_1' in r.fastq_url or r.ena_run+'_1' in r.fastq_url): # corrupted.append(d) # except Exception, e: # errors.append([d,e]) print corrupted print errors for exp in corrupted: exp_sub_tracking_id = retrieve_experiment_id_by_accession(exp) print 'MAGE-TAB_' + str(exp_sub_tracking_id) if not exp_sub_tracking_id: print "%s doesn't exist in subs tracking" continue mage_tab = os.path.join(ANNOTARE_DIR, 'MAGE-TAB_' + str(exp_sub_tracking_id)) try: idf_file = extract_idf_file_name(mage_tab, exp) except Exception, e: print e continue print idf_file f = open(idf_file, 'r') lines = f.readlines() f.close() is_sdrf = False replace = '_1.fastq.gz' write_lines = [] d = collections.OrderedDict() run_index = -1 changed = False for line in lines: if line.strip() == '[SDRF]': is_sdrf = True write_lines.append(line.strip()) if not is_sdrf: write_lines.append(line.strip()) else: if line.startswith('Source Name'): write_lines.append(line.strip()) parts = line.strip().split('\t') run_index = parts.index('Comment[ENA_RUN]') continue run = line.split('\t')[run_index] if run in d.keys(): d[run].append(line.strip()) else: d[run] = [line.strip()] for k, v in d.items(): if len(v) > 1: if '_1.fastq.gz' in ' '.join(v) or '_2.fastq.gz' in ' '.join( v): print colored.red('DAMAGED: ' + 'MAGE-TAB_' + str(exp_sub_tracking_id)) break changed = True v[0] = v[0].replace('.fastq.gz', '_1.fastq.gz') v[1] = v[1].replace('.fastq.gz', '_2.fastq.gz') write_lines += v if changed: f = open(idf_file, 'w') f.write('\n'.join(write_lines)) f.close() out, err = execute_command('reset_experiment.pl -c ' + 'MAGE-TAB_' + str(exp_sub_tracking_id)) print 'out: ', out print colored.red('error: ' + str(err)) if not err: wait_for_ae_export(exp, 'MAGE-TAB_' + str(exp_sub_tracking_id)) reload_experiment(exp)