def __init__(self, *args, **kwrds): PipelineStep.__init__(self, *args, **kwrds) self.chunks_in_flight = threading.Semaphore(self.additional_attributes['chunks_in_flight']) self.chunks_result_dir_local = os.path.join(self.output_dir_local, "chunks") self.chunks_result_dir_s3 = os.path.join(self.output_dir_s3, "chunks") self.iostream_upload = multiprocessing.Semaphore(MAX_CONCURRENT_CHUNK_UPLOADS) command.execute("mkdir -p %s" % self.chunks_result_dir_local)
def __init__(self, *args, **kwrds): PipelineStep.__init__(self, *args, **kwrds) # TODO: (tmorse) remove service compatibility https://jira.czi.team/browse/IDSEQ-2568 self.alignment_algorithm = self.additional_attributes.get( "alignment_algorithm", self.additional_attributes.get("service")) assert self.alignment_algorithm in ("gsnap", "rapsearch2") self.chunks_in_flight_semaphore = threading.Semaphore( MAX_CHUNKS_IN_FLIGHT) self.chunks_result_dir_local = os.path.join(self.output_dir_local, "chunks") self.chunks_result_dir_s3 = os.path.join(self.output_dir_s3, "chunks") self.batch_job_desc_bucket = get_batch_job_desc_bucket() command.make_dirs(self.chunks_result_dir_local)
def __init__(self, *args, **kwrds): PipelineStep.__init__(self, *args, **kwrds) self.alignment_algorithm = self.additional_attributes.get("alignment_algorithm") assert self.alignment_algorithm in ("gsnap", "rapsearch2") self.chunks_in_flight_semaphore = threading.Semaphore(MAX_CHUNKS_IN_FLIGHT) self.chunks_result_dir_local = os.path.join(self.output_dir_local, "chunks") self.chunks_result_dir_s3 = os.path.join(self.output_dir_s3, "chunks") self.batch_job_desc_bucket = get_batch_job_desc_bucket() self.is_local_run = bool(self.additional_attributes.get("run_locally")) self.genome_name = self.additional_attributes.get("genome_name", "nt_k16") self.index = self.additional_files.get("index") if self.is_local_run: assert self.index, "local runs require an index to be passed in" else: assert not self.index, "passing in an index is not supported for remote runs" command.make_dirs(self.chunks_result_dir_local)
def fetch_input_files_from_s3(input_files, input_dir_s3, result_dir_local): for f in input_files: s3_file = os.path.join(input_dir_s3, f) local_file = os.path.join(result_dir_local, f) local_dir = os.path.dirname(local_file) command.make_dirs(local_dir) # copy the file over output_file = idseq_dag.util.s3.fetch_from_s3(s3_file, local_dir, allow_s3mi=True) if output_file: # write the done_file done_file = PipelineStep.done_file(local_file) fmt_now = datetime.datetime.now(tz=pytz.UTC).strftime("%a %b %e %H:%M:%S %Z %Y") command.write_text_to_file(fmt_now, done_file) else: raise RuntimeError(f"{s3_file} likely doesn't exist")
def fetch_input_files_from_s3(input_files, input_dir_s3, result_dir_local): for f in input_files: s3_file = os.path.join(input_dir_s3, f) local_file = os.path.join(result_dir_local, f) local_dir = os.path.dirname(local_file) command.execute("mkdir -p %s" % local_dir) # copy the file over output_file = idseq_dag.util.s3.fetch_from_s3(s3_file, local_dir, allow_s3mi=True) if output_file: # write the done_file done_file = PipelineStep.done_file(local_file) command.execute("date > %s" % done_file) else: raise RuntimeError(f"{s3_file} likely doesn't exist")