Python Logger.info示例，pipeline.Logger.info Python示例

示例#1

0

显示文件

文件： transfer_parameters.py 项目： ymjyyjyz/cloud-pipeline

    def build_dts_path(self, path, dts_registry, input_type):
        for prefix in dts_registry:
            if path.startswith(prefix):
                if not self.bucket:
                    raise RuntimeError(
                        'Transfer bucket shall be set for DTS locations')
                relative_path = path.replace(prefix, '')
                s3_path = self.join_paths(self.bucket, relative_path)

                if input_type == ParameterType.OUTPUT_PARAMETER:
                    local_path = self.analysis_dir
                else:
                    local_dir = self.get_local_dir(input_type)
                    local_path = self.join_paths(local_dir, relative_path)
                Logger.info(
                    'Found remote {} path {} matching DTS prefix {}. '
                    'It will be uploaded to bucket path {} and localized {} {}.'
                    .format(
                        input_type, path, prefix, s3_path, 'from' if input_type
                        == ParameterType.OUTPUT_PARAMETER else 'to',
                        local_path),
                    task_name=self.task_name)
                return LocalizedPath(path,
                                     s3_path,
                                     local_path,
                                     PathType.DTS,
                                     prefix=prefix)
        raise RuntimeError(
            'Remote path %s does not match any of DTS prefixes.')

示例#2

0

显示文件

文件： sample_sheet_batch.py 项目： ymjyyjyz/cloud-pipeline

    def find_files(self, recursive=True):
        Logger.info("Starting parsing input directory: {}.".format(
            self.folder),
                    task_name=self.TASK_NAME)
        all_files = bucket.ls_s3(self.folder,
                                 self.MAX_ATTEMPTS,
                                 recursive=recursive)
        patterns_files = {}
        if recursive:
            all_folders = self.get_folders(all_files)
            for folder in all_folders:
                self.check_file_match(self.samples, folder, patterns_files)

        for file in all_files:
            # recursive version of s3 ls returns path from bucket root
            # non-recursive ls returns path relative to the requested folder
            if recursive:
                file_name = file[len(self.get_path_without_bucket()) - 1:]
            else:
                file_name = file
            self.check_file_match(self.samples, file_name, patterns_files)
        Logger.info('Collected batch files: {}.'.format(str(patterns_files)),
                    task_name=self.TASK_NAME)
        if len(patterns_files) != len(self.samples):
            self.fail_task(
                "Failed to find all parameters for all samples.".format())
        Logger.success('Successfully collected batch files: {}.'.format(
            str(patterns_files)),
                       task_name=self.TASK_NAME)
        return patterns_files

示例#3

0

显示文件

文件： run_analytical.py 项目： Buagir/cloud-pipeline

 def run(self):
     analysis_folder = os.environ['ANALYSIS_FOLDER']
     machine_run_folder = os.environ['MACHINE_RUN_FOLDER']
     sample_sheet = os.environ['SAMPLE_SHEET']
     Logger.info('Starting analytical processing for sample sheet %s' %
                 sample_sheet,
                 task_name=self.task)
     samples = SampleSheetParser(
         sample_sheet,
         [SAMPLE_ID, SAMPLE_NAME, SAMPLE_PROJECT]).parse_sample_sheet()
     launched_runs = {}
     for sample in samples:
         Logger.info('Starting "%s" sample processing.' %
                     sample[SAMPLE_NAME],
                     task_name=self.task)
         launched_runs[sample[SAMPLE_NAME]] = self.__run_sample(
             sample[SAMPLE_NAME], analysis_folder, machine_run_folder)
     failed_runs = self.__wait_runs_completion(launched_runs)
     if failed_runs:
         for sample, run_id in failed_runs.iteritems():
             Logger.fail(
                 'Processing failed for sample "%s". Check run %d logs for more information.'
                 % (sample, run_id),
                 task_name=self.task)
         sys.exit(1)
     Logger.success("All samples processed successfully.",
                    task_name=self.task)

示例#4

0

显示文件

文件： run_analytical.py 项目： Buagir/cloud-pipeline

 def __run_sample(self, sample, analysis_folder, machine_run_folder):
     Logger.info(
         'Launching analytical pipeline "%s" with version "%s" for sample %s.'
         % (self.pipeline['name'], self.version, sample),
         task_name=self.task)
     read1, read2 = self.__fetch_reads(sample, analysis_folder,
                                       machine_run_folder)
     pipeline_params = {
         'SAMPLE': {
             'value': sample
         },
         'READ1': {
             'value': read1,
             'type': 'input'
         },
         'READ2': {
             'value': read2,
             'type': 'input'
         },
         'OUTPUT_FOLDER': {
             'value': analysis_folder,
             'type': 'output'
         }
     }
     run = self.api.launch_pipeline(self.pipeline['id'],
                                    self.version,
                                    pipeline_params,
                                    instance=self.instance_type,
                                    disk=self.instance_disk,
                                    parent_run_id=os.environ['RUN_ID'])
     return run['id']

示例#5

0

显示文件

    def await_workers_start(self, nodes_number, parent_id):
        if nodes_number == 0:
            Logger.success(
                'No workers requested. Processing will run on a master node',
                task_name=self.task_name)
            return []
        try:
            Logger.info('Waiting for {} worker node(s)'.format(nodes_number),
                        task_name=self.task_name)

            # TODO: probably we shall check several times, as it is possible that workers are not yet submitted
            worker_ids = self.get_workers(parent_id)
            total_number = len(worker_ids)
            started = []
            # approximately 10 minutes
            attempts = 60
            while len(started) != total_number and attempts != 0:
                started = self.get_started_workers(worker_ids)
                attempts -= 1
                Logger.info('Started {} worker(s) of {} total'.format(
                    len(started), total_number),
                            task_name=self.task_name)
                time.sleep(10)
            if len(started) != total_number:
                raise RuntimeError('Failed to start all workers')

            Logger.success('All workers started', task_name=self.task_name)
            return started
        except Exception as e:
            self.fail_task(e.message)

示例#6

0

显示文件

文件： transfer_parameters.py 项目： ymjyyjyz/cloud-pipeline

 def perform_transfer(self,
                      path,
                      source,
                      destination,
                      cluster,
                      upload,
                      rules=None):
     Logger.info(
         'Uploading files from {} to {}'.format(source, destination),
         self.task_name)
     if path.type == PathType.HTTP_OR_FTP or cluster is None or self.is_file(
             source):
         if upload or self.rules is None:
             S3Bucket().pipe_copy(source, destination, TRANSFER_ATTEMPTS)
         else:
             S3Bucket().pipe_copy_with_rules(source, destination,
                                             TRANSFER_ATTEMPTS, self.rules)
     else:
         common_folder = os.path.join(os.environ['SHARED_WORK_FOLDER'],
                                      'transfer')
         applied_rules = None if upload else rules
         chunks = self.split_source_into_chunks(cluster, source,
                                                destination, common_folder,
                                                applied_rules)
         transfer_pool = Pool(len(chunks))
         transfer_pool.map(transfer_async, chunks)
         shutil.rmtree(common_folder, ignore_errors=True)

示例#7

0

显示文件

    def await_master_start(self, master_id, task_name):
        try:
            Logger.info(
                'Waiting for master node (run id: {}), task: {}'.format(
                    master_id, task_name),
                task_name=self.task_name)

            # approximately 1 day. we really do not need this timeout, as if something will go wrong with a master - workers will be killed automatically
            # but for any unpredictable case - this task will be killed eventually
            attempts = 8640
            master = None
            Logger.info('Waiting for master node ...',
                        task_name=self.task_name)
            while not master and attempts > 0:
                master = self.get_master_node_info(master_id, task_name)
                attempts -= 1
                time.sleep(10)
            if not master:
                raise RuntimeError('Failed to attach to master node')

            Logger.success(
                'Attached to master node (run id {})'.format(master_id),
                task_name=self.task_name)
            return master
        except Exception as e:
            self.fail_task(e.message)

示例#8

0

显示文件

文件： mount_storage.py 项目： ymjyyjyz/cloud-pipeline

 def execute_mount(self, command, params):
     result = common.execute_cmd_command(command, silent=True)
     if result == 0:
         Logger.info('-->{path} mounted to {mount}'.format(**params),
                     task_name=self.task_name)
     else:
         Logger.warn(
             '--> Failed mounting {path} to {mount}'.format(**params),
             task_name=self.task_name)

示例#9

0

显示文件

 def read(cls, report_file, task):
     Logger.info("Reading Flagstats report from file %s." % report_file,
                 task_name=task)
     with open(report_file, 'r') as report:
         line_index = 0
         for line in report.readlines():
             if line_index < 2:
                 line_index += 1
                 continue
             return int(line.split('+')[0].strip())

示例#10

0

显示文件

 def __fill_trim_data(self, sample_metrics):
     Logger.info("Fetching data from FASTQC reports after trimming.",
                 task_name=self.task)
     r1_total_reads, r1_poor_reads, r1_gc, r1_read_length = FastQCReader \
         .read(os.path.join(self.folder, "FastQC_Trimmed", self.file_suffix + ".Trimmomatic.R1.trimmed_fastqc.zip"),
               self.task)
     r2_total_reads, r2_poor_reads, r2_gc, r2_read_length = FastQCReader \
         .read(os.path.join(self.folder, "FastQC_Trimmed", self.file_suffix + ".Trimmomatic.R2.trimmed_fastqc.zip"),
               self.task)
     sample_metrics["ReadsAfterTrim"] = r1_total_reads + r2_total_reads

示例#11

0

显示文件

 def __fill_starting_data(self, sample_metrics):
     Logger.info("Fetching data from FASTQC Initial reports.",
                 task_name=self.task)
     r1_total_reads, r1_poor_reads, r1_gc, r1_read_length = FastQCReader\
         .read(os.path.join(self.folder, "FastQC_Initial", self.sample + "_R1_fastqc.zip"), self.task)
     r2_total_reads, r2_poor_reads, r2_gc, r2_read_length = FastQCReader\
         .read(os.path.join(self.folder, "FastQC_Initial", self.sample + "_R2_fastqc.zip"), self.task)
     sample_metrics["StartingReads"] = r1_total_reads + r2_total_reads
     sample_metrics["QCFailedReads"] = r1_poor_reads + r2_poor_reads
     sample_metrics["ReadLength"] = r1_read_length
     sample_metrics["GC"] = r1_gc

示例#12

0

显示文件

 def read(cls, report_file, task):
     Logger.info("Reading Coverage report from file %s." % report_file,
                 task_name=task)
     total_bases = 0
     total_coverage = 0
     with open(report_file, 'r') as report:
         for line in report.readlines():
             if line:
                 total_bases += 1
                 total_coverage += int(line.split("\t")[2])
     return 0 if total_bases == 0 else total_coverage / total_bases

示例#13

0

显示文件

 def __wait_run_completion(self, run_id):
     current_status = self.api.load_run(run_id)['status']
     while current_status == 'RUNNING':
         Logger.info('Run %d status is %s. Waiting for completion...' %
                     (run_id, current_status),
                     task_name=self.task)
         time.sleep(60)
         current_status = self.api.load_run(run_id)['status']
     Logger.info('Run %d finished with status %s' %
                 (run_id, current_status),
                 task_name=self.task)
     return current_status

示例#14

0

显示文件

文件： transfer_parameters.py 项目： ymjyyjyz/cloud-pipeline

 def fetch_dts_registry(self):
     result = {}
     try:
         dts_data = self.api.load_dts_registry()
     except BaseException as e:
         Logger.info("DTS is not available: %s" % e.message,
                     task_name=self.task_name)
         return result
     for registry in dts_data:
         for prefix in registry['prefixes']:
             result[prefix] = registry['url']
     return result

示例#15

0

显示文件

文件： collect_variant_metrics.py 项目： ymjyyjyz/cloud-pipeline

 def run(self):
     Logger.info("Reading %s file to collect variants metrics." %
                 self.vcf_file,
                 task_name=self.task)
     with open(self.output_file, 'w+') as output, open(self.vcf_file,
                                                       'r') as vcf:
         self.__write_header(output)
         lines_started = False
         for vcf_line in vcf.readlines():
             if lines_started and vcf_line:
                 self.__process_variant(output, vcf_line)
             elif vcf_line.startswith("#CHROM"):
                 lines_started = True

示例#16

0

显示文件

文件： transfer_parameters.py 项目： ymjyyjyz/cloud-pipeline

 def _build_remote_path(self, path, input_type, path_type):
     if input_type == ParameterType.OUTPUT_PARAMETER:
         local_path = self.analysis_dir
     else:
         remote = urlparse.urlparse(path)
         relative_path = path.replace(
             '%s://%s' % (remote.scheme, remote.netloc), '')
         local_dir = self.get_local_dir(input_type)
         local_path = self.join_paths(local_dir, relative_path)
     Logger.info('Found %s %s path %s. It will be localized to %s.' %
                 (path_type.lower(), input_type, path, local_path),
                 task_name=self.task_name)
     return LocalizedPath(path, path, local_path, path_type)

示例#17

0

显示文件

文件： transfer_parameters.py 项目： ymjyyjyz/cloud-pipeline

 def run(self, upload):
     Logger.info('Starting localization of remote data...',
                 task_name=self.task_name)
     try:
         dts_registry = self.fetch_dts_registry()
         parameter_types = {ParameterType.INPUT_PARAMETER, ParameterType.COMMON_PARAMETER} if upload else \
             {ParameterType.OUTPUT_PARAMETER}
         remote_locations = self.find_remote_locations(
             dts_registry, parameter_types)
         if len(remote_locations) == 0:
             Logger.info('No remote sources found',
                         task_name=self.task_name)
         else:
             dts_locations = [
                 path for location in remote_locations
                 for path in location.paths if path.type == PathType.DTS
             ]
             if upload:
                 self.transfer_dts(dts_locations, dts_registry, upload)
                 self.localize_data(remote_locations, upload)
                 if self.report_file:
                     with open(self.report_file, 'w') as report:
                         for location in remote_locations:
                             env_name = location.env_name
                             original_value = location.original_value
                             localized_value = location.delimiter.join([
                                 path.local_path for path in location.paths
                             ])
                             report.write('export {}="{}"\n'.format(
                                 env_name, localized_value))
                             report.write('export {}="{}"\n'.format(
                                 env_name + '_ORIGINAL', original_value))
             else:
                 rule_patterns = DataStorageRule.read_from_file(self.rules)
                 rules = []
                 for rule in rule_patterns:
                     if rule.move_to_sts:
                         rules.append(rule.file_mask)
                 self.localize_data(remote_locations, upload, rules=rules)
                 self.transfer_dts(dts_locations,
                                   dts_registry,
                                   upload,
                                   rules=rules)
         Logger.success('Finished localization of remote data',
                        task_name=self.task_name)
     except BaseException as e:
         Logger.fail(
             'Localization of remote data failed due to exception: %s' %
             e.message,
             task_name=self.task_name)
         exit(1)

示例#18

0

显示文件

 def run(self, worker_ids, status):
     try:
         Logger.info('Shutting down {} node(s)'.format(len(worker_ids)),
                     task_name=self.task_name)
         api = PipelineAPI(os.environ['API'], 'logs')
         for pod in worker_ids:
             Logger.info('Shutting down {} node with status {}.'.format(
                 pod.run_id, status.status),
                         task_name=self.task_name)
             api.update_status(pod.run_id, status)
         Logger.success('Successfully scaled cluster down',
                        task_name=self.task_name)
     except Exception as e:
         self.fail_task(e.message)

示例#19

0

显示文件

 def read(cls, report_file, task):
     Logger.info("Reading InsertSizeMetrics report from file %s." %
                 report_file,
                 task_name=task)
     with open(report_file, 'r') as report:
         data_started = False
         for line in report.readlines():
             if data_started and line:
                 chunks = line.split("\t")
                 # MEDIAN_INSERT_SIZE
                 return int(chunks[0])
             elif line.startswith("MEDIAN_INSERT_SIZE"):
                 data_started = True
     return 0

示例#20

0

显示文件

    def launch(self,
               instance_size,
               instance_disk,
               docker_image,
               cmd,
               wait_finish=False):
        running = 0
        Logger.info('Starting {} sample(s) scheduling.'.format(
            len(self.run_dirs)),
                    task_name=self.TASK_NAME)
        for folder in self.run_dirs:
            self.launch_pipeline(folder, self.param_names, instance_size,
                                 instance_disk, docker_image, cmd)
            running = running + 1
            Logger.info('Processing {} sample(s).'.format(running),
                        task_name=self.TASK_NAME)

        Logger.info('Successfully scheduled {} sample(s).'.format(running),
                    task_name=self.TASK_NAME)
        if wait_finish:
            Logger.info('Waiting for all runs to finish.',
                        task_name=self.TASK_NAME)
            self.wait_all_samples_finish()
        Logger.success('All child pipeline successfully finished.',
                       task_name=self.TASK_NAME)

示例#21

0

显示文件

 def run(self, worker_pods, path, run_id):
     try:
         Logger.info('Creating hostfile {}'.format(path),
                     task_name=self.task_name)
         with open(path, 'w') as file:
             master_pod = self.kube.get_pod(run_id)
             file.write('{}\n'.format(master_pod.name))
             for pod in worker_pods:
                 file.write('{}\n'.format(pod.name))
                 self.add_to_hosts(pod)
         Logger.success('Successfully created hostfile {}'.format(path),
                        task_name=self.task_name)
     except Exception as e:
         self.fail_task(e.message)

示例#22

0

显示文件

 def read(cls, report_file, task):
     Logger.info("Reading MarkDuplicates report from file %s." %
                 report_file,
                 task_name=task)
     with open(report_file, 'r') as report:
         data_started = False
         for line in report.readlines():
             if data_started and line:
                 chunks = line.split("\t")
                 # UNPAIRED_READ_DUPLICATES READ_PAIR_DUPLICATES READ_PAIR_OPTICAL_DUPLICATES
                 return int(
                     chunks[5]) + 2 * int(chunks[6]) + 2 * int(chunks[7])
             elif line.startswith("LIBRARY"):
                 data_started = True
     return 0

示例#23

0

显示文件

 def find_files(self, recursive=False):
     Logger.info("Starting parsing input directory: {}.".format(
         self.folder),
                 task_name=self.TASK_NAME)
     all_files = bucket.ls_s3(self.folder,
                              self.MAX_ATTEMPTS,
                              recursive=recursive)
     result = [[] for x in xrange(len(all_files))]
     index = 0
     for file in all_files:
         result[index].append(os.path.join(self.folder, file))
         index = index + 1
     Logger.success("Found {} directories to process.".format(len(result)),
                    task_name=self.TASK_NAME)
     return result

示例#24

0

显示文件

文件： run_analytical.py 项目： Buagir/cloud-pipeline

 def __wait_runs_completion(self, launched_runs):
     finished = {}
     failed = {}
     while True:
         for sample, run_id in launched_runs.iteritems():
             current_status = self.api.load_run(run_id)['status']
             Logger.info('Processing sample: %s. Run %d status is %s.' %
                         (sample, run_id, current_status),
                         task_name=self.task)
             if current_status != 'RUNNING':
                 finished[sample] = run_id
                 if current_status != 'SUCCESS':
                     failed[sample] = run_id
         if len(finished) == len(launched_runs):
             Logger.info("Processing for all samples completed.",
                         task_name=self.task)
             return failed
         time.sleep(60)

示例#25

0

显示文件

文件： sample_sheet_batch.py 项目： ymjyyjyz/cloud-pipeline

 def launch(self,
            nodes,
            instance_size,
            instance_disk,
            docker_image,
            cmd,
            wait_finish=False):
     running = 0
     scheduled = 0
     Logger.info('Starting {} sample(s) scheduling.'.format(
         len(self.samples)),
                 task_name=self.TASK_NAME)
     while scheduled != len(self.samples):
         if running < nodes:
             sample = self.samples[scheduled]
             self.launch_pipeline(self.run_files[sample[SAMPLE_NAME]],
                                  self.param_names,
                                  instance_size,
                                  instance_disk,
                                  docker_image,
                                  cmd,
                                  sample=sample)
             scheduled = scheduled + 1
             running = running + 1
         else:
             Logger.info('Processing {} sample(s).'.format(running),
                         task_name=self.TASK_NAME)
             Logger.info('Total scheduled  {} sample(s).'.format(scheduled),
                         task_name=self.TASK_NAME)
             time.sleep(self.POLL_TIMEOUT)
             running = self.get_running_samples()
     while self.child_run_active():
         Logger.info('Waiting a child run {} to finish.'.format(
             self.child_id),
                     task_name=self.TASK_NAME)
         time.sleep(self.POLL_TIMEOUT)
     if wait_finish:
         Logger.info('Waiting for all runs to finish.',
                     task_name=self.TASK_NAME)
         self.wait_all_samples_finish()
     Logger.success(
         'Successfully scheduled {} sample(s).'.format(scheduled),
         task_name=self.TASK_NAME)

示例#26

0

显示文件

文件： transfer_parameters.py 项目： ymjyyjyz/cloud-pipeline

    def transfer_dts(self, dts_locations, dts_registry, upload, rules=None):
        grouped_paths = {}
        for path in dts_locations:
            if path.prefix not in grouped_paths:
                grouped_paths[path.prefix] = [path]
            else:
                grouped_paths[path.prefix].append(path)

        for prefix, paths in grouped_paths.iteritems():
            dts_url = dts_registry[prefix]
            Logger.info(
                'Uploading {} paths using DTS service {}'.format(
                    len(paths), dts_url), self.task_name)
            dts_client = DataTransferServiceClient(dts_url, self.token,
                                                   self.api_url, self.token,
                                                   10)
            dts_client.transfer_data(
                [self.create_dts_path(path, upload, rules) for path in paths],
                self.task_name)

示例#27

0

显示文件

文件： folder_batch.py 项目： ymjyyjyz/cloud-pipeline

    def launch(self,
               nodes,
               instance_size,
               instance_disk,
               docker_image,
               cmd,
               wait_finish=False):
        running = 0
        current_index = 0
        Logger.info('Starting {} sample(s) scheduling.'.format(
            self.samples_number),
                    task_name=self.TASK_NAME)
        while current_index != self.samples_number:
            if running < nodes:
                self.launch_pipeline(self.run_files[current_index],
                                     self.param_names, instance_size,
                                     instance_disk, docker_image, cmd)
                current_index = current_index + 1
                running = running + 1
            else:
                Logger.info('Processing {} sample(s).'.format(running),
                            task_name=self.TASK_NAME)
                Logger.info(
                    'Total scheduled  {} sample(s).'.format(current_index),
                    task_name=self.TASK_NAME)
                time.sleep(self.POLL_TIMEOUT)
                running = self.get_running_samples()
        while self.child_run_active():
            Logger.info('Waiting a child run {} to finish.'.format(
                self.child_id),
                        task_name=self.TASK_NAME)
            time.sleep(self.POLL_TIMEOUT)

        if wait_finish:
            Logger.info('Waiting for all runs to finish.',
                        task_name=self.TASK_NAME)
            self.wait_all_samples_finish()

        Logger.success(
            'Successfully scheduled {} sample(s).'.format(current_index),
            task_name=self.TASK_NAME)

示例#28

0

显示文件

文件： sample_sheet_batch.py 项目： ymjyyjyz/cloud-pipeline

 def check_file_match(self, samples, file_name, patterns_files):
     for sample in samples:
         sample_name = sample[SAMPLE_NAME]
         patterns = self.format_sample_patterns(sample, self.patterns)
         exclude = self.format_sample_patterns(sample,
                                               self.exclude_patterns)
         for pattern_name, glob in patterns.iteritems():
             if self.match_patterns(file_name, glob):
                 if pattern_name in exclude:
                     exclude = exclude[pattern_name]
                     if self.match_patterns(file_name, exclude):
                         Logger.info(
                             "Skipping filename '{}' since it matches exclude patterns '{}'."
                             .format(file_name, str(exclude)))
                         continue
                 if sample_name not in patterns_files:
                     patterns_files[sample_name] = {}
                 if pattern_name not in patterns_files[sample_name]:
                     patterns_files[sample_name][pattern_name] = []
                 patterns_files[sample_name][pattern_name].append(
                     os.path.join(self.folder, file_name))

示例#29

0

显示文件

文件： transfer_parameters.py 项目： ymjyyjyz/cloud-pipeline

def transfer_async(chunk):
    if not chunk.files:
        Logger.info('Skipping empty chunk', task_name=chunk.task_name)
        return
    file_list_name = ''.join(
        random.choice(string.ascii_lowercase) for _ in range(10)) + '.list'
    file_list_path = os.path.join(chunk.common_folder, file_list_name)
    with open(file_list_path, 'w') as file_list:
        for file in chunk.files:
            file_list.write('%s\t%d\n' % (file.filename, file.size))
    bucket = S3Bucket()
    cmd = bucket.build_pipe_cp_command(chunk.source,
                                       chunk.destination,
                                       file_list=file_list_path,
                                       include=chunk.rules)
    if chunk.hostname != 'localhost':
        cmd = '(ssh %s API=$API API_TOKEN=$API_TOKEN RUN_ID=$RUN_ID "%s") & _CHUNK_PID=$! && wait $_CHUNK_PID' % \
              (chunk.hostname, cmd)
    Logger.info('Executing chunk transfer with cmd: %s' % cmd,
                task_name=chunk.task_name)
    bucket.execute_command(cmd, TRANSFER_ATTEMPTS)

示例#30

0

显示文件

文件： transfer_parameters.py 项目： ymjyyjyz/cloud-pipeline

    def find_remote_locations(self, dts_registry, parameter_types):
        remote_locations = []
        for env in os.environ:
            param_type_name = env + '_PARAM_TYPE'
            if os.environ[env] and param_type_name in os.environ:
                param_type = os.environ[param_type_name]
                if param_type in parameter_types:
                    value = os.environ[env].strip()
                    Logger.info('Found remote parameter %s with type %s' %
                                (value, param_type),
                                task_name=self.task_name)
                    original_paths = [value]
                    delimiter = ''
                    for supported_delimiter in VALUE_DELIMITERS:
                        if value.find(supported_delimiter) != -1:
                            original_paths = re.split(supported_delimiter,
                                                      value)
                            delimiter = supported_delimiter
                            break
                    paths = []
                    for path in original_paths:
                        resolved_path = replace_all_system_variables_in_path(
                            path).strip()
                        if self.match_dts_path(resolved_path, dts_registry):
                            paths.append(
                                self.build_dts_path(resolved_path,
                                                    dts_registry, param_type))
                        elif self.match_s3_path(resolved_path):
                            paths.append(
                                self.build_s3_path(resolved_path, param_type))
                        elif self.match_ftp_or_http_path(resolved_path):
                            paths.append(
                                self.build_ftp_or_http_path(
                                    resolved_path, param_type))
                    if len(paths) != 0:
                        remote_locations.append(
                            RemoteLocation(env, value, param_type, paths,
                                           delimiter))

        return remote_locations