Exemplo n.º 1
0
    def read(self, raise_timeout=False):
        """Read from heartbeat file.
        If a heartbeat file is not fresh (mtime difference < timeout)
        then None is returned.

        Returns:
            Tuple of (hostname, port)
        """
        try:
            u = AutoURI(self._heartbeat_file)
            if (time.time() - u.mtime) * 1000.0 > self._heartbeat_timeout:
                raise ServerHeartbeatTimeoutError
            else:
                hostname, port = u.read().strip('\n').split(':')
                logger.info(
                    'Reading hostname/port from a heartbeat file. {h}:{p}'.format(
                        h=hostname, p=port
                    )
                )
                return hostname, int(port)

        except ServerHeartbeatTimeoutError:
            logger.error(
                'Found a heartbeat file but it has been expired (> timeout)'
                '. {f}'.format(f=self._heartbeat_file)
            )
            if raise_timeout:
                raise

        except Exception:
            logger.error(
                'Failed to read from a heartbeat file. {f}'.format(
                    f=self._heartbeat_file
                )
            )
Exemplo n.º 2
0
    def save_to_file(self):
        html = CrooHtmlReport.HTML

        head = ''
        head += self._file_table.get_html_head_str()
        html = html.replace(CrooHtmlReport.HEAD, head)

        body = ''
        body += self._file_table.get_html_body_str()
        body += self._task_graph.get_html_body_str()
        body += self._ucsc_tracks.get_html_body_str()
        html = html.replace(CrooHtmlReport.BODY, body)

        # write to file and return HTML string
        uri_report = os.path.join(
            self._out_dir,
            CrooHtmlReport.REPORT_HTML.format(workflow_id=self._workflow_id),
        )
        with tempfile.TemporaryDirectory() as tmp_dir:
            local_uri_report = os.path.join(
                tmp_dir,
                CrooHtmlReport.REPORT_HTML.format(
                    workflow_id=self._workflow_id),
            )
            AutoURI(local_uri_report).write(html, no_lock=True)
            AutoURI(local_uri_report).cp(uri_report, no_lock=True)
        return html
Exemplo n.º 3
0
def get_single_cromwell_metadata_obj(caper_client, args, subcmd):
    if not args.wf_id_or_label:
        raise ValueError('Define at least one metadata JSON file or '
                         'a search query for workflow ID/string label '
                         'if there is a running Caper server.')
    elif len(args.wf_id_or_label) > 1:
        raise ValueError(
            'Multiple files/queries are not allowed for {subcmd}. '
            'Define one metadata JSON file or a search query '
            'for workflow ID/string label.'.format(subcmd=subcmd))

    metadata_file = AutoURI(get_abspath(args.wf_id_or_label[0]))

    if metadata_file.exists:
        metadata = json.loads(metadata_file.read())
    else:
        metadata_objs = caper_client.metadata(
            wf_ids_or_labels=args.wf_id_or_label, embed_subworkflow=True)
        if len(metadata_objs) > 1:
            raise ValueError(
                'Found multiple workflows matching with search query.')
        elif len(metadata_objs) == 0:
            raise ValueError('Found no workflow matching with search query.')
        metadata = metadata_objs[0]

    return CromwellMetadata(metadata)
Exemplo n.º 4
0
def install_file(f, install_dir, label):
    """Install f locally on install_dir.
    If f is already local then skip it.
    """
    if AbsPath(f).is_valid:
        return AbsPath(f).uri
    logger.info('Installing {label}... {f}'.format(label=label, f=f))
    path = os.path.join(os.path.expanduser(install_dir), AutoURI(f).basename)
    return AutoURI(f).cp(path)
Exemplo n.º 5
0
    def create_file(
        self,
        directory,
        backend=None,
        custom_labels=None,
        str_label=None,
        user=None,
        basename=BASENAME_LABELS,
    ):
        """Create labels JSON file.

        Args:
            directory:
                Directory to create a labels JSON file.
            backend:
                Backend
            custom_labels:
                User's labels file to be merged.
            str_label:
                Caper's string label.
                Wildcards ('*' and '?') and ':' are not allowed by default.
                These will be replaced with '_' by default.
            basename:
                Basename of labels file.
        """
        template = {}

        if custom_labels:
            s = AutoURI(custom_labels).read()
            merge_dict(template, json.loads(s))

        if backend:
            template[CaperLabels.KEY_CAPER_BACKEND] = backend

        if str_label:
            new_str_label = re.sub(
                RE_ILLEGAL_STR_LABEL_CHRS, SUB_ILLEGAL_STR_LABEL_CHRS, str_label
            )
            if str_label != new_str_label:
                logger.warning(
                    'Found illegal characters in str_label matching with {regex}. '
                    'Replaced with {sub}'.format(
                        regex=RE_ILLEGAL_STR_LABEL_CHRS, sub=SUB_ILLEGAL_STR_LABEL_CHRS
                    )
                )
            template[CaperLabels.KEY_CAPER_STR_LABEL] = new_str_label

        template[CaperLabels.KEY_CAPER_USER] = (
            user if user else pwd.getpwuid(os.getuid())[0]
        )

        labels_file = os.path.join(directory, basename)
        AutoURI(labels_file).write(json.dumps(template, indent=4))

        return labels_file
Exemplo n.º 6
0
def test_on_successful_workflow(tmp_path, cromwell, womtool):
    fileobj_stdout = sys.stdout

    make_directory_with_wdls(str(tmp_path / 'successful'))

    # Run Cromwell to get metadata JSON
    c = Cromwell(cromwell=cromwell, womtool=womtool)
    th = c.run(
        wdl=str(tmp_path / 'successful' / 'main.wdl'),
        inputs=str(tmp_path / 'successful' / 'inputs.json'),
        fileobj_stdout=fileobj_stdout,
        cwd=str(tmp_path / 'successful'),
    )
    th.join()
    metadata = th.returnvalue
    assert metadata

    cm = CromwellMetadata(metadata)
    # test all properties
    assert cm.data == metadata
    assert cm.metadata == metadata
    assert CromwellMetadata(metadata).data == metadata
    assert cm.workflow_id == metadata['id']
    assert cm.workflow_status == metadata['status']
    # no failures for successful workflow's metadata
    assert cm.failures is None
    assert cm.calls == metadata['calls']
    assert sorted([call_name for call_name, _, _ in cm.recursed_calls
                   ]) == sorted(['main.t1', 'sub.t2', 'sub_sub.t3'])

    # test recurse_calls(): test with a simple function
    def fnc(call_name, call, parent_call_names):
        assert call_name in ('main.t1', 'sub.t2', 'sub_sub.t3')
        assert call['executionStatus'] == 'Done'
        if call_name == 'main.t1':
            assert not parent_call_names
        elif call_name == 'sub.t2':
            assert parent_call_names == ('main.sub', )
        elif call_name == 'sub_sub.t3':
            assert parent_call_names == ('main.sub', 'sub.sub_sub')
        else:
            raise ValueError('Wrong call_name: {name}'.format(name=call_name))

    list(cm.recurse_calls(fnc))

    # test write_on_workflow_root()
    m_file_on_root = os.path.join(cm.metadata['workflowRoot'], 'metadata.json')
    u = AutoURI(m_file_on_root)
    u.rm()
    assert not u.exists

    cm.write_on_workflow_root()
    assert os.path.exists(m_file_on_root)
    assert CromwellMetadata(m_file_on_root).metadata == cm.metadata
Exemplo n.º 7
0
    def create_file(
        self,
        directory,
        backend=None,
        custom_backend_conf=None,
        basename=BASENAME_BACKEND_CONF,
    ):
        """Create a HOCON string and create a backend.conf file.

        Args:
            backend:
                Backend to run a workflow on.
                Default backend will be use if not defined.
            custom_backend_conf:
                User's custom backend conf file to override on
                Caper's auto-generated backend conf.
            basename:
                Basename.
        """
        template = deepcopy(self._template)

        if backend == BACKEND_SGE:
            if self._sge_pe is None:
                raise ValueError(
                    'sge-pe (Sun GridEngine parallel environment) '
                    'is required for backend sge.')
        elif backend == BACKEND_GCP:
            if self._gcp_prj is None:
                raise ValueError('gcp-prj (Google Cloud Platform project) '
                                 'is required for backend gcp.')
            if self._gcp_out_dir is None:
                raise ValueError('gcp-out-dir (gs:// output bucket path) '
                                 'is required for backend gcp.')
        elif backend == BACKEND_AWS:
            if self._aws_batch_arn is None:
                raise ValueError('aws-batch-arn (ARN for AWS Batch) '
                                 'is required for backend aws.')
            if self._aws_region is None:
                raise ValueError('aws-region (AWS region) '
                                 'is required for backend aws.')
            if self._aws_out_dir is None:
                raise ValueError('aws-out-dir (s3:// output bucket path) '
                                 'is required for backend aws.')

        hocon_s = HOCONString.from_dict(
            template, include=CaperBackendConf.BACKEND_CONF_INCLUDE)

        if custom_backend_conf is not None:
            s = AutoURI(custom_backend_conf).read()
            hocon_s.merge(s, update=True)

        final_backend_conf_file = os.path.join(directory, basename)
        AutoURI(final_backend_conf_file).write(str(hocon_s) + '\n')
        return final_backend_conf_file
Exemplo n.º 8
0
 def __init__(self, wdl):
     """Wraps miniwdl's parse_document().
     """
     u = AutoURI(wdl)
     if not u.exists:
         raise FileNotFoundError(
             'WDL does not exist: wdl={wdl}'.format(wdl=wdl))
     self._wdl = wdl
     self._wdl_contents = AutoURI(wdl).read()
     try:
         self._wdl_doc = parse_document(self._wdl_contents)
     except Exception:
         logger.error('Failed to parse WDL with miniwdl.')
         self._wdl_doc = None
Exemplo n.º 9
0
def find_valid_uris_in_dict(d, parent=tuple(), list_idx=tuple()):
    """Can recursively parse WDL struct to find valid AbsPath/URL/URIs.
    For example, /somewhere/here/there.txt, s3://bucket1/t.txt, http://...

    Returns a list of tuples (
        dot_delimited_all_parents_string,
        uri,
        tuple_of_nested_shard_indices,
    ).
    """
    files = []
    if isinstance(d, dict):
        for k, v in d.items():
            files.extend(
                find_valid_uris_in_dict(v,
                                        parent=parent + (k, ),
                                        list_idx=list_idx))

    elif isinstance(d, (list, tuple)):
        for i, v in enumerate(d):
            files.extend(
                find_valid_uris_in_dict(v,
                                        parent=parent,
                                        list_idx=list_idx + (i, )))

    elif isinstance(d, str) and AutoURI(d).is_valid:
        files.append(('.'.join(parent), d, list_idx if list_idx else (-1, )))
    return files
Exemplo n.º 10
0
    def cleanup(
        self, dry_run=False, num_threads=URIBase.DEFAULT_NUM_THREADS, no_lock=False
    ):
        """Cleans up workflow's root output directory.

        Args:
            dry_run:
                Dry-run mode.
            num_threads:
                For outputs on cloud buckets only.
                Number of threads for deleting individual outputs on cloud buckets in parallel.
                Generates one client per thread. This works like `gsutil -m rm -rf`.
            no_lock:
                No file locking.
        """
        root = self.workflow_root
        if not root:
            logger.error(
                'workflow\'s root directory cannot be found in metadata JSON. '
                'Cannot proceed to cleanup outputs.'
            )
            return

        if AbsPath(root).is_valid:
            # num_threads is not available for AbsPath().rmdir()
            AbsPath(root).rmdir(dry_run=dry_run, no_lock=no_lock)
        else:
            AutoURI(root).rmdir(
                dry_run=dry_run, no_lock=no_lock, num_threads=num_threads
            )
Exemplo n.º 11
0
def find_files_in_dict(d):
    files = []
    for k, v in d.items():
        maybe_files = []
        if isinstance(v, list):
            for i, v_ in enumerate(v):
                if isinstance(v_, str):
                    maybe_files.append((v_, (i, )))
                elif isinstance(v_, list):
                    for j, v__ in enumerate(v_):
                        if isinstance(v__, str):
                            maybe_files.append((v__, (i, j)))
                        elif isinstance(v__, list):
                            for k, v___ in enumerate(v__):
                                if isinstance(v___, str):
                                    maybe_files.append((v___, (i, j, k)))
        elif isinstance(v, dict):
            for _, v_ in v.items():
                if isinstance(v_, str):
                    maybe_files.append((v_, (-1, )))
        elif isinstance(v, str):
            maybe_files.append((v, (-1, )))
        for f, shard_idx in maybe_files:
            if AutoURI(f).is_valid:
                files.append((k, f, shard_idx))
    return files
Exemplo n.º 12
0
    def _write_to_file(self, port, hostname=None):
        if not hostname:
            hostname = socket.gethostname()

        logger.info('Server heartbeat thread started.')

        while True:
            try:
                logger.debug(
                    'Writing heartbeat: {hostname}, {port}'.format(
                        hostname=hostname, port=port
                    )
                )
                AutoURI(self._heartbeat_file).write(
                    '{hostname}:{port}'.format(hostname=hostname, port=port)
                )
            except Exception:
                logger.error(
                    'Failed to write to a heartbeat_file. {f}'.format(
                        f=self._heartbeat_file
                    )
                )
            cnt = 0
            while cnt < self._interval_update_heartbeat:
                cnt += 1
                if self._stop_it:
                    break
                time.sleep(1)
            if self._stop_it:
                break

        logger.info('Server heartbeat thread ended.')
Exemplo n.º 13
0
            def troubleshoot_call(call_name, call, parent_call_names):
                """Returns troubleshooting help message.
                """
                nonlocal show_completed_task
                nonlocal show_stdout
                status = call.get('executionStatus')
                shard_index = call.get('shardIndex')
                rc = call.get('returnCode')
                job_id = call.get('jobId')
                stdout = call.get('stdout')
                stderr = call.get('stderr')
                run_start = None
                run_end = None
                for event in call.get('executionEvents', []):
                    if event['description'].startswith('Running'):
                        run_start = event['startTime']
                        run_end = event['endTime']
                        break

                help_msg = ''
                if show_completed_task or status not in ('Done', 'Succeeded'):
                    help_msg += (
                        '\n==== NAME={name}, STATUS={status}, PARENT={p}\n'
                        'SHARD_IDX={shard_idx}, RC={rc}, JOB_ID={job_id}\n'
                        'START={start}, END={end}\n'
                        'STDOUT={stdout}\nSTDERR={stderr}\n'.format(
                            name=call_name,
                            status=status,
                            p=','.join(parent_call_names),
                            start=run_start,
                            end=run_end,
                            shard_idx=shard_index,
                            rc=rc,
                            job_id=job_id,
                            stdout=stdout,
                            stderr=stderr,
                        ))
                    if stderr:
                        if AutoURI(stderr).exists:
                            help_msg += 'STDERR_CONTENTS=\n{s}\n'.format(
                                s=AutoURI(stderr).read())
                    if show_stdout and stdout:
                        if AutoURI(stdout).exists:
                            help_msg += 'STDOUT_CONTENTS=\n{s}\n'.format(
                                s=AutoURI(stdout).read())

                return help_msg
Exemplo n.º 14
0
 def __init__(self, qcs, delim='\t'):
     """
     Args:
         qcs:
             list of QC file URIs (path/URL/S3/GCS)
         delim:
             delimiter for output ([TAB] by default)
     """
     self._delim = delim
     self._jsons = []
     for qc in qcs:
         qc = AbsPath.get_abspath_if_exists(qc)
         if not AutoURI(qc).exists:
             logger.error('File does not exists. Skipping... {uri}'.format(uri=qc))
             continue
         s = AutoURI(qc).read()
         j = json.loads(s)
         self._jsons.append(j)
Exemplo n.º 15
0
 def __init__(self, metadata):
     """Parses metadata JSON (dict) object or file.
     """
     if isinstance(metadata, dict):
         self._metadata = metadata
     elif isinstance(metadata, CromwellMetadata):
         self._metadata = metadata._metadata
     else:
         s = AutoURI(metadata).read()
         self._metadata = json.loads(s)
Exemplo n.º 16
0
def make_directory_with_failing_wdls(directory, no_sub_wdl=False):
    """
    Run Cromwell with WDLs:
    main + 1 sub (supposed to fail) + 1 sub's sub.

    Returns:
        Created root directory
    """
    main_inputs = os.path.join(directory, 'inputs.json')
    AutoURI(main_inputs).write(json.dumps(MAIN_INPUTS, indent=4))

    main_wdl = os.path.join(directory, 'main.wdl')
    AutoURI(main_wdl).write(MAIN_WDL)

    if not no_sub_wdl:
        sub_wdl = os.path.join(directory, 'sub', 'sub.wdl')
        AutoURI(sub_wdl).write(SUB_WDL_TO_FAIL)

        sub_sub_wdl = os.path.join(directory, 'sub', 'sub', 'sub_sub.wdl')
        AutoURI(sub_sub_wdl).write(SUB_SUB_WDL)
Exemplo n.º 17
0
    def localize_on_backend_if_modified(self,
                                        f,
                                        backend,
                                        recursive=False,
                                        make_md5_file=False):
        """Wrapper for localize_on_backend.

        If localized file is not modified due to recursive localization,
        then it means that localization for such file was redundant.
        So returns the original file instead of a redundantly localized one.
        We can check if file is modifed or not by looking at their basename.
        Modified localized file has a suffix of the target storage. e.g. .s3.
        """
        f_loc = self.localize_on_backend(f=f,
                                         backend=backend,
                                         recursive=recursive,
                                         make_md5_file=make_md5_file)

        if AutoURI(f).basename == AutoURI(f_loc).basename:
            return f
        return f_loc
Exemplo n.º 18
0
 def zip_subworkflows(self, zip_file):
     """Recursively find/zip imported subworkflow WDLs
     This will zip sub-WDLs with relative paths only.
     i.e. URIs are ignored.
     For this (main) workflow, any URI is allowed.
     However, only subworkflows with relative path will be zipped
     since there is no way to make directory structure to zip them.
     Returns:
         Zipped imports file.
         None if no subworkflows recursively found in WDL.
     """
     with TemporaryDirectory() as tmp_d:
         # localize WDL first. If it's already local
         # then will use its original path without loc.
         wdl = AutoURI(self._wdl).localize_on(tmp_d)
         # keep directory structure as they imported
         num_sub_wf_packed = self.__recurse_zip_subworkflows(
             root_zip_dir=tmp_d, root_wdl_dir=AutoURI(wdl).dirname)
         if num_sub_wf_packed:
             shutil.make_archive(AutoURI(zip_file).uri_wo_ext, 'zip', tmp_d)
             return zip_file
Exemplo n.º 19
0
    def write_on_workflow_root(self, basename=DEFAULT_METADATA_BASENAME):
        """Update metadata JSON file on metadata's output root directory.
        """
        root = self.workflow_root

        if root:
            metadata_file = os.path.join(root, basename)

            AutoURI(metadata_file).write(json.dumps(self._metadata, indent=4) + '\n')
            logger.info('Wrote metadata file. {f}'.format(f=metadata_file))

            return metadata_file
Exemplo n.º 20
0
def split_list_into_file_and_non_file(lst):
    """Returns tuple of (list of existing files, list of non-file strings)
    """
    files = []
    non_files = []

    for maybe_file in lst:
        if AutoURI(get_abspath(maybe_file)).exists:
            files.append(maybe_file)
        else:
            non_files.append(maybe_file)

    return files, non_files
Exemplo n.º 21
0
def get_abspath(path):
    """Get abspath from a string.
    This function is mainly used to make a command line argument an abspath
    since AutoURI module only works with abspath and full URIs
    (e.g. /home/there, gs://here/there).
    For example, "caper run toy.wdl --docker ubuntu:latest".
    AutoURI cannot recognize toy.wdl on CWD as a file path.
    It should be converted to an abspath first.
    To do so, use this function for local file path strings only (e.g. toy.wdl).
    Do not use this function for other non-local-path strings (e.g. --docker).
    """
    if path:
        if not AutoURI(path).is_valid:
            return os.path.abspath(os.path.expanduser(path))
    return path
Exemplo n.º 22
0
    def localize_on_backend(self,
                            f,
                            backend,
                            recursive=False,
                            make_md5_file=False):
        """Localize a file according to the chosen backend.
        Each backend has its corresponding storage.
            - gcp -> GCS bucket path (starting with gs://)
            - aws -> S3 bucket path (starting with s3://)
            - All others (based on local backend) -> local storage

        If contents of input JSON changes due to recursive localization (deepcopy)
        then a new temporary file suffixed with backend type will be written on loc_prefix.
        For example, /somewhere/test.json -> gs://example-tmp-gcs-bucket/somewhere/test.gcs.json

        loc_prefix will be one of the cache directories according to the backend type
            - gcp -> gcp_loc_dir
            - aws -> aws_loc_dir
            - all others (local) -> local_loc_dir

        Args:
            f:
                File to be localized.
            backend:
                Backend to localize file f on.
            recursive:
                Recursive localization (deepcopy).
                All files (if value is valid path/URI string) in JSON/CSV/TSV
                will be localized together with file f.
            make_md5_file:
                Make .md5 file for localized files. This is for local only since
                GCS/S3 bucket paths already include md5 hash information in their metadata.

        Returns:
            localized URI.
        """
        if backend == BACKEND_GCP:
            loc_prefix = self._gcp_loc_dir
        elif backend == BACKEND_AWS:
            loc_prefix = self._aws_loc_dir
        else:
            loc_prefix = self._local_loc_dir

        return AutoURI(f).localize_on(loc_prefix,
                                      recursive=recursive,
                                      make_md5_file=make_md5_file)
Exemplo n.º 23
0
        def on_finish():
            nonlocal metadata
            nonlocal fileobj_troubleshoot

            if os.path.exists(metadata):
                json_contents = AutoURI(metadata).read()
                if json_contents:
                    metadata_dict = json.loads(json_contents)
                    cm = CromwellMetadata(metadata_dict)
                    cm.write_on_workflow_root()

                    if cm.workflow_status != 'Succeeded' and fileobj_troubleshoot:
                        # auto-troubleshoot on terminate if workflow is not successful
                        logger.info('Workflow failed. Auto-troubleshooting...')
                        cm.troubleshoot(fileobj=fileobj_troubleshoot)

                    # to make it a return value of the thread after it is done (joined)
                    return metadata_dict
Exemplo n.º 24
0
    def save_to_file(self):
        html = CrooHtmlReport.HTML

        head = ''
        head += self._file_table.get_html_head_str()
        html = html.replace(CrooHtmlReport.HEAD, head)

        body = ''
        body += self._file_table.get_html_body_str()
        body += self._task_graph.get_html_body_str()
        body += self._ucsc_tracks.get_html_body_str()
        html = html.replace(CrooHtmlReport.BODY, body)

        # write to file and return HTML string
        uri_report = os.path.join(
            self._out_dir,
            CrooHtmlReport.REPORT_HTML.format(workflow_id=self._workflow_id))
        AutoURI(uri_report).write(html)
        return html
Exemplo n.º 25
0
    def write_on_workflow_root(self, basename=DEFAULT_METADATA_BASENAME):
        """Update metadata JSON file on metadata's output root directory.
        If there is a subworkflow, nest its metadata into main workflow's one

        Args:
            write_subworkflow:
                Write metadata JSON file for subworkflows.
        """
        if 'workflowRoot' in self._metadata:
            root = self._metadata['workflowRoot']
            metadata_file = os.path.join(root, basename)
            AutoURI(metadata_file).write(
                json.dumps(self._metadata, indent=4) + '\n')
            logger.info('Wrote metadata file. {f}'.format(f=metadata_file))
        else:
            metadata_file = None
            workflow_id = self._metadata.get('id')
            logger.warning(
                'Failed to write metadata file. workflowRoot not found. '
                'wf_id={i}'.format(i=workflow_id))
        return metadata_file
Exemplo n.º 26
0
def get_multi_cromwell_metadata_objs(caper_client, args):
    if not args.wf_id_or_label:
        raise ValueError('Define at least one metadata JSON file or '
                         'a search query for workflow ID/string label '
                         'if there is a running Caper server.')

    files, non_files = split_list_into_file_and_non_file(args.wf_id_or_label)

    all_metadata = []
    for file in files:
        metadata = json.loads(AutoURI(get_abspath(file)).read())
        all_metadata.append(metadata)

    if non_files:
        all_metadata.extend(
            caper_client.metadata(wf_ids_or_labels=non_files,
                                  embed_subworkflow=True))

    if not all_metadata:
        raise ValueError(
            'Found no metadata/workflow matching with search query.')
    return [CromwellMetadata(m) for m in all_metadata]
Exemplo n.º 27
0
    def validate(
        self,
        wdl,
        inputs=None,
        imports=None,
        cwd=None,
        java_heap_womtool=DEFAULT_JAVA_HEAP_WOMTOOL,
    ):
        """Validate WDL/inputs/imports using Womtool.

        Returns:
            valid:
                Validated or not.
        """
        self.install_womtool()

        wdl_file = AutoURI(wdl)
        if not wdl_file.exists:
            raise FileNotFoundError(
                'WDL file does not exist. wdl={wdl}'.format(wdl=wdl)
            )
        if inputs:
            if not AutoURI(inputs).exists:
                raise FileNotFoundError(
                    'Inputs JSON defined but does not exist. i={i}'.format(i=inputs)
                )

        with tempfile.TemporaryDirectory() as tmp_d:
            if imports:
                if not AutoURI(imports).exists:
                    raise FileNotFoundError(
                        'Imports file defined but does not exist. i={i}'.format(
                            i=imports
                        )
                    )
                wdl_ = os.path.join(tmp_d, wdl_file.basename)
                wdl_file.cp(wdl_)
                shutil.unpack_archive(imports, tmp_d)
            else:
                wdl_ = wdl_file.localize_on(tmp_d)

            cmd = [
                'java',
                '-Xmx{heap}'.format(heap=java_heap_womtool),
                '-jar',
                '-DLOG_LEVEL={lvl}'.format(lvl='INFO'),
                self._womtool,
                'validate',
                wdl_,
            ]
            if inputs:
                cmd += ['-i', AutoURI(inputs).localize_on(tmp_d)]

            logger.info('Validating WDL/inputs/imports with Womtool...')

            stderr = ''

            def on_stderr(s):
                nonlocal stderr
                stderr += s

            th = NBSubprocThread(cmd, cwd=tmp_d, on_stderr=on_stderr, quiet=True)
            th.start()
            th.join()

            if th.returncode:
                logger.error(
                    'RC={rc}\nSTDERR={stderr}\nWomtool validation failed.'.format(
                        rc=th.returncode, stderr=stderr
                    )
                )
                return False
            else:
                logger.info('Womtool validation passed.')
                return True
Exemplo n.º 28
0
    def server(
        self,
        server_port=DEFAULT_SERVER_PORT,
        server_hostname=None,
        server_heartbeat=None,
        backend_conf=None,
        fileobj_stdout=None,
        embed_subworkflow=False,
        java_heap_cromwell_server=DEFAULT_JAVA_HEAP_CROMWELL_SERVER,
        auto_write_metadata=True,
        on_server_start=None,
        on_status_change=None,
        cwd=None,
        dry_run=False,
    ):
        """Run Cromwell server mode (java -jar cromwell.jar server).
        This is a non-blocking function that returns a Thread object of Cromwell server.
        Howerver, this Thread object has a property status that indicates whether
        the server is started and ready to take submissions.
        Such condition is thread.status == True.

        Args:
            server_port:
                Server port.
            server_hostname:
                Server hostname. If defined then the heartbeat file will be written
                with this hostname instead of socket.gethostname().
            server_heartbeat:
                ServerHeartbeat object to write hostname/port pair into a heartbeat file.
                Then it will be later used by CaperClient to find hostname/port of
                this server.
            backend_conf:
                backend.conf file for Cromwell's Java parameter
                "-Dconfig.file=".
                Default backend defined in this file will be used.
                If no default backend is defined then "Local" (Cromwell's default)
                backend will be used.
            fileobj_stdout:
                File object to write Cromwell's STDOUT on.
            embed_subworkflow:
                This class basically stores/updates metadata.JSON file on
                each workflow's root directory whenever there is status change
                of workflow (or its tasks).
                This flag ensures that any subworkflow's metadata JSON will be
                embedded in main (this) workflow's metadata JSON.
                This is to mimic behavior of Cromwell run mode's -m parameter.
            java_heap_cromwell_server:
                Java heap (java -Xmx) for Cromwell server mode.
            auto_write_metadata:
                Automatic retrieval/writing of metadata.json upon workflow/task's status change.
            on_server_start:
                On server start.
            on_status_change:
                (Not implemented yet)
                Callback function called while polling.
                function should take 5 args
                    workflow_id:
                        UUID of a workflow
                    workflow_new_status:
                        New status for a workflow. None if no change.
                    task_id:
                        Tuple (task_name, shard_idx) to identify workflow's task.
                    task_new_status:
                        New status for a task, None if no change.
                    metadata:
                        metadata (dict) of a workflow.
            cwd:
                This will be finally passed to subprocess.Popen(cwd=).
            dry_run:
                Dry run.
        Returns:
            th:
                Thread for Cromwell's server mode.
                Returns None if dry_run.
        """
        self.install_cromwell()

        # check if port is open
        sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
        result = sock.connect_ex((Cromwell.LOCALHOST, server_port))
        if not result:
            raise PortAlreadyInUseError(
                'Server port {p} is already taken. '
                'Try with a different port'.format(p=server_port)
            )

        # LOG_LEVEL must be >=INFO to catch workflow ID from STDOUT
        cmd = [
            'java',
            '-Xmx{}'.format(java_heap_cromwell_server),
            '-XX:ParallelGCThreads=1',
            '-jar',
            '-DLOG_LEVEL={lvl}'.format(lvl='INFO'),
            '-DLOG_MODE=standard',
            '-Dwebservice.port={port}'.format(port=server_port),
        ]
        if backend_conf:
            cmd += ['-Dconfig.file={}'.format(backend_conf)]
            logger.debug(
                'backend_conf contents:\n{s}'.format(s=AutoURI(backend_conf).read())
            )

        cmd += [self._cromwell, 'server']

        logger.debug('cmd: {cmd}'.format(cmd=' '.join(cmd)))
        if dry_run:
            return

        wm = CromwellWorkflowMonitor(
            server_port=server_port,
            is_server=True,
            embed_subworkflow=embed_subworkflow,
            auto_write_metadata=auto_write_metadata,
            on_server_start=on_server_start,
            on_status_change=on_status_change,
        )

        def on_stdout(stdout):
            """Returns 'server_started' when server is ready to take submissions.
            Return value of this callback function is to update .status
            of an NBSubprocThread object.
            """
            nonlocal fileobj_stdout
            nonlocal wm
            nonlocal server_heartbeat

            if is_fileobj_open(fileobj_stdout):
                fileobj_stdout.write(stdout)
                fileobj_stdout.flush()

            wm.update(stdout)
            if wm.is_server_started():
                if server_heartbeat and not server_heartbeat.is_alive():
                    server_heartbeat.start(port=server_port, hostname=server_hostname)
                return 'server_started'

        def on_finish():
            nonlocal server_heartbeat

            if server_heartbeat:
                server_heartbeat.stop()

        th = NBSubprocThread(
            cmd,
            cwd=cwd,
            on_stdout=on_stdout,
            on_finish=on_finish,
            subprocess_name='Cromwell',
        )
        th.start()

        return th
Exemplo n.º 29
0
def read_json(json_file):
    if json_file:
        json_contents = AutoURI(get_abspath(json_file)).read()
        return json.loads(json_contents)
Exemplo n.º 30
0
    def submit(
        self,
        wdl,
        backend=None,
        inputs=None,
        options=None,
        labels=None,
        imports=None,
        str_label=None,
        user=None,
        docker=None,
        singularity=None,
        singularity_cachedir=Singularity.DEFAULT_SINGULARITY_CACHEDIR,
        no_build_singularity=False,
        max_retries=CaperWorkflowOpts.DEFAULT_MAX_RETRIES,
        memory_retry_multiplier=CaperWorkflowOpts.
        DEFAULT_MEMORY_RETRY_MULTIPLIER,
        gcp_monitoring_script=CaperWorkflowOpts.DEFAULT_GCP_MONITORING_SCRIPT,
        ignore_womtool=False,
        no_deepcopy=False,
        hold=False,
        java_heap_womtool=Cromwell.DEFAULT_JAVA_HEAP_WOMTOOL,
        dry_run=False,
        work_dir=None,
    ):
        """Submit a workflow to Cromwell server.

        Args:
            wdl:
                WDL file.
            backend:
                Backend to run a workflow on.
                Choose among Caper's built-in or user's custom backends.
                (aws, gcp, Local, slurm, sge, pbs, ...).
                If not defined then server's default backend will be used.
            inputs:
                Input JSON file.
            options:
                Workflow options JSON file.
            labels:
                Labels JSON file.
            imports:
                imports ZIP file.
            str_label:
                Caper's string label for a workflow,
                which will be written to labels JSON file.
            user:
                Username. If not defined, find a username from system.
                This will be written to to Cromwell' labels JSON file and will not
                be used elsewhere.
            docker:
                Docker image to run a workflow on.
                This will add "docker" attribute to runtime {} section
                of all tasks in WDL.
                This will be overriden by existing "docker" attr defined in
                WDL's task's "runtime {} section.
            singularity:
                Singularity image to run a workflow on.
                To use this, do not define "docker" attribute in
                WDL's task's "runtime {} section.
            singularity_cachedir:
                Cache directory for local Singularity images.
                If there is a shell environment variable SINGULARITY_CACHEDIR
                define then this parameter will be ignored.
            no_build_singularity:
                Do not build local singularity image.
                However, a local singularity image will be eventually built on
                env var SINGULARITY_CACHEDIR.
                Therefore, use this flag if you have already built it.
            max_retries:
                Max retrial for a failed task. 0 or None means no trial.
            memory_retry_multiplier:
                Multiplier for the memory retry feature.
                See https://cromwell.readthedocs.io/en/develop/cromwell_features/RetryWithMoreMemory/
                for details.
            ignore_womtool:
                Disable Womtool validation for WDL/input JSON/imports.
            no_deepcopy:
                Disable recursive localization of files defined in input JSON.
                Input JSON file itself will still be localized.
            hold:
                Put a workflow on hold when submitted. This workflow will be on hold until
                it's released. See self.unhold() for details.
            java_heap_womtool:
                Java heap (java -Xmx) for Womtool.
            dry_run:
                Stop before running Java command line for Cromwell.
            work_dir:
                Local temporary directory to store all temporary files.
                Temporary files mean intermediate files used for running Cromwell.
                For example, workflow options file, imports zip file.
                Localized (recursively) data files defined in input JSON
                will NOT be stored here.
                They will be localized on self._local_loc_dir instead.
                If this is not defined, then cache directory self._local_loc_dir will be used.
        """
        wdl_file = AutoURI(wdl)
        if not wdl_file.exists:
            raise FileNotFoundError(
                'WDL does not exists. {wdl}'.format(wdl=wdl))

        if str_label is None and inputs:
            str_label = AutoURI(inputs).basename_wo_ext

        if work_dir is None:
            work_dir = self.create_timestamped_work_dir(
                prefix=wdl_file.basename_wo_ext)

        wdl = wdl_file.localize_on(work_dir)

        if backend is None:
            backend = self._cromwell_rest_api.get_default_backend()

        if inputs:
            # inputs should be localized on corresponding
            # backend's localization directory.
            # check if such loc_dir is defined.
            if self.get_loc_dir(backend) is None:
                raise ValueError(
                    'loc_dir is not defined for your backend. {b}'.format(
                        b=backend))

            maybe_remote_file = self.localize_on_backend_if_modified(
                inputs,
                backend=backend,
                recursive=not no_deepcopy,
                make_md5_file=True)
            inputs = AutoURI(maybe_remote_file).localize_on(work_dir)

        options = self._caper_workflow_opts.create_file(
            directory=work_dir,
            wdl=wdl,
            backend=backend,
            inputs=inputs,
            custom_options=options,
            docker=docker,
            singularity=singularity,
            singularity_cachedir=singularity_cachedir,
            no_build_singularity=no_build_singularity,
            max_retries=max_retries,
            memory_retry_multiplier=memory_retry_multiplier,
            gcp_monitoring_script=gcp_monitoring_script,
        )

        labels = self._caper_labels.create_file(
            directory=work_dir,
            backend=backend,
            custom_labels=labels,
            str_label=str_label,
            user=user,
        )

        wdl_parser = CaperWDLParser(wdl)
        if imports:
            imports = AutoURI(imports).localize_on(work_dir)
        else:
            imports = wdl_parser.create_imports_file(work_dir)

        logger.debug('submit params: wdl={wdl}, imports={imp}, inputs={inp}, '
                     'options={opt}, labels={lbl}, hold={hold}'.format(
                         wdl=wdl,
                         imp=imports,
                         inp=inputs,
                         opt=options,
                         lbl=labels,
                         hold=hold))

        if not ignore_womtool:
            self._cromwell.validate(
                wdl=wdl,
                inputs=inputs,
                imports=imports,
                java_heap_womtool=java_heap_womtool,
            )

        if dry_run:
            return

        r = self._cromwell_rest_api.submit(
            source=wdl,
            dependencies=imports,
            inputs=inputs,
            options=options,
            labels=labels,
            on_hold=hold,
        )
        logger.info('submit: {r}'.format(r=r))
        return r