Exemplo n.º 1
0
    def get_html_body_str(self):
        """HTML for browser track
        This HTML section provides:
            1) A plain text for UCSC genome browser &hgct_customText=
                - This is written to a text file (.txt) on the output directory
            2) An encoded URL for UCSC genome browser &hgct_customText=
            3) Clickable href link
                - Full URL is written to a text file (.url) on the output directory
        """
        if self._ucsc_genome_db is None:
            return ''
        txt = self.__make_ucsc_track_txt()
        if txt is None or txt == '':
            return ''
        if self._ucsc_genome_pos is not None:
            extra_param = (
                CrooHtmlReportUCSCTracks.UCSC_BROWSER_QUERY_POS_PARAM +
                self._ucsc_genome_pos)
        else:
            extra_param = ''

        # save to TXT
        uri_txt = os.path.join(
            self._out_dir,
            CrooHtmlReportUCSCTracks.UCSC_TRACKS_TXT.format(
                workflow_id=self._workflow_id),
        )

        # localize TXT
        # long URL doesn't work
        u = AutoURI(uri_txt)
        u.write(txt, no_lock=True)

        url_trackhub_txt_file = None
        if isinstance(u, GCSURI):
            if self._public_gcs:
                url_trackhub_txt_file = u.get_public_url()

            elif self._use_presigned_url_gcs:
                url_trackhub_txt_file = u.get_presigned_url(
                    duration=self._duration_presigned_url_gcs,
                    private_key_file=self._gcp_private_key,
                )

        elif isinstance(u, S3URI):
            if self._use_presigned_url_s3:
                url_trackhub_txt_file = u.get_presigned_url(
                    duration=self._duration_presigned_url_s3)

        elif isinstance(u, AbsPath):
            if self._map_path_to_url:
                url_trackhub_txt_file = u.get_mapped_url(
                    map_path_to_url=self._map_path_to_url)
        html = ''

        url = CrooHtmlReportUCSCTracks.UCSC_BROWSER_QUERY_URL.format(
            db=self._ucsc_genome_db,
            extra_param=extra_param,
            encoded=urllib.parse.quote(txt),
        )
        html += CrooHtmlReportUCSCTracks.HTML_TRACK_HUB_LINK.format(
            title='UCSC browser tracks', url=url)

        if url_trackhub_txt_file is not None:
            url = CrooHtmlReportUCSCTracks.UCSC_BROWSER_QUERY_URL.format(
                db=self._ucsc_genome_db,
                extra_param=extra_param,
                encoded=urllib.parse.quote(url_trackhub_txt_file),
            )

            html += CrooHtmlReportUCSCTracks.HTML_TRACK_HUB_LINK.format(
                title='UCSC browser tracks (if the above link does not work)',
                url=url)

        html += CrooHtmlReportUCSCTracks.HTML_TRACK_HUB_TEXT.format(
            title=
            'UCSC track hub plain text. Paste it directly to custom track edit box '
            'on UCSC genome browser.',
            txt=txt,
        )

        return html
Exemplo n.º 2
0
    def organize_output(self):
        """Organize outputs
        """
        report = CrooHtmlReport(
            out_dir=self._out_dir,
            workflow_id=self._cm.get_workflow_id(),
            dag=self._task_graph,
            task_graph_template=self._task_graph_template,
            public_gcs=self._public_gcs,
            gcp_private_key=self._gcp_private_key,
            use_presigned_url_gcs=self._use_presigned_url_gcs,
            use_presigned_url_s3=self._use_presigned_url_s3,
            duration_presigned_url_s3=self._duration_presigned_url_s3,
            duration_presigned_url_gcs=self._duration_presigned_url_gcs,
            map_path_to_url=self._map_path_to_url,
            ucsc_genome_db=self._ucsc_genome_db,
            ucsc_genome_pos=self._ucsc_genome_pos,
        )

        if self._input_def_json is not None:
            for input_name, input_obj in self._input_def_json.items():
                node_format = input_obj.get('node')
                subgraph = input_obj.get('subgraph')

                for _, node in self._task_graph.get_nodes():
                    # if node is pipeline's input
                    if (node.type != 'output' or node.task_name is not None
                            or node.output_name != input_name):
                        continue
                    full_path = node.output_path
                    shard_idx = node.shard_idx

                    if node_format is not None:
                        interpreted_node_format = Croo.__interpret_inline_exp(
                            node_format, full_path, shard_idx)
                        if subgraph is not None:
                            interpreted_subgraph = Croo.__interpret_inline_exp(
                                subgraph, full_path, shard_idx)
                        else:
                            interpreted_subgraph = None
                        report.add_to_task_graph(
                            node.output_name,
                            None,
                            shard_idx,
                            full_path,
                            interpreted_node_format,
                            interpreted_subgraph,
                        )

        for task_name, out_vars in self._out_def_json.items():
            for output_name, output_obj in out_vars.items():
                path = output_obj.get('path')
                table_item = output_obj.get('table')
                ucsc_track = output_obj.get('ucsc_track')
                node_format = output_obj.get('node')
                subgraph = output_obj.get('subgraph')

                for _, node in self._task_graph.get_nodes():
                    # look at output nodes only (not a task node)
                    if task_name != node.task_name or node.type != 'task':
                        continue
                    all_outputs = node.all_outputs
                    shard_idx = node.shard_idx
                    if not all_outputs:
                        continue

                    for k, full_path, _ in all_outputs:
                        if k != output_name:
                            continue

                        target_uri = full_path
                        if path is not None:
                            interpreted_path = Croo.__interpret_inline_exp(
                                path, full_path, shard_idx)

                            au = AutoURI(full_path)
                            target_path = os.path.join(self._out_dir,
                                                       interpreted_path)

                            if self._soft_link:
                                au_target = AutoURI(target_path)
                                if isinstance(au, AbsPath) and isinstance(
                                        au_target, AbsPath):
                                    au.soft_link(target_path, force=True)
                                    target_uri = target_path
                                else:
                                    target_uri = full_path
                            else:
                                target_uri = au.cp(
                                    target_path,
                                    no_checksum=self._no_checksum,
                                    make_md5_file=True,
                                    no_lock=True,
                                )

                        # get presigned URLs if possible
                        target_url = None
                        if (path is not None or table_item is not None
                                or ucsc_track is not None
                                or node_format is not None):
                            u = AutoURI(target_uri)

                            if isinstance(u, GCSURI):
                                if self._public_gcs:
                                    target_url = u.get_public_url()

                                elif self._use_presigned_url_gcs:
                                    target_url = u.get_presigned_url(
                                        duration=self.
                                        _duration_presigned_url_gcs,
                                        private_key_file=self._gcp_private_key,
                                    )

                            elif isinstance(u, S3URI):
                                if self._use_presigned_url_s3:
                                    target_url = u.get_presigned_url(
                                        duration=self.
                                        _duration_presigned_url_s3)

                            elif isinstance(u, AbsPath):
                                if self._map_path_to_url:
                                    target_url = u.get_mapped_url(
                                        map_path_to_url=self._map_path_to_url)

                        if table_item is not None:
                            interpreted_table_item = Croo.__interpret_inline_exp(
                                table_item, full_path, shard_idx)
                            # add to file table
                            report.add_to_file_table(target_uri, target_url,
                                                     interpreted_table_item)
                        if ucsc_track is not None and target_url is not None:
                            interpreted_ucsc_track = Croo.__interpret_inline_exp(
                                ucsc_track, full_path, shard_idx)
                            report.add_to_ucsc_track(target_url,
                                                     interpreted_ucsc_track)
                        if node_format is not None:
                            interpreted_node_format = Croo.__interpret_inline_exp(
                                node_format, full_path, shard_idx)
                            if subgraph is not None:
                                interpreted_subgraph = Croo.__interpret_inline_exp(
                                    subgraph, full_path, shard_idx)
                            else:
                                interpreted_subgraph = None
                            report.add_to_task_graph(
                                output_name,
                                task_name,
                                shard_idx,
                                full_path
                                if target_url is None else target_url,
                                interpreted_node_format,
                                interpreted_subgraph,
                            )
        # write to html report
        report.save_to_file()