Exemplo n.º 1
0
    def __init__(self, parser: PipelineSpecificationParser):
        """
        Constructor.

        :param parser: A pipeline specification file parser.
        """
        end_node_pipeline = parser.get_end_node_pipeline()
        files_by_pipeline = parser.get_pipeline_files()
        inputs_by_pipeline = parser.get_pipeline_inputs()
        print(f'end node pipeline: {end_node_pipeline}')
        self.dag_builder = DagBuilder(end_node_pipeline, files_by_pipeline,
                                      inputs_by_pipeline)
def main() -> None:
    parser = argparse.ArgumentParser()
    parser.add_argument('--host',
                        required=True,
                        help='Only the hostname of a grpc URL.')
    parser.add_argument('--port', required=True, help='The port number.')
    parser.add_argument('--specification',
                        required=True,
                        help='A DAG end node pipeline specification path.')
    parser.add_argument('--specifications',
                        required=True,
                        help='A path containing pipeline specification files.')
    args = parser.parse_args()
    host = args.host
    port = int(args.port)
    specification = Path(args.specification)
    specifications = Path(args.specifications)

    print(f'host: {host}')
    print(f'port: {port}')
    print(f'specification: {specification}')
    print(f'specifications: {specifications}')

    client = python_pachyderm.Client(host=host, port=port)

    parser = PipelineSpecificationParser(specification, specifications)
    dag_manager = DagManager(parser)
    dag_builder = dag_manager.get_dag_builder()
    pipeline_names = dag_builder.get_pipeline_names()

    total_upload = 0
    total_download = 0
    total_process = 0
    for pipeline_name in pipeline_names:
        job = data_finder.get_latest_job(client, pipeline_name)
        if job is None:
            print(f'No jobs are available for {pipeline_name}')
        else:
            job_data = data_finder.get_job_run_times(job)
            upload_time = job_data.get('upload')
            download_time = job_data.get('download')
            process_time = job_data.get('process')
            datums_processed = job_data.get('datums_processed')
            print(f'pipeline: {pipeline_name} '
                  f'upload time: {upload_time} sec. '
                  f'download time: {download_time} sec. '
                  f'process time {process_time} sec. '
                  f'datums processed {datums_processed}')
            if upload_time is not None:
                total_upload += upload_time
            if download_time is not None:
                total_download += download_time
            if process_time is not None:
                total_process += process_time
    print(f'total upload: {total_upload} sec. '
          f'total download: {total_download} sec. '
          f'total_process: {total_process} sec. ')
Exemplo n.º 3
0
def main(end_node_specification: str, specification_dir: str):
    """
    Graph a DAG.

    :param end_node_specification: The end node pipeline specification file.
    :param specification_dir: A directory containing the DAG pipeline specification files.
    """
    parser = PipelineSpecificationParser(Path(end_node_specification), Path(specification_dir))
    manager = DagManager(parser)
    manager.graph_dag()
Exemplo n.º 4
0
def main(end_node_specification: str, specification_dir: str):
    """
    Update a DAG without reprocessing from the given end node up to the root nodes.

    :param end_node_specification: The end node pipeline specification file.
    :param specification_dir: A directory containing the DAG pipeline specification files.
    """
    parser = PipelineSpecificationParser(Path(end_node_specification), Path(specification_dir))
    manager = DagManager(parser)
    manager.update_dag()
Exemplo n.º 5
0
def main(end_node_specification: str, specification_dir: str):
    """
    Delete a DAG from the given end node.

    :param end_node_specification: The end node pipeline specification file.
    :param specification_dir: A directory containing the DAG pipeline specification files.
    """
    parser = PipelineSpecificationParser(Path(end_node_specification),
                                         Path(specification_dir))
    manager = DagManager(parser)
    manager.delete_dag()
Exemplo n.º 6
0
def main(end_node_specification: str, specification_dir: str):
    """
    Update a DAG with reprocessing from the given end node up to the root nodes.
    !!! This script does the update in a single transaction. 
    !!! If it does not complete, be sure to finish the transaction manually using 'pachctl finish transaction'

    :param end_node_specification: The end node pipeline specification file.
    :param specification_dir: A directory containing the DAG pipeline specification files.
    """
    parser = PipelineSpecificationParser(Path(end_node_specification), Path(specification_dir))
    manager = DagManager(parser)
    manager.update_reprocess_dag()
 def test_parse_json(self):
     parser = PipelineSpecificationParser(self.json_path, self.json_root)
     self.assertTrue(len(parser.get_pipeline_files()) == 1)