예제 #1
0
    def test_nested_observe_progress(self):
        observer = MyProgressObserver()
        observer.deactivate()
        observer.activate()

        with observe_progress('computing', 4) as reporter:
            # do something that takes 1 unit
            reporter.worked(1)
            # do something that takes 1 unit
            reporter.worked(1)
            # do something that will take 2 units
            reporter.will_work(2)
            with observe_progress('loading', 4) as nested_reporter:
                # do something that takes 3 units
                nested_reporter.worked(3)
                # do something that takes 1 unit
                nested_reporter.worked(1)

        self.assertEqual([('begin', [('computing', 0.0, False)]),
                          ('update', [('computing', 0.25, False)]),
                          ('update', [('computing', 0.5, False)]),
                          ('begin', [('computing', 0.5, False),
                                     ('loading', 0.0, False)]),
                          ('update', [('computing', 0.875, False),
                                      ('loading', 0.75, False)]),
                          ('update', [('computing', 1.0, False),
                                      ('loading', 1.0, False)]),
                          ('end', [('computing', 1.0, False),
                                   ('loading', 1.0, True)]),
                          ('end', [('computing', 1.0, True)])], observer.calls)
예제 #2
0
    def test_nested_observe_progress_with_new_progress_observers(self):
        observer = MyProgressObserver()
        observer.activate()

        nested_observer = MyProgressObserver()

        with observe_progress('computing', 4) as progress_reporter:
            # do something that takes 1 unit
            progress_reporter.worked(1)
            # do something that takes 1 unit
            progress_reporter.worked(1)
            with new_progress_observers(nested_observer):
                with observe_progress('loading', 4) as progress_reporter_2:
                    # do something that takes 3 units
                    progress_reporter_2.worked(3)
                    # do something that takes 1 unit
                    progress_reporter_2.worked(1)

            # do something that takes 1 unit
            progress_reporter.worked(2)

        self.assertEqual([('begin', [('computing', 0.0, False)]),
                          ('update', [('computing', 0.25, False)]),
                          ('update', [('computing', 0.5, False)]),
                          ('update', [('computing', 1.0, False)]),
                          ('end', [('computing', 1.0, True)])], observer.calls)

        self.assertEqual([('begin', [('loading', 0.0, False)]),
                          ('update', [('loading', 0.75, False)]),
                          ('update', [('loading', 1.0, False)]),
                          ('end', [('loading', 1.0, True)])],
                         nested_observer.calls)
예제 #3
0
    def test_nested_observe_progress_with_exception(self):
        observer = MyProgressObserver(record_errors=True)
        observer.activate()

        try:
            with observe_progress('computing', 10) as reporter:
                # do something that takes 1 unit
                reporter.worked(1)
                # do something that takes 1 unit
                reporter.worked(1)
                # do something that will take 2 units
                reporter.will_work(8)
                with observe_progress('loading', 100) as nested_reported:
                    # do something that takes 3 units
                    nested_reported.worked(15)
                    # now - BANG!
                    raise ValueError('Failed to load')
        except ValueError:
            pass
        self.assertEqual(7, len(observer.calls))
        self.assertEqual([
            ('begin', [('computing', 0.0, False, None)]),
            ('update', [('computing', 0.1, False, None)]),
            ('update', [('computing', 0.2, False, None)]),
            ('begin', [('computing', 0.2, False, None),
                       ('loading', 0.0, False, None)]),
            ('update', [('computing', 0.32, False, None),
                        ('loading', 0.15, False, None)]),
        ], observer.calls[0:-2])

        self.assertEqual(2, len(observer.calls[-2]))
        event, states = observer.calls[-2]
        self.assertEqual('end', event)
        self.assertEqual(2, len(states))
        self.assertEqual(4, len(states[0]))
        self.assertEqual(4, len(states[1]))
        self.assertEqual(('computing', 0.32, False), states[0][0:-1])
        self.assertEqual(('loading', 0.15, True), states[1][0:-1])
        error = states[0][-1]
        self.assertIsNone(error)
        error = states[1][-1]
        self.assertIsInstance(error, tuple)
        exc_type, exc_value, exc_traceback = error
        self.assertEqual('ValueError', exc_type)
        self.assertEqual('Failed to load', exc_value)
        self.assertIsInstance(exc_traceback, list)

        self.assertEqual(2, len(observer.calls[-1]))
        event, states = observer.calls[-1]
        self.assertEqual('end', event)
        self.assertEqual(1, len(states))
        self.assertEqual(4, len(states[0]))
        self.assertEqual(('computing', 0.32, True), states[0][0:-1])
        error = states[0][-1]
        self.assertIsInstance(error, tuple)
        exc_type, exc_value, exc_traceback = error
        self.assertEqual('ValueError', exc_type)
        self.assertEqual('Failed to load', exc_value)
        self.assertIsInstance(exc_traceback, list)
예제 #4
0
    def _generate_cube(self, request: CubeGeneratorRequestLike) \
            -> CubeGeneratorResult:
        request = CubeGeneratorRequest.normalize(request).for_service()

        response = self._submit_gen_request(request)
        cubegen_id, result, _ = \
            self._get_cube_generator_result(response)
        if result is not None:
            return result

        last_worked = 0
        with observe_progress('Generating cube', 100) as cm:
            while True:
                time.sleep(self._progress_period)

                response = requests.get(
                    self.endpoint_op(f'cubegens/{cubegen_id}'),
                    headers=self.auth_headers)
                _, result, progress = \
                    self._get_cube_generator_result(response)
                if result is not None:
                    return result

                if progress is not None and len(progress) > 0:
                    progress_state = progress[0].state
                    total_work = progress_state.total_work
                    progress = progress_state.progress or 0
                    worked = progress * total_work
                    work = 100 * ((worked - last_worked) / total_work)
                    if work > 0:
                        cm.worked(work)
                        last_worked = worked
예제 #5
0
 def describe_datasets(self) -> Sequence[DatasetDescriptor]:
     descriptors = []
     with observe_progress('Fetching dataset information',
                           len(self._input_configs)) as progress:
         for input_config in self._input_configs:
             descriptors.append(self._describe_dataset(input_config))
             progress.worked(1)
     return descriptors
예제 #6
0
    def open_cube(self, input_config: InputConfig) -> TransformedCube:
        cube_config = self._cube_config
        cube_params = cube_config.to_dict()
        opener_id = input_config.opener_id
        store_params = input_config.store_params or {}
        open_params = input_config.open_params or {}

        with observe_progress('reading cube', 3) as observer:
            try:
                if input_config.store_id:
                    store_instance = get_data_store_instance(
                        input_config.store_id,
                        store_params=store_params,
                        store_pool=self._store_pool)
                    store = store_instance.store
                    if opener_id is None:
                        opener_id = self._get_opener_id(input_config, store)
                    opener = store
                    open_params = dict(open_params)
                    open_params['opener_id'] = opener_id
                else:
                    opener = new_data_opener(opener_id)
                    open_params = dict(open_params)
                    open_params.update(store_params)

                open_params_schema = opener.get_open_data_params_schema(
                    input_config.data_id)

                dataset_open_params = {
                    k: v
                    for k, v in cube_params.items()
                    if k in open_params_schema.properties
                }

                observer.worked(1)

                dataset = opener.open_data(input_config.data_id, **open_params,
                                           **dataset_open_params)
                observer.worked(1)

            except DataStoreError as dse:
                raise CubeGeneratorError(f'{dse}', status_code=400) from dse

            # Turn dataset into cube and grid_mapping
            try:
                cube, gm, _ = decode_cube(dataset, normalize=True)
            except DatasetIsNotACubeError as e:
                raise CubeGeneratorError(f'{e}') from e
            observer.worked(1)

        if dataset_open_params:
            drop_names = [
                k for k in dataset_open_params.keys()
                if k not in _STEADY_CUBE_CONFIG_NAMES
            ]
            cube_config = cube_config.drop_props(drop_names)

        return cube, gm, cube_config
예제 #7
0
파일: combiner.py 프로젝트: dcs4cop/xcube
    def combine_cubes(self, t_cubes: Sequence[TransformedCube]) \
            -> TransformedCube:
        cube, gm, _ = t_cubes[0]
        if len(t_cubes) == 1:
            return cube, gm, self._cube_config

        with observe_progress('merging cubes', 1) as observer:
            cube = xr.merge([t_cube[0] for t_cube in t_cubes])
            observer.worked(1)

        return cube, gm, self._cube_config
예제 #8
0
def resample_and_merge_cubes(cubes: List[xr.Dataset],
                             cube_config: CubeConfig) -> xr.Dataset:
    with observe_progress('Resampling cube(s)', len(cubes) + 1) as progress:
        resampled_cubes = []
        for cube in cubes:
            resampled_cube = resample_cube(cube, cube_config)
            resampled_cubes.append(resampled_cube)
            progress.worked(1)
        merged_cube = xr.merge(resampled_cubes) if len(
            resampled_cubes) > 1 else resampled_cubes[0]
        progress.worked(1)
        return merged_cube
예제 #9
0
def main(gen_config_path: str,
         store_configs_path: str = None,
         verbose: bool = False):
    """
    Generator tool for data cubes.

    Creates cube views from one or more cube stores, resamples them to a common grid,
    optionally performs some cube transformation,
    and writes the resulting cube to some target cube store.

    *gen_config_path* is the cube generator configuration. It may be provided as a JSON or YAML file
    (file extensions ".json" or ".yaml"). If the *gen_config_path* argument is omitted, it is expected that
    the cube generator configuration is piped as a JSON string.

    *store_configs_path* is a path to a JSON file with data store configurations. It is a mapping of names to
    configured stores. Entries are dictionaries that have a mandatory "store_id" property which is a name of a
    registered xcube data store. The optional "store_params" property may define data store specific parameters.

    :param gen_config_path: Cube generation configuration. It may be provided as a JSON or YAML file
        (file extensions ".json" or ".yaml"). If the REQUEST file argument is omitted, it is expected that
        the cube generator configuration is piped as a JSON string.
    :param store_configs_path: A JSON file that maps store names to parameterized stores.
    :param verbose: Whether to output progress information to stdout.
    """

    store_pool = DataStorePool.from_file(
        store_configs_path) if store_configs_path else DataStorePool()

    gen_config = GenConfig.from_file(gen_config_path, verbose=verbose)

    if gen_config.callback_config:
        ApiProgressCallbackObserver(gen_config.callback_config).activate()
    if verbose:
        ConsoleProgressObserver().activate()

    with observe_progress('Generating cube', 100) as cm:
        cm.will_work(10)
        cubes = open_cubes(gen_config.input_configs,
                           cube_config=gen_config.cube_config,
                           store_pool=store_pool)

        cm.will_work(10)
        cube = resample_and_merge_cubes(cubes,
                                        cube_config=gen_config.cube_config)

        cm.will_work(80)
        data_id = write_cube(cube,
                             output_config=gen_config.output_config,
                             store_pool=store_pool)

    if verbose:
        print('Cube "{}" generated within {:.2f} seconds'.format(
            str(data_id), cm.state.total_time))
예제 #10
0
def open_cubes(input_configs: Sequence[InputConfig],
               cube_config: CubeConfig,
               store_pool: DataStorePool = None):
    cubes = []
    all_cube_params = cube_config.to_dict()
    with observe_progress('Opening input(s)', len(input_configs)) as progress:
        for input_config in input_configs:
            open_params = {}
            opener_id = input_config.opener_id
            if input_config.store_id:
                store_instance = get_data_store_instance(
                    input_config.store_id,
                    store_params=input_config.store_params,
                    store_pool=store_pool)
                store = store_instance.store
                if opener_id is None:
                    opener_ids = store.get_data_opener_ids(
                        data_id=input_config.data_id,
                        type_specifier=TYPE_SPECIFIER_CUBE)
                    if not opener_ids:
                        raise DataStoreError(
                            f'Data store "{input_config.store_id}" does not support data cubes'
                        )
                    opener_id = opener_ids[0]
                opener = store
                open_params.update(opener_id=opener_id,
                                   **input_config.open_params)
            else:
                opener = new_data_opener(opener_id)
                open_params.update(**input_config.store_params,
                                   **input_config.open_params)
            open_params_schema = opener.get_open_data_params_schema(
                input_config.data_id)
            cube_params = {
                k: v
                for k, v in all_cube_params.items()
                if k in open_params_schema.properties
            }
            cube = opener.open_data(input_config.data_id, **open_params,
                                    **cube_params)
            cubes.append(cube)
            progress.worked(1)

    return cubes
예제 #11
0
def transform_cube(t_cube: TransformedCube,
                   transformer: CubeTransformer,
                   label: str = '') -> TransformedCube:
    empty_cube = is_empty_cube(t_cube[0])
    identity = isinstance(transformer, CubeIdentity)
    if not label:
        label = f'{type(transformer).__name__}'
    if identity:
        label += ' (step not applicable)'
    elif empty_cube:
        label += ' (step not applicable, empty cube)'

    with observe_progress(label, 1) as progress:
        if not (identity or empty_cube):
            t_cube = transformer.transform_cube(*t_cube)
            t_cube = strip_cube(t_cube[0]), t_cube[1], t_cube[2]
        progress.worked(1)

    return t_cube
예제 #12
0
    def test_observe_progress(self):
        observer = MyProgressObserver()
        observer.activate()

        with observe_progress('computing', 4) as reporter:
            # do something that takes 1 unit
            reporter.worked(1)
            # do something that takes 1 unit
            reporter.worked(1)
            # do something that takes 2 units
            reporter.worked(2)

        self.assertIsInstance(reporter.state, ProgressState)
        self.assertIsInstance(reporter.state.total_time, float)
        self.assertTrue(reporter.state.total_time >= 0.0)

        self.assertEqual([('begin', [('computing', 0.0, False)]),
                          ('update', [('computing', 0.25, False)]),
                          ('update', [('computing', 0.5, False)]),
                          ('update', [('computing', 1.0, False)]),
                          ('end', [('computing', 1.0, True)])], observer.calls)
예제 #13
0
def write_cube(cube: xr.Dataset,
               output_config: OutputConfig,
               store_pool: DataStorePool = None) -> str:
    with observe_progress('Writing output', 1) as progress:
        write_params = dict()
        if output_config.store_id:
            store_instance = get_data_store_instance(output_config.store_id,
                                                     store_params=output_config.store_params,
                                                     store_pool=store_pool)
            writer = store_instance.store
            write_params.update(writer_id=output_config.writer_id, **output_config.write_params)
        else:
            writer = new_data_writer(output_config.writer_id)
            write_params.update(**output_config.store_params, **output_config.write_params)

        # TODO: develop an adapter from Dask callback to ProgressObserver and use it here.
        data_id = writer.write_data(cube,
                                    data_id=output_config.data_id,
                                    replace=output_config.replace or False,
                                    **write_params)
        progress.worked(1)
        return data_id
예제 #14
0
파일: generator.py 프로젝트: dcs4cop/xcube
    def __generate_cube(self, request: CubeGeneratorRequest) \
            -> CubeGeneratorResult:

        cube_config = request.cube_config \
            if request.cube_config is not None else CubeConfig()

        opener = CubeOpener(cube_config, store_pool=self._store_pool)

        subsetter = CubeSubsetter()
        resampler_xy = CubeResamplerXY()
        resampler_t = CubeResamplerT()
        combiner = CubesCombiner(cube_config)
        rechunker = CubeRechunker()

        code_config = request.code_config
        if code_config is not None:
            code_executor = CubeUserCodeExecutor(code_config)
            post_rechunker = CubeRechunker()
        else:
            code_executor = CubeIdentity()
            post_rechunker = CubeIdentity()

        md_adjuster = CubeMetadataAdjuster()

        cube_writer = CubeWriter(request.output_config,
                                 store_pool=self._store_pool)

        num_inputs = len(request.input_configs)
        # Estimated workload:
        opener_work = 10
        resampler_t_work = 1
        resampler_xy_work = 20
        subsetter_work = 1
        combiner_work = num_inputs
        rechunker_work = 1
        executor_work = 1
        post_rechunker_work = 1
        metadata_adjuster_work = 1
        writer_work = 100  # this is where dask processing takes place
        total_work = (opener_work
                      + subsetter_work
                      + resampler_t_work
                      + resampler_xy_work) * num_inputs \
                     + combiner_work \
                     + rechunker_work \
                     + executor_work \
                     + post_rechunker_work \
                     + metadata_adjuster_work \
                     + writer_work

        t_cubes = []
        with observe_progress('Generating cube', total_work) as progress:
            for input_config in request.input_configs:
                progress.will_work(opener_work)
                t_cube = opener.open_cube(input_config)

                progress.will_work(subsetter_work)
                t_cube = transform_cube(t_cube, subsetter, 'subsetting')

                progress.will_work(resampler_t_work)
                t_cube = transform_cube(t_cube, resampler_t,
                                        'resampling in time')

                progress.will_work(resampler_xy_work)
                t_cube = transform_cube(t_cube, resampler_xy,
                                        'resampling in space')

                t_cubes.append(t_cube)

            progress.will_work(combiner_work)
            t_cube = combiner.combine_cubes(t_cubes)

            progress.will_work(rechunker_work)
            t_cube = transform_cube(t_cube, rechunker, 'rechunking')

            progress.will_work(executor_work)
            t_cube = transform_cube(t_cube, code_executor,
                                    'executing user code')

            progress.will_work(post_rechunker_work)
            t_cube = transform_cube(t_cube, post_rechunker, 'post-rechunking')

            progress.will_work(metadata_adjuster_work)
            t_cube = transform_cube(t_cube, md_adjuster, 'adjusting metadata')

            progress.will_work(writer_work)
            cube, gm, _ = t_cube
            if not is_empty_cube(cube):
                data_id, cube = cube_writer.write_cube(cube, gm)
                self._generated_data_id = data_id
                self._generated_cube = cube
                self._generated_gm = gm
            else:
                self._generated_data_id = None
                self._generated_cube = None
                self._generated_gm = None

        total_time = progress.state.total_time

        if self._generated_data_id is not None:
            return CubeGeneratorResult(status='ok',
                                       status_code=201,
                                       result=CubeReference(data_id=data_id),
                                       message=f'Cube generated successfully'
                                       f' after {total_time:.2f} seconds')
        else:
            return CubeGeneratorResult(
                status='warning',
                status_code=422,
                message=f'An empty cube has been generated'
                f' after {total_time:.2f} seconds.'
                f' No data has been written at all.')