示例#1
0
    def test_nightly_doc_util(self):
        data_dir = '/tmp/dummy_dir'

        nightly_dict = {
            'dummy_dataset': {
                '': {
                    '1.0.0': False
                }
            },
            'dummy_new_ds': True,
            'dummy_new_config': {
                'new_config': True,
                'old_config': {
                    '2.0.0': True,  # New versions
                    '1.0.0': False,
                },
            },
        }
        with mock.patch.object(doc_utils,
                               '_load_nightly_dict',
                               return_value=nightly_dict):
            ndu = doc_utils.NightlyDocUtil(path='/tmp/some/patched/path')

        dummy_dataset = tfds.testing.DummyDataset(data_dir=data_dir)
        dummy_new_ds = DummyNewDs(data_dir=data_dir)
        dummy_new_config = DummyNewConfig(data_dir=data_dir,
                                          config='new_config')
        dummy_new_version = DummyNewConfig(data_dir=data_dir,
                                           config='old_config')

        # Only `dummy_new_ds` is a new builder
        self.assertFalse(ndu.is_builder_nightly(dummy_dataset))
        self.assertTrue(ndu.is_builder_nightly(dummy_new_ds))
        self.assertFalse(ndu.is_builder_nightly(dummy_new_config))
        self.assertFalse(ndu.is_builder_nightly(dummy_new_version))

        # Only `dummy_new_ds/new_config` is a new config
        self.assertFalse(ndu.is_config_nightly(dummy_dataset))
        self.assertFalse(ndu.is_config_nightly(dummy_new_ds))
        self.assertTrue(ndu.is_config_nightly(dummy_new_config))
        self.assertFalse(ndu.is_config_nightly(dummy_new_version))

        # Only `dummy_new_ds/new_version/2.0.0` is a new version
        self.assertFalse(ndu.is_version_nightly(dummy_dataset, '1.0.0'))
        self.assertFalse(ndu.is_version_nightly(dummy_new_ds, 'x.x.x'))
        self.assertFalse(ndu.is_version_nightly(dummy_new_config, 'x.x.x'))
        self.assertFalse(ndu.is_version_nightly(dummy_new_version, '1.0.0'))
        self.assertTrue(ndu.is_version_nightly(dummy_new_version, '2.0.0'))

        # Only `dummy_dataset` don't have a nightly version
        self.assertFalse(ndu.has_nightly(dummy_dataset))
        self.assertTrue(ndu.has_nightly(dummy_new_ds))
        self.assertTrue(ndu.has_nightly(dummy_new_config))
        self.assertTrue(ndu.has_nightly(dummy_new_version))
示例#2
0
def iter_documentation_builders(
    datasets: Optional[List[str]] = None,
    *,
    doc_util_paths: doc_utils.DocUtilPaths = None,
) -> Iterator[BuilderDocumentation]:
    """Create dataset documentation string for given datasets.

  Args:
    datasets: list of datasets for which to create documentation.
              If None, then all available datasets will be used.
    doc_util_paths: Additional path for visualization, nightly info,...

  Yields:
    builder_documetation: The documentation information for each builder
  """
    print('Retrieving the list of builders...')
    datasets = datasets or _all_tfds_datasets()

    if doc_util_paths.fig_base_path:
        visu_doc_util = doc_utils.VisualizationDocUtil(
            base_path=doc_util_paths.fig_base_path,
            base_url=doc_util_paths.fig_base_url,
        )
    else:
        visu_doc_util = None

    if doc_util_paths.df_base_path:
        df_doc_util = doc_utils.DataframeDocUtil(
            base_path=doc_util_paths.df_base_path,
            base_url=doc_util_paths.df_base_url,
        )
    else:
        df_doc_util = None

    if doc_util_paths.fig_base_path:
        nightly_doc_util = doc_utils.NightlyDocUtil(
            path=doc_util_paths.nightly_path, )
    else:
        nightly_doc_util = None

    document_single_builder_fn = functools.partial(
        _document_single_builder,
        visu_doc_util=visu_doc_util,
        df_doc_util=df_doc_util,
        nightly_doc_util=nightly_doc_util,
    )

    # Document all builders
    print(f'Document {len(datasets)} builders...')
    with futures.ThreadPoolExecutor(
            max_workers=_WORKER_COUNT_DATASETS) as tpool:
        tasks = [
            tpool.submit(document_single_builder_fn, name) for name in datasets
        ]
        for future in tqdm.tqdm(futures.as_completed(tasks), total=len(tasks)):
            builder_doc = future.result()
            if builder_doc is None:  # Builder filtered
                continue
            else:
                tqdm.tqdm.write(
                    f'Documentation generated for {builder_doc.name}...')
                yield builder_doc
    print('All builder documentations generated!')