Пример #1
0
def _run():
    """
    Uses a todo file to identify the work to be done.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config, clients, name_builder, metadata_reader = _common()
    files_source = None
    if config.use_local_files:
        if config.cleanup_files_when_storing:
            files_source = data_source.DAOLocalFilesDataSource(
                config, clients.data_client, metadata_reader
            )
    else:
        files_source = dsc.TodoFileDataSource(config)
    return rc.run_by_todo(
        name_builder=name_builder,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
        clients=clients,
        config=config,
        source=files_source,
        metadata_reader=metadata_reader,
    )
def test_todo_file():
    todo_fqn = os.path.join(tc.TEST_DATA_DIR, 'todo.txt')
    with open(todo_fqn, 'w') as f:
        f.write('file1\n')
        f.write('file2\n')
        f.write('\n')
    try:
        test_config = mc.Config()
        test_config.work_fqn = todo_fqn
        test_subject = dsc.TodoFileDataSource(test_config)
        test_result = test_subject.get_work()
        assert test_result is not None, 'expect result'
        assert len(test_result) == 2, 'wrong number of files'
    finally:
        if os.path.exists(todo_fqn):
            os.unlink(todo_fqn)
Пример #3
0
def _run():
    """
    Uses a todo file with file names, even though Gemini provides
    information about existing data referenced by observation ID.
    """
    (
        clients,
        config,
        metadata_reader,
        meta_visitors,
        name_builder,
    ) = _common_init()
    if config.use_local_files or mc.TaskType.SCRAPE in config.task_types:
        source = dsc.ListDirSeparateDataSource(config)
    else:
        source = dsc.TodoFileDataSource(config)
    return rc.run_by_todo(
        config=config,
        name_builder=name_builder,
        meta_visitors=meta_visitors,
        source=source,
        metadata_reader=metadata_reader,
        clients=clients,
    )
Пример #4
0
def run_by_todo(
    config=None,
    name_builder=None,
    chooser=None,
    command_name=None,
    source=None,
    meta_visitors=[],
    data_visitors=[],
    modify_transfer=None,
    store_transfer=None,
    clients=None,
):
    """A default implementation for using the TodoRunner.

    :param config Config instance
    :param name_builder NameBuilder extension that creates an instance of
        a StorageName extension, from an entry from a DataSourceComposable
        listing
    :param command_name string that represents the specific pipeline
        application name
    :param source DataSource implementation, if there's a special data source
    :param meta_visitors list of modules with visit methods, that expect
        the metadata of a work file to exist on disk
    :param data_visitors list of modules with visit methods, that expect the
        work file to exist on disk
    :param chooser OrganizerChooser, if there's strange rules about file
        naming.
    :param modify_transfer Transfer extension that identifies how to retrieve
        data from a source for modification of CAOM2 metadata. By this time,
        files are usually stored at CADC, so it's probably a CadcTransfer
        instance, but this allows for the case that a file is never stored
        at CADC. Try to guess what this one is.
    :param store_transfer Transfer extension that identifies hot to retrieve
        data from a source for storage at CADC, probably an HTTP or FTP site.
        Don't try to guess what this one is.
    :param clients: ClientCollection instance
    """
    if config is None:
        config = mc.Config()
        config.get_executors()
    _set_logging(config)
    if clients is None:
        clients = cc.ClientCollection(config)

    if name_builder is None:
        name_builder = name_builder_composable.StorageNameInstanceBuilder(
            config.collection)

    if source is None:
        if config.use_local_files:
            source = data_source_composable.ListDirSeparateDataSource(
                config, recursive=config.recurse_data_sources)
        else:
            source = data_source_composable.TodoFileDataSource(config)

    modify_transfer = _set_modify_transfer(modify_transfer, config,
                                           clients.data_client)

    organizer = ec.OrganizeExecutes(
        config,
        command_name,
        meta_visitors,
        data_visitors,
        chooser,
        store_transfer,
        modify_transfer,
        cadc_client=clients.data_client,
        caom_client=clients.metadata_client,
    )

    runner = TodoRunner(config, organizer, name_builder, source)
    result = runner.run()
    result |= runner.run_retry()
    runner.report()
    return result
Пример #5
0
 def _reset_for_retry(self, count):
     self._config.update_for_retry(count)
     # the log location changes for each retry
     self._organizer.set_log_location()
     self._data_source = data_source_composable.TodoFileDataSource(
         self._config)