Пример #1
0
def vlass_run_single():
    import sys
    config = mc.Config()
    config.get_executors()
    config.collection = COLLECTION
    config.working_directory = '/usr/src/app'
    config.use_local_files = False
    config.logging_level = 'INFO'
    config.log_to_file = False
    config.task_types = [mc.TaskType.INGEST]
    config.resource_id = 'ivo://cadc.nrc.ca/sc2repo'
    if config.features.run_in_airflow:
        temp = tempfile.NamedTemporaryFile()
        mc.write_to_file(temp.name, sys.argv[2])
        config.proxy_fqn = temp.name
    else:
        config.proxy_fqn = sys.argv[2]
    config.stream = 'raw'
    file_name = sys.argv[1]
    if config.features.use_file_names:
        vlass_name = VlassName(file_name=file_name)
    else:
        vlass_name = VlassName(obs_id=sys.argv[1])
    ec.run_single(config, vlass_name, APPLICATION, meta_visitors=visitors,
                  data_visitors=None)
Пример #2
0
def test_builder(obs_metadata_mock, tap_client_mock):
    obs_metadata_mock.side_effect = gem_mocks.mock_get_obs_metadata

    test_config = mc.Config()
    test_config.working_directory = '/test_files'
    test_config.proxy_fqn = os.path.join(gem_mocks.TEST_DATA_DIR,
                                         'test_proxy.pem')
    em.init_global(config=test_config)
    test_subject = builder.GemObsIDBuilder(test_config)

    test_entry = 'S20050825S0143.fits'
    for support in [False, True]:
        test_config.features.supports_latest_client = support
        test_config.features.use_file_names = True
        for task_type in [mc.TaskType.INGEST, mc.TaskType.SCRAPE]:
            test_config.task_types = [task_type]
            test_result = test_subject.build(test_entry)
            assert test_result is not None, \
                f'expect a result support {support}'
            expected_path = COLLECTION if support else ARCHIVE
            assert test_result.file_uri == \
                   f'{SCHEME}:{expected_path}/{test_entry}', 'wrong file uri'
            assert test_result.prev_uri == \
                   f'{SCHEME}:{expected_path}/{test_result.prev}', \
                   'wrong preview uri'
            expected_scheme = V_SCHEME if support else A_SCHEME
            assert test_result.thumb_uri == \
                   f'{expected_scheme}:{expected_path}/{test_result.thumb}', \
                   'wrong thumb uri'

        test_config.task_types = [mc.TaskType.INGEST]
        test_config.features.use_file_names = False
        with pytest.raises(mc.CadcException):
            test_result = test_subject.build(test_entry)
Пример #3
0
def _execute_and_check_list_for_validate(ftp_mock, source_list_fqn,
                                         result_count, cache_count):
    source_dir_fqn = os.path.join(
        test_main_app.TEST_DATA_DIR, scrape.NEOSSAT_DIR_LIST)
    source_fqn = os.path.join(test_main_app.TEST_DATA_DIR,
                              'test_source_dir_listing.csv')
    shutil.copy(source_fqn, source_dir_fqn)

    ftp_mock.return_value.__enter__.return_value.listdir. \
        side_effect = _list_dirs
    ftp_mock.return_value.__enter__.return_value.stat. \
        side_effect = _entry_stats
    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR)
    try:
        test_config = mc.Config()
        test_config.get_executors()
        scrape.list_for_validate(test_config)

        result = mc.read_as_yaml(source_list_fqn)
        assert result is not None, 'expect a file record'
        assert len(result) == result_count, 'wrong number of entries'
        assert f'{MOCK_DIR}/NEOS_SCI_2017213215701_cord.fits' in result, \
            'wrong content'

        cache_result = scrape._read_cache(test_config.working_directory)
        assert cache_result is not None, 'expected return value'
        assert len(cache_result) == cache_count, \
            'wrong number of cached entries'
        assert f'{MOCK_DIR}/NEOS_SCI_2017213215701.fits' in cache_result, \
            'wrong content'
    finally:
        os.getcwd = getcwd_orig
Пример #4
0
def test_provenance_augmentation(dmf_mock, headers_mock, access_mock,
                                 builder_mock, repo_get_mock, test_fqn):
    builder_mock.return_value._get_obs_id.return_value = None
    access_mock.return_value = 'https://localhost'
    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_config.task_types = [mc.TaskType.VISIT]
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    headers_mock.return_value.get_head.side_effect = _get_headers_mock
    dmf_mock.get.side_effect = _get_obs_id_mock
    repo_get_mock.side_effect = _repo_get_mock
    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR)
    temp = os.path.basename(test_fqn).replace('.expected.xml', '.fits')
    test_storage_name = GemProcName(entry=temp)
    try:
        test_obs = mc.read_obs_from_file(test_fqn)
        assert not test_obs.target.moving, 'initial conditions moving target'
        kwargs = {
            'storage_name': test_storage_name,
            'working_directory': test_main_app.TEST_DATA_DIR,
            'observable': test_observable,
            'caom_repo_client': Mock(),
        }
        test_result = provenance_augmentation.visit(test_obs, **kwargs)
        assert test_result is not None, 'expect a result'
        assert test_result.get('provenance') == 2, 'wrong result'
        assert len(test_obs.members) == 1, 'wrong membership'
        assert test_obs.target.moving, 'should be changed'
    finally:
        os.getcwd = getcwd_orig
Пример #5
0
def test_omm_name():
    test_config = mc.Config()
    test_config.task_types = []
    test_config.use_local_files = True
    test_builder = OmmBuilder(test_config)
    test_name = 'C121212_00001_SCI'
    for entry in [f'{test_name}', f'/tmp/{test_name}']:
        test_subject = test_builder.build(f'{entry}.fits')
        assert f'ad:OMM/{test_name}.fits.gz' == test_subject.file_uri
        assert (test_subject.source_names == [f'{entry}.fits'
                                              ]), 'wrong source name'
        assert (test_subject.destination_uris[0] ==
                f'ad:OMM/{test_name}.fits.gz'), 'wrong source name'

    test_name = 'C121212_sh2-132_J_old_SCIRED'
    file_name = f'{test_name}_prev_256.jpg'
    assert f'{test_name}_prev.jpg' == OmmName(file_name=file_name).prev
    assert f'{test_name}_prev_256.jpg' == OmmName(file_name=file_name).thumb
    test_name = 'C121212_sh2-132_J_old_SCIRED'
    file_name = f'{test_name}_prev_256.jpg'
    assert f'{test_name}_prev.jpg' == OmmName(file_name=file_name).prev
    assert f'{test_name}_prev_256.jpg' == OmmName(file_name=file_name).thumb
    test_obs_id = 'C121121_J024345.57-021326.4_K'
    test_name = f'{test_obs_id}_SCIRED'
    file_name = f'{test_name}.fits.gz'
    assert f'{test_name}_prev.jpg' == OmmName(file_name=file_name).prev
    assert f'{test_name}_prev_256.jpg' == OmmName(file_name=file_name).thumb
    assert OmmName(file_name=file_name).obs_id == test_obs_id
Пример #6
0
def _run_state():
    """Uses a state file with a timestamp to control which files will be
    retrieved from the CSA ftp host.

    Ingestion is based on fully-qualified file names from the CSA ftp host,
    because those are difficult to reproduce otherwise.
    """
    builder = nbc.FileNameBuilder(NEOSSatName)
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    start_time = state.get_bookmark(NEOS_BOOKMARK)
    temp = mc.increment_time(start_time, 0).timestamp()
    todo_list, max_timestamp = scrape.build_todo(
        temp, config.working_directory, config.state_fqn)
    max_date = datetime.fromtimestamp(max_timestamp)
    incremental_source = data_source.IncrementalSource(todo_list)
    transferrer = tc.FtpTransfer(config.data_source)
    return rc.run_by_state(config=config, name_builder=builder,
                           command_name=APPLICATION,
                           bookmark_name=NEOS_BOOKMARK,
                           meta_visitors=META_VISITORS,
                           data_visitors=DATA_VISITORS,
                           end_time=max_date, chooser=None,
                           source=incremental_source,
                           store_transfer=transferrer)
Пример #7
0
def test_store(put_mock):
    test_config = mc.Config()
    test_config.logging_level = 'ERROR'
    test_config.working_directory = '/tmp'
    test_url = 'https://archive-new.nrao.edu/vlass/quicklook/VLASS2.1/' \
               'T10t12/VLASS2.1.ql.T10t12.J073401-033000.10.2048.v1/' \
               'VLASS2.1.ql.T10t12.J073401-033000.10.2048.v1.I.iter1.image.' \
               'pbcor.tt0.rms.subim.fits'
    test_storage_name = VlassName(url=test_url, entry=test_url)
    transferrer = Mock()
    cred_param = Mock()
    cadc_data_client = Mock()
    caom_repo_client = Mock()
    observable = mc.Observable(mc.Rejected('/tmp/rejected.yml'),
                               mc.Metrics(test_config))
    test_subject = ec.Store(test_config, test_storage_name, APPLICATION,
                            cred_param, cadc_data_client, caom_repo_client,
                            observable, transferrer)
    test_subject.execute(None)
    assert put_mock.called, 'expect a call'
    args, kwargs = put_mock.call_args
    assert args[2] == test_storage_name.file_name, 'wrong file name'
    assert transferrer.get.called, 'expect a transfer call'
    args, kwargs = transferrer.get.call_args
    assert args[0] == test_url, 'wrong source parameter'
    assert args[1] == f'/tmp/{test_storage_name.obs_id}/' \
                      f'{test_storage_name.file_name}',\
        'wrong destination parameter'
Пример #8
0
def test_look_pull_and_put_v(http_mock, mock_client):
    stat_orig = os.stat
    os.stat = Mock()
    os.stat.return_value = Mock(st_size=1234)
    try:
        test_storage_name = 'cadc:GEMINI/TEST.fits'
        f_name = 'test_f_name.fits'
        url = f'https://localhost/{f_name}'
        test_config = mc.Config()
        test_config.observe_execution = True
        test_metrics = mc.Metrics(test_config)
        mock_client.get_node.side_effect = tc.mock_get_node
        mock_client.copy.return_value = 1234
        assert len(test_metrics.history) == 0, 'initial history conditions'
        assert len(test_metrics.failures) == 0, 'initial failure conditions'
        mc.look_pull_and_put_v(
            test_storage_name,
            f_name,
            tc.TEST_DATA_DIR,
            url,
            mock_client,
            'md5:01234',
            test_metrics,
        )
        test_fqn = os.path.join(tc.TEST_DATA_DIR, f_name)
        mock_client.copy.assert_called_with(
            test_fqn, destination=test_storage_name), 'mock not called'
        http_mock.assert_called_with(url, os.path.join(
            tc.TEST_DATA_DIR, f_name)), 'http mock not called'
        assert len(test_metrics.history) == 1, 'history conditions'
        assert len(test_metrics.failures) == 0, 'failure conditions'
    finally:
        os.stat = stat_orig
Пример #9
0
def test_run_by_builder(data_client_mock, exec_mock, query_endpoint_mock):
    query_endpoint_mock.side_effect = test_scrape._query_endpoint
    data_client_mock.return_value.info.side_effect = (_mock_get_file_info)
    data_client_mock.return_value.get_head.side_effect = (
        ac.make_headers_from_file)

    exec_mock.return_value = 0

    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR)

    test_config = mc.Config()
    test_config.get_executors()

    test_f_name = ('VLASS1.2.ql.T07t13.J083838-153000.10.2048.v1.I.iter1.'
                   'image.pbcor.tt0.subim.fits')
    with open(test_config.work_fqn, 'w') as f:
        f.write(f'{test_f_name}\n')

    # the equivalent of calling work.init_web_log()
    scrape.web_log_content['abc'] = 123

    try:
        # execution
        test_result = composable._run()
        assert test_result == 0, 'wrong result'
    finally:
        os.getcwd = getcwd_orig
        if os.path.exists(test_config.work_fqn):
            os.unlink(test_config.work_fqn)

    assert exec_mock.called, 'expect to be called'
    exec_mock.assert_called_with(ANY), 'wrong args'
Пример #10
0
def _run_by_public():
    """Run the processing for observations that are public, but there are
    no artifacts representing the previews in CAOM, or a FITS file in ad.

    Called as gem_run_public. The time-boxing is based on timestamps from a
    state.yml file. Call once/day, since data release timestamps have times
    of 00:00:00.000.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()
    external_metadata.init_global(config=config)
    name_builder = nbc.FileNameBuilder(gem_name.GemName)
    incremental_source = data_source.PublicIncremental(config)
    meta_visitors = _define_meta_visitors(config)
    return rc.run_by_state(config=config,
                           name_builder=name_builder,
                           command_name=main_app.APPLICATION,
                           bookmark_name=data_source.GEM_BOOKMARK,
                           meta_visitors=meta_visitors,
                           data_visitors=DATA_VISITORS,
                           end_time=None,
                           source=incremental_source,
                           chooser=None)
Пример #11
0
def test_vault_list_dir_time_box_data_source():
    node1 = type('', (), {})()
    node1.props = {
        'date': '2020-09-15 19:55:03.067000+00:00',
        'size': 14,
    }
    node1.uri = 'vos://cadc.nrc.ca!vault/goliaths/moc/994898p_moc.fits'
    node2 = type('', (), {})()
    node2.props = {
        'date': '2020-09-13 19:55:03.067000+00:00',
        'size': 12,
    }
    node2.uri = 'vos://cadc.nrc.ca!vault/goliaths/moc/994899p_moc.fits'
    node1.isdir = Mock(return_value=False)
    node2.isdir = Mock(return_value=False)

    def _glob_mock(ignore_source_directory):
        return [1, 2]

    def _get_node_mock(target):
        if target == 1:
            return node1
        else:
            return node2

    test_vos_client = Mock()
    test_vos_client.glob.side_effect = _glob_mock
    test_vos_client.get_node.side_effect = _get_node_mock
    test_config = mc.Config()
    test_config.get_executors()
    test_config.data_sources = ['vos:goliaths/wrong']
    test_subject = dsc.VaultDataSource(test_vos_client, test_config)
    assert test_subject is not None, 'expect a test_subject'
    test_prev_exec_time = datetime(
        year=2020,
        month=9,
        day=15,
        hour=10,
        minute=0,
        second=0,
        tzinfo=timezone.utc,
    )
    test_exec_time = datetime(
        year=2020,
        month=9,
        day=16,
        hour=10,
        minute=0,
        second=0,
        tzinfo=timezone.utc,
    )
    test_result = test_subject.get_time_box_work(test_prev_exec_time,
                                                 test_exec_time)
    assert test_result is not None, 'expect a test result'
    assert len(test_result) == 1, 'wrong number of results'
    assert ('vos://cadc.nrc.ca!vault/goliaths/moc/994898p_moc.fits' ==
            test_result[0].entry_name), 'wrong name result'
    assert (datetime(
        2020, 9, 15, 19, 55, 3, 67000,
        tzinfo=timezone.utc) == test_result[0].entry_ts), 'wrong ts result'
Пример #12
0
def test_vault_list_dir_data_source():
    def _query_mock(ignore_source_directory):
        return ['abc.txt', 'abc.fits', '900898p_moc.fits']

    node1 = type('', (), {})()
    node1.props = {
        'size': 0,
    }
    node1.uri = 'vos://cadc.nrc.ca!vault/goliaths/wrong/900898p_moc.fits'
    node2 = type('', (), {})()
    node2.props = {
        'size': 12,
    }
    node2.uri = 'vos://cadc.nrc.ca!vault/goliaths/moc/abc.fits'
    node3 = type('', (), {})()
    node3.props = {
        'size': 12,
    }
    node3.uri = 'vos://cadc.nrc.ca!vault/goliaths/moc/abc.txt'

    test_vos_client = Mock()
    test_vos_client.listdir.side_effect = _query_mock
    test_vos_client.get_node.side_effect = [node1, node2, node3]
    test_config = mc.Config()
    test_config.get_executors()
    test_config.data_sources = ['vos:goliaths/wrong']
    test_config.data_source_extensions = ['.fits']
    test_subject = dsc.VaultDataSource(test_vos_client, test_config)
    assert test_subject is not None, 'expect a test_subject'
    test_result = test_subject.get_work()
    assert test_result is not None, 'expect a test result'
    assert len(test_result) == 1, 'wrong number of results'
    assert 'vos:goliaths/wrong/abc.fits' in test_result, 'wrong result'
Пример #13
0
def _run_state():
    """Uses a state file with a timestamp to control which quicklook
    files will be retrieved from VLASS.

    Ingestion is based on URLs, because a URL that contains the phrase
    'QA_REJECTED' is the only way to tell if the attribute 'requirements'
    should be set to 'fail', or not.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    # a way to get a datetime from a string, or maybe a datetime, depending
    # on the execution environment
    start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0)
    todo_list, max_date = scrape.build_file_url_list(start_time)
    source = data_source.NraoPage(todo_list)
    name_builder = nbc.EntryBuilder(storage_name.VlassName)
    storage_name.set_use_storage_inventory(
        config.features.supports_latest_client)
    return rc.run_by_state(
        config=config,
        bookmark_name=VLASS_BOOKMARK,
        meta_visitors=META_VISITORS,
        data_visitors=DATA_VISITORS,
        name_builder=name_builder,
        source=source,
        end_time=max_date,
        store_transfer=tc.HttpTransfer(),
    )
Пример #14
0
def _run():
    """Run the processing for observations using a todo file to identify the
    work to be done, but with the support of a Builder, so that StorageName
    instances can be provided. This is important here, because the
    instrument name needs to be provided to the StorageName constructor.

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    config = mc.Config()
    config.get_executors()

    # time_bounds_augmentation and quality_augmentation depend on
    # metadata scraped from the NRAO site, but that only changes if a new
    # file is created, a new version of a file is created, or an old version
    # of a file is replaced. If the pipeline isn't STORE'ing information from
    # the source, files aren't changing, and the related metadata isn't
    # changing, so be polite to the NRAO site, and don't scrape if it's not
    # necessary.
    meta_visitors = [cleanup_augmentation]
    if (mc.TaskType.STORE in config.task_types
            and mc.TaskType.INGEST in config.task_types):
        meta_visitors = META_VISITORS
    name_builder = nbc.EntryBuilder(storage_name.VlassName)
    storage_name.set_use_storage_inventory(
        config.features.supports_latest_client)
    return rc.run_by_todo(
        config=config,
        name_builder=name_builder,
        meta_visitors=meta_visitors,
        data_visitors=DATA_VISITORS,
        store_transfer=tc.HttpTransfer(),
    )
Пример #15
0
def test_aug_visit_works(query_endpoint_mock, get_mock):
    get_mock.return_value.__enter__.return_value.raw = test_scrape.WL_INDEX
    query_endpoint_mock.side_effect = test_scrape._query_endpoint
    test_config = mc.Config()
    test_config.get_executors()
    test_state = mc.State(test_config.state_fqn)
    work.init_web_log(test_state, test_config)
    test_name = sn.VlassName(
        file_name='VLASS1.2.ql.T07t13.J081828-133000.10.2048.v1.I.iter1.'
        'image.pbcor.tt0.subim.fits',
        entry='VLASS1.2.ql.T07t13.J081828-133000.10.2048.v1.I.iter1.'
        'image.pbcor.tt0.subim.fits')
    test_file = os.path.join(TEST_DATA_DIR, f'{test_name.obs_id}.xml')
    test_obs = mc.read_obs_from_file(test_file)
    assert test_obs is not None, 'unexpected None'

    data_dir = os.path.join(THIS_DIR, '../../data')
    kwargs = {'working_directory': data_dir, 'cadc_client': Mock()}
    test_result = time_bounds_augmentation.visit(test_obs, **kwargs)
    assert test_obs is not None, 'unexpected modification'
    assert test_result is not None, 'should have a result status'
    assert len(test_result) == 1, 'modified artifacts count'
    assert test_result['artifacts'] == 2, 'artifact count'
    plane = test_obs.planes[test_name.product_id]
    chunk = plane.artifacts[test_name.file_uri].parts['0'].chunks[0]
    assert chunk is not None
    assert chunk.time is not None, 'no time information'
    assert chunk.time.axis is not None, 'no axis information'
    assert chunk.time.axis.bounds is not None, 'no bounds information'
    assert len(chunk.time.axis.bounds.samples) == 1, \
        'wrong amount of bounds info'
    assert chunk.time.exposure == 234.0, 'wrong exposure value'
Пример #16
0
def test_run(run_mock, access_mock):
    run_mock.return_value = 0
    access_mock.return_value = 'https://localhost'
    test_f_id = 'test_file_id'
    test_f_name = f'{test_f_id}.fits'
    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR)
    config = mc.Config()
    config.get_executors()
    with TemporaryDirectory(dir=test_main_app.TEST_DATA_DIR) as temp_dir:
        os.chdir(temp_dir)
        config.working_directory = temp_dir
        config.log_file_directory = f'{temp_dir}/logs'
        config.rejected_directory = f'{temp_dir}/rejected'
        mc.Config.write_to_file(config)

        with open(f'{temp_dir}/test_proxy.pem', 'w') as f:
            f.write('test content')
        with open(f'{temp_dir}/todo.txt', 'w') as f:
            f.write(test_f_name)

        try:
            # execution
            test_result = composable._run()
            assert test_result == 0, 'wrong return value'
            assert run_mock.called, 'should have been called'
            args, kwargs = run_mock.call_args
            test_storage = args[0]
            assert isinstance(test_storage, mc.StorageName), type(test_storage)
            assert test_storage.file_name == test_f_name, 'wrong file name'
            assert (test_storage.source_names[0] == test_f_name
                    ), 'wrong fname on disk'
        finally:
            os.getcwd = getcwd_orig
            os.chdir(test_main_app.TEST_DATA_DIR)
Пример #17
0
def _common_init():
    config = mc.Config()
    config.get_executors()
    clients = GemClientCollection(config)
    meta_visitors = META_VISITORS
    gemini_session = mc.get_endpoint_session()
    provenance_finder = gemini_metadata.ProvenanceFinder(
        config, clients.query_client, gemini_session)
    svofps_session = mc.get_endpoint_session()
    filter_cache = svofps.FilterMetadataCache(svofps_session)
    clients.gemini_session = gemini_session
    clients.svo_session = svofps_session
    if config.use_local_files or mc.TaskType.SCRAPE in config.task_types:
        metadata_reader = gemini_metadata.GeminiFileMetadataReader(
            gemini_session, provenance_finder, filter_cache)
        meta_visitors = [
            fits2caom2_augmentation,
            preview_augmentation,
            cleanup_augmentation,
        ]
    elif [mc.TaskType.VISIT] == config.task_types:
        metadata_reader = gemini_metadata.GeminiStorageClientReader(
            clients.data_client,
            gemini_session,
            provenance_finder,
            filter_cache,
        )
    else:
        metadata_reader = gemini_metadata.GeminiMetadataReader(
            gemini_session, provenance_finder, filter_cache)
    reader_lookup = gemini_metadata.GeminiMetadataLookup(metadata_reader)
    reader_lookup.reader = metadata_reader
    name_builder = builder.GemObsIDBuilder(config, metadata_reader,
                                           reader_lookup)
    return clients, config, metadata_reader, meta_visitors, name_builder
Пример #18
0
def _run_by_state():
    """Uses a state file with a timestamp to control which quicklook
    files will be retrieved from VLASS.

    Ingestion is based on URLs, because a URL that contains the phrase
    'QA_REJECTED' is the only way to tell if the attribute 'requirements'
    should be set to 'fail', or not.
    """
    config = mc.Config()
    config.get_executors()
    state = mc.State(config.state_fqn)
    # a way to get a datetime from a string, or maybe a datetime, depending
    # on the execution environment
    start_time = mc.increment_time(state.get_bookmark(VLASS_BOOKMARK), 0)
    todo_list, max_date = scrape.build_file_url_list(start_time)
    if len(todo_list) > 0:
        state = mc.State(config.state_fqn)
        work.init_web_log(state, config)
    # still make all subsequent calls if len == 0, for consistent reporting
    source = data_source.NraoPage(todo_list)
    name_builder = builder.VlassInstanceBuilder(config)
    return rc.run_by_state(config=config,
                           command_name=sn.APPLICATION,
                           bookmark_name=VLASS_BOOKMARK,
                           meta_visitors=META_VISITORS,
                           data_visitors=DATA_VISITORS,
                           name_builder=name_builder,
                           source=source,
                           end_time=max_date,
                           store_transfer=tc.HttpTransfer())
Пример #19
0
def test_transfer_fails_fits_check():
    # test case - when the fits check fails, the file is cleaned up, if
    # the configuration says it should be

    vos_client_mock = Mock(autospec=True)

    def mock_copy(ignore_src, ignore_dst, send_md5=True):
        copyfile('/test_files/broken.fits', '/tmp/broken.fits')

    vos_client_mock.copy.side_effect = mock_copy

    test_config = mc.Config()
    test_config.cleanup_files_when_storing = True
    test_config.cleanup_failure_destination = 'vos:goliaths/dao_test/failure'

    test_subject = transfer.VoFitsCleanupTransfer(vos_client_mock, test_config)
    assert test_subject is not None, 'expect ctor to work'
    test_subject.observable = Mock(autospec=True)

    test_source = 'vos:goliaths/dao_test/broken.fits'
    test_destination = '/tmp/broken.fits'
    test_subject.get(test_source, test_destination)

    assert vos_client_mock.copy.called, 'expect copy call'
    vos_client_mock.copy.assert_called_with(test_source,
                                            test_destination,
                                            send_md5=True), 'wrong copy args'
    assert vos_client_mock.move.called, 'expect move call'
    vos_client_mock.move.assert_called_with(
        test_source,
        'vos:goliaths/dao_test/failure/broken.fits'), 'wrong move args'
Пример #20
0
def test_config():
    test_config = mc.Config()
    test_config.working_directory = tc.THIS_DIR
    test_config.collection = 'OMM'
    test_config.netrc_file = os.path.join(tc.TEST_DATA_DIR, 'test_netrc')
    test_config.work_file = 'todo.txt'
    test_config.logging_level = 'DEBUG'
    test_config.log_file_directory = tc.TEST_DATA_DIR
    test_config.failure_fqn = f'{tc.TEST_DATA_DIR}/fail.txt'
    test_config.failure_log_file_name = 'fail.txt'
    test_config.retry_fqn = f'{tc.TEST_DATA_DIR}/retry.txt'
    test_config.retry_file_name = 'retry.txt'
    test_config.success_fqn = f'{tc.TEST_DATA_DIR}/good.txt'
    test_config.success_log_file_name = 'good.txt'
    test_config.rejected_fqn = f'{tc.TEST_DATA_DIR}/rejected.yml'
    test_config.progress_fqn = f'{tc.TEST_DATA_DIR}/progress.txt'
    test_config.resource_id = 'ivo://cadc.nrc.ca/sc2repo'
    test_config.features.run_in_airflow = False
    test_config.features.use_file_names = False
    test_config._report_fqn = f'{test_config.log_file_directory}/' \
                              f'test_report.txt'
    test_config.stream = 'TEST'
    for f_name in [test_config.failure_fqn, test_config.success_fqn,
                   test_config.retry_fqn]:
        if os.path.exists(f_name):
            os.unlink(f_name)
    return test_config
def test_storage_time_box_query(query_mock):
    def _mock_query(arg1, arg2):
        return Table.read(
            'fileName,ingestDate\n'
            'NEOS_SCI_2015347000000_clean.fits,2019-10-23T16:27:19.000\n'
            'NEOS_SCI_2015347000000.fits,2019-10-23T16:27:27.000\n'
            'NEOS_SCI_2015347002200_clean.fits,2019-10-23T16:27:33.000\n'.split(
                '\n'
            ),
            format='csv',
        )

    query_mock.side_effect = _mock_query
    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=tc.TEST_DATA_DIR)
    tap_client_ctor_orig = CadcTapClient.__init__
    CadcTapClient.__init__ = Mock(return_value=None)
    test_config = mc.Config()
    test_config.get_executors()
    utc_now = datetime.utcnow()
    prev_exec_date = utc_now - timedelta(seconds=3600)
    exec_date = utc_now - timedelta(seconds=1800)
    try:
        test_subject = dsc.QueryTimeBoxDataSource(test_config)
        test_result = test_subject.get_time_box_work(prev_exec_date, exec_date)
        assert test_result is not None, 'expect result'
        assert len(test_result) == 3, 'wrong number of results'
        assert (
            test_result[0].entry_name == 'NEOS_SCI_2015347000000_clean.fits'
        ), 'wrong results'
    finally:
        os.getcwd = getcwd_orig
        CadcTapClient.__init__ = tap_client_ctor_orig
def test_provenance_augmentation(obs_id_mock, repo_get_mock, headers_mock,
                                 test_fqn):
    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    headers_mock.side_effect = _get_headers_mock
    repo_get_mock.side_effect = _repo_get_mock
    obs_id_mock.side_effect = _get_obs_id_mock
    getcwd_orig = os.getcwd
    os.getcwd = Mock(return_value=test_main_app.TEST_DATA_DIR)
    try:
        test_obs = mc.read_obs_from_file(test_fqn)
        assert not test_obs.target.moving, 'initial conditions moving target'
        kwargs = {
            'science_file':
            os.path.basename(test_fqn).replace('.expected.xml', '.fits'),
            'working_directory':
            test_main_app.TEST_DATA_DIR,
            'observable':
            test_observable,
            'caom_repo_client':
            Mock()
        }
        test_result = provenance_augmentation.visit(test_obs, **kwargs)
        assert test_result is not None, 'expect a result'
        assert test_result.get('provenance') == 2, 'wrong result'
        assert len(test_obs.members) == 1, 'wrong membership'
        assert test_obs.target.moving, 'should be changed'
    finally:
        os.getcwd = getcwd_orig
Пример #23
0
def run_query():
    """
    Run the processing for all the entries returned from a time-boxed ad
    query.

    :param sys.argv[1] the timestamp for the > comparison in the time-boxed
        query
    :param sys.argv[2] the timestamp for the <= comparison in the time-boxed
        query

    :return 0 if successful, -1 if there's any sort of failure. Return status
        is used by airflow for task instance management and reporting.
    """
    prev_exec_date = sys.argv[1]
    exec_date = sys.argv[2]

    config = mc.Config()
    config.get()
    config.stream = 'default'

    file_list = mc.read_file_list_from_archive(config, APPLICATION,
                                               prev_exec_date, exec_date)
    sys.argv = sys.argv[:1]
    result = 0
    if len(file_list) > 0:
        mc.write_to_file(config.work_fqn, '\n'.join(file_list))
        result |= ec.run_by_file(GemName,
                                 APPLICATION,
                                 COLLECTION,
                                 config.proxy_fqn,
                                 meta_visitors,
                                 data_visitors,
                                 archive=ARCHIVE)
    sys.exit(result)
Пример #24
0
def test_builder(file_info_mock, header_mock):
    file_info_mock.side_effect = gem_mocks.mock_get_obs_metadata

    test_config = mc.Config()
    test_config.working_directory = '/test_files'
    test_config.proxy_fqn = os.path.join(gem_mocks.TEST_DATA_DIR,
                                         'test_proxy.pem')
    test_reader = gemini_metadata.GeminiMetadataReader(Mock(), Mock(), Mock())
    test_metadata = gemini_metadata.GeminiMetadataLookup(test_reader)
    test_subject = builder.GemObsIDBuilder(test_config, test_reader,
                                           test_metadata)

    test_entries = ['S20050825S0143.fits', 'TX20131117_raw.3002.fits']
    for test_entry in test_entries:
        for task_type in [mc.TaskType.INGEST, mc.TaskType.SCRAPE]:
            test_config.task_types = [task_type]
            test_result = test_subject.build(test_entry)
            assert test_result is not None, f'expect a result'
            assert (test_result.file_uri ==
                    f'{SCHEME}:{COLLECTION}/{test_entry}'), 'wrong file uri'
            assert (test_result.prev_uri ==
                    f'{SCHEME}:{COLLECTION}/{test_result.prev}'
                    ), 'wrong preview uri'
            assert (test_result.thumb_uri ==
                    f'{V_SCHEME}:{COLLECTION}/{test_result.thumb}'
                    ), 'wrong thumb uri'
            assert test_result.obs_id is not None, f'expect an obs id'
Пример #25
0
def test_storage_name():
    d_name1 = 'DRAO_ST_CGPS_RN43_20180715T1450_C21.tar.gz'
    d_name2 = 'DRAO_ST_CGPS_RN43_20180715T1450_C74.tar.gz'
    d_name3 = 'DRAO_ST_CGPS_RN43_20180715T1450_RAW.tar.gz'
    d_name4 = 'DRAO_ST_CGPS_RN43_20180715T1450_S21.tar.gz'
    test_f_names = sorted([d_name1, d_name2, d_name3,
                           d_name4])
    f_name = 'RN43.json'
    test_subject = draost_name.DraoSTName(fname_on_disk=f_name)
    assert test_subject.is_valid(), 'should be valid'
    assert test_subject.obs_id == 'RN43', 'wrong obs_id'
    assert test_subject.product_id is None, 'not maintained by pipeline'
    assert test_subject.file_uri is None, 'not maintained by pipeline'
    assert test_subject.lineage is None, 'not maintained by pipeline'
    test_config = mc.Config()
    test_config.get_executors()
    test_config.working_directory = TEST_FILES_DIR
    for ii in test_f_names:
        f_name = f'{TEST_FILES_DIR}/{ii}'
        if not os.path.exists(f_name):
            with open(f_name, 'w') as f:
                f.write('test content')

    test_result = draost_name.DraoSTName.get_f_names(
        test_subject.obs_id, test_config.working_directory)
    assert test_result == test_f_names, \
        'two ways to name'
Пример #26
0
def test_store(put_mock):
    test_config = mc.Config()
    test_config.logging_level = 'ERROR'
    test_config.working_directory = '/tmp'
    test_fqn = '/users/OpenData_DonneesOuvertes/pub/NEOSSAT/ASTRO/2019/' \
               '268/NEOS_SCI_2019268004930_clean.fits'
    test_storage_name = NEOSSatName(file_name=test_fqn, entry=test_fqn)
    transferrer = Mock()
    cred_param = Mock()
    cadc_data_client = Mock()
    caom_repo_client = Mock()
    observable = mc.Observable(mc.Rejected('/tmp/rejected.yml'),
                               mc.Metrics(test_config))
    test_subject = ec.Store(test_config, test_storage_name, APPLICATION,
                            cred_param, cadc_data_client, caom_repo_client,
                            observable, transferrer)
    test_subject.execute(None)
    assert put_mock.called, 'expect a call'
    args, kwargs = put_mock.call_args
    assert args[2] == test_storage_name.file_name, 'wrong file name'
    assert transferrer.get.called, 'expect a transfer call'
    args, kwargs = transferrer.get.call_args
    import logging
    logging.error(args)
    assert args[0] == test_fqn, 'wrong source parameter'
    assert args[1] == f'/tmp/{test_storage_name.obs_id}/' \
                      f'{test_storage_name.file_name}', \
        'wrong destination parameter'
Пример #27
0
def test_preview_augment_delete_preview():
    # plane starts with a preview artifact, but it represents a non-existent
    # file, so remove the artifact from the CAOM observation
    test_product_id = 'S20080610S0045'
    fqn = os.path.join(TEST_DATA_DIR, 'GS-2008A-C-5-35-002.fits.xml')
    obs = mc.read_obs_from_file(fqn)
    assert len(obs.planes[test_product_id].artifacts) == 2, 'initial condition'
    test_rejected = mc.Rejected('/tmp/nonexistent')
    test_rejected.content = {
        'bad_metadata': [],
        'no_preview':
            ['S20080610S0043.jpg',
             'S20080610S0041.jpg',
             'S20080610S0044.jpg',
             'S20080610S0045.jpg']}
    test_config = mc.Config()
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    kwargs = {'working_directory': TEST_DATA_DIR,
              'cadc_client': None,
              'stream': 'stream',
              'observable': test_observable}
    result = preview_augmentation.visit(obs, **kwargs)
    assert result is not None, 'expect a result'
    assert result['artifacts'] == 1, 'wrong result'
    assert len(obs.planes[test_product_id].artifacts) == 1, 'post condition'
Пример #28
0
def test_pull_v_augmentation(put_mock, http_mock):
    obs = mc.read_obs_from_file(TEST_OBS_FILE)
    obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow()
    assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition'
    test_uri = f'{SCHEME}:{COLLECTION}/{TEST_PRODUCT_ID}.fits'
    for plane in obs.planes.values():
        for artifact in plane.artifacts.values():
            artifact.uri = test_uri

    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    cadc_client_mock = Mock()
    kwargs = {'working_directory': TEST_DATA_DIR,
              'cadc_client': cadc_client_mock,
              'observable': test_observable}

    result = pull_v_augmentation.visit(obs, **kwargs)
    test_url = f'{pull_augmentation.FILE_URL}/{TEST_PRODUCT_ID}.fits'
    test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.fits'
    http_mock.assert_called_with(test_url, test_prev),  'mock not called'
    assert put_mock.called, 'put mock not called'
    args, kwargs = put_mock.call_args
    assert args[1] == TEST_DATA_DIR, 'wrong working dir'
    assert args[2] == f'{TEST_PRODUCT_ID}.fits', 'wrong file name'
    assert args[3] == test_uri, 'wrong storage name'
    assert result is not None, 'expect a result'
    assert result['observation'] == 0, 'no updated metadata'
    assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, \
        'no new artifacts'
Пример #29
0
def test_pull_augmentation():
    obs = mc.read_obs_from_file(TEST_OBS_FILE)
    obs.planes[TEST_PRODUCT_ID].data_release = datetime.utcnow()
    assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, 'initial condition'

    test_rejected = mc.Rejected(REJECTED_FILE)
    test_config = mc.Config()
    test_observable = mc.Observable(test_rejected, mc.Metrics(test_config))
    cadc_client_mock = Mock()
    kwargs = {'working_directory': TEST_DATA_DIR,
              'cadc_client': cadc_client_mock,
              'stream': 'stream',
              'observable': test_observable}

    with patch('caom2pipe.manage_composable.http_get') as http_mock, \
            patch('caom2pipe.manage_composable.data_put') as ad_put_mock:
        cadc_client_mock.return_value.data_get.return_value = mc.CadcException(
            'test')
        # no scheme from cadc client
        cadc_client_mock.get_file_info.return_value = {'md5sum': '1234'}
        result = pull_augmentation.visit(obs, **kwargs)
        test_url = f'{pull_augmentation.FILE_URL}/{TEST_PRODUCT_ID}.fits'
        test_prev = f'{TEST_DATA_DIR}/{TEST_PRODUCT_ID}.fits'
        http_mock.assert_called_with(test_url, test_prev),  'mock not called'
        assert ad_put_mock.called, 'ad put mock not called'
        assert result is not None, 'expect a result'
        assert result['observation'] == 0, 'no updated metadata'
        assert len(obs.planes[TEST_PRODUCT_ID].artifacts) == 1, \
            'no new artifacts'
def test_data_visit_params():
    test_wd = '/tmp/abc'
    if os.path.exists(test_wd):
        if os.path.isdir(test_wd):
            os.rmdir(test_wd)
        else:
            os.unlink(test_wd)
    storage_name = mc.StorageName(
        obs_id='abc',
        fname_on_disk='abc.fits.gz',
        source_names=['vos:DAO/incoming/abc.fits.gz'],
        destination_uris=['ad:TEST/abc.fits.gz'],
    )

    test_config = mc.Config()
    test_config.task_types = [mc.TaskType.MODIFY]
    test_config.working_directory = '/tmp'
    test_config.logging_level = 'DEBUG'

    test_cadc_client = Mock(autospec=True)
    test_caom_client = Mock(autospec=True)
    test_caom_client.read.side_effect = _read_obs2
    data_visitor = Mock(autospec=True)
    test_data_visitors = [data_visitor]
    test_observable = Mock(autospec=True)
    test_transferrer = Mock(autospec=True)

    try:
        test_config.use_local_files = False
        test_subject = ec.DataVisit(
            test_config,
            storage_name,
            test_cadc_client,
            test_caom_client,
            test_data_visitors,
            test_config.task_types[0],
            test_observable,
            test_transferrer,
        )
        assert test_subject is not None, 'broken ctor'
        test_subject.execute(context=None)
        assert data_visitor.visit.called, 'expect visit call'
        data_visitor.visit.assert_called_with(
            ANY,
            working_directory='/tmp/abc',
            storage_name=storage_name,
            log_file_directory=None,
            cadc_client=ANY,
            caom_repo_client=ANY,
            stream=None,
            observable=ANY,
        ), f'wrong visit params {storage_name.source_names}'
        data_visitor.visit.reset_mock()
    finally:
        if os.path.exists(test_wd):
            dir_listing = os.listdir(test_wd)
            for f in dir_listing:
                os.unlink(f)
            os.rmdir(test_wd)