def test_reload_missing_remote_directory(intake_server): try: shutil.rmtree(TMP_DIR) except: pass time.sleep(1) catalog = Catalog(intake_server) assert_items_equal(list(catalog), []) os.mkdir(TMP_DIR) with open(os.path.join(TMP_DIR, YAML_FILENAME), 'w') as f: f.write(''' plugins: source: - module: intake.catalog.tests.example1_source - dir: '{{ CATALOG_DIR }}/example_plugin_dir' sources: use_example1: description: example1 source plugin driver: example1 args: {} ''') time.sleep(2) assert_items_equal(list(catalog), ['use_example1']) try: shutil.rmtree(TMP_DIR) except: pass
def listing(args): catalog = Catalog(args.uri) for entry in list(catalog): if args.full: print_entry_info(catalog, entry) else: print(entry)
def invoke(self, args): catalog = Catalog(args.uri) for entry in list(catalog): s = catalog[entry]() if s.cache: print("Caching for entry %s" % entry) s.read()
def test_read(intake_server): catalog = Catalog(intake_server) d = catalog['entry1'].get() test_dir = os.path.dirname(__file__) file1 = os.path.join(test_dir, 'entry1_1.csv') file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2))) meta = expected_df[:0] info = d.discover() assert info['datashape'] is None assert info['dtype'] == { k: str(v) for k, v in meta.dtypes.to_dict().items() } assert info['npartitions'] == 2 assert info['shape'] == (None, 3) # Do not know CSV size ahead of time md = d.metadata.copy() md.pop('catalog_dir', None) assert md == dict(foo='bar', bar=[1, 2, 3], cache=[]) df = d.read() assert expected_df.equals(df)
def test_read_direct(intake_server): catalog = Catalog(intake_server) d = catalog['entry1_part'].get(part='2') test_dir = os.path.dirname(__file__) file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.read_csv(file2) meta = expected_df[:0] info = d.discover() assert info['datashape'] is None assert info['dtype'] == { k: str(v) for k, v in meta.dtypes.to_dict().items() } assert info['npartitions'] == 1 assert info['shape'] == (None, 3) # Do not know CSV size ahead of time md = info['metadata'].copy() md.pop('catalog_dir', None) assert md == {'bar': [2, 4, 6], 'foo': 'baz', 'cache': []} md = d.metadata.copy() md.pop('catalog_dir', None) assert md == dict(foo='baz', bar=[2, 4, 6], cache=[]) assert d.description == 'entry1 part' df = d.read() assert expected_df.equals(df)
def test_reload_updated_config(intake_server_with_config): catalog = Catalog(intake_server_with_config) entries = list(catalog) assert entries == ['use_example1'] with open(os.path.join(TMP_DIR, YAML_FILENAME), 'w') as f: f.write(''' plugins: source: - module: intake.catalog.tests.example1_source - dir: '{{ CATALOG_DIR }}/example_plugin_dir' sources: use_example1: description: example1 source plugin driver: example1 args: {} use_example1_1: description: example1 other driver: example1 args: {} ''') time.sleep(2) assert_items_equal(list(catalog), ['use_example1', 'use_example1_1'])
def test_secret_auth(intake_server_with_auth): auth = SecretClientAuth(secret='test_secret') catalog = Catalog(intake_server_with_auth, auth=auth) entries = list(catalog) assert entries == ['example'] catalog.example.read()
def test_union_catalog(): path = os.path.dirname(__file__) uri1 = os.path.join(path, 'catalog_union_1.yml') uri2 = os.path.join(path, 'catalog_union_2.yml') union_cat = Catalog([uri1, uri2]) assert_items_equal(list(union_cat), ['entry1', 'entry1_part', 'use_example1']) assert union_cat.entry1_part.describe() == { 'container': 'dataframe', 'user_parameters': [ { 'name': 'part', 'description': 'part of filename', 'default': '1', 'type': 'str', 'allowed': ['1', '2'], } ], 'description': 'entry1 part', 'direct_access': 'allow' } desc_open = union_cat.entry1_part.describe_open() assert desc_open['args']['urlpath'].endswith('entry1_1.csv') del desc_open['args']['urlpath'] # Full path will be system dependent desc_open['args']['metadata'].pop('catalog_dir') assert 'csv' in str(desc_open.pop('plugin')) assert desc_open == { 'args': {'metadata': {'bar': [2, 4, 6], 'cache': [], 'foo': 'baz'}}, 'description': 'entry1 part', 'direct_access': 'allow', 'metadata': {'bar': [2, 4, 6], 'cache': [], 'foo': 'baz'}, } # Implied creation of data source assert union_cat.entry1.container == 'dataframe' md = union_cat.entry1.metadata md.pop('catalog_dir') assert md == dict(foo='bar', bar=[1, 2, 3], cache=[]) # Use default parameters in explict creation of data source assert union_cat.entry1_part().container == 'dataframe' # Specify parameters in creation of data source assert union_cat.entry1_part(part='2').container == 'dataframe'
def test_catalog_file_removal(temp_catalog_file): cat_dir = os.path.dirname(temp_catalog_file) cat = Catalog(cat_dir + '/*') assert set(cat) == {'a', 'b'} os.remove(temp_catalog_file) time.sleep(1.5) # wait for catalog refresh assert set(cat) == set()
def test_remote_env(intake_server): import os os.environ['INTAKE_TEST'] = 'client' catalog = Catalog(intake_server) with pytest.raises(Exception) as e: catalog.remote_env.get() assert 'path-server' in str(e.value) with pytest.raises(Exception) as e: catalog.local_env.get() assert 'path-client' in str(e.value) # prevents *client* from getting env catalog = Catalog(intake_server, getenv=False) with pytest.raises(Exception) as e: catalog.local_env.get() assert 'INTAKE_TEST' in str(e.value)
def build_app(catalog_uri, zmq_address=None): catalog = Catalog(catalog_uri) app = QApplication([b'Bluesky Browser']) app.main_window = QMainWindow() central_widget = CentralWidget(catalog=catalog, zmq_address=zmq_address, menuBar=app.main_window.menuBar) app.main_window.setCentralWidget(central_widget) app.main_window.show() return app
def test_filter(): from intake.catalog.base import Catalog entry1 = LocalCatalogEntry(name='trial', description='get this back', driver='csv') entry2 = LocalCatalogEntry(name='trial', description='pass this through', driver='csv') cat = Catalog.from_dict({'trial1': entry1, 'trial2': entry2}, name='mycat') cat2 = cat.filter(lambda e: 'pass' in e._description) assert list(cat2) == ['trial2'] assert cat2.trial2 is entry2
def test_dict_adddel(): from intake.catalog.base import Catalog entry = LocalCatalogEntry(name='trial', description='get this back', driver='csv') cat = Catalog.from_dict({'trial': entry}, name='mycat') assert 'trial' in cat cat['trial2'] = entry assert list(cat) == ['trial', 'trial2'] cat.pop('trial') assert list(cat) == ['trial2'] assert cat['trial2'] is entry
def get(self, **user_parameters): """Instantiate the NestedCatalogEntry""" if not self._default_source: self._default_source = Catalog.from_dict( entries=self.entries, name=self.name, metadata=self.metadata, description=self.description, **user_parameters) self._default_source.cat = self._catalog self._default_source.catalog_object = self._catalog return self._default_source
def test_dict_save(): from intake.catalog.base import Catalog fn = os.path.join(tempfile.mkdtemp(), 'mycat.yaml') entry = LocalCatalogEntry(name='trial', description='get this back', driver='csv') cat = Catalog.from_dict({'trial': entry}, name='mycat') cat.save(fn) cat2 = open_catalog(fn) assert 'trial' in cat2 assert cat2.name == 'mycat' assert cat2.trial._driver =='csv'
def test_read_partition(intake_server): catalog = Catalog(intake_server) d = catalog.entry1.get() p2 = d.read_partition(1) p1 = d.read_partition(0) test_dir = os.path.dirname(__file__) file1 = os.path.join(test_dir, 'entry1_1.csv') file2 = os.path.join(test_dir, 'entry1_2.csv') assert pd.read_csv(file1).equals(p1) assert pd.read_csv(file2).equals(p2)
def test_union_catalog(): path = os.path.dirname(__file__) uri1 = os.path.join(path, 'catalog_union_1.yml') uri2 = os.path.join(path, 'catalog_union_2.yml') union_cat = Catalog([uri1, uri2]) assert_items_equal(list(union_cat), ['entry1', 'entry1_part', 'use_example1']) expected = { 'name': 'entry1_part', 'container': 'dataframe', 'user_parameters': [{ 'name': 'part', 'description': 'part of filename', 'default': '1', 'type': 'str', 'allowed': ['1', '2'], }], 'description': 'entry1 part', 'direct_access': 'allow' } for k in expected: assert union_cat.entry1_part.describe()[k] == expected[k] # Implied creation of data source assert union_cat.entry1.container == 'dataframe' md = union_cat.entry1._metadata assert md == dict(foo='bar', bar=[1, 2, 3]) # Use default parameters in explict creation of data source assert union_cat.entry1_part().container == 'dataframe' # Specify parameters in creation of data source assert union_cat.entry1_part(part='2').container == 'dataframe'
def test_read_chunks(intake_server): catalog = Catalog(intake_server) d = catalog.entry1.get() chunks = list(d.read_chunked()) assert len(chunks) == 2 test_dir = os.path.dirname(__file__) file1 = os.path.join(test_dir, 'entry1_1.csv') file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2))) assert expected_df.equals(pd.concat(chunks))
def test_pickle(intake_server): catalog = Catalog(intake_server) d = catalog.entry1.get() new_d = pickle.loads(pickle.dumps(d, pickle.HIGHEST_PROTOCOL)) df = new_d.read() test_dir = os.path.dirname(__file__) file1 = os.path.join(test_dir, 'entry1_1.csv') file2 = os.path.join(test_dir, 'entry1_2.csv') expected_df = pd.concat((pd.read_csv(file1), pd.read_csv(file2))) assert expected_df.equals(df)
def test_dir(intake_server): PAGE_SIZE = 2 catalog = Catalog(intake_server, page_size=PAGE_SIZE) assert len(catalog._entries._page_cache) == 0 assert len(catalog._entries._direct_lookup_cache) == 0 assert not catalog._entries.complete with pytest.warns(UserWarning, match="Tab-complete"): key_completions = catalog._ipython_key_completions_() with pytest.warns(UserWarning, match="Tab-complete"): dir_ = dir(catalog) # __dir__ triggers loading the first page. assert len(catalog._entries._page_cache) == 2 assert len(catalog._entries._direct_lookup_cache) == 0 assert not catalog._entries.complete assert set(key_completions) == set(['use_example1', 'nested']) assert 'metadata' in dir_ # a normal attribute assert 'use_example1' in dir_ # an entry from the first page assert 'arr' not in dir_ # an entry we haven't cached yet # Trigger fetching one specific name. catalog['arr'] with pytest.warns(UserWarning, match="Tab-complete"): dir_ = dir(catalog) with pytest.warns(UserWarning, match="Tab-complete"): key_completions = catalog._ipython_key_completions_() assert 'metadata' in dir_ assert 'arr' in dir_ # an entry cached via direct access assert 'arr' in key_completions # Load everything. list(catalog) assert catalog._entries.complete with pytest.warns(None) as record: assert set(catalog) == set(catalog._ipython_key_completions_()) assert set(catalog).issubset(set(dir(catalog))) assert len(record) == 0 # Load without pagination (with also loads everything). catalog = Catalog(intake_server, page_size=None) assert catalog._entries.complete with pytest.warns(None) as record: assert set(catalog) == set(catalog._ipython_key_completions_()) assert set(catalog).issubset(set(dir(catalog))) assert len(record) == 0
def test_reload_missing_local_directory(tempdir): catalog = Catalog(tempdir + '/*') assert_items_equal(list(catalog), []) with open(os.path.join(tempdir, YAML_FILENAME), 'w') as f: f.write(''' plugins: source: - module: intake.catalog.tests.example1_source - dir: '{{ CATALOG_DIR }}/example_plugin_dir' sources: use_example1: description: example1 source plugin driver: example1 args: {} ''') time.sleep(1) assert 'use_example1' in catalog
def test_getitem_and_getattr(intake_server): catalog = Catalog(intake_server) catalog['arr'] with pytest.raises(KeyError): catalog['doesnotexist'] with pytest.raises(KeyError): catalog['_doesnotexist'] with pytest.raises(KeyError): # This exists as an *attribute* but not as an item. catalog['metadata'] catalog.arr # alias to catalog['arr'] catalog.metadata # a normal attribute with pytest.raises(AttributeError): catalog.doesnotexit with pytest.raises(AttributeError): catalog._doesnotexit assert catalog.arr is catalog['arr'] assert isinstance(catalog.arr, RemoteCatalogEntry) assert isinstance(catalog.metadata, (dict, type(None)))
def test_reload_updated_directory(intake_server_with_config): catalog = Catalog(intake_server_with_config) orig_entries = list(catalog) assert 'example2' not in orig_entries filename = os.path.join(TMP_DIR, 'intake_test_catalog2.yml') with open(filename, 'w') as f: f.write(''' sources: example2: description: source 2 driver: csv args: urlpath: none ''') time.sleep(2) assert_items_equal(list(catalog), ['example2'] + orig_entries)
def test_info_describe(intake_server): catalog = Catalog(intake_server) assert_items_equal(list(catalog), ['use_example1', 'nested', 'entry1', 'entry1_part', 'remote_env', 'local_env', 'text', 'arr']) info = catalog['entry1'].describe() assert info == { 'container': 'dataframe', 'description': 'entry1 full', 'name': 'entry1', 'direct_access': 'forbid', 'user_parameters': [] } info = catalog['entry1_part'].describe() assert info['direct_access'] == 'allow'
def main(argv=None): from intake.config import conf from intake import Catalog if argv is None: argv = sys.argv parser = argparse.ArgumentParser(description='Intake Catalog Server') parser.add_argument('-p', '--port', type=int, default=conf['port'], help='port number for server to listen on') parser.add_argument('--sys-exit-on-sigterm', action='store_true', help='internal flag used during unit testing to ensure .coverage file is written') parser.add_argument('catalog_args', metavar='FILE', type=str, nargs='+', help='Name of catalog YAML file') args = parser.parse_args(argv[1:]) if args.sys_exit_on_sigterm: signal.signal(signal.SIGTERM, call_exit_on_sigterm) logger.info('Creating catalog from:') for arg in args.catalog_args: logger.info(' - %s' % arg) catargs = args.catalog_args catargs = catargs[0] if len(catargs) == 1 else catargs logger.info("catalog_args: %s" % catargs) catalog = Catalog(catargs) logger.info('Entries:' + ','.join(list(catalog))) logger.info('Listening on port %d' % args.port) server = IntakeServer(catalog) app = server.make_app() server.start_periodic_functions(close_idle_after=3600.0) app.listen(args.port) tornado.ioloop.IOLoop.current().start()
def test_datetime(intake_server): catalog = Catalog(intake_server) info = catalog["datetime"].describe() print(info) expected = { 'name': 'datetime', 'container': 'dataframe', 'description': 'datetime parameters', 'direct_access': 'forbid', 'user_parameters': [{ 'name': 'time', 'description': 'some time', 'type': 'datetime', 'default': pd.Timestamp('1970-01-01 00:00:00') }], 'metadata': {}, } for k in expected: assert info[k] == expected[k]
def test_pagination(intake_server): PAGE_SIZE = 2 catalog = Catalog(intake_server, page_size=PAGE_SIZE) assert len(catalog._entries._page_cache) == 0 assert len(catalog._entries._direct_lookup_cache) == 0 # Trigger fetching one specific name. catalog['arr'] assert len(catalog._entries._page_cache) == 0 assert len(catalog._entries._direct_lookup_cache) == 1 # Using `in` on a Catalog should not iterate. 'arr' in catalog assert len(catalog._entries._page_cache) == 0 assert len(catalog._entries._direct_lookup_cache) == 1 # Trigger fetching just one full page. list(zip(range(PAGE_SIZE), catalog)) assert len(catalog._entries._page_cache) == PAGE_SIZE assert len(catalog._entries._direct_lookup_cache) == 1 # Trigger fetching all pages by list-ifying. list(catalog) assert len(catalog._entries._page_cache) > PAGE_SIZE assert len(catalog._entries._direct_lookup_cache) == 1 # Now direct lookup by name should be free because everything is cached. catalog['text'] assert len(catalog._entries._direct_lookup_cache) == 1
def get_app(self): local_catalog = Catalog(catalog_file) self.server = IntakeServer(local_catalog) return self.server.make_app()
def invoke(self, args): catalog = Catalog(args.uri) with catalog[args.name].get() as f: print(f.discover())
def test_environment_evaluation(intake_server): catalog = Catalog(intake_server) import os os.environ['INTAKE_TEST'] = 'client' d = catalog['remote_env']