def from_yaml(fh, ignore=(ValueError, NotImplementedError), followlinks=True, hidden=False, relative_to_yaml_dir=False): """Construct a dictionary of resources from a YAML specification. Parameters ---------- fh : file File object referring to the YAML specification of resources to load. ignore : tuple of Exception, optional Ignore these exceptions when calling ``blaze.data``. followlinks : bool, optional Follow symbolic links. hidden : bool, optional Load hidden files. relative_to_yaml_dir: bool, optional, default False Load paths relative to yaml file's directory. Default is to load relative to process' CWD. Returns ------- dict A dictionary mapping top level keys in a YAML file to resources. See Also -------- data_spider : Traverse a directory tree for resources """ resources = {} yaml_dir = os.path.dirname(os.path.abspath(fh.name)) for name, info in yaml.load(fh.read()).items(): with pushd(yaml_dir if relative_to_yaml_dir else os.getcwd()): try: source = info.pop('source') except KeyError: raise ValueError( 'source key not found for data source named %r' % name) for mod in info.pop('imports', []): importlib.import_module(mod) if os.path.isdir(source): resources[name] = data_spider(os.path.expanduser(source), ignore=ignore, followlinks=followlinks, hidden=hidden, extra_kwargs=info) else: resources[name] = bz_data(source, **info) return resources
def from_yaml(fh, ignore=(ValueError, NotImplementedError), followlinks=True, hidden=False, relative_to_yaml_dir=False): """Construct a dictionary of resources from a YAML specification. Parameters ---------- fh : file File object referring to the YAML specification of resources to load. ignore : tuple of Exception, optional Ignore these exceptions when calling ``blaze.data``. followlinks : bool, optional Follow symbolic links. hidden : bool, optional Load hidden files. relative_to_yaml_dir: bool, optional, default False Load paths relative to yaml file's directory. Default is to load relative to process' CWD. Returns ------- dict A dictionary mapping top level keys in a YAML file to resources. See Also -------- data_spider : Traverse a directory tree for resources """ resources = {} yaml_dir = os.path.dirname(os.path.abspath(fh.name)) for name, info in yaml.load(fh.read()).items(): with pushd(yaml_dir if relative_to_yaml_dir else os.getcwd()): try: source = info.pop('source') except KeyError: raise ValueError('source key not found for data source named %r' % name) for mod in info.pop('imports', []): importlib.import_module(mod) if os.path.isdir(source): resources[name] = data_spider(os.path.expanduser(source), ignore=ignore, followlinks=followlinks, hidden=hidden, extra_kwargs=info) else: resources[name] = bz_data(source, **info) return resources
def _spider(resource_path, ignore, followlinks, hidden, extra_kwargs): resources = {} for filename in (os.path.join(resource_path, x) for x in os.listdir(resource_path)): basename = os.path.basename(filename) if (basename.startswith(os.curdir) and not hidden or os.path.islink(filename) and not followlinks): continue if os.path.isdir(filename): new_resources = _spider(filename, ignore=ignore, followlinks=followlinks, hidden=hidden, extra_kwargs=extra_kwargs) if new_resources: resources[basename] = new_resources else: with ignoring(*ignore): resources[basename] = bz_data(filename, **(extra_kwargs or {})) return resources
def from_yaml(path, ignore=(ValueError, NotImplementedError), followlinks=True, hidden=False): """Construct a dictionary of resources from a YAML specification. Parameters ---------- path : str Path to a YAML specification of resources to load ignore : tuple of Exception, optional Ignore these exceptions when calling ``blaze.data`` followlinks : bool, optional Follow symbolic links hidden : bool, optional Load hidden files Returns ------- dict A dictionary mapping top level keys in a YAML file to resources. See Also -------- data_spider : Traverse a directory tree for resources """ resources = {} for name, info in yaml.load(path.read()).items(): try: source = info.pop('source') except KeyError: raise ValueError('source key not found for data source named %r' % name) for mod in info.pop('imports', []): importlib.import_module(mod) if os.path.isdir(source): resources[name] = data_spider(os.path.expanduser(source), ignore=ignore, followlinks=followlinks, hidden=hidden, extra_kwargs=info) else: resources[name] = bz_data(source, **info) return resources
def create_index(uri, column_name_or_names, name=None, **kwargs): dta = bz_data(uri, **kwargs) create_index(dta, column_name_or_names, name=name) return dta