def get_database(config=None,name='library'): """Return a new `LibraryDb`, constructed from a configuration :param config: a `RunConfig` object :rtype: a `LibraryDb` object If config is None, the function will constuct a new RunConfig() with a default constructor. """ import tempfile if config is None: config = get_runconfig() if not config.library: raise ConfigurationError("Didn't get library configuration value") root_dir = config.filesystem.get('root_dir',tempfile.gettempdir()) db_config = config.database.get(name) db_config.dbname = db_config.dbname.format(root=root_dir) if not db_config: raise ConfigurationError("Didn't get database.{} configuration value".format(name)) database = LibraryDb(**db_config) database.create() # creates if does not exist. return database
def _get_library(config=None, name='default'): if name is None: name = 'default' if config is None: config = get_runconfig() sc = config.library.get(name,False) if not sc: raise Exception("Failed to get library.{} config key ".format(name)) filesystem = Filesystem(config) cache = filesystem.get_cache(sc.filesystem, config) database = get_database(config, name=sc.database) remote = sc.get('remote',None) l = Library(cache = cache, database = database, remote = remote) return l
def test_compression(self): from databundles.run import get_runconfig from databundles.cache import new_cache from databundles.util import temp_file_name, md5_for_file, copy_file_or_flo rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_CONFIG)) comp_cache = new_cache(rc.filesystem('compressioncache')) test_file_name = 'test_file' fn = temp_file_name() print 'orig file ', fn with open(fn,'wb') as f: for i in range(1000): f.write("{:03d}:".format(i)) cf = comp_cache.put(fn, test_file_name) with open(cf) as stream: from databundles.util.sgzip import GzipFile stream = GzipFile(stream) uncomp_cache = new_cache(rc.filesystem('fscache')) uncomp_stream = uncomp_cache.put_stream('decomp') copy_file_or_flo(stream, uncomp_stream) uncomp_stream.close() dcf = uncomp_cache.get('decomp') self.assertEquals(md5_for_file(fn), md5_for_file(dcf))
def setUp(self): import testbundle.bundle, shutil, os self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'source-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'), RunConfig.USER_CONFIG)) self.copy_or_build_bundle() bundle = Bundle() print "Deleting: {}".format(self.rc.group('filesystem').root_dir) databundles.util.rm_rf(self.rc.group('filesystem').root_dir) bdir = os.path.join(self.rc.group('sourcerepo')['dir'],'testbundle') pats = shutil.ignore_patterns('build', 'build-save','*.pyc', '.git','.gitignore','.ignore','__init__.py') print "Copying test dir tree to ", bdir shutil.copytree(bundle.bundle_dir, bdir, ignore=pats) # Import the bundle file from the directory from databundles.run import import_file import imp rp = os.path.realpath(os.path.join(bdir, 'bundle.py')) mod = import_file(rp) dir_ = os.path.dirname(rp) self.bundle = mod.Bundle(dir_) print self.bundle.bundle_dir
def setUp(self): import testbundle.bundle self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'database-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'))) self.copy_or_build_bundle() self.bundle = Bundle()
def setUp(self): import testbundle.bundle self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'client-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'), RunConfig.USER_CONFIG )) self.copy_or_build_bundle() self.bundle = Bundle()
def test_caches(self): '''Basic test of put(), get() and has() for all cache types''' from functools import partial from databundles.run import get_runconfig, RunConfig from databundles.filesystem import Filesystem from databundles.cache import new_cache from databundles.util import md5_for_file from databundles.bundle import DbBundle self.start_server() # For the rest-cache #fn = '/tmp/1mbfile' #with open(fn, 'wb') as f: # f.write('.'*(1024)) fn = self.bundle.database.path # Opening the file might run the database updates in # database.sqlite._on_connect_update_schema, which can affect the md5. b = DbBundle(fn) md5 = md5_for_file(fn) print "MD5 {} = {}".format(fn, md5) rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_CONFIG)) for i, fsname in enumerate(['fscache', 'limitedcache', 'compressioncache','cached-s3', 'cached-compressed-s3', 'rest-cache']): #'compressioncache', config = rc.filesystem(fsname) cache = new_cache(config) print '---', fsname, cache identity = self.bundle.identity relpath = identity.cache_key r = cache.put(fn, relpath,identity.to_meta(md5=md5)) r = cache.get(relpath) if not r.startswith('http'): self.assertTrue(os.path.exists(r), str(cache)) self.assertTrue(cache.has(relpath, md5=md5)) cache.remove(relpath, propagate=True) self.assertFalse(os.path.exists(r), str(cache)) self.assertFalse(cache.has(relpath)) cache = new_cache(rc.filesystem('s3cache-noupstream')) r = cache.put(fn, 'a')
def setUp(self): import testbundle.bundle self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'warehouse-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml'))) self.copy_or_build_bundle() self.bundle = Bundle() print "Deleting: {}".format(self.rc.group('filesystem').root_dir) databundles.util.rm_rf(self.rc.group('filesystem').root_dir)
def test_runconfig(self): '''Check the the RunConfig expands the library configuration''' from databundles.run import get_runconfig, RunConfig rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_CONFIG)) l = rc.library('library1') self.assertEquals('database1', l['database']['_name']) self.assertEquals('filesystem1', l['filesystem']['_name']) self.assertEquals('filesystem2', l['filesystem']['upstream']['_name']) self.assertEquals('filesystem3', l['filesystem']['upstream']['upstream']['_name']) self.assertEquals('devtest.sandiegodata.org', l['filesystem']['upstream']['upstream']['account']['_name'])
def x_test_remote(self): from databundles.run import RunConfig from databundles.library import new_library rc = get_runconfig((os.path.join(self.bundle_dir,'server-test-config.yaml'),RunConfig.USER_CONFIG)) config = rc.library('default') library = new_library(config) print library.remote print library.remote.last_upstream() print library.cache print library.cache.last_upstream()
def setUp(self): import testbundle.bundle self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'library-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml')) ) self.copy_or_build_bundle() self.bundle = Bundle() print "Deleting: {}".format(self.rc.filesystem.root_dir) Test.rm_rf(self.rc.filesystem.root_dir)
def _get_library(config=None, name='default'): from databundles.filesystem import Filesystem if name is None: name = 'default' if config is None: config = get_runconfig() sc = config.library.get(name,False) if not sc: raise Exception("Failed to get library.{} config key ".format(name)) filesystem = Filesystem(config) cache = filesystem.get_cache(sc.filesystem, config) database = get_database(config, name=sc.database) remote_name = sc.get('remote',None) if remote_name: from databundles.client.rest import Rest if not isinstance(remote_name, basestring): raise Exception("Deprecated") elif remote_name.startswith('http'): # It is a URL, and it points to an api that wil be used directly. url = remote_name remote = Rest(url, config.group('accounts')) else: # It is a name of a filesystem configuration remote = Filesystem._get_cache(config.filesystem, remote_name ) else: remote = None require_upload = sc.get('require-upload', False) l = Library(cache = cache, database = database, remote = remote, require_upload = require_upload, host = sc.get('host','localhost'), port = sc.get('port',80) ) return l
def get_client(rc=None, name=None): from databundles.dbexceptions import ConfigurationError if rc is None: rc = get_runconfig() if name is None: name = 'default' try: catalog = rc.group('catalog') cfg = rc.catalog.get(name) url = cfg.url key = cfg.key except Exception as e: raise ConfigurationError(("Failed to get configuration for catalog.{0}.url or "+ "catalog.{0}.key: {1}").format(name, e)) return Ckan(url, key)
def setUp(self): """Setup a library that will download bundles from production, freshly for every run""" import testbundle.bundle from databundles.util import rm_rf self.bundle_dir = os.path.dirname(testbundle.bundle.__file__) self.rc = get_runconfig((os.path.join(self.bundle_dir,'client-test-config.yaml'), os.path.join(self.bundle_dir,'bundle.yaml')) ) self.copy_or_build_bundle() self.bundle = Bundle() #print "Deleting: {}".format(self.rc.filesystem.root_dir) #rm_rf(self.rc.filesystem.root_dir) self.library = get_library(self.rc, 'production', reset = True)
def main(): import argparse from databundles.run import get_runconfig from databundles.util import daemonize parser = argparse.ArgumentParser(prog='bulkloader', description='Bulkloader, version {}'.format(__version__)) #parser.add_argument('command', nargs=1, help='Create a new bundle') parser.add_argument('-c','--config', default=None, action='append', help="Path to a run config file") parser.add_argument('-v','--verbose', default=None, action='append', help="Be verbose") parser.add_argument('--single-config', default=False,action="store_true", help="Load only the config file specified") parser.add_argument('-d','--daemonize', default=False, action="store_true", help="Run as a daemon") parser.add_argument('-k','--kill', default=False, action="store_true", help="With --daemonize, kill the running daemon process") parser.add_argument('-L','--unlock', default=False, action="store_true", help="Reclaim lockfile if it is locked") parser.add_argument('-g','--group', default=None, help="Set group for daemon operation") parser.add_argument('-u','--user', default=None, help="Set user for daemon operation") parser.add_argument('-t','--test', default=False, action="store_true", help="Run the test version of the server") parser.add_argument('-D','--dir', default='/var/run/bulkloader', help='Directory to create fifos') args = parser.parse_args() if args.single_config: if args.config is None or len(args.config) > 1: raise Exception("--single_config can only be specified with one -c") else: rc_path = args.config elif args.config is not None and len(args.config) == 1: rc_path = args.config.pop() else: rc_path = args.config rc = get_runconfig(rc_path) prog_name='bulkloader' daemonize(run, args, rc, prog_name=prog_name)
def __init__(self, root_dir): '''Load the bundle.yaml file and create a config object If the 'id' value is not set in the yaml file, it will be created and the file will be re-written ''' super(BundleFileConfig, self).__init__() self.root_dir = root_dir self.local_file = os.path.join(self.root_dir,'bundle.yaml') self._run_config = get_runconfig(self.local_file) # If there is no id field, create it immediately and # write the configuration back out. if not self._run_config.identity.get('id',False): from databundles.identity import DatasetNumber self._run_config.identity.id = str(DatasetNumber()) self.rewrite() if not os.path.exists(self.local_file): raise ConfigurationError("Can't find bundle config file: ")
def get_cache(fsname): rc = get_runconfig((os.path.join(self.bundle_dir,'test-run-config.yaml'),RunConfig.USER_CONFIG)) config = rc.filesystem(fsname) cache = new_cache(config) return cache
def main(): import argparse parser = argparse.ArgumentParser(prog='python -mdatabundles', description='Databundles {}. Management interface for databundles, libraries and repositories. '.format(__version__)) #parser.add_argument('command', nargs=1, help='Create a new bundle') parser.add_argument('-c','--config', default=None, action='append', help="Path to a run config file") parser.add_argument('-v','--verbose', default=None, action='append', help="Be verbose") parser.add_argument('--single-config', default=False,action="store_true", help="Load only the config file specified") cmd = parser.add_subparsers(title='commands', help='command help') # # Bundle Command # bundle_p = cmd.add_parser('bundle', help='Create a new bundle') bundle_p.set_defaults(command='bundle') asp = bundle_p.add_subparsers(title='Bundle commands', help='Commands for maniplulating bundles') sp = asp.add_parser('new', help='Create a new bundle') sp.set_defaults(subcommand='new') sp.set_defaults(revision='1') # Needed in Identity.name_parts sp.add_argument('-s','--source', required=True, help='Source, usually a domain name') sp.add_argument('-d','--dataset', required=True, help='Name of the dataset') sp.add_argument('-b','--subset', nargs='?', default=None, help='Name of the subset') sp.add_argument('-v','--variation', default='orig', help='Name of the variation') sp.add_argument('-c','--creator', required=True, help='Id of the creator') sp.add_argument('-n','--dry-run', default=False, help='Dry run') sp.add_argument('args', nargs=argparse.REMAINDER) # Get everything else. # # Library Command # lib_p = cmd.add_parser('library', help='Manage a library') lib_p.set_defaults(command='library') asp = lib_p.add_subparsers(title='library commands', help='command help') lib_p.add_argument('-n','--name', default='default', help='Select a different name for the library') sp = asp.add_parser('push', help='Push new library files') sp.set_defaults(subcommand='push') sp.add_argument('-w','--watch', default=False,action="store_true", help='Check periodically for new files.') sp.add_argument('-f','--force', default=False,action="store_true", help='Push all files') sp = asp.add_parser('server', help='Run the library server') sp.set_defaults(subcommand='server') sp.add_argument('-d','--daemonize', default=False, action="store_true", help="Run as a daemon") sp.add_argument('-k','--kill', default=False, action="store_true", help="With --daemonize, kill the running daemon process") sp.add_argument('-g','--group', default=None, help="Set group for daemon operation") sp.add_argument('-u','--user', default=None, help="Set user for daemon operation") sp = asp.add_parser('files', help='Print out files in the library') sp.set_defaults(subcommand='files') sp.add_argument('-a','--all', default='all',action="store_const", const='all', dest='file_state', help='Print all files') sp.add_argument('-n','--new', default=False,action="store_const", const='new', dest='file_state', help='Print new files') sp.add_argument('-p','--pushed', default=False,action="store_const", const='pushed', dest='file_state', help='Print pushed files') sp.add_argument('-u','--pulled', default=False,action="store_const", const='pulled', dest='file_state', help='Print pulled files') sp = asp.add_parser('new', help='Create a new library') sp.set_defaults(subcommand='new') sp = asp.add_parser('drop', help='Print out files in the library') sp.set_defaults(subcommand='drop') sp = asp.add_parser('clean', help='Remove all entries from the library database') sp.set_defaults(subcommand='clean') sp = asp.add_parser('purge', help='Remove all entries from the library database and delete all files') sp.set_defaults(subcommand='purge') sp = asp.add_parser('rebuild', help='Rebuild the library database from the files in the library') sp.set_defaults(subcommand='rebuild') sp = asp.add_parser('info', help='Display information about the library') sp.set_defaults(subcommand='info') sp = asp.add_parser('get', help='Search for the argument as a bundle or partition name or id. Possible download the file from the remote library') sp.set_defaults(subcommand='get') sp.add_argument('term', type=str,help='Query term') sp.add_argument('-o','--open', default=False, action="store_true", help='Open the database with sqlites') sp = asp.add_parser('find', help='Search for the argument as a bundle or partition name or id') sp.set_defaults(subcommand='find') sp.add_argument('term', type=str,help='Query term') sp = asp.add_parser('listremote', help='List the datasets stored on the remote') sp.set_defaults(subcommand='listremote') # # ckan Command # lib_p = cmd.add_parser('ckan', help='Access a CKAN repository') lib_p.set_defaults(command='ckan') lib_p.add_argument('-n','--name', default='default', help='Select the configuration name for the repository') asp = lib_p.add_subparsers(title='CKAN commands', help='Access a CKAN repository') sp = asp.add_parser('package', help='Dump a package by name, as json or yaml') sp.set_defaults(subcommand='package') sp.add_argument('term', type=str,help='Query term') group = sp.add_mutually_exclusive_group() group.add_argument('-y', '--yaml', default=True, dest='use_json', action='store_false') group.add_argument('-j', '--json', default=True, dest='use_json', action='store_true') # # Install Command # lib_p = cmd.add_parser('install', help='Install configuration files') lib_p.set_defaults(command='install') asp = lib_p.add_subparsers(title='Install', help='Install configuration files') sp = asp.add_parser('config', help='Install the global configuration') sp.set_defaults(subcommand='config') sp.add_argument('-p', '--print', dest='prt', default=False, action='store_true', help='Print, rather than save, the config file') sp.add_argument('-f', '--force', default=False, action='store_true', help="Force using the default config; don't re-use the xisting config") sp.add_argument('-r', '--root', default=None, help="Set the root dir") sp.add_argument('-R', '--remote', default=None, help="Url of remote library") # # Test Command # lib_p = cmd.add_parser('test', help='Test and debugging') lib_p.set_defaults(command='test') asp = lib_p.add_subparsers(title='Test commands', help='command help') sp = asp.add_parser('config', help='Dump the configuration') sp.set_defaults(subcommand='config') group.add_argument('-v', '--version', default=False, action='store_true', help='Display module version') args = parser.parse_args() if args.single_config: if args.config is None or len(args.config) > 1: raise Exception("--single_config can only be specified with one -c") else: rc_path = args.config elif args.config is not None and len(args.config) == 1: rc_path = args.config.pop() else: rc_path = args.config funcs = { 'bundle': bundle_command, 'library':library_command, 'test':test_command, 'install':install_command, 'ckan':ckan_command } f = funcs.get(args.command, False) if f != install_command: rc = get_runconfig(rc_path) else: rc = None if not f: print "Error: No command: "+args.command else: f(args, rc)
def main(): import argparse from .library import library_command #@UnresolvedImport from .warehouse import warehouse_command, warehouse_parser #@UnresolvedImport from .remote import remote_command #@UnresolvedImport from .library import library_parser from test import test_command #@UnresolvedImport from install import install_command #@UnresolvedImport from ckan import ckan_command #@UnresolvedImport from source import source_command, source_parser #@UnresolvedImport parser = argparse.ArgumentParser(prog='python -mdatabundles', description='Databundles {}. Management interface for databundles, libraries and repositories. '.format(__version__)) #parser.add_argument('command', nargs=1, help='Create a new bundle') parser.add_argument('-c','--config', default=None, action='append', help="Path to a run config file") parser.add_argument('-v','--verbose', default=None, action='append', help="Be verbose") parser.add_argument('--single-config', default=False,action="store_true", help="Load only the config file specified") cmd = parser.add_subparsers(title='commands', help='command help') # # library Command # library_parser(cmd) # # warehouse Command # warehouse_parser(cmd) # # ckan Command # lib_p = cmd.add_parser('ckan', help='Access a CKAN repository') lib_p.set_defaults(command='ckan') lib_p.add_argument('-n','--name', default='default', help='Select the configuration name for the repository') asp = lib_p.add_subparsers(title='CKAN commands', help='Access a CKAN repository') sp = asp.add_parser('package', help='Dump a package by name, as json or yaml') sp.set_defaults(subcommand='package') sp.add_argument('term', type=str,help='Query term') group = sp.add_mutually_exclusive_group() group.add_argument('-y', '--yaml', default=True, dest='use_json', action='store_false') group.add_argument('-j', '--json', default=True, dest='use_json', action='store_true') # # Install Command # lib_p = cmd.add_parser('install', help='Install configuration files') lib_p.set_defaults(command='install') asp = lib_p.add_subparsers(title='Install', help='Install configuration files') # # Config Command # sp = asp.add_parser('config', help='Install the global configuration') sp.set_defaults(subcommand='config') sp.add_argument('-p', '--print', dest='prt', default=False, action='store_true', help='Print, rather than save, the config file') sp.add_argument('-f', '--force', default=False, action='store_true', help="Force using the default config; don't re-use the xisting config") sp.add_argument('-r', '--root', default=None, help="Set the root dir") sp.add_argument('-R', '--remote', default=None, help="Url of remote library") source_parser(cmd) # # Remote Command # lib_p = cmd.add_parser('remote', help='Access the remote library') lib_p.set_defaults(command='remote') asp = lib_p.add_subparsers(title='remote commands', help='Access the remote library') lib_p.add_argument('-n','--name', default='default', help='Select a different name for the library, from which the remote is located') group = lib_p.add_mutually_exclusive_group() group.add_argument('-s', '--server', default=False, dest='is_server', action='store_true', help = 'Select the server configuration') group.add_argument('-c', '--client', default=False, dest='is_server', action='store_false', help = 'Select the client configuration') sp = asp.add_parser('info', help='Display the remote configuration') sp.set_defaults(subcommand='info') sp.add_argument('term', nargs='?', type=str,help='Name or ID of the bundle or partition to print information for') sp = asp.add_parser('list', help='List remote files') sp.set_defaults(subcommand='list') sp.add_argument('-m','--meta', default=False, action='store_true', help="Force fetching metadata for remotes that don't provide it while listing, like S3") sp.add_argument('datasets', nargs=argparse.REMAINDER) sp = asp.add_parser('find', help='Search for the argument as a bundle or partition name or id') sp.set_defaults(subcommand='find') sp.add_argument('term', type=str, nargs=argparse.REMAINDER,help='Query term') # # BigQuery # lib_p = cmd.add_parser('bq', help='BigQuery administration') lib_p.set_defaults(command='bq') asp = lib_p.add_subparsers(title='Bigquerry Commands', help='command help') sp = asp.add_parser('cred', help='Setup access credentials') sp.set_defaults(subcommand='cred') sp = asp.add_parser('list', help='List datasets') sp.set_defaults(subcommand='list') # # Test Command # lib_p = cmd.add_parser('test', help='Test and debugging') lib_p.set_defaults(command='test') asp = lib_p.add_subparsers(title='Test commands', help='command help') sp = asp.add_parser('config', help='Dump the configuration') sp.set_defaults(subcommand='config') group.add_argument('-v', '--version', default=False, action='store_true', help='Display module version') sp = asp.add_parser('spatialite', help='Test spatialite configuration') sp.set_defaults(subcommand='spatialite') args = parser.parse_args() if args.single_config: if args.config is None or len(args.config) > 1: raise Exception("--single_config can only be specified with one -c") else: rc_path = args.config elif args.config is not None and len(args.config) == 1: rc_path = args.config.pop() else: rc_path = args.config funcs = { 'library':library_command, 'warehouse':warehouse_command, 'remote':remote_command, 'test':test_command, 'install':install_command, 'ckan':ckan_command, 'source': source_command, } f = funcs.get(args.command, False) if f != install_command: rc = get_runconfig(rc_path) src = get_runconfig(rc_path, is_server = True) else: rc = None src = None global logger logger = get_logger("{}.{}".format(args.command,args.subcommand )) logger.setLevel(logging.INFO) if not f: err("Error: No command: "+args.command) else: try: f(args, rc, src) except KeyboardInterrupt: prt('\nExiting...') pass