def _resolve_ref(self, ref, location=None): from ambry.identity import Identity ip = Identity.classify(ref) return ip, { k: Identity.from_dict(ds) for k, ds in datasets.items() }
def post_partition(did, pid, library): from ambry.identity import Identity from ambry.util import md5_for_file b = library.get(did) if not b: raise exc.NotFound("No bundle found for id {}".format(did)) payload = request.json identity = Identity.from_dict(payload['identity']) p = b.partitions.get(pid) if not p: raise exc.NotFound( "No partition for {} in dataset {}".format( pid, did)) if not pid in set([identity.id_, identity.vid]): raise exc.Conflict( "Partition address '{}' doesn't match payload id '{}'".format( pid, identity.vid)) library.database.add_remote_file(identity) return identity.dict
def post_dataset(did, library): """Accept a payload that describes a bundle in the remote. Download the bundle from the remote and install it. """ from ambry.identity import Identity identity = Identity.from_dict(request.json) if not identity.md5: raise exc.BadRequest("The identity must have the md5 value set") if not did in set([identity.id_, identity.vid]): raise exc.Conflict("Dataset address '{}' doesn't match payload id '{}'".format(did, identity.vid)) # need to go directly to remote, not library.get() because the # dataset hasn't been loaded yet. db_path = library.load(identity.cache_key, identity.md5) if not db_path: logger.error("Failed to get {} from cache while posting dataset".format(identity.cache_key)) logger.error(" cache = {}".format(library.cache)) logger.error(" remote = {}".format(library.upstream)) raise exc.NotFound("Didn't get bundle file for cache key {} ".format(identity.cache_key)) logger.debug("Loading {} for identity {} ".format(db_path, identity)) b = library.load(identity.cache_key, identity.md5) return b.identity.dict
def dependencies(self): '''Return a set of dependencies for the source packages''' from collections import defaultdict import os from ambry.identity import Identity from ambry.run import import_file if not self._dependencies: depset = defaultdict(set) for root, _, files in os.walk(self.dir_): if 'bundle.yaml' in files: rp = os.path.realpath(os.path.join(root, 'bundle.py')) mod = import_file(rp) bundle = mod.Bundle(root) deps = bundle.library.dependencies for _, v in deps.items(): ident = Identity.parse_name(v) # Remove revision #print "XXX {:50s} {:30s} {}".format(v, ident.name, ident.to_dict()) depset[bundle.identity.name].add(ident.name) self._dependencies = depset return dict(self._dependencies.items())
def init_dataset_number(self): from ambry.identity import Identity, DatasetNumber, NumberServer try: ns = NumberServer(**self.group('numbers')) ds = ns.next() except Exception as e: from ..util import get_logger logger = get_logger(__name__) logger.error("Failed to get number from number sever; need to use self assigned number: {}" .format(e.message)) raise self.identity['id'] = str(ds) ident = Identity.from_dict(self.identity) ident._on = ds.rev(self.identity.revision) self.rewrite(**dict( identity=ident.ident_dict, names=ident.names_dict ))
def dependencies(self): """Return a set of dependencies for the source packages.""" from collections import defaultdict import os from ambry.identity import Identity from ambry.run import import_file if not self._dependencies: depset = defaultdict(set) for root, _, files in os.walk(self.dir_): if 'bundle.yaml' in files: rp = os.path.realpath(os.path.join(root, 'bundle.py')) mod = import_file(rp) bundle = mod.Bundle(root) deps = bundle.library.dependencies for _, v in deps.items(): ident = Identity.parse_name(v) # Remove revision # print "XXX {:50s} {:30s} {}".format(v, ident.name, # ident.to_dict()) depset[bundle.identity.name].add(ident.name) self._dependencies = depset return dict(self._dependencies.items())
def push_cb(expect, action, metadata, time): import json self.assertIn(action, expect) identity = Identity.from_dict(json.loads(metadata['identity'])) print action, identity.cache_key
def test_identity_from_dict(self): name = Name(source='source.com', dataset='foobar', variation='orig', version='0.0.1') dataset_number = DatasetNumber(10000, 1, assignment_class='registered') oident = Identity(name, dataset_number) opident = oident.as_partition(7) idict = oident.dict pidict = opident.dict ident = Identity.from_dict(idict) self.assertIsInstance(ident, Identity) self.assertEqual(ident.fqname, oident.fqname) ident = Identity.from_dict(pidict) self.assertEqual('source.com/foobar-orig-0.0.1', ident.cache_key)
def bundle_deps(self,name, reverse=False): '''Dependencies for a particular bundle''' from ambry.identity import Identity ident = Identity.parse_name(name) name = ident.name out = [] all_deps = self.dependencies if reverse: out = set() def reverse_set(name): o = set() for k,v in all_deps.items(): if name in v: o.add(k) return o deps = reverse_set(name) while len(deps): out.update(deps) next_deps = set() for name in deps: next_deps.update(reverse_set(name)) deps = next_deps out = list(out) else: deps = all_deps.get(ident.name,[]) while len(deps) > 0: out += deps next_deps = [] for d in deps: if d in all_deps: next_deps += all_deps[d] deps = next_deps final = [] for n in reversed(out): if not n in final: final.append(n) return final
def bundle_deps(self, name, reverse=False): """Dependencies for a particular bundle.""" from ambry.identity import Identity ident = Identity.parse_name(name) name = ident.name out = [] all_deps = self.dependencies if reverse: out = set() def reverse_set(name): o = set() for k, v in all_deps.items(): if name in v: o.add(k) return o deps = reverse_set(name) while len(deps): out.update(deps) next_deps = set() for name in deps: next_deps.update(reverse_set(name)) deps = next_deps out = list(out) else: deps = all_deps.get(ident.name, []) while len(deps) > 0: out += deps next_deps = [] for d in deps: if d in all_deps: next_deps += all_deps[d] deps = next_deps final = [] for n in reversed(out): if not n in final: final.append(n) return final
def test_assignment(self): from ambry.identity import Identity t1 = Top(yaml.load(self.yaml_config)) self.assertEquals(self.yaml_config.strip(' \n'), t1.dump().strip(' \n')) idnt = Identity.from_dict(dict(t1.identity)) idd = idnt.ident_dict idd['variation'] = 'v2' t1.identity = idd self.assertEquals('v2', t1.identity.variation)
def put(self, metadata): '''''' import json from ambry.identity import Identity metadata['identity'] = json.loads(metadata['identity']) identity = Identity.from_dict(metadata['identity']) if identity.is_bundle: r = self.remote.datasets(identity.vid).post(metadata) raise_for_status(r) else: r = self.remote.datasets(identity.as_dataset.vid).partitions(identity.vid).post(metadata) raise_for_status(r) return r
def new_from_bundle_config(self, config): """ Create a new bundle, or link to an existing one, based on the identity in config data. :param config: A Dict form of a bundle.yaml file :return: """ identity = Identity.from_dict(config['identity']) ds = self._db.dataset(identity.vid, exception=False) if not ds: ds = self._db.new_dataset(**identity.dict) b = Bundle(ds, self) b.commit() b.state = Bundle.STATES.NEW b.set_last_access(Bundle.STATES.NEW) # b.set_file_system(source_url=self._fs.source(ds.name), # build_url=self._fs.build(ds.name)) return b
def source_build(args, l, st, rc): """Build a single bundle, or a set of bundles in a directory. The build process will build all dependencies for each bundle before buildng the bundle. """ from ambry.identity import Identity from ..source.repository import new_repository repo = new_repository(rc.sourcerepo(args.name)) dir_ = None name = None if args.dir: if os.path.exists(args.dir): dir_ = args.dir name = None else: name = args.dir try: Identity.parse_name(name) except: fatal( "Argument '{}' must be either a bundle name or a directory".format(name)) return if not dir_: dir_ = rc.sourcerepo.dir def build(bundle_dir): from ambry.library import new_library # Import the bundle file from the directory bundle_class = load_bundle(bundle_dir) bundle = bundle_class(bundle_dir) l = new_library(rc.library(args.library_name)) if l.get(bundle.identity.vid) and not args.force: prt("{} Bundle is already in library", bundle.identity.name) return elif bundle.is_built and not args.force and not args.clean: prt("{} Bundle is already built", bundle.identity.name) return else: if args.dryrun: prt("{} Would build but in dry run ", bundle.identity.name) return repo.bundle = bundle if args.clean: bundle.clean() # Re-create after cleaning is important for something ... bundle = bundle_class(bundle_dir) prt("{} Building ", bundle.identity.name) if not bundle.run_prepare(): fatal("{} Prepare failed", bundle.identity.name) if not bundle.run_build(): fatal("{} Build failed", bundle.identity.name) if args.install and not args.dryrun: if not bundle.run_install(force=True): fatal('{} Install failed', bundle.identity.name) build_dirs = {} # Find all of the dependencies for the named bundle, and make those first. for root, _, files in os.walk(rc.sourcerepo.dir): if 'bundle.yaml' in files: bundle_class = load_bundle(root) bundle = bundle_class(root) build_dirs[bundle.identity.name] = root if name: deps = repo.bundle_deps(name) deps.append(name) else: deps = [] # Walk the subdirectory for the files to build, and # add all of their dependencies for root, _, files in os.walk(dir_): if 'bundle.yaml' in files: bundle_class = load_bundle(root) bundle = bundle_class(root) for dep in repo.bundle_deps(bundle.identity.name): if dep not in deps: deps.append(dep) deps.append(bundle.identity.name) for n in deps: try: dir_ = build_dirs[n] except KeyError: fatal("Failed to find directory for bundle {}".format(n)) prt('') prt("{} Building in {}".format(n, dir_)) build(dir_)
def test_split(self): name = Name(source='source.com', dataset='foobar', version='1.2.3') dn = DatasetNumber(10000, 1, assignment_class='registered') # NOTE, version is entered as 1.2.3, but will be changed to 1.2.1 b/c # last digit is overridden by revision ident = Identity(name, dn) ip = Identity.classify(name) self.assertEqual(Name, ip.isa) self.assertIsNone(ip.version) ip = Identity.classify(ident.name) self.assertEqual(Name, ip.isa) self.assertIsNone(ip.on) self.assertEqual(ident.sname, ip.name) self.assertIsNone(ip.version) ip = Identity.classify(ident.vname) self.assertEqual(Name, ip.isa) self.assertIsNone(ip.on) self.assertEqual(ident.vname, ip.name) self.assertEqual(ident._name.version, str(ip.version)) ip = Identity.classify(ident.fqname) self.assertEqual(DatasetNumber, ip.isa) self.assertEqual(ident.vname, ip.name) self.assertEqual(str(ip.on), str(ip.on)) ip = Identity.classify(ident.vid) self.assertEqual(DatasetNumber, ip.isa) ip = Identity.classify(ident.id_) self.assertEqual(DatasetNumber, ip.isa) ip = Identity.classify(dn) self.assertEqual(DatasetNumber, ip.isa) ip = Identity.classify(dn.as_partition(10)) self.assertEqual(PartitionNumber, ip.isa) ip = Identity.classify('source.com-foobar-orig') self.assertIsNone(ip.version) self.assertEqual('source.com-foobar-orig', ip.sname) self.assertIsNone(ip.vname) ip = Identity.classify('source.com-foobar-orig-1.2.3') self.assertIsInstance(ip.version, Version) self.assertEqual('source.com-foobar-orig', ip.sname) self.assertEqual('source.com-foobar-orig-1.2.3', ip.vname) ip = Identity.classify('source.com-foobar-orig->=1.2.3') self.assertIsInstance(ip.version, Spec) self.assertEqual('source.com-foobar-orig', ip.sname) self.assertIsNone(ip.vname) ip = Identity.classify('source.com-foobar-orig-==1.2.3') self.assertIsInstance(ip.version, Spec) self.assertEqual('source.com-foobar-orig', ip.sname) self.assertIsNone(ip.vname)
def test_identity(self): name = Name(source='source.com', dataset='foobar', version='0.0.1', variation='orig') dn = DatasetNumber(10000, 1, assignment_class='registered') ident = Identity(name, dn) self.assertEqual('d002Bi', ident.id_) self.assertEqual('d002Bi001', ident.vid) self.assertEqual('source.com-foobar-orig', str(ident.name)) self.assertEqual('source.com-foobar-orig-0.0.1', ident.vname) self.assertEqual('source.com-foobar-orig-0.0.1~d002Bi001', ident.fqname) self.assertEqual('source.com/foobar-orig-0.0.1', ident.path) self.assertEqual('source.com/foobar-orig', ident.source_path) self.assertEqual('source.com/foobar-orig-0.0.1', ident.cache_key) self.assertEqual('source.com-foobar-orig-0.0.1', ident.name.dict['vname']) self.assertEqual( { 'id', 'vid', 'revision', 'name', 'vname', 'cache_key', 'variation', 'dataset', 'source', 'version' }, set(ident.dict.keys())) self.assertIn('fqname', ident.names_dict) self.assertIn('vname', ident.names_dict) self.assertNotIn('dataset', ident.names_dict) self.assertIn('dataset', ident.ident_dict) self.assertNotIn('fqname', ident.ident_dict) # Clone to get a PartitionIdentity pi = ident.as_partition(7) self.assertEqual('source.com-foobar-orig-0.0.1~p002Bi007001', pi.fqname) pi = ident.as_partition(8, time='time', space='space', format='geo') self.assertEqual( 'source.com-foobar-orig-time-space-geo-0.0.1~p002Bi008001', pi.fqname) # PartitionIdentity part_name = PartitionName(time='time', space='space', format='geo', **name.dict) pn = PartitionNumber(dn, 500) ident = PartitionIdentity(part_name, pn) expected_keys = set([ 'id', 'vid', 'revision', 'cache_key', 'name', 'vname', 'space', 'format', 'variation', 'dataset', 'source', 'version', 'time' ]) self.assertEqual(expected_keys, set(ident.dict.keys())) self.assertEqual('p002Bi084', ident.id_) self.assertEqual('p002Bi084001', ident.vid) self.assertEqual('source.com-foobar-orig-time-space-geo', str(ident.name)) self.assertEqual('source.com-foobar-orig-time-space-geo-0.0.1', ident.vname) self.assertEqual( 'source.com-foobar-orig-time-space-geo-0.0.1~p002Bi084001', ident.fqname) self.assertEqual('source.com/foobar-orig-0.0.1/geo/time-space', ident.path) self.assertEqual('source.com/foobar-orig-0.0.1/geo/time-space', ident.cache_key) # Updating partition names that were partially specified PartitionNameQuery(time='time', space='space', format='hdf') # pnq = PartitionNameQuery(time='time', space='space', format='hdf') # Partitions, converting to datasets ident = Identity(name, dn) pi = ident.as_partition(8, time='time', space='space', format='geo') self.assertEqual( 'source.com-foobar-orig-time-space-geo-0.0.1~p002Bi008001', pi.fqname) iid = pi.as_dataset() self.assertEqual(ident.fqname, iid.fqname)
def test_identity(self): name = Name(source='source.com', dataset='foobar', version='0.0.1', variation='orig') dn = DatasetNumber(10000, 1, assignment_class='registered') ident = Identity(name, dn) self.assertEqual('d002Bi', ident.id_) self.assertEqual('d002Bi001', ident.vid) self.assertEqual('source.com-foobar-orig', str(ident.name)) self.assertEqual('source.com-foobar-orig-0.0.1', ident.vname) self.assertEqual('source.com-foobar-orig-0.0.1~d002Bi001', ident.fqname) self.assertEqual('source.com/foobar-orig-0.0.1', ident.path) self.assertEqual('source.com/foobar-orig', ident.source_path) self.assertEqual('source.com/foobar-orig-0.0.1', ident.cache_key) self.assertEqual('source.com-foobar-orig-0.0.1', ident.name.dict['vname']) self.assertEqual({'id', 'vid', 'revision', 'name', 'vname', 'cache_key', 'variation', 'dataset', 'source', 'version'}, set(ident.dict.keys())) self.assertIn('fqname', ident.names_dict) self.assertIn('vname', ident.names_dict) self.assertNotIn('dataset', ident.names_dict) self.assertIn('dataset', ident.ident_dict) self.assertNotIn('fqname', ident.ident_dict) # Clone to get a PartitionIdentity pi = ident.as_partition(7) self.assertEqual('source.com-foobar-orig-0.0.1~p002Bi007001', pi.fqname) pi = ident.as_partition(8, time='time', space='space', format='geo') self.assertEqual('source.com-foobar-orig-time-space-geo-0.0.1~p002Bi008001', pi.fqname) # PartitionIdentity part_name = PartitionName(time='time', space='space', format='geo', **name.dict) pn = PartitionNumber(dn, 500) ident = PartitionIdentity(part_name, pn) expected_keys = set([ 'id', 'vid', 'revision', 'cache_key', 'name', 'vname', 'space', 'format', 'variation', 'dataset', 'source', 'version', 'time']) self.assertEqual(expected_keys, set(ident.dict.keys())) self.assertEqual('p002Bi084', ident.id_) self.assertEqual('p002Bi084001', ident.vid) self.assertEqual('source.com-foobar-orig-time-space-geo', str(ident.name)) self.assertEqual('source.com-foobar-orig-time-space-geo-0.0.1', ident.vname) self.assertEqual('source.com-foobar-orig-time-space-geo-0.0.1~p002Bi084001', ident.fqname) self.assertEqual('source.com/foobar-orig-0.0.1/geo/time-space', ident.path) self.assertEqual('source.com/foobar-orig-0.0.1/geo/time-space', ident.cache_key) # Updating partition names that were partially specified PartitionNameQuery(time='time', space='space', format='hdf') # pnq = PartitionNameQuery(time='time', space='space', format='hdf') # Partitions, converting to datasets ident = Identity(name, dn) pi = ident.as_partition(8, time='time', space='space', format='geo') self.assertEqual('source.com-foobar-orig-time-space-geo-0.0.1~p002Bi008001', pi.fqname) iid = pi.as_dataset() self.assertEqual(ident.fqname, iid.fqname)
def _resolve_ref(self, ref, location=None): ip = Identity.classify(ref) return ip, {k: Identity.from_dict(ds) for k, ds in datasets.items()}
def source_build(args, l, st, rc): """Build a single bundle, or a set of bundles in a directory. The build process will build all dependencies for each bundle before buildng the bundle. """ from ambry.identity import Identity from ..source.repository import new_repository repo = new_repository(rc.sourcerepo(args.name)) dir_ = None name = None if args.dir: if os.path.exists(args.dir): dir_ = args.dir name = None else: name = args.dir try: Identity.parse_name(name) except: fatal( "Argument '{}' must be either a bundle name or a directory" .format(name)) return if not dir_: dir_ = rc.sourcerepo.dir def build(bundle_dir): from ambry.library import new_library # Import the bundle file from the directory bundle_class = load_bundle(bundle_dir) bundle = bundle_class(bundle_dir) l = new_library(rc.library(args.library_name)) if l.get(bundle.identity.vid) and not args.force: prt("{} Bundle is already in library", bundle.identity.name) return elif bundle.is_built and not args.force and not args.clean: prt("{} Bundle is already built", bundle.identity.name) return else: if args.dryrun: prt("{} Would build but in dry run ", bundle.identity.name) return repo.bundle = bundle if args.clean: bundle.clean() # Re-create after cleaning is important for something ... bundle = bundle_class(bundle_dir) prt("{} Building ", bundle.identity.name) if not bundle.run_prepare(): fatal("{} Prepare failed", bundle.identity.name) if not bundle.run_build(): fatal("{} Build failed", bundle.identity.name) if args.install and not args.dryrun: if not bundle.run_install(force=True): fatal('{} Install failed', bundle.identity.name) build_dirs = {} # Find all of the dependencies for the named bundle, and make those first. for root, _, files in os.walk(rc.sourcerepo.dir): if 'bundle.yaml' in files: bundle_class = load_bundle(root) bundle = bundle_class(root) build_dirs[bundle.identity.name] = root if name: deps = repo.bundle_deps(name) deps.append(name) else: deps = [] # Walk the subdirectory for the files to build, and # add all of their dependencies for root, _, files in os.walk(dir_): if 'bundle.yaml' in files: bundle_class = load_bundle(root) bundle = bundle_class(root) for dep in repo.bundle_deps(bundle.identity.name): if dep not in deps: deps.append(dep) deps.append(bundle.identity.name) for n in deps: try: dir_ = build_dirs[n] except KeyError: fatal("Failed to find directory for bundle {}".format(n)) prt('') prt("{} Building in {}".format(n, dir_)) build(dir_)