def __init__(self,**kwargs): self.id_ = kwargs.get("oid",kwargs.get("id",kwargs.get("id_", None)) ) self.vid = kwargs.get("vid", None) self.location = kwargs.get("location", self.LOCATION.LIBRARY) self.name = kwargs.get("name",None) self.vname = kwargs.get("vname",None) self.fqname = kwargs.get("fqname",None) self.cache_key = kwargs.get("cache_key",None) self.source = kwargs.get("source",None) self.dataset = kwargs.get("dataset",None) self.subset = kwargs.get("subset",None) self.variation = kwargs.get("variation",None) self.btime = kwargs.get("btime", None) self.bspace = kwargs.get("bspace", None) self.creator = kwargs.get("creator",None) self.revision = kwargs.get("revision",None) self.version = kwargs.get("version",None) if not self.id_: dn = DatasetNumber(None, self.revision ) self.vid = str(dn) self.id_ = str(dn.rev(None)) elif not self.vid: try: self.vid = str(ObjectNumber.parse(self.id_).rev(self.revision)) except ValueError as e: print repr(self) raise ValueError('Could not parse id value; '+e.message) if self.cache_key is None: self.cache_key = self.identity.cache_key assert self.vid[0] == 'd'
def test_parse_other(self): dn = DatasetNumber(100, 5, 'authoritative') self.assertEqual('d01C005', str(dn)) self.assertEqual('G01C001Z005', str(GeneralNumber1('G', dn, 123))) self.assertEqual( 'G01C001Z005', ObjectNumber.parse(str(GeneralNumber1('G', dn, 123)), 'other1'))
def setUp(self): super(Test, self).setUp() self.dsn = 'sqlite://' # Make an array of dataset numbers, so we can refer to them with a single integer self.dn = [str(DatasetNumber(x, x)) for x in range(1, 10)]
def __init__(self, *args, **kwargs): super(Dataset, self).__init__(*args, **kwargs) if self.vid and not self.id: self.revision = ObjectNumber.parse(self.vid).revision self.id = str(ObjectNumber.parse(self.vid).rev(None)) if not self.id: dn = DatasetNumber(None, self.revision) self.vid = str(dn) self.id = str(dn.rev(None)) elif not self.vid: try: self.vid = str(ObjectNumber.parse(self.id).rev(self.revision)) except ValueError as e: raise ValueError('Could not parse id value; ' + e.message) if not self.revision: self.revision = 1 if self.cache_key is None: self.cache_key = self.identity.name.cache_key if not self.name: self.name = str(self.identity.name) if not self.vname: self.vname = str(self.identity.vname) if not self.fqname: self.fqname = str(self.identity.fqname) if not self.version: self.version = str(self.identity.version) assert self.vid[0] == 'd'
def setUp(self): self.db = None self._library = None super(TestBase, self).setUp() # Make an array of dataset numbers, so we can refer to them with a single integer self.dn = [str(DatasetNumber(x, x)) for x in range(1, 10)] self._library = None # Will be populated if someone calls library() method. if self.__class__._is_postgres: PostgreSQLTestBase._create_postgres_test_db( test_db_dsn=self.__class__.library_test_dsn)
def test_id(self): dnn = 1000000 rev = 100 dn = DatasetNumber(dnn) self.assertEqual('d000004c92', str(dn)) dn = DatasetNumber(dnn, rev) self.assertEqual('d000004c9201C', str(dn)) self.assertEqual('d000004c9201C', str(ObjectNumber.parse(str(dn)))) tn = TableNumber(dn, 1) self.assertEqual('t000004c920101C', str(tn)) self.assertEqual('t000004c920101C', str(ObjectNumber.parse(str(tn)))) tnnr = tn.rev(None) self.assertEqual('t000004c9201', str(tnnr)) self.assertEqual('t000004c9201004', str(tnnr.rev(4))) # Other assignment classes # dnn = 62 * 62 + 11 dn = DatasetNumber(62 ** 3 - 1, None, 'authoritative') self.assertEqual('dZZZ', str(dn)) dn = DatasetNumber(62 ** 3 - 1, None, 'registered') self.assertEqual('d00ZZZ', str(dn)) dn = DatasetNumber(62 ** 3 - 1, None, 'unregistered') self.assertEqual('d0000ZZZ', str(dn)) dn = DatasetNumber(62 ** 3 - 1, None, 'self') self.assertEqual('d000000ZZZ', str(dn)) tn = TableNumber(dn, 2) self.assertEqual('t000000ZZZ02', str(tn)) cn = ColumnNumber(tn, 3) self.assertEqual('c000000ZZZ02003', str(cn)) pn = dn.as_partition(5) self.assertEqual('p000000ZZZ005', str(pn))
def test_increment(self): dnn = 1000000 rev = 100 dn = DatasetNumber(dnn, rev) self.assertEqual('d000004c9201C', str(dn)) dn2 = ObjectNumber.increment(dn) self.assertEqual(101, dn2.revision) dn3 = ObjectNumber.increment(dn2) self.assertEqual(102, dn3.revision) tn = TableNumber(dn3, 1) self.assertEqual(102, tn.revision) self.assertEqual('t000004c920101E', str(tn)) tn2 = ObjectNumber.increment(tn) self.assertEqual(103, tn2.revision)
def test_identity_from_dict(self): name = Name(source='source.com', dataset='foobar', variation='orig', version='0.0.1') dataset_number = DatasetNumber(10000, 1, assignment_class='registered') oident = Identity(name, dataset_number) opident = oident.as_partition(7) idict = oident.dict pidict = opident.dict ident = Identity.from_dict(idict) self.assertIsInstance(ident, Identity) self.assertEqual(ident.fqname, oident.fqname) ident = Identity.from_dict(pidict) self.assertEqual('source.com/foobar-orig-0.0.1', ident.cache_key)
def test_dataset_basic(self): """Basic operations on datasets""" library = self.library(use_proto=False) db = library.database try: # Creating and conflicts # db.new_dataset(vid=self.dn[0], source='source', dataset='dataset') db.new_dataset(vid=self.dn[1], source='source', dataset='dataset') with self.assertRaises(ConflictError): db.new_dataset(vid=self.dn[0], source='source', dataset='dataset') dn = DatasetNumber(100) # datasets() gets datasets, and latest give id instead of vid # db.new_dataset(vid=str(dn.rev(5)), source='a', dataset='dataset') db.new_dataset(vid=str(dn.rev(1)), source='a', dataset='dataset') db.new_dataset(vid=str(dn.rev(3)), source='a', dataset='dataset') db.new_dataset(vid=str(dn.rev(4)), source='a', dataset='dataset') ds = db.dataset(str(dn.rev(5))) self.assertEqual(str(dn.rev(5)), ds.vid) ds = db.dataset(str(dn.rev(3))) self.assertEqual(str(dn.rev(3)), ds.vid) ds = db.dataset(str(dn.rev(None))) self.assertEqual(str(dn.rev(5)), ds.vid) db.new_dataset(vid=str(dn.rev(6)), source='a', dataset='dataset') ds = db.dataset(str(dn.rev(None))) self.assertEqual(str(dn.rev(6)), ds.vid) finally: db.close()
def test_split(self): name = Name(source='source.com', dataset='foobar', version='1.2.3') dn = DatasetNumber(10000, 1, assignment_class='registered') # NOTE, version is entered as 1.2.3, but will be changed to 1.2.1 b/c # last digit is overridden by revision ident = Identity(name, dn) ip = Identity.classify(name) self.assertEqual(Name, ip.isa) self.assertIsNone(ip.version) ip = Identity.classify(ident.name) self.assertEqual(Name, ip.isa) self.assertIsNone(ip.on) self.assertEqual(ident.sname, ip.name) self.assertIsNone(ip.version) ip = Identity.classify(ident.vname) self.assertEqual(Name, ip.isa) self.assertIsNone(ip.on) self.assertEqual(ident.vname, ip.name) self.assertEqual(ident._name.version, str(ip.version)) ip = Identity.classify(ident.fqname) self.assertEqual(DatasetNumber, ip.isa) self.assertEqual(ident.vname, ip.name) self.assertEqual(str(ip.on), str(ip.on)) ip = Identity.classify(ident.vid) self.assertEqual(DatasetNumber, ip.isa) ip = Identity.classify(ident.id_) self.assertEqual(DatasetNumber, ip.isa) ip = Identity.classify(dn) self.assertEqual(DatasetNumber, ip.isa) ip = Identity.classify(dn.as_partition(10)) self.assertEqual(PartitionNumber, ip.isa) ip = Identity.classify('source.com-foobar-orig') self.assertIsNone(ip.version) self.assertEqual('source.com-foobar-orig', ip.sname) self.assertIsNone(ip.vname) ip = Identity.classify('source.com-foobar-orig-1.2.3') self.assertIsInstance(ip.version, Version) self.assertEqual('source.com-foobar-orig', ip.sname) self.assertEqual('source.com-foobar-orig-1.2.3', ip.vname) ip = Identity.classify('source.com-foobar-orig->=1.2.3') self.assertIsInstance(ip.version, Spec) self.assertEqual('source.com-foobar-orig', ip.sname) self.assertIsNone(ip.vname) ip = Identity.classify('source.com-foobar-orig-==1.2.3') self.assertIsInstance(ip.version, Spec) self.assertEqual('source.com-foobar-orig', ip.sname) self.assertIsNone(ip.vname)
def test_identity(self): name = Name(source='source.com', dataset='foobar', version='0.0.1', variation='orig') dn = DatasetNumber(10000, 1, assignment_class='registered') ident = Identity(name, dn) self.assertEqual('d002Bi', ident.id_) self.assertEqual('d002Bi001', ident.vid) self.assertEqual('source.com-foobar-orig', str(ident.name)) self.assertEqual('source.com-foobar-orig-0.0.1', ident.vname) self.assertEqual('source.com-foobar-orig-0.0.1~d002Bi001', ident.fqname) self.assertEqual('source.com/foobar-orig-0.0.1', ident.path) self.assertEqual('source.com/foobar-orig', ident.source_path) self.assertEqual('source.com/foobar-orig-0.0.1', ident.cache_key) self.assertEqual('source.com-foobar-orig-0.0.1', ident.name.dict['vname']) self.assertEqual( { 'id', 'vid', 'revision', 'name', 'vname', 'cache_key', 'variation', 'dataset', 'source', 'version' }, set(ident.dict.keys())) self.assertIn('fqname', ident.names_dict) self.assertIn('vname', ident.names_dict) self.assertNotIn('dataset', ident.names_dict) self.assertIn('dataset', ident.ident_dict) self.assertNotIn('fqname', ident.ident_dict) # Clone to get a PartitionIdentity pi = ident.as_partition(7) self.assertEqual('source.com-foobar-orig-0.0.1~p002Bi007001', pi.fqname) pi = ident.as_partition(8, time='time', space='space', format='geo') self.assertEqual( 'source.com-foobar-orig-time-space-geo-0.0.1~p002Bi008001', pi.fqname) # PartitionIdentity part_name = PartitionName(time='time', space='space', format='geo', **name.dict) pn = PartitionNumber(dn, 500) ident = PartitionIdentity(part_name, pn) expected_keys = set([ 'id', 'vid', 'revision', 'cache_key', 'name', 'vname', 'space', 'format', 'variation', 'dataset', 'source', 'version', 'time' ]) self.assertEqual(expected_keys, set(ident.dict.keys())) self.assertEqual('p002Bi084', ident.id_) self.assertEqual('p002Bi084001', ident.vid) self.assertEqual('source.com-foobar-orig-time-space-geo', str(ident.name)) self.assertEqual('source.com-foobar-orig-time-space-geo-0.0.1', ident.vname) self.assertEqual( 'source.com-foobar-orig-time-space-geo-0.0.1~p002Bi084001', ident.fqname) self.assertEqual('source.com/foobar-orig-0.0.1/geo/time-space', ident.path) self.assertEqual('source.com/foobar-orig-0.0.1/geo/time-space', ident.cache_key) # Updating partition names that were partially specified PartitionNameQuery(time='time', space='space', format='hdf') # pnq = PartitionNameQuery(time='time', space='space', format='hdf') # Partitions, converting to datasets ident = Identity(name, dn) pi = ident.as_partition(8, time='time', space='space', format='geo') self.assertEqual( 'source.com-foobar-orig-time-space-geo-0.0.1~p002Bi008001', pi.fqname) iid = pi.as_dataset() self.assertEqual(ident.fqname, iid.fqname)
def test_id(self): dnn = 1000000 rev = 100 dn = DatasetNumber(dnn) self.assertEqual('d000004c92', str(dn)) dn = DatasetNumber(dnn, rev) self.assertEqual('d000004c9201C', str(dn)) self.assertEqual('d000004c9201C', str(ObjectNumber.parse(str(dn)))) tn = TableNumber(dn, 1) self.assertEqual('t000004c920101C', str(tn)) self.assertEqual('t000004c920101C', str(ObjectNumber.parse(str(tn)))) tnnr = tn.rev(None) self.assertEqual('t000004c9201', str(tnnr)) self.assertEqual('t000004c9201004', str(tnnr.rev(4))) # Other assignment classes # dnn = 62 * 62 + 11 dn = DatasetNumber(62**3 - 1, None, 'authoritative') self.assertEqual('dZZZ', str(dn)) dn = DatasetNumber(62**3 - 1, None, 'registered') self.assertEqual('d00ZZZ', str(dn)) dn = DatasetNumber(62**3 - 1, None, 'unregistered') self.assertEqual('d0000ZZZ', str(dn)) dn = DatasetNumber(62**3 - 1, None, 'self') self.assertEqual('d000000ZZZ', str(dn)) tn = TableNumber(dn, 2) self.assertEqual('t000000ZZZ02', str(tn)) cn = ColumnNumber(tn, 3) self.assertEqual('c000000ZZZ02003', str(cn)) pn = dn.as_partition(5) self.assertEqual('p000000ZZZ005', str(pn))
def number(self, assignment_class=None, namespace='d'): """ Return a new number. :param assignment_class: Determines the length of the number. Possible values are 'authority' (3 characters) , 'registered' (5) , 'unregistered' (7) and 'self' (9). Self assigned numbers are random and acquired locally, while the other assignment classes use the number server defined in the configuration. If None, then look in the number server configuration for one of the class keys, starting with the longest class and working to the shortest. :param namespace: The namespace character, the first character in the number. Can be one of 'd', 'x' or 'b' :return: """ if assignment_class == 'self': # When 'self' is explicit, don't look for number server config return str(DatasetNumber()) elif assignment_class is None: try: nsconfig = self.services['numbers'] except ConfigurationError: # A missing configuration is equivalent to 'self' self.logger.error( 'No number server configuration; returning self assigned number' ) return str(DatasetNumber()) for assignment_class in ('self', 'unregistered', 'registered', 'authority'): if assignment_class + '-key' in nsconfig: break # For the case where the number configuratoin references a self-assigned key if assignment_class == 'self': return str(DatasetNumber()) else: try: nsconfig = self.services['numbers'] except ConfigurationError: raise ConfigurationError('No number server configuration') if assignment_class + '-key' not in nsconfig: raise ConfigurationError( 'Assignment class {} not number server config'.format( assignment_class)) try: key = nsconfig[assignment_class + '-key'] config = { 'key': key, 'host': nsconfig['host'], 'port': nsconfig.get('port', 80) } ns = NumberServer(**config) n = str(next(ns)) self.logger.info('Got number from number server: {}'.format(n)) except HTTPError as e: self.logger.error( 'Failed to get number from number server for key: {}'.format( key, e.message)) self.logger.error( 'Using self-generated number. There is no problem with this, ' 'but they are longer than centrally generated numbers.') n = str(DatasetNumber()) return n
def get_next(redis, assignment_class=None, space=''): from time import time from ambry.identity import DatasetNumber, TopNumber delay_factor = 2 ip = str(request.remote_addr) now = time() next_key = "next:" + ip delay_key = "delay:" + ip if space and space in NUMBER_SPACES: spacestr = space + ':' else: spacestr = '' # # The assignment class determine how long the resulting number will be # which namespace the number is drawn from, and whether the user is rate limited # The assignment_class: key is assigned and set externally # access_key = request.query.access_key if access_key: assignment_class_key = "assignment_class:" + access_key assignment_class = redis.get(assignment_class_key) if not assignment_class: raise exc.NotAuthorized( 'Use an access key to gain access to this service') # # These are the keys that store values, so they need to be augmented with the numebr space. # For backwards compatiility, the 'd' space is empty, but the other spaces have strings. # # The number space depends on the assignment class. number_key = "dataset_number:" + spacestr + assignment_class authallocated_key = "allocated:" + spacestr + assignment_class # Keep track of allocatiosn by IP ipallocated_key = "allocated:" + spacestr + ip nxt = redis.get(next_key) delay = redis.get(delay_key) # Adjust rate limiting based on assignment class if assignment_class == 'authoritative': since, nxt, delay, wait, safe = (0, now - 1, 0, 0, 0) elif assignment_class == 'registered': delay_factor = 1.1 ok, since, nxt, delay, wait, safe = request_delay(nxt, delay, delay_factor) with redis.pipeline() as pipe: redis.set(next_key, nxt) redis.set(delay_key, delay) global_logger.info( "ip={} ok={} since={} nxt={} delay={} wait={} safe={}".format( ip, ok, since, nxt, delay, wait, safe)) if ok: number = redis.incr(number_key) if not space: dn = DatasetNumber(number, None, assignment_class) else: dn = TopNumber(space, number, None, assignment_class) redis.sadd(ipallocated_key, dn) redis.sadd(authallocated_key, dn) else: number = None raise exc.TooManyRequests( " Access will resume in {} seconds".format(wait)) return dict(ok=ok, number=str(dn), assignment_class=assignment_class, wait=wait, safe_wait=safe, nxt=nxt, delay=delay)