Exemplo n.º 1
0
Arquivo: orm.py Projeto: kball/ambry
    def __init__(self,dataset, **kwargs):
        self.id_ = kwargs.get("id",kwargs.get("id_",None)) 
        self.name = kwargs.get("name",kwargs.get("name",None)) 
        self.vname = kwargs.get("vname",None) 
        self.fqname = kwargs.get("fqname",None)
        self.cache_key = kwargs.get("cache_key",None)
        self.sequence_id = kwargs.get("sequence_id",None) 
        self.d_id = kwargs.get("d_id",None) 
        self.space = kwargs.get("space",None) 
        self.time = kwargs.get("time",None)  
        self.t_id = kwargs.get("t_id",None) 
        self.grain = kwargs.get('grain',None)
        self.format = kwargs.get('format',None)
        self.segment = kwargs.get('segment',None)
        self.data = kwargs.get('data',None)

        self.d_id = dataset.id_
        self.d_vid = dataset.vid
        
        # See before_insert for setting self.vid and self.id_
        
        if self.t_id:
            don = ObjectNumber.parse(self.d_vid)
            ton = ObjectNumber.parse(self.t_id)
            self.t_vid = str(ton.rev( don.revision))

        assert self.cache_key is not None

        if True: # Debugging
            from partition import extension_for_format_name

            ext = extension_for_format_name(self.format)

            assert self.cache_key.endswith(ext)
Exemplo n.º 2
0
def deref_tc_ref(ref):
    """Given a column or table, vid or id, return the object."""
    from ambry.identity import ObjectNumber

    on = ObjectNumber.parse(ref)

    b = str(on.as_dataset)

    try:
        c = on
        t = on.as_table
    except AttributeError:
        t = on
        c = None

    if not on.revision:
        # The table does not have a revision, so we need to get one, just get the
        # latest one
        from . import renderer

        r = renderer()
        dc = r.doc_cache

        tm = dc.table_version_map()

        t_vid = reversed(sorted(tm.get(str(t)))).next()

        t = ObjectNumber.parse(t_vid)
        b = t.as_dataset

        if c:
            c = c.rev(t.revision)

    return b, t, c
Exemplo n.º 3
0
    def update_number(target):

        ton = ObjectNumber.parse(target.t_vid)
        con = ColumnNumber(ton, target.sequence_id)
        target.id = str(ton.rev(None))
        target.vid = str(con)
        target.id = str(con.rev(None))
        target.d_vid = str(ObjectNumber.parse(target.t_vid).as_dataset)
Exemplo n.º 4
0
    def update_number(target):

        ton = ObjectNumber.parse(target.t_vid)
        con = ColumnNumber(ton, target.sequence_id)
        target.id = str(ton.rev(None))
        target.vid = str(con)
        target.id = str(con.rev(None))
        target.d_vid = str(ObjectNumber.parse(target.t_vid).as_dataset)
Exemplo n.º 5
0
    def test_id(self):
        dnn = 1000000
        rev = 100

        dn = DatasetNumber(dnn)
        self.assertEqual('d000004c92', str(dn))

        dn = DatasetNumber(dnn, rev)
        self.assertEqual('d000004c9201C', str(dn))

        self.assertEqual('d000004c9201C', str(ObjectNumber.parse(str(dn))))

        tn = TableNumber(dn, 1)

        self.assertEqual('t000004c920101C', str(tn))

        self.assertEqual('t000004c920101C', str(ObjectNumber.parse(str(tn))))

        tnnr = tn.rev(None)

        self.assertEqual('t000004c9201', str(tnnr))

        self.assertEqual('t000004c9201004', str(tnnr.rev(4)))

        # Other assignment classes

        # dnn = 62 * 62 + 11

        dn = DatasetNumber(62**3 - 1, None, 'authoritative')
        self.assertEqual('dZZZ', str(dn))

        dn = DatasetNumber(62**3 - 1, None, 'registered')
        self.assertEqual('d00ZZZ', str(dn))

        dn = DatasetNumber(62**3 - 1, None, 'unregistered')
        self.assertEqual('d0000ZZZ', str(dn))

        dn = DatasetNumber(62**3 - 1, None, 'self')
        self.assertEqual('d000000ZZZ', str(dn))

        tn = TableNumber(dn, 2)
        self.assertEqual('t000000ZZZ02', str(tn))

        cn = ColumnNumber(tn, 3)
        self.assertEqual('c000000ZZZ02003', str(cn))

        pn = dn.as_partition(5)
        self.assertEqual('p000000ZZZ005', str(pn))
Exemplo n.º 6
0
    def test_id(self):
        dnn = 1000000
        rev = 100

        dn = DatasetNumber(dnn)
        self.assertEqual('d000004c92', str(dn))

        dn = DatasetNumber(dnn, rev)
        self.assertEqual('d000004c9201C', str(dn))

        self.assertEqual('d000004c9201C', str(ObjectNumber.parse(str(dn))))

        tn = TableNumber(dn, 1)

        self.assertEqual('t000004c920101C', str(tn))

        self.assertEqual('t000004c920101C', str(ObjectNumber.parse(str(tn))))

        tnnr = tn.rev(None)

        self.assertEqual('t000004c9201', str(tnnr))

        self.assertEqual('t000004c9201004', str(tnnr.rev(4)))

        # Other assignment classes

        # dnn = 62 * 62 + 11

        dn = DatasetNumber(62 ** 3 - 1, None, 'authoritative')
        self.assertEqual('dZZZ', str(dn))

        dn = DatasetNumber(62 ** 3 - 1, None, 'registered')
        self.assertEqual('d00ZZZ', str(dn))

        dn = DatasetNumber(62 ** 3 - 1, None, 'unregistered')
        self.assertEqual('d0000ZZZ', str(dn))

        dn = DatasetNumber(62 ** 3 - 1, None, 'self')
        self.assertEqual('d000000ZZZ', str(dn))

        tn = TableNumber(dn, 2)
        self.assertEqual('t000000ZZZ02', str(tn))

        cn = ColumnNumber(tn, 3)
        self.assertEqual('c000000ZZZ02003', str(cn))

        pn = dn.as_partition(5)
        self.assertEqual('p000000ZZZ005', str(pn))
Exemplo n.º 7
0
    def table(self, ref):
        """ Finds table by ref and returns it.

        Args:
            ref (str): id, vid (versioned id) or name of the table

        Raises:
            NotFoundError: if table with given ref not found.

        Returns:
            orm.Table

        """

        try:
            obj_number = ObjectNumber.parse(ref)
            ds_obj_number = obj_number.as_dataset

            dataset = self._db.dataset(
                ds_obj_number
            )  # Could do it in on SQL query, but this is easier.
            table = dataset.table(ref)

        except NotObjectNumberError:
            q = self.database.session.query(Table)\
                .filter(Table.name == str(ref))\
                .order_by(Table.vid.desc())

            table = q.first()

        if not table:
            raise NotFoundError("No table for ref: '{}'".format(ref))
        return table
Exemplo n.º 8
0
    def find_remote_bundle(self, ref, try_harder=None):
        """
        Locate a bundle, by any reference, among the configured remotes. The routine will only look in the cache
        directory lists stored in the remotes, which must be updated to be current.

        :param vid: A bundle or partition reference, vid, or name
        :param try_harder: If the reference isn't found, try parsing for an object id, or subsets of the name
        :return: (remote,vname) or (None,None) if the ref is not found
        """
        from ambry.identity import ObjectNumber

        remote, vid = self._find_remote_bundle(ref)

        if remote:
            return (remote, vid)

        if try_harder:

            on = ObjectNumber.parse(vid)

            if on:
                raise NotImplementedError()
                don = on.as_dataset
                return self._find_remote_bundle(vid)

            # Try subsets of a name, assuming it is a name
            parts = ref.split('-')

            for i in range(len(parts) - 1, 2, -1):
                remote, vid = self._find_remote_bundle('-'.join(parts[:i]))

                if remote:
                    return (remote, vid)
        return (None, None)
Exemplo n.º 9
0
    def install(self, ref, table_name=None, index_columns=None,logger=None):
        """ Finds partition by reference and installs it to warehouse db.

        Args:
            ref (str): id, vid (versioned id), name or vname (versioned name) of the partition.

        """


        try:
            obj_number = ObjectNumber.parse(ref)
            if isinstance(obj_number, TableNumber):
                table = self._library.table(ref)
                connection = self._backend._get_connection()
                return self._backend.install_table(connection, table, logger=logger)
            else:
                # assume partition
                raise NotObjectNumberError

        except NotObjectNumberError:
            # assume partition.
            partition = self._library.partition(ref)
            connection = self._backend._get_connection()

            return self._backend.install(
                connection, partition, table_name=table_name, index_columns=index_columns,
                logger=logger)
Exemplo n.º 10
0
    def search(self, search_phrase, limit=None):
        """Search for datasets, and expand to database records"""
        from ambry.identity import ObjectNumber
        from ambry.orm.exc import NotFoundError
        from ambry.library.search_backends.base import SearchTermParser

        results = []

        stp = SearchTermParser()

        # Because of the split between searching for partitions and bundles, some terms don't behave right.
        # The source term should be a limit on everything, but it isn't part of the partition doc,
        # so we check for it here.
        parsed_terms = stp.parse(search_phrase)

        for r in self.search_datasets(search_phrase, limit):
            vid = r.vid or ObjectNumber.parse(next(iter(r.partitions))).as_dataset

            r.vid = vid

            try:
                r.bundle = self.library.bundle(r.vid)

                if 'source' not in parsed_terms or parsed_terms['source'] in r.bundle.dataset.source:
                    results.append(r)
            except NotFoundError:
                pass

        return sorted(results, key=lambda r : r.score, reverse=True)
Exemplo n.º 11
0
Arquivo: orm.py Projeto: kball/ambry
 def before_update(mapper, conn, target):
     '''Set the column id number based on the table number and the 
     sequence id for the column'''
    
     if target.id_  is None:
         table_on = ObjectNumber.parse(target.t_id)
         target.id_ = str(ColumnNumber(table_on, target.sequence_id))
Exemplo n.º 12
0
Arquivo: orm.py Projeto: kball/ambry
    def __init__(self,table, **kwargs):

        self.sequence_id = kwargs.get("sequence_id",len(table.columns)+1) 
        self.name = kwargs.get("name",None) 
        self.altname = kwargs.get("altname",None) 
        self.is_primary_key = _clean_flag(kwargs.get("is_primary_key",False))
        self.datatype = kwargs.get("datatype",None) 
        self.size = kwargs.get("size",None) 
        self.precision = kwargs.get("precision",None) 
        self.width = kwargs.get("width",None)    
        self.sql = kwargs.get("sql",None)      
        self.flags = kwargs.get("flags",None) 
        self.description = kwargs.get("description",None) 
        self.keywords = kwargs.get("keywords",None) 
        self.measure = kwargs.get("measure",None) 
        self.units = kwargs.get("units",None) 
        self.universe = kwargs.get("universe",None) 
        self.scale = kwargs.get("scale",None) 
        self.data = kwargs.get("data",None) 

        # the table_name attribute is not stored. It is only for
        # building the schema, linking the columns to tables. 
        self.table_name = kwargs.get("table_name",None) 

        if not self.name:
            raise ValueError('Column must have a name')

        self.t_id = table.id_
        self.t_vid = table.vid
        ton = ObjectNumber.parse(table.vid)
        con = ColumnNumber(ton, self.sequence_id)
        self.vid = str(con)
        self.id = str(con.rev(None))
Exemplo n.º 13
0
 def test_parse_other(self):
     dn = DatasetNumber(100, 5, 'authoritative')
     self.assertEqual('d01C005', str(dn))
     self.assertEqual('G01C001Z005', str(GeneralNumber1('G', dn, 123)))
     self.assertEqual(
         'G01C001Z005',
         ObjectNumber.parse(str(GeneralNumber1('G', dn, 123)), 'other1'))
Exemplo n.º 14
0
Arquivo: orm.py Projeto: kball/ambry
    def __init__(self,**kwargs):
        self.id_ = kwargs.get("oid",kwargs.get("id",kwargs.get("id_", None)) )
        self.vid = kwargs.get("vid", None)
        self.location = kwargs.get("location", self.LOCATION.LIBRARY)
        self.name = kwargs.get("name",None) 
        self.vname = kwargs.get("vname",None) 
        self.fqname = kwargs.get("fqname",None)
        self.cache_key = kwargs.get("cache_key",None)
        self.source = kwargs.get("source",None) 
        self.dataset = kwargs.get("dataset",None) 
        self.subset = kwargs.get("subset",None) 
        self.variation = kwargs.get("variation",None)
        self.btime = kwargs.get("btime", None)
        self.bspace = kwargs.get("bspace", None)
        self.creator = kwargs.get("creator",None) 
        self.revision = kwargs.get("revision",None) 
        self.version = kwargs.get("version",None) 

        if not self.id_:
            dn = DatasetNumber(None, self.revision )
            self.vid = str(dn)
            self.id_ = str(dn.rev(None))
        elif not self.vid:
            try:
                self.vid = str(ObjectNumber.parse(self.id_).rev(self.revision))
            except ValueError as e:
                print repr(self)
                raise ValueError('Could not parse id value; '+e.message)

        if self.cache_key is None:
            self.cache_key = self.identity.cache_key

        assert self.vid[0] == 'd'
Exemplo n.º 15
0
    def search(self, search_phrase, limit=None):
        """Search for datasets, and expand to database records"""
        from ambry.identity import ObjectNumber
        from ambry.orm.exc import NotFoundError
        from ambry.library.search_backends.base import SearchTermParser

        results = []

        stp = SearchTermParser()

        # Because of the split between searching for partitions and bundles, some terms don't behave right.
        # The source term should be a limit on everything, but it isn't part of the partition doc,
        # so we check for it here.
        parsed_terms = stp.parse(search_phrase)

        for r in self.search_datasets(search_phrase, limit):
            vid = r.vid or ObjectNumber.parse(next(iter(
                r.partitions))).as_dataset

            r.vid = vid

            try:
                r.bundle = self.library.bundle(r.vid)

                if 'source' not in parsed_terms or parsed_terms[
                        'source'] in r.bundle.dataset.source:
                    results.append(r)
            except NotFoundError:
                pass

        return sorted(results, key=lambda r: r.score, reverse=True)
Exemplo n.º 16
0
def tc_obj(ref):
    """Return an object for a table or column."""

    dc = renderer().doc_cache

    try:
        b, t, c = deref_tc_ref(ref)
    except NotFoundError:
        return None

    try:
        table = dc.table(str(t))
    except NotFoundError:

        # This can happen when the table reference has a version id in it, and that version is not available.
        # So, try it again without the version
        table = dc.table(str(ObjectNumber.parse(str(t)).rev(None)))

    if c:
        try:
            return table['columns'][str(c.rev(0))]
        except KeyError:
            return None
        except TypeError:
            return None
    else:
        return table
Exemplo n.º 17
0
    def find_remote_bundle(self, ref, try_harder=None):
        """
        Locate a bundle, by any reference, among the configured remotes. The routine will only look in the cache
        directory lists stored in the remotes, which must be updated to be current.

        :param vid: A bundle or partition reference, vid, or name
        :param try_harder: If the reference isn't found, try parsing for an object id, or subsets of the name
        :return: (remote,vname) or (None,None) if the ref is not found
        """
        from ambry.identity import ObjectNumber

        remote, vid = self._find_remote_bundle(ref)

        if remote:
            return (remote, vid)

        if try_harder:

            on = ObjectNumber.parse(vid)

            if on:
                raise NotImplementedError()
                don = on.as_dataset
                return self._find_remote_bundle(vid)

            # Try subsets of a name, assuming it is a name
            parts = ref.split('-')

            for i in range(len(parts) - 1, 2, -1):
                remote, vid = self._find_remote_bundle('-'.join(parts[:i]))

                if remote:
                    return (remote, vid)
        return (None, None)
Exemplo n.º 18
0
    def table(self, ref):
        """ Finds table by ref and returns it.

        Args:
            ref (str): id, vid (versioned id) or name of the table

        Raises:
            NotFoundError: if table with given ref not found.

        Returns:
            orm.Table

        """

        try:
            obj_number = ObjectNumber.parse(ref)
            ds_obj_number = obj_number.as_dataset

            dataset = self._db.dataset(ds_obj_number)  # Could do it in on SQL query, but this is easier.
            table = dataset.table(ref)

        except NotObjectNumberError:
            q = self.database.session.query(Table)\
                .filter(Table.name == str(ref))\
                .order_by(Table.vid.desc())

            table = q.first()

        if not table:
            raise NotFoundError("No table for ref: '{}'".format(ref))
        return table
Exemplo n.º 19
0
    def test_returns_identity_of_dataset_found_by_dataset_number(self):
        ds1 = DatasetFactory()
        ds_number = ObjectNumber.parse(ds1.vid)
        assert isinstance(ds_number, DatasetNumber)

        ret = self.sqlite_db.get(ds_number)
        self.assertEquals(ret.vid, ds1.vid)
Exemplo n.º 20
0
    def install(self, ref, table_name=None, index_columns=None, logger=None):
        """ Finds partition by reference and installs it to warehouse db.

        Args:
            ref (str): id, vid (versioned id), name or vname (versioned name) of the partition.

        """

        try:
            obj_number = ObjectNumber.parse(ref)
            if isinstance(obj_number, TableNumber):
                table = self._library.table(ref)
                connection = self._backend._get_connection()
                return self._backend.install_table(connection,
                                                   table,
                                                   logger=logger)
            else:
                # assume partition
                raise NotObjectNumberError

        except NotObjectNumberError:
            # assume partition.
            partition = self._library.partition(ref)
            connection = self._backend._get_connection()

            return self._backend.install(connection,
                                         partition,
                                         table_name=table_name,
                                         index_columns=index_columns,
                                         logger=logger)
Exemplo n.º 21
0
    def update_id(self, sequence_id=None, force=True):
        """Alter the sequence id, and all of the names and ids derived from it. This
        often needs to be don after an IntegrityError in a multiprocessing run"""
        from ..identity import ObjectNumber

        if sequence_id:
            self.sequence_id = sequence_id

        assert self.d_vid

        if self.id is None or force:
            dataset_id = ObjectNumber.parse(self.d_vid).rev(None)
            self.d_id = str(dataset_id)
            self.id = str(TableNumber(dataset_id, self.sequence_id))

        if self.vid is None or force:
            dataset_vid = ObjectNumber.parse(self.d_vid)
            self.vid = str(TableNumber(dataset_vid, self.sequence_id))
Exemplo n.º 22
0
Arquivo: orm.py Projeto: kball/ambry
 def before_update(mapper, conn, target):
     '''Set the Table ID based on the dataset number and the sequence number
     for the table '''
     if isinstance(target,Column):
         raise TypeError('Got a column instead of a table')
     
     if target.id_ is None:
         dataset_id = ObjectNumber.parse(target.d_id)
         target.id_ = str(TableNumber(dataset_id, target.sequence_id))
Exemplo n.º 23
0
    def test_returns_identity_of_dataset_found_by_partition_number(self):
        ds1 = DatasetFactory()
        partition1 = PartitionFactory(dataset=ds1)
        part_number = ObjectNumber.parse(partition1.vid)
        assert isinstance(part_number, PartitionNumber)

        ret = self.sqlite_db.get(part_number)
        self.assertEquals(ret.vid, ds1.vid)
        self.assertIn(partition1.vid, ret.partitions)
Exemplo n.º 24
0
    def update_id(self, sequence_id=None, force=True):
        """Alter the sequence id, and all of the names and ids derived from it. This
        often needs to be don after an IntegrityError in a multiprocessing run"""
        from ..identity import ObjectNumber

        if sequence_id:
            self.sequence_id = sequence_id

        assert self.d_vid

        if self.id is None or force:
            dataset_id = ObjectNumber.parse(self.d_vid).rev(None)
            self.d_id = str(dataset_id)
            self.id = str(TableNumber(dataset_id, self.sequence_id))

        if self.vid is None or force:
            dataset_vid = ObjectNumber.parse(self.d_vid)
            self.vid = str(TableNumber(dataset_vid, self.sequence_id))
Exemplo n.º 25
0
def deref_tc_ref(ref):
    """Given a column or table, vid or id, return the object."""
    from ambry.identity import ObjectNumber
    from ambry.dbexceptions import NotFoundError

    on = ObjectNumber.parse(ref)

    b = str(on.as_dataset)

    try:
        c = on
        t = on.as_table
    except AttributeError:
        t = on
        c = None

    if not on.revision:
        # The table does not have a revision, so we need to get one, just get the
        # latest one
        from . import renderer

        r = renderer()
        dc = r.doc_cache

        tm = dc.table_version_map()


        if not str(t) in tm:
            # This happens when the the referenced table is in a bundle that is not installed,
            # often because it is private or restricted
            raise NotFoundError('Table {} not in table_version_map'.format(str(t)))


        t_vid = reversed(sorted(tm.get(str(t)))).next()

        t = ObjectNumber.parse(t_vid)
        b = t.as_dataset

        if c:
            c = c.rev(t.revision)

    return b, t, c
Exemplo n.º 26
0
Arquivo: orm.py Projeto: kball/ambry
    def set_ids(self, sequence_id):
        from identity import Identity

        self.sequence_id = sequence_id

        don = ObjectNumber.parse(self.d_vid)
        pon = PartitionNumber(don, self.sequence_id)

        self.vid = str(pon)
        self.id_ = str(pon.rev(None))
        self.fqname = Identity._compose_fqname(self.vname,self.vid)
Exemplo n.º 27
0
    def resolve_object_number(self, ref):
        """Resolve a variety of object numebrs to a dataset number"""

        if not isinstance(ref, ObjectNumber):
            on = ObjectNumber.parse(ref)
        else:
            on = ref

        ds_on = on.as_dataset

        return ds_on
Exemplo n.º 28
0
    def test_increment(self):

        dnn = 1000000
        rev = 100

        dn = DatasetNumber(dnn, rev)
        self.assertEqual('d000004c9201C', str(dn))

        dn2 = ObjectNumber.increment(dn)
        self.assertEqual(101, dn2.revision)

        dn3 = ObjectNumber.increment(dn2)
        self.assertEqual(102, dn3.revision)

        tn = TableNumber(dn3, 1)
        self.assertEqual(102, tn.revision)
        self.assertEqual('t000004c920101E', str(tn))

        tn2 = ObjectNumber.increment(tn)
        self.assertEqual(103, tn2.revision)
Exemplo n.º 29
0
    def resolve_object_number(self, ref):
        """Resolve a variety of object numebrs to a dataset number"""

        if not isinstance(ref, ObjectNumber):
            on = ObjectNumber.parse(ref)
        else:
            on = ref

        ds_on = on.as_dataset

        return ds_on
Exemplo n.º 30
0
    def test_increment(self):

        dnn = 1000000
        rev = 100

        dn = DatasetNumber(dnn, rev)
        self.assertEqual('d000004c9201C', str(dn))

        dn2 = ObjectNumber.increment(dn)
        self.assertEqual(101, dn2.revision)

        dn3 = ObjectNumber.increment(dn2)
        self.assertEqual(102, dn3.revision)

        tn = TableNumber(dn3, 1)
        self.assertEqual(102, tn.revision)
        self.assertEqual('t000004c920101E', str(tn))

        tn2 = ObjectNumber.increment(tn)
        self.assertEqual(103, tn2.revision)
Exemplo n.º 31
0
def substitute_vids(library, statement):
    """ Replace all of the references to tables and partitions with their vids.

    This is a bit of a hack -- it ought to work with the parser, but instead it just looks for
    common SQL tokens that indicate an identifier.

    :param statement: an sqlstatement. String.
    :return: tuple: new_statement, set of table vids, set of partition vids.
    """
    from ambry.identity import ObjectNumber, TableNumber, NotObjectNumberError
    from ambry.orm.exc import NotFoundError

    try:
        stmt_str = statement.to_unicode()
    except AttributeError:
        stmt_str = statement

    parts = stmt_str.strip(';').split()

    new_parts = []

    tables = set()
    partitions = set()

    while parts:
        token = parts.pop(0).strip()
        if token.lower() in ('from', 'join', 'materialize', 'install'):
            ident = parts.pop(0).strip(';')
            new_parts.append(token)

            try:
                obj_number = ObjectNumber.parse(token)
                if isinstance(obj_number, TableNumber):
                    table = library.table(ident)
                    tables.add(table.vid)
                    new_parts.append(table.vid)
                else:
                    # Do not care about other object numbers. Assume partition.
                    raise NotObjectNumberError

            except NotObjectNumberError:
                # assume partition
                try:
                    partition = library.partition(ident)
                    partitions.add(partition.vid)
                    new_parts.append(partition.vid)
                except NotFoundError:
                    # Ok, maybe it is just a normal identifier...
                    new_parts.append(ident)
        else:
            new_parts.append(token)

    return ' '.join(new_parts).strip(), tables, partitions
Exemplo n.º 32
0
def partition_path(b, p=None):

    if p is None:
        from ambry.identity import ObjectNumber
        p = b
        on = ObjectNumber.parse(p)
        try:
            b = str(on.as_dataset)
        except AttributeError:
            b = str(on)
            raise
    return "/bundles/{}/partitions/{}.html".format(resolve(b), resolve(p))
Exemplo n.º 33
0
def substitute_vids(library, statement):
    """ Replace all of the references to tables and partitions with their vids.

    This is a bit of a hack -- it ought to work with the parser, but instead it just looks for
    common SQL tokens that indicate an identifier.

    :param statement: an sqlstatement. String.
    :return: tuple: new_statement, set of table vids, set of partition vids.
    """
    from ambry.identity import ObjectNumber, TableNumber, NotObjectNumberError
    from ambry.orm.exc import NotFoundError

    try:
        stmt_str = statement.to_unicode()
    except AttributeError:
        stmt_str = statement

    parts = stmt_str.strip(';').split()

    new_parts = []

    tables = set()
    partitions = set()

    while parts:
        token = parts.pop(0).strip()
        if token.lower() in ('from', 'join', 'materialize', 'install'):
            ident = parts.pop(0).strip(';')
            new_parts.append(token)

            try:
                obj_number = ObjectNumber.parse(token)
                if isinstance(obj_number, TableNumber):
                    table = library.table(ident)
                    tables.add(table.vid)
                    new_parts.append(table.vid)
                else:
                    # Do not care about other object numbers. Assume partition.
                    raise NotObjectNumberError

            except NotObjectNumberError:
                # assume partition
                try:
                    partition = library.partition(ident)
                    partitions.add(partition.vid)
                    new_parts.append(partition.vid)
                except NotFoundError:
                    # Ok, maybe it is just a normal identifier...
                    new_parts.append(ident)
        else:
            new_parts.append(token)

    return ' '.join(new_parts).strip(), tables, partitions
Exemplo n.º 34
0
    def __init__(self, *args, **kwargs):

        super(Dataset, self).__init__(*args, **kwargs)

        if self.vid and not self.id:
            self.revision = ObjectNumber.parse(self.vid).revision
            self.id = str(ObjectNumber.parse(self.vid).rev(None))

        if not self.id:
            dn = DatasetNumber(None, self.revision)
            self.vid = str(dn)
            self.id = str(dn.rev(None))
        elif not self.vid:
            try:
                self.vid = str(ObjectNumber.parse(self.id).rev(self.revision))
            except ValueError as e:
                raise ValueError('Could not parse id value; ' + e.message)

        if not self.revision:
            self.revision = 1

        if self.cache_key is None:
            self.cache_key = self.identity.name.cache_key

        if not self.name:
            self.name = str(self.identity.name)

        if not self.vname:
            self.vname = str(self.identity.vname)

        if not self.fqname:
            self.fqname = str(self.identity.fqname)

        if not self.version:
            self.version = str(self.identity.version)

        assert self.vid[0] == 'd'
Exemplo n.º 35
0
        def augment(path,o, parent=None):
            """Descend recursively"""
            from ambry.identity import ObjectNumber
            
            path_num = ObjectNumber.base62_decode(''.join( ObjectNumber.base62_encode(e[1]).zfill(2) for e in path ))
                 
            objects = []

            if isinstance(o, dict):

                for i,(k,v) in enumerate(o.items(),1):
                    objects += augment(path+[ (k,i,) ], v, k)
                    
                o['path'] = tuple(path)
                
                o['path_num'] = path_num
                o['parent'] = parent if parent else 'root'
                objects += [{ k:v for k,v in o.items()   if not isinstance(v,(list,dict))}]
                                 
            elif isinstance(o, list):
                for i, v in enumerate(o,1):
                    objects += augment(path+[ (i,i,) ], v, parent)
                    
            return objects
Exemplo n.º 36
0
def partition_path(b, p=None):

    if p is None:
        p = b

    try:
        on = ObjectNumber.parse(p)
        b = str(on.as_dataset)
    except NotObjectNumberError as e:
        return None
    except AttributeError:
        b = str(on)
        raise


    return '/bundles/{}/partitions/{}.html'.format(resolve(b), resolve(p))
Exemplo n.º 37
0
def incver(o, prop_names):
    """Increment the version numbers of a set of properties and return a new object"""
    from ambry.identity import ObjectNumber

    d = {}

    for p in o.__mapper__.attrs:
        v = getattr(o, p.key)
        if v is None:
            d[p.key] = None
        elif p.key in prop_names:
            d[p.key] = str(ObjectNumber.increment(v))
        else:
            if not hasattr(v, '__mapper__'): # Only copy values, never objects
                d[p.key] = v

    return o.__class__(**d)
Exemplo n.º 38
0
def incver(o, prop_names):
    """Increment the version numbers of a set of properties and return a new object"""
    from ambry.identity import ObjectNumber

    d = {}

    for p in o.__mapper__.attrs:
        v = getattr(o, p.key)
        if v is None:
            d[p.key] = None
        elif p.key in prop_names:
            d[p.key] = str(ObjectNumber.increment(v))
        else:
            if not hasattr(v, '__mapper__'):  # Only copy values, never objects
                d[p.key] = v

    return o.__class__(**d)
Exemplo n.º 39
0
    def _set_ids(self, force=False):

        if not self.sequence_id:
            from .exc import DatabaseError

            raise DatabaseError('Sequence ID must be set before insertion')

        if not self.vid or force:
            assert bool(self.d_vid)
            assert bool(self.sequence_id)
            don = ObjectNumber.parse(self.d_vid)
            assert don.revision
            on = don.as_partition(self.sequence_id)
            self.vid = str(on.rev(don.revision))
            self.id = str(on.rev(None))

        if not self.data:
            self.data = {}
Exemplo n.º 40
0
    def partition(self, ref, localize=False):
        """ Finds partition by ref and converts to bundle partition.

        :param ref: A partition reference
        :param localize: If True, copy a remote partition to local filesystem. Defaults to False
        :raises: NotFoundError: if partition with given ref not found.
        :return: orm.Partition: found partition.
        """

        if not ref:
            raise NotFoundError("No partition for empty ref")

        try:
            on = ObjectNumber.parse(ref)
            ds_on = on.as_dataset

            ds = self._db.dataset(
                ds_on)  # Could do it in on SQL query, but this is easier.

            # The refresh is required because in some places the dataset is loaded without the partitions,
            # and if that persist, we won't have partitions in it until it is refreshed.

            self.database.session.refresh(ds)

            p = ds.partition(ref)

        except NotObjectNumberError:
            q = (self.database.session.query(Partition).filter(
                or_(Partition.name == str(ref),
                    Partition.vname == str(ref))).order_by(
                        Partition.vid.desc()))

            p = q.first()

        if not p:
            raise NotFoundError("No partition for ref: '{}'".format(ref))

        b = self.bundle(p.d_vid)
        p = b.wrap_partition(p)

        if localize:
            p.localize()

        return p
Exemplo n.º 41
0
    def partition(self, ref, localize=False):
        """ Finds partition by ref and converts to bundle partition.

        :param ref: A partition reference
        :param localize: If True, copy a remote partition to local filesystem. Defaults to False
        :raises: NotFoundError: if partition with given ref not found.
        :return: orm.Partition: found partition.
        """

        if not ref:
            raise NotFoundError("No partition for empty ref")

        try:
            on = ObjectNumber.parse(ref)
            ds_on = on.as_dataset

            ds = self._db.dataset(ds_on)  # Could do it in on SQL query, but this is easier.

            # The refresh is required because in some places the dataset is loaded without the partitions,
            # and if that persist, we won't have partitions in it until it is refreshed.

            self.database.session.refresh(ds)

            p = ds.partition(ref)

        except NotObjectNumberError:
            q = (self.database.session.query(Partition)
                 .filter(or_(Partition.name == str(ref), Partition.vname == str(ref)))
                 .order_by(Partition.vid.desc()))

            p = q.first()

        if not p:
            raise NotFoundError("No partition for ref: '{}'".format(ref))

        b = self.bundle(p.d_vid)
        p = b.wrap_partition(p)

        if localize:
            p.localize()

        return p
Exemplo n.º 42
0
Arquivo: orm.py Projeto: kball/ambry
    def __init__(self,dataset, **kwargs):

        self.sequence_id = kwargs.get("sequence_id",None)  
        self.name = kwargs.get("name",None) 
        self.vname = kwargs.get("vname",None) 
        self.altname = kwargs.get("altname",None) 
        self.description = kwargs.get("description",None)
        self.universe = kwargs.get("universe", None)
        self.keywords = kwargs.get("keywords",None) 
        self.data = kwargs.get("data",None) 
        
        self.d_id = dataset.id_
        self.d_vid = dataset.vid
        don = ObjectNumber.parse(dataset.vid)
        ton = TableNumber(don, self.sequence_id)
      
        self.vid = str(ton)
        self.id_ = str(ton.rev(None))

        if self.name:
            self.name = self.mangle_name(self.name)

        self.init_on_load()
Exemplo n.º 43
0
    def put_manifest(self, m, f):
        """WARNING!

        This method must be run after all of the bundles are already
        cached, or at least the bundles used in this manifest

        """

        from ambry.identity import ObjectNumber

        d = m.dict
        d['file'] = f.dict
        d['text'] = str(m)

        #d['files'] = f.dict['data'].get('files')

        #del d['file']['data']

        # Update the partitions to include bundle references,
        # then add bundle information.

        partitions = {
            pvid: str(
                ObjectNumber.parse(pvid).as_dataset) for pvid in f.dict.get(
                'partitions',
                [])}

        d["partitions"] = partitions

        d['tables'] = {tvid: {
            k: v for k, v in (self.get_table(tvid).items() + [('installed_names', [])]) if k != 'columns'
        } for tvid in f.dict.get('tables', [])
        }

        d['bundles'] = {vid: self.get_bundle(vid)
                        for vid in partitions.values()}

        for vid, b in d['bundles'].items():
            b['installed_partitions'] = [
                pvid for pvid,
                pbvid in partitions.items() if vid == pbvid]

        # Generate entries for the tables, using the names that they are installed with. These tables aren't
        # nessiarily installed; this maps the instllation names to vids if they
        # are installed.

        installed_table_names = {}

        def inst_table_entry(b, p, t):
            return dict(
                t_vid=t['vid'],
                t_name=t['name'],
                p_vid=p['vid'],
                p_vname=p['vname'],
                b_vid=b['identity']['vid'],
                b_vname=b['identity']['vname']
            )

        for vid, b in d['bundles'].items():
            for pvid, bvid in d['partitions'].items():
                b = d['bundles'][bvid]
                p = b['partitions'][pvid]
                for tvid in p['table_vids']:

                    t = b['tables'][tvid]
                    e = inst_table_entry(b, p, t)

        d['installed_table_names'] = installed_table_names

        # Collect the views and mviews

        views = {}

        for s in d['sections']:
            if s['tag'] in ('view', 'mview'):
                views[s['args']] = dict(
                    tag=s['tag'],
                    tc_names=s.get('content', {}).get('tc_names'),
                    html=s.get('content', {}).get('html'),
                    text=s.get('content', {}).get('text'),
                )

        d['views'] = views

        return self.put(self.manifest_relpath(m.uid), d)
Exemplo n.º 44
0
    def before_insert(mapper, conn, target):

        target.d_vid = str(ObjectNumber.parse(target.c_vid).as_dataset)
Exemplo n.º 45
0
    def add_column(self, name, update_existing=False, **kwargs):
        """
        Add a column to the table, or update an existing one.
        :param name: Name of the new or existing column.
        :param update_existing: If True, alter existing column values. Defaults to False
        :param kwargs: Other arguments for the the Column() constructor
        :return: a Column object
        """
        from ..identity import ColumnNumber

        try:
            c = self.column(name)
            extant = True

            if not update_existing:
                return c

        except NotFoundError:

            sequence_id = len(self.columns) + 1

            assert sequence_id

            c = Column(t_vid=self.vid,
                       sequence_id=sequence_id,
                       vid=str(
                           ColumnNumber(ObjectNumber.parse(self.vid),
                                        sequence_id)),
                       name=name,
                       datatype='str')
            extant = False

        # Update possibly existing data
        c.data = dict((list(c.data.items()) if c.data else []) +
                      list(kwargs.get('data', {}).items()))

        for key, value in list(kwargs.items()):

            if key[0] != '_' and key not in [
                    't_vid', 'name', 'sequence_id', 'data'
            ]:

                # Don't update the type if the user has specfied a custom type
                if key == 'datatype' and not c.type_is_builtin():
                    continue

                # Don't change a datatype if the value is set and the new value is unknown
                if key == 'datatype' and value == 'unknown' and c.datatype:
                    continue

                # Don't change a datatype if the value is set and the new value is unknown
                if key == 'description' and not value:
                    continue

                try:
                    setattr(c, key, value)
                except AttributeError:
                    raise AttributeError(
                        "Column record has no attribute {}".format(key))

            if key == 'is_primary_key' and isinstance(value,
                                                      str) and len(value) == 0:
                value = False
                setattr(c, key, value)

        # If the id column has a description and the table does not, add it to
        # the table.
        if c.name == 'id' and c.is_primary_key and not self.description:
            self.description = c.description

        if not extant:
            self.columns.append(c)

        return c
Exemplo n.º 46
0
    def put_manifest(self, m, f):
        """WARNING!

        This method must be run after all of the bundles are already
        cached, or at least the bundles used in this manifest

        """

        from ambry.identity import ObjectNumber

        d = m.dict
        d['file'] = f.dict
        d['text'] = str(m)

        #d['files'] = f.dict['data'].get('files')

        #del d['file']['data']

        # Update the partitions to include bundle references,
        # then add bundle information.

        partitions = {
            pvid: str(
                ObjectNumber.parse(pvid).as_dataset) for pvid in f.dict.get(
                'partitions',
                [])}

        d["partitions"] = partitions

        d['tables'] = {tvid: {
            k: v for k, v in (self.get_table(tvid).items() + [('installed_names', [])]) if k != 'columns'
        } for tvid in f.dict.get('tables', [])
        }

        d['bundles'] = {vid: self.get_bundle(vid)
                        for vid in partitions.values()}

        for vid, b in d['bundles'].items():
            b['installed_partitions'] = [
                pvid for pvid,
                pbvid in partitions.items() if vid == pbvid]

        # Generate entries for the tables, using the names that they are installed with. These tables aren't
        # nessiarily installed; this maps the instllation names to vids if they
        # are installed.

        installed_table_names = {}

        def inst_table_entry(b, p, t):
            return dict(
                t_vid=t['vid'],
                t_name=t['name'],
                p_vid=p['vid'],
                p_vname=p['vname'],
                b_vid=b['identity']['vid'],
                b_vname=b['identity']['vname']
            )

        for vid, b in d['bundles'].items():
            for pvid, bvid in d['partitions'].items():
                b = d['bundles'][bvid]
                p = b['partitions'][pvid]
                for tvid in p['table_vids']:

                    t = b['tables'][tvid]
                    e = inst_table_entry(b, p, t)

        d['installed_table_names'] = installed_table_names

        # Collect the views and mviews

        views = {}

        for s in d['sections']:
            if s['tag'] in ('view', 'mview'):
                views[s['args']] = dict(
                    tag=s['tag'],
                    tc_names=s.get('content', {}).get('tc_names'),
                    html=s.get('content', {}).get('html'),
                    text=s.get('content', {}).get('text'),
                )

        d['views'] = views

        return self.put(self.manifest_relpath(m.uid), d)
Exemplo n.º 47
0
 def test_parse_other(self):
     dn = DatasetNumber(100, 5, 'authoritative')
     self.assertEqual('d01C005', str(dn))
     self.assertEqual('G01C001Z005', str(GeneralNumber1('G', dn, 123)))
     self.assertEqual('G01C001Z005', ObjectNumber.parse(str(GeneralNumber1('G', dn, 123)), 'other1'))