def create_table(self, table_name, columns, ctypes, keys):
        """
        This method is used to create new table in the database.

        Input:
        Database configuration object and table name

        Output:
        New table created
        """
        
        #before creating a new table check if that table exists
        self.table_name = table_name
        table_flag = self.check_if_table_exists(table_name)
        if table_flag:
            print 'Table already exists in the database. No need to create a new table'
        else:
            #create a new table since it does not exist
            print 'Table - %s does not exist. Create a new table' %(table_name)
            #get the description of the table
            table_columns = self.get_table_desc(columns, ctypes, keys)
            try:
                new_table = Table(
                        self.table_name,
                        self.metadata,
                        *table_columns
                        )
                #create new table                         
                new_table.create(checkfirst = True)
                print "Table '%s' created"%self.table_name

            except:
                print 'Error while creating the table %s'%self.table_name
                raise Exception
Пример #2
0
def upgrade(migrate_engine):
    meta.bind = migrate_engine

    rbac = Table('rbac',
                 meta,
                 Column('id',
                        UUID(),
                        default=utils.generate_uuid,
                        primary_key=True),
                 Column('created_at',
                        DateTime,
                        default=lambda: timeutils.utcnow()),
                 Column('updated_at',
                        DateTime,
                        onupdate=lambda: timeutils.utcnow()),
                 Column('project_id', UUID(), nullable=False, unique=False),
                 Column('object_id', UUID(), nullable=False, unique=False),
                 Column('target_tenant', UUID(), nullable=False, unique=False),
                 Column('rbac_action',
                        String(255),
                        nullable=False,
                        unique=False),
                 mysql_engine='InnoDB',
                 mysql_charset='utf8')

    rbac.create(checkfirst=True)
Пример #3
0
    def test_clone_table_adds_or_deletes_columns(self):
        meta = MetaData()
        meta.bind = self.engine

        table = Table('dummy',
                      meta,
                      Column('id', String(36), primary_key=True,
                             nullable=False),
                      Column('A', Boolean, default=False)
                      )
        table.create()

        newcols = [
            Column('B', Boolean, default=False),
            Column('C', String(255), default='foobar')
        ]
        ignorecols = [
            table.c.A.name
        ]
        new_table = migrate_utils.clone_table('new_dummy', table, meta,
                                              newcols=newcols,
                                              ignorecols=ignorecols)

        col_names = [c.name for c in new_table.columns]

        self.assertEqual(3, len(col_names))
        self.assertIsNotNone(new_table.c.B)
        self.assertIsNotNone(new_table.c.C)
        self.assertNotIn('A', col_names)
Пример #4
0
    def test_clone_table_adds_or_deletes_columns(self):
        meta = MetaData()
        meta.bind = self.engine

        table = Table(
            'dummy', meta,
            Column('id', String(36), primary_key=True, nullable=False),
            Column('A', Boolean, default=False))
        table.create()

        newcols = [
            Column('B', Boolean, default=False),
            Column('C', String(255), default='foobar')
        ]
        ignorecols = [table.c.A.name]
        new_table = migrate_utils.clone_table('new_dummy',
                                              table,
                                              meta,
                                              newcols=newcols,
                                              ignorecols=ignorecols)

        col_names = [c.name for c in new_table.columns]

        self.assertEqual(3, len(col_names))
        self.assertIsNotNone(new_table.c.B)
        self.assertIsNotNone(new_table.c.C)
        self.assertNotIn('A', col_names)
def upgrade(migrate_engine):
    """Create shares and share_access_map tables."""
    meta = MetaData()
    meta.bind = migrate_engine

    shares = Table('shares', meta, autoload=True)
    share_snapshots = Table(
        'share_snapshots', meta,
        Column('created_at', DateTime),
        Column('updated_at', DateTime),
        Column('deleted_at', DateTime),
        Column('deleted', Boolean),
        Column('id', String(length=36), primary_key=True, nullable=False),
        Column('user_id', String(length=255)),
        Column('project_id', String(length=255)),
        Column('share_id', String(36), ForeignKey('shares.id'),
               nullable=False),
        Column('size', Integer),
        Column('status', String(length=255)),
        Column('progress', String(length=255)),
        Column('display_name', String(length=255)),
        Column('display_description', String(length=255)),
        Column('share_size', Integer),
        Column('share_proto', String(length=255)),
        Column('export_location', String(255)),
        mysql_engine='InnoDB')

    try:
        share_snapshots.create()
    except Exception:
        LOG.error(_("Table %r not created!"), share_snapshots)
        raise
Пример #6
0
    def create_table(self, table_name, columns, ctypes, keys):
        """
        This method is used to create new table in the database.

        Input:
        Database configuration object and table name

        Output:
        New table created
        """

        #before creating a new table check if that table exists
        self.table_name = table_name
        table_flag = self.check_if_table_exists(table_name)
        if table_flag:
            print 'Table already exists in the database. No need to create a new table'
        else:
            #create a new table since it does not exist
            print 'Table - %s does not exist. Create a new table' % (
                table_name)
            #get the description of the table
            table_columns = self.get_table_desc(columns, ctypes, keys)
            try:
                new_table = Table(self.table_name, self.metadata,
                                  *table_columns)
                #create new table
                new_table.create(checkfirst=True)
                print "Table '%s' created" % self.table_name

            except:
                print 'Error while creating the table %s' % self.table_name
                raise Exception
Пример #7
0
    def create_table(self,
                     table_name,
                     primary_id='id',
                     primary_type='Integer'):
        """
        Creates a new table. The new table will automatically have an `id` column
        unless specified via optional parameter primary_id, which will be used
        as the primary key of the table. Automatic id is set to be an
        auto-incrementing integer, while the type of custom primary_id can be a
        String or an Integer as specified with primary_type flag. The default
        length of String is 255. The caller can specify the length.
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='String')
            # custom length of String
            table4 = db.create_table('population4', primary_id='race', primary_type='String(50)')
        """
        table_name = self._valid_table_name(table_name)
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            match = re.match(r'^(Integer)$|^(String)(\(\d+\))?$', primary_type)
            if match:
                if match.group(1) == 'Integer':
                    auto_flag = False
                    if primary_id == 'id':
                        auto_flag = True
                    col = Column(primary_id,
                                 Integer,
                                 primary_key=True,
                                 autoincrement=auto_flag)
                elif not match.group(3):
                    col = Column(primary_id, String(255), primary_key=True)
                else:
                    len_string = int(match.group(3)[1:-1])
                    len_string = min(len_string, 255)
                    col = Column(primary_id,
                                 String(len_string),
                                 primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'String'.")

            table = SQLATable(table_name, self.metadata, schema=self.schema)
            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Пример #8
0
 def add_aspect(self, name, *columns):
     table = Table(
         name, self.metadata,
         Column('id', types.Integer, ForeignKey("entity.id")),
         *columns)
     table.create()
     self.aspects[name] = [x.name for x in columns]
     self.aspect_names = set(self.aspects.iterkeys())
Пример #9
0
		def wrap(fn):
			table_definition = TableDefinition()
			fn(table_definition)
			table = Table(name, self.meta)
			for attrname in table_definition.fields.keys():
				args, kw = table_definition.fields[attrname]
				table.append_column(Column(attrname, *args, **kw))
			table.create()
Пример #10
0
 def _create_table(self, table_name):
     table_name = validate_name(table_name)
     log.debug("Creating table: %s on %r" % (table_name, self.engine))
     table = Table(table_name, self.meta)
     col = Column(ID_COLUMN, Integer, primary_key=True)
     table.append_column(col)
     table.create(self.engine)
     return table
Пример #11
0
 def _create_table(self, table_name):
     table_name = validate_name(table_name)
     log.debug("Creating table: %s on %r" % (table_name, self.engine))
     table = Table(table_name, self.meta)
     col = Column(ID_COLUMN, Integer, primary_key=True)
     table.append_column(col)
     table.create(self.engine)
     return table
Пример #12
0
def create_table(engine, table_name):
    log.debug("Creating table: %s on %r" % (table_name, engine))
    table = Table(table_name, engine._metadata)
    col = Column('id', Integer, primary_key=True)
    table.append_column(col)
    table.create(engine)
    TABLES[engine][table_name] = table
    return table
Пример #13
0
 def __init__(self, registry=None):
     self.registry = registry
     self.engine = create_engine('sqlite://')
     self.metadata = MetaData(self.engine)
     entity = Table(
         'entity', self.metadata,
         Column('id', types.Integer, primary_key=True))
     entity.create()
     self.aspects = propdict()
Пример #14
0
def create_table(engine, table_name):
    with lock:
        log.debug("Creating table: %s on %r" % (table_name, engine))
        table = Table(table_name, engine._metadata)
        col = Column("id", Integer, primary_key=True)
        table.append_column(col)
        table.create(engine)
        engine._tables[table_name] = table
        return table
Пример #15
0
def create_table(engine, table_name):
    with lock:
        log.debug("Creating table: %s on %r" % (table_name, engine))
        table = Table(table_name, engine._metadata)
        col = Column('id', Integer, primary_key=True)
        table.append_column(col)
        table.create(engine)
        engine._tables[table_name] = table
        return table
Пример #16
0
    def test_insert_values(self, engine, connection):
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer))
        table.drop(checkfirst=True)
        table.create()
        connection.execute(table.insert([{'a': 1}, {'a': 2}]))

        result = table.select().execute().fetchall()
        expected = [(1, ), (2, )]
        self.assertEqual(result, expected)
Пример #17
0
    def create_table(self, table_name, primary_id='id', primary_type='Integer'):
        """
        Creates a new table. The new table will automatically have an `id` column
        unless specified via optional parameter primary_id, which will be used
        as the primary key of the table. Automatic id is set to be an
        auto-incrementing integer, while the type of custom primary_id can be a
        String or an Integer as specified with primary_type flag. The default
        length of String is 255. The caller can specify the length.
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='String')
            # custom length of String
            table4 = db.create_table('population4', primary_id='race', primary_type='String(50)')
        """
        table_name = self._valid_table_name(table_name)
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            match = re.match(r'^(Integer)$|^(String)(\(\d+\))?$', primary_type)
            if match:
                if match.group(1) == 'Integer':
                    auto_flag = False
                    if primary_id == 'id':
                        auto_flag = True
                    col = Column(primary_id, Integer, primary_key=True, autoincrement=auto_flag)
                elif not match.group(3):
                    col = Column(primary_id, String(255), primary_key=True)
                else:
                    len_string = int(match.group(3)[1:-1])
                    len_string = min(len_string, 255)
                    col = Column(primary_id, String(len_string), primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'String'.")

            table = SQLATable(table_name, self.metadata, schema=self.schema)
            table.append_column(col)
            # always add date_added and date_updated to tables
            table.append_column(Column('created', DateTime, default=datetime.utcnow))
            table.append_column(Column('updated', DateTime, default=datetime.utcnow, onupdate=datetime.utcnow))

            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Пример #18
0
	def wrap(fn):
		table_definition = OrderedProperties()
		fn(table_definition)
		table = Table(table_name, g.db_meta)
		for attrname in table_definition.keys():
			value = table_definition[attrname]
			if isinstance(value, Column):
				table.append_column(value)
			elif isinstance(value, Constraint):
				table.append_constraint(value)
		table.create(g.db_engine)
Пример #19
0
    def test_insert_select(self, engine, connection):
        one_row = Table('one_row', MetaData(bind=engine), autoload=True)
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer))
        table.drop(checkfirst=True)
        table.create()
        connection.execute(table.insert().from_select(['a'], one_row.select()))

        result = table.select().execute().fetchall()
        expected = [(1, )]
        self.assertEqual(result, expected)
Пример #20
0
 def add_aspect(self, name, *columns):
     table = Table(
         name, self.metadata,
         Column('id', types.Integer, ForeignKey("entity.id")),
         *columns)
     table.create()
     self.aspects[name] = propdict(
         ((x.name, x) for x in columns),
         _name=name,
         _table=table)
     self._aspect_names = set(self.aspects.keys())
Пример #21
0
    def test_insert_values(self, engine, connection):
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer),
                      schema='pyhive_test_database')
        table.drop(checkfirst=True)
        table.create()
        connection.execute(table.insert([{'a': 1}, {'a': 2}]))

        result = table.select().execute().fetchall()
        expected = [(1,), (2,)]
        self.assertEqual(result, expected)
Пример #22
0
    def create_table(cls, name, columns_data, pkey):
        columns = [
            Column(col_name, col_type, primary_key=(col_name == pkey))
            for col_name, col_type in columns_data.items()
        ]

        table = Table(name, cls._metadata, *columns)
        table.create(bind=cls._ctx.engine)
        base = automap_base(metadata=cls._metadata)
        base.prepare()

        return getattr(base.classes, name)
Пример #23
0
    def test_migrate_data(self):
        meta = MetaData(bind=self.engine)

        # create TableA
        table_a = Table('TableA',
                        meta,
                        Column('id', Integer, primary_key=True),
                        Column('first', String(8), nullable=False),
                        Column('second', Integer))
        table_a.create()

        # update it with sample data
        values = [
            {'id': 1, 'first': 'a'},
            {'id': 2, 'first': 'b'},
            {'id': 3, 'first': 'c'}
        ]

        for value in values:
            self.engine.execute(table_a.insert(values=value))

        # create TableB similar to TableA, except column 'second'
        table_b = Table('TableB',
                        meta,
                        Column('id', Integer, primary_key=True),
                        Column('first', String(8), nullable=False))
        table_b.create()

        # migrate data
        migrate_utils.migrate_data(self.engine,
                                   table_a,
                                   table_b,
                                   ['second'])

        # validate table_a is dropped
        self.assertTrue(self.engine.dialect.has_table(
            self.engine.connect(),
            'TableA'),
            'Data migration failed to drop source table')

        # validate table_b is updated with data from table_a
        table_b_rows = list(table_b.select().execute())
        self.assertEqual(3,
                         len(table_b_rows),
                         "Data migration is failed")
        table_b_values = []
        for row in table_b_rows:
            table_b_values.append({'id': row.id,
                                   'first': row.first})

        self.assertEqual(values,
                         table_b_values,
                         "Data migration failed with invalid data copy")
Пример #24
0
def load_raw(eng, handle, dummy_separator='\v', table_name=None):
    """
    Uses SQLalchemy (and optionally psycopg2) to load raw data into a
    one-column table. Returns SQLAlchemy Table object

    Parameters
    ==========
    eng: required
        Must be a SQLAlchemy engine object.
    
    handle: required
        Must be a file-like object. I.e., returned by pyhcup.parser._open().

    dummy_separator: required (default: "\v")
        Must be a character not found in the data to be loaded. The
        psycopg2.cursor.copy_from() method uses a horizontal tab ('\t')
        by default, but I am afraid of occasional horizontal tabs in
        HCUP and PUDF data that we just haven't found yet.
        
    table_name: optional (default: None)
        Table name for the load. Will be generated automatically if not provided.

    """

    # get the filename sans extension for use in making a table name
    base_filename = os.path.split(handle.name)[-1].split('.')[0]

    # make a timestamp in YYYYMMDDhhmmss format
    # will be used as part of the table name
    now = datetime.datetime.now()
    timestamp = now.strftime('%Y%m%d%H%M%S')

    if table_name is None:
        table_name = '%s_%s_raw' % (base_filename, timestamp)
        # else SQLAlchemy will fail to reflect later (seems to coerce case-sensitive)
        table_name = table_name.lower()

    # proceed to table creation
    table = Table(table_name, MetaData(), Column('line', Text()))
    table.create(bind=eng)

    if eng.driver == 'psycopg2':  # use Postgres COPY FROM
        conn = eng.raw_connection()
        cursor = conn.cursor()  # acquire a cursor from the connection object
        # load the data using psycopg2.cursor.copy_from() method
        cursor.copy_from(handle, '"%s"' % table_name, sep=dummy_separator)
        conn.commit()
        conn.close()
    else:  # fall back to line-by-line insert
        data = [{'line': l.strip()} for l in handle]
        eng.execute(table.insert(), data)

    return table
Пример #25
0
 def build_database(self):
     self.connect(test=False)
     if not os.path.isfile(self.name):
         sess = self.get_session()
         meta = MetaData()
         bt = Table('LabTable', meta,
                     Column('id', Integer, primary_key=True),
                     Column('labnumber', Integer),
                     Column('aliquot', Integer),
                     Column('collection_path', String(200)),
                     Column('repository_path', String(200)),
                     Column('create_date', DateTime))
         bt.create(sess.bind)
Пример #26
0
    def test_insert_select(self, engine, connection):
        one_row = Table('one_row', MetaData(bind=engine), autoload=True)
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer),
                      schema='pyhive_test_database')
        table.drop(checkfirst=True)
        table.create()
        connection.execute('SET mapred.job.tracker=local')
        # NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES
        connection.execute(table.insert().from_select(['a'], one_row.select()))

        result = table.select().execute().fetchall()
        expected = [(1,)]
        self.assertEqual(result, expected)
Пример #27
0
    def test_insert_select(self, engine, connection):
        one_row = Table('one_row', MetaData(bind=engine), autoload=True)
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer),
                      schema='pyhive_test_database')
        table.drop(checkfirst=True)
        table.create()
        connection.execute('SET mapred.job.tracker=local')
        # NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES
        connection.execute(table.insert().from_select(['a'], one_row.select()))

        result = table.select().execute().fetchall()
        expected = [(1,)]
        self.assertEqual(result, expected)
Пример #28
0
 def build_database(self):
     self.connect(test=False)
     if not os.path.isfile(self.name):
         sess = self.get_session()
         meta = MetaData()
         bt = Table('LabTable', meta, Column('id',
                                             Integer,
                                             primary_key=True),
                    Column('labnumber', Integer),
                    Column('aliquot', Integer),
                    Column('collection_path', String(200)),
                    Column('repository_path', String(200)),
                    Column('create_date', DateTime))
         bt.create(sess.bind)
Пример #29
0
class TableHandler(object):

    """ Used by automatically generated objects such as datasets
    and dimensions to generate, write and clear the table under
    its management. """

    def _init_table(self, meta, namespace, name, id_type=Integer):
        """ Create the given table if it does not exist, otherwise
        reflect the current table schema from the database.
        """
        name = namespace + '__' + name
        self.table = Table(name, meta)
        if id_type is not None:
            col = Column('id', id_type, primary_key=True)
            self.table.append_column(col)

    def _generate_table(self):
        """ Create the given table if it does not exist. """
        # TODO: make this support some kind of migration?
        if not db.engine.has_table(self.table.name):
            self.table.create(db.engine)

    def _upsert(self, bind, data, unique_columns):
        """ Upsert a set of values into the table. This will
        query for the set of unique columns and either update an
        existing row or create a new one. In both cases, the ID
        of the changed row will be returned. """
        key = and_(*[self.table.c[c] == data.get(c)
                     for c in unique_columns])
        q = self.table.update(key, data)
        if bind.execute(q).rowcount == 0:
            q = self.table.insert(data)
            rs = bind.execute(q)
            return rs.inserted_primary_key[0]
        else:
            q = self.table.select(key)
            row = bind.execute(q).fetchone()
            return row['id']

    def _flush(self, bind):
        """ Delete all rows in the table. """
        q = self.table.delete()
        bind.execute(q)

    def _drop(self, bind):
        """ Drop the table and the local reference to it. """
        if db.engine.has_table(self.table.name):
            self.table.drop()
        del self.table
Пример #30
0
    def test_migrate_data(self):
        meta = MetaData(bind=self.engine)

        # create TableA
        table_a = Table('TableA', meta, Column('id', Integer,
                                               primary_key=True),
                        Column('first', String(8), nullable=False),
                        Column('second', Integer))
        table_a.create()

        # update it with sample data
        values = [{
            'id': 1,
            'first': 'a'
        }, {
            'id': 2,
            'first': 'b'
        }, {
            'id': 3,
            'first': 'c'
        }]

        for value in values:
            self.engine.execute(table_a.insert(values=value))

        # create TableB similar to TableA, except column 'second'
        table_b = Table('TableB', meta, Column('id', Integer,
                                               primary_key=True),
                        Column('first', String(8), nullable=False))
        table_b.create()

        # migrate data
        migrate_utils.migrate_data(self.engine, table_a, table_b, ['second'])

        # validate table_a is dropped
        self.assertTrue(
            self.engine.dialect.has_table(self.engine.connect(), 'TableA'),
            'Data migration failed to drop source table')

        # validate table_b is updated with data from table_a
        table_b_rows = list(table_b.select().execute())
        self.assertEqual(3, len(table_b_rows), "Data migration is failed")
        table_b_values = []
        for row in table_b_rows:
            table_b_values.append({'id': row.id, 'first': row.first})

        self.assertEqual(values, table_b_values,
                         "Data migration failed with invalid data copy")
Пример #31
0
    def create_table(self, table_name, columns):
        """Create a table called table_name in the set database with the given
        schema (a dictionary of field_name:field_type).
        Note that table constraints are not added.
        """
        self.reflect()
        if self.table_exists(table_name): return

        kwargs = {}
        if self.protocol_manager.uses_schemas:
            kwargs = {'schema': self.database_name}

        new_table = Table(table_name, self.metadata, *columns, **kwargs)

        new_table.create(checkfirst=True)
        return new_table
Пример #32
0
 def test_nullable_reflection(self):
     t = Table(
         "t",
         self.metadata,
         Column("a", Integer, nullable=True),
         Column("b", Integer, nullable=False),
     )
     t.create()
     eq_(
         dict((col["name"], col["nullable"])
              for col in inspect(self.metadata.bind).get_columns("t")),
         {
             "A": True,
             "B": False
         },
     )
Пример #33
0
    def create_table(self,
                     table_name,
                     primary_id='id',
                     primary_type='Integer'):
        """
        Creates a new table. The new table will automatically have an `id` column 
        unless specified via optional parameter primary_id, which will be used 
        as the primary key of the table. Automatic id is set to be an 
        auto-incrementing integer, while the type of custom primary_id can be a 
        Text or an Integer as specified with primary_type flag. 
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call. 

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='Text')
        """
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            table = SQLATable(table_name, self.metadata)
            if primary_type is 'Integer':
                auto_flag = False
                if primary_id is 'id':
                    auto_flag = True
                col = Column(primary_id,
                             Integer,
                             primary_key=True,
                             autoincrement=auto_flag)
            elif primary_type is 'Text':
                col = Column(primary_id, Text, primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'Text'.")

            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Пример #34
0
def dte_load(eng, handle, table_name):
    """
    Uses SQLAlchemy (and optionally psycopg2) to load DaysToEvent data from a csv file into a database table.
    Uses default schema; no support for specifying schema inside this function.
    Returns the row count of the newly created table.

    Parameters
    ==========
    eng: required
        Must be a SQLAlchemy engine object.
    
    handle: required
        Must be a file-like object. I.e., returned by open(path).
    
    table_name: required
        Table name for the load.
    """
    
    table = Table(table_name, MetaData(),
        Column('key', BigInteger),
        Column('visitlink', BigInteger),
        Column('daystoevent', BigInteger)
    )
    table.create(eng, checkfirst=True)

    if eng.driver == 'psycopg2':  # use Postgres COPY FROM
        conn = eng.raw_connection()
        cursor = conn.cursor()  # acquire a cursor from the connection object
        cp_sql = "COPY %s FROM STDIN DELIMITER ',' CSV HEADER;" % (table_name)
        cursor.copy_expert(cp_sql, handle)
        conn.commit()
        conn.close()
    else:  # fall back to generic bulk insert
        data = []
        for line in handle:
            l = [int(x.strip()) for x in line.strip().split(',')]
            data.append({
                'key': l[0],
                'visitlink': l[1],
                'daystoevent': l[2]
            })
        eng.execute(table.insert(), data) 
    
    row_count = eng.execute(select([func.count()]).select_from(table)).fetchone()[0]
    
    return row_count
 def test_lots_of_types(self, engine, connection):
     # take type list from sqlalchemy.types
     types = [
         'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT',
         'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB',
         'BOOLEAN', 'SMALLINT', 'DATE', 'TIME',
         'String', 'Integer', 'SmallInteger',
         'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'LargeBinary',
         'Boolean', 'Unicode', 'UnicodeText',
     ]
     cols = []
     for i, t in enumerate(types):
         cols.append(Column(str(i), getattr(sqlalchemy.types, t)))
     table = Table('test_table', MetaData(bind=engine), *cols)
     table.drop(checkfirst=True)
     table.create()
     table.drop()
Пример #36
0
class TableHandler(object):
    """ Used by automatically generated objects such as datasets
    and dimensions to generate, write and clear the table under
    its management. """
    def _init_table(self, meta, namespace, name, id_type=Integer):
        """ Create the given table if it does not exist, otherwise
        reflect the current table schema from the database.
        """
        name = namespace + '__' + name
        self.table = Table(name, meta)
        if id_type is not None:
            col = Column('id', id_type, primary_key=True)
            self.table.append_column(col)

    def _generate_table(self):
        """ Create the given table if it does not exist. """
        # TODO: make this support some kind of migration?
        if not db.engine.has_table(self.table.name):
            self.table.create(db.engine)

    def _upsert(self, bind, data, unique_columns):
        """ Upsert a set of values into the table. This will
        query for the set of unique columns and either update an
        existing row or create a new one. In both cases, the ID
        of the changed row will be returned. """
        key = and_(*[self.table.c[c] == data.get(c) for c in unique_columns])
        q = self.table.update(key, data)
        if bind.execute(q).rowcount == 0:
            q = self.table.insert(data)
            rs = bind.execute(q)
            return rs.inserted_primary_key[0]
        else:
            q = self.table.select(key)
            row = bind.execute(q).fetchone()
            return row['id']

    def _flush(self, bind):
        """ Delete all rows in the table. """
        q = self.table.delete()
        bind.execute(q)

    def _drop(self, bind):
        """ Drop the table and the local reference to it. """
        if db.engine.has_table(self.table.name):
            self.table.drop()
        del self.table
Пример #37
0
    def create_table(self, table_name, columns):
        """Create a table called table_name in the set database with the given
        schema (a dictionary of field_name:field_type).
        Note that table constraints are not added.
        """
        self.reflect()
        if self.table_exists(table_name):
            return

        kwargs = {}
        if self.protocol_manager.uses_schemas:
            kwargs = {"schema": self.database_name}

        new_table = Table(table_name, self.metadata, *columns, **kwargs)

        new_table.create(checkfirst=True)
        return new_table
Пример #38
0
def upgrade(migrate_engine):
    meta.bind = migrate_engine

    keys = Enum(name='key', *ZONE_ATTRIBUTE_KEYS)

    domain_attributes_table = Table(
        'domain_attributes', meta,
        Column('id', UUID(), default=utils.generate_uuid, primary_key=True),
        Column('version', Integer(), default=1, nullable=False),
        Column('created_at', DateTime, default=lambda: timeutils.utcnow()),
        Column('updated_at', DateTime, onupdate=lambda: timeutils.utcnow()),

        Column('key', keys),
        Column('value', String(255), nullable=False),
        Column('domain_id', UUID(), nullable=False),

        UniqueConstraint('key', 'value', 'domain_id',
                         name='unique_attributes'),
        ForeignKeyConstraint(['domain_id'], ['domains.id'],
                             ondelete='CASCADE'),

        mysql_engine='INNODB',
        mysql_charset='utf8'
    )

    domains_table = Table('domains', meta, autoload=True)
    types = Enum(name='types', metadata=meta, *ZONE_TYPES)
    types.create()

    # Add type and transferred_at to domains
    type_ = Column('type', types, default='PRIMARY', server_default='PRIMARY')
    transferred_at = Column('transferred_at', DateTime, default=None)

    type_.create(domains_table, populate_default=True)
    transferred_at.create(domains_table, populate_default=True)

    domain_attributes_table.create()

    dialect = migrate_engine.url.get_dialect().name
    if dialect.startswith('sqlite'):
        constraint = UniqueConstraint(
            'name', 'deleted', name='unique_domain_name', table=domains_table)

        # Add missing unique index
        constraint.create()
Пример #39
0
    def create_table(self, table_name):
        """
        Creates a new table. The new table will automatically have an `id` column, which is
        set to be an auto-incrementing integer as the primary key of the table.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')
        """
        with self.lock:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            table = SQLATable(table_name, self.metadata)
            col = Column('id', Integer, primary_key=True)
            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
Пример #40
0
 def linktab(self):
     if not hasattr(self, '_linktab'):
         if self.engine.has_table(self.linktab_name):
             self._linktab = Table(self.linktab_name, self.meta,
                                   autoload=True)
         else:
             table = Table(self.linktab_name, self.meta)
             col = Column('view', Unicode, index=True)
             table.append_column(col)
             col = Column('serial', Unicode(40))
             table.append_column(col)
             col = Column('key', Unicode, index=True)
             table.append_column(col)
             col = Column('fingerprint', Unicode(255), index=True)
             table.append_column(col)
             table.create(self.engine)
             self._linktab = table
     return self._linktab
Пример #41
0
def upgrade(migrate_engine):
    meta.bind = migrate_engine

    status_enum = Enum(name='service_statuses', metadata=meta, *SERVICE_STATES)
    status_enum.create()

    service_status_table = Table('service_statuses', meta,
        Column('id', UUID(), default=utils.generate_uuid, primary_key=True),
        Column('created_at', DateTime),
        Column('updated_at', DateTime),

        Column('service_name', String(40), nullable=False),
        Column('hostname', String(255), nullable=False),
        Column('heartbeated_at', DateTime, nullable=True),
        Column('status', status_enum, nullable=False),
        Column('stats', Text, nullable=False),
        Column('capabilities', Text, nullable=False),
    )
    service_status_table.create()
Пример #42
0
    def test_clone_table_retains_constraints(self):
        meta = MetaData()
        meta.bind = self.engine
        parent = Table('parent',
                       meta,
                       Column('id', String(36), primary_key=True,
                              nullable=False),
                       Column('A', Integer),
                       Column('B', Integer),
                       Column('C', Integer,
                              CheckConstraint('C>100', name="above 100")),
                       Column('D', Integer, unique=True),

                       UniqueConstraint('A', 'B', name='uix_1')
                       )
        parent.create()

        child = Table('child',
                      meta,
                      Column('id', String(36),
                             ForeignKey('parent.id', name="parent_ref"),
                             primary_key=True,
                             nullable=False),
                      Column('A', Boolean, default=False)
                      )
        child.create()

        ignorecols = [
            parent.c.D.name,
        ]

        new_parent = migrate_utils.clone_table('new_parent', parent, meta,
                                               ignorecols=ignorecols)
        new_child = migrate_utils.clone_table('new_child', child, meta)

        self.assertTrue(_has_constraint(new_parent.constraints,
                                        UniqueConstraint, 'uix_1'))
        self.assertTrue(_has_constraint(new_parent.c.C.constraints,
                                        CheckConstraint, 'above 100'))
        self.assertTrue(_has_constraint(new_child.constraints,
                                        ForeignKeyConstraint, 'parent_ref'))
Пример #43
0
    def create_table(self, table_name, primary_id='id', primary_type='Integer'):
        """
        Creates a new table. The new table will automatically have an `id` column 
        unless specified via optional parameter primary_id, which will be used 
        as the primary key of the table. Automatic id is set to be an 
        auto-incrementing integer, while the type of custom primary_id can be a 
        Text or an Integer as specified with primary_type flag. 
        The caller will be responsible for the uniqueness of manual primary_id.

        This custom id feature is only available via direct create_table call. 

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')

            # custom id and type
            table2 = db.create_table('population2', 'age')
            table3 = db.create_table('population3', primary_id='race', primary_type='Text')
        """
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            table = SQLATable(table_name, self.metadata)
            if primary_type is 'Integer':
                auto_flag = False
                if primary_id is 'id':
                    auto_flag = True
                col = Column(primary_id, Integer, primary_key=True, autoincrement=auto_flag)
            elif primary_type is 'Text':
                col = Column(primary_id, Text, primary_key=True)
            else:
                raise DatasetException(
                    "The primary_type has to be either 'Integer' or 'Text'.")

            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Пример #44
0
    def create_table(self, table_name):
        """
        Creates a new table. The new table will automatically have an `id` column, which is
        set to be an auto-incrementing integer as the primary key of the table.

        Returns a :py:class:`Table <dataset.Table>` instance.
        ::

            table = db.create_table('population')
        """
        self._acquire()
        try:
            log.debug("Creating table: %s on %r" % (table_name, self.engine))
            table = SQLATable(table_name, self.metadata)
            col = Column('id', Integer, primary_key=True)
            table.append_column(col)
            table.create(self.engine)
            self._tables[table_name] = table
            return Table(self, table)
        finally:
            self._release()
Пример #45
0
    def test_clone_table_swaps_columns(self):
        meta = MetaData()
        meta.bind = self.engine

        table = Table("dummy1",
                      meta,
                      Column('id', String(36), primary_key=True,
                             nullable=False),
                      Column('A', Boolean, default=False),
                      )
        table.create()

        swapcols = {
            'A': Column('A', Integer, default=1),
        }

        new_table = migrate_utils.clone_table('swap_dummy', table, meta,
                                              swapcols=swapcols)

        self.assertIsNotNone(new_table.c.A)
        self.assertEqual(Integer, type(new_table.c.A.type))
Пример #46
0
    def test_clone_table_ignores_constraints(self):
        meta = MetaData()
        meta.bind = self.engine
        table = Table(
            'constraints_check', meta,
            Column('id', String(36), primary_key=True, nullable=False),
            Column('A', Integer), Column('B', Integer),
            Column('C', Integer, CheckConstraint('C>100', name="above 100")),
            UniqueConstraint('A', 'B', name='uix_1'))
        table.create()

        ignorecons = [
            'uix_1',
        ]

        new_table = migrate_utils.clone_table('constraints_check_tmp',
                                              table,
                                              meta,
                                              ignorecons=ignorecons)
        self.assertFalse(
            _has_constraint(new_table.constraints, UniqueConstraint, 'uix_1'))
Пример #47
0
 def test_lots_of_types(self, engine, connection):
     # Presto doesn't have raw CREATE TABLE support, so we ony test hive
     # take type list from sqlalchemy.types
     types = [
         'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT',
         'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB',
         'BOOLEAN', 'SMALLINT', 'DATE', 'TIME',
         'String', 'Integer', 'SmallInteger',
         'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'Binary',
         'Boolean', 'Unicode', 'UnicodeText',
     ]
     cols = []
     for i, t in enumerate(types):
         cols.append(Column(str(i), getattr(sqlalchemy.types, t)))
     cols.append(Column('hive_date', HiveDate))
     cols.append(Column('hive_decimal', HiveDecimal))
     cols.append(Column('hive_timestamp', HiveTimestamp))
     table = Table('test_table', MetaData(bind=engine), *cols, schema='pyhive_test_database')
     table.drop(checkfirst=True)
     table.create()
     connection.execute('SET mapred.job.tracker=local')
     connection.execute('USE pyhive_test_database')
     big_number = 10 ** 10 - 1
     connection.execute("""
     INSERT OVERWRITE TABLE test_table
     SELECT
         1, "a", "a", "a", "a", "a", 0.1,
         0.1, 0.1, 0, 0, "a", "a",
         false, 1, 0, 0,
         "a", 1, 1,
         0.1, 0.1, 0, 0, 0, "a",
         false, "a", "a",
         0, %d, 123 + 2000
     FROM default.one_row
     """, big_number)
     row = connection.execute(table.select()).fetchone()
     self.assertEqual(row.hive_date, datetime.date(1970, 1, 1))
     self.assertEqual(row.hive_decimal, decimal.Decimal(big_number))
     self.assertEqual(row.hive_timestamp, datetime.datetime(1970, 1, 1, 0, 0, 2, 123))
     table.drop()
Пример #48
0
 def test_lots_of_types(self, engine, connection):
     # Presto doesn't have raw CREATE TABLE support, so we ony test hive
     # take type list from sqlalchemy.types
     types = [
         'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT',
         'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB',
         'BOOLEAN', 'SMALLINT', 'DATE', 'TIME',
         'String', 'Integer', 'SmallInteger',
         'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'Binary',
         'Boolean', 'Unicode', 'UnicodeText',
     ]
     cols = []
     for i, t in enumerate(types):
         cols.append(Column(str(i), getattr(sqlalchemy.types, t)))
     cols.append(Column('hive_date', HiveDate))
     cols.append(Column('hive_decimal', HiveDecimal))
     cols.append(Column('hive_timestamp', HiveTimestamp))
     table = Table('test_table', MetaData(bind=engine), *cols, schema='pyhive_test_database')
     table.drop(checkfirst=True)
     table.create()
     connection.execute('SET mapred.job.tracker=local')
     connection.execute('USE pyhive_test_database')
     big_number = 10 ** 10 - 1
     connection.execute("""
     INSERT OVERWRITE TABLE test_table
     SELECT
         1, "a", "a", "a", "a", "a", 0.1,
         0.1, 0.1, 0, 0, "a", "a",
         false, 1, 0, 0,
         "a", 1, 1,
         0.1, 0.1, 0, 0, 0, "a",
         false, "a", "a",
         0, %d, 123 + 2000
     FROM default.one_row
     """, big_number)
     row = connection.execute(table.select()).fetchone()
     self.assertEqual(row.hive_date, datetime.date(1970, 1, 1))
     self.assertEqual(row.hive_decimal, decimal.Decimal(big_number))
     self.assertEqual(row.hive_timestamp, datetime.datetime(1970, 1, 1, 0, 0, 2, 123))
     table.drop()
Пример #49
0
    def test_clone_table_retains_constraints(self):
        meta = MetaData()
        meta.bind = self.engine
        parent = Table(
            'parent', meta,
            Column('id', String(36), primary_key=True, nullable=False),
            Column('A', Integer), Column('B', Integer),
            Column('C', Integer, CheckConstraint('C>100', name="above 100")),
            Column('D', Integer, unique=True),
            UniqueConstraint('A', 'B', name='uix_1'))
        parent.create()

        child = Table(
            'child', meta,
            Column('id',
                   String(36),
                   ForeignKey('parent.id', name="parent_ref"),
                   primary_key=True,
                   nullable=False), Column('A', Boolean, default=False))
        child.create()

        ignorecols = [
            parent.c.D.name,
        ]

        new_parent = migrate_utils.clone_table('new_parent',
                                               parent,
                                               meta,
                                               ignorecols=ignorecols)
        new_child = migrate_utils.clone_table('new_child', child, meta)

        self.assertTrue(
            _has_constraint(new_parent.constraints, UniqueConstraint, 'uix_1'))
        self.assertTrue(
            _has_constraint(new_parent.c.C.constraints, CheckConstraint,
                            'above 100'))
        self.assertTrue(
            _has_constraint(new_child.constraints, ForeignKeyConstraint,
                            'parent_ref'))
Пример #50
0
    def create_table(self, schema_name, table_name, columns_configuration,
                     drop_first):
        metadata = MetaData()

        table = Table(table_name, metadata, schema=schema_name)

        for column_configuration in columns_configuration:
            table.append_column(
                self.create_column(column_configuration["destination"]))

        table.append_column(
            Column(
                Providers.AuditColumnsNames.TIMESTAMP,
                DateTime(timezone=True),
                server_default=func.now(),
            ))

        table.append_column(
            Column(
                Providers.AuditColumnsNames.IS_DELETED,
                Boolean,
                server_default="f",
                default=False,
            ))

        table.append_column(
            Column(Providers.AuditColumnsNames.CHANGE_VERSION, BigInteger))

        if drop_first:
            self.logger.debug(f"Dropping table {schema_name}.{table_name}")
            table.drop(self.target_db, checkfirst=True)
            self.logger.debug(f"Dropped table {schema_name}.{table_name}")

        self.logger.debug(f"Creating table {schema_name}.{table_name}")
        table.create(self.target_db, checkfirst=False)
        self.logger.debug(f"Created table {schema_name}.{table_name}")

        return
Пример #51
0
    def test_clone_table_swaps_columns(self):
        meta = MetaData()
        meta.bind = self.engine

        table = Table(
            "dummy1",
            meta,
            Column('id', String(36), primary_key=True, nullable=False),
            Column('A', Boolean, default=False),
        )
        table.create()

        swapcols = {
            'A': Column('A', Integer, default=1),
        }

        new_table = migrate_utils.clone_table('swap_dummy',
                                              table,
                                              meta,
                                              swapcols=swapcols)

        self.assertIsNotNone(new_table.c.A)
        self.assertEqual(Integer, type(new_table.c.A.type))
Пример #52
0
    def test_clone_table_ignores_constraints(self):
        meta = MetaData()
        meta.bind = self.engine
        table = Table('constraints_check',
                      meta,
                      Column('id', String(36), primary_key=True,
                             nullable=False),
                      Column('A', Integer),
                      Column('B', Integer),
                      Column('C', Integer,
                             CheckConstraint('C>100', name="above 100")),

                      UniqueConstraint('A', 'B', name='uix_1')
                      )
        table.create()

        ignorecons = [
            'uix_1',
        ]

        new_table = migrate_utils.clone_table('constraints_check_tmp', table,
                                              meta, ignorecons=ignorecons)
        self.assertFalse(_has_constraint(new_table.constraints,
                                         UniqueConstraint, 'uix_1'))
def postgresql_create_table(metadata, engine, columns, object_name):
    table = Table(object_name, metadata, *columns, extend_existing=True)
    table.create(engine, checkfirst=True)
    logging.info('created table in database: %s', object_name)
Пример #54
0
class SQLTable(Component):

    _selects = 0
    _inserts = 0
    _updates = 0
    _finalized = False

    STORE_MODE_LOOKUP = "lookup"
    STORE_MODE_INSERT = "insert"
    STORE_MODE_UPSERT = "upsert"

    _pk = False

    columns = []

    create = True

    _unicode_errors = 0
    _lookup_changed_fields = None

    def __init__(self, name, connection, columns, label=None):

        super(SQLTable, self).__init__()

        self.sa_table = None
        self.sa_metadata = None

        self.name = name
        self.connection = connection

        self.label = label if label else name

        self.columns = columns or []
        for col in columns:
            col.sqltable = self

    def _get_sa_type(self, column):

        if (column.type == "Integer"):
            return Integer
        elif (column.type == "String"):
            #if (column.length is None): column.length = 128
            return Unicode(length = 128)
        elif (column.type == "Float"):
            return Float
        elif (column.type == "Boolean"):
            return Boolean
        elif (column.type == "AutoIncrement"):
            return Integer
        elif (column.type == "Date"):
            return Date
        elif (column.type == "Time"):
            return Time
        elif (column.type == "DateTime"):
            return DateTime
        elif (column.type == "Binary"):
            return Binary
        else:
            raise Exception("Invalid data type (%s): %s" % (column, column.type))

    def finalize(self, ctx):

        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info("SQLTable Totals  ins/upd/sel: %d/%d/%d " %
                            (SQLTable._inserts, SQLTable._updates, SQLTable._selects))

        if (self._inserts + self._selects > 0):
            logger.info("SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " %
                            (self.name, self._inserts, self._updates, self._selects))
        if (self._unicode_errors > 0):
            logger.warning("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" %
                           (self.name, self._unicode_errors))

        ctx.comp.finalize(self.connection)

        super(SQLTable, self).finalize(ctx)

    def initialize(self, ctx):

        super(SQLTable, self).initialize(ctx)

        if self._lookup_changed_fields == None:
            self._lookup_changed_fields = []

        ctx.comp.initialize(self.connection)

        logger.debug("Loading table %s on %s" % (self.name, self))

        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        self._selects = 0
        self._inserts = 0
        self._updates = 0
        self._unicode_errors = 0

        # Drop?

        columns_ex = []
        for column in self.columns:

            logger.debug("Adding column to %s: %s" % (self, column))

            column.sqltable = self

            # Check for duplicate names
            if (column.name in columns_ex):
                raise ETLConfigurationException("Duplicate column name '%s' in %s" % (column.name, self))

            columns_ex.append(column.name)

            # Configure column
            if isinstance(column, SQLColumnFK):
                if column.fk_sqlcolumn.sqltable.sa_table is None:
                    logger.warning("Column %s foreign key %s table (%s) has not been defined in backend (ignoring).", column, column.fk_sqlcolumn, column.fk_sqlcolumn.sqltable)
                    continue

                self.sa_table.append_column(Column(column.name,
                                                   self._get_sa_type(column),
                                                   ForeignKey(column.fk_sqlcolumn.sqltable.sa_table.columns[column.fk_sqlcolumn.name]),
                                                   primary_key=column.pk,
                                                   nullable=column.nullable,
                                                   autoincrement=(True if column.type == "AutoIncrement" else False)))
            else:
                self.sa_table.append_column(Column(column.name,
                                                   self._get_sa_type(column),
                                                   primary_key=column.pk,
                                                   nullable=column.nullable,
                                                   autoincrement=(True if column.type == "AutoIncrement" else False)))

        # Check schema:

        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name)
            self.sa_table.create(self.connection.connection())

        # TODO:? Extend?  (unsafe, allow read-only connections and make them default?)
        # TODO:? Delete columns (unsafe, allow read-only connections and make them default?)

    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """

        #if (self._pk == False):
        if True:
            pk_cols = []
            for col in self.columns:
                if col.pk:
                    pk_cols.append(col)

            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None

        return self._pk

    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)

        return and_(*clauses)

    def _rowtodict(self, row):

        d = {}
        for column in self.columns:
            #print column
            d[column.name] = getattr(row, column.name)

        return d

    def _find(self, ctx, attribs):

        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1

        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)


    def lookup(self, ctx, attribs, find_function=None):

        logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs))

        if (len(attribs.keys()) == 0):
            raise Exception("Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name)

        find_function = find_function or self._find
        rows = find_function(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]
        else:
            row = None

        logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row))
        return row

    def upsert(self, ctx, data, keys = []):
        """
        Upsert checks if the row exists and has changed. It does a lookup
        followed by an update or insert as appropriate.
        """

        # TODO: Check for AutoIncrement in keys, shall not be used

        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        # Do lookup
        if len(qfilter) > 0:

            row = self.lookup(ctx, qfilter)

            if (row):
                # Check row is identical
                for c in self.columns:
                    if c.type != "AutoIncrement":
                        v1 = row[c.name]
                        v2 = data[c.name]
                        if c.type == "Date":
                            v1 = row[c.name].strftime('%Y-%m-%d')
                            v2 = data[c.name].strftime('%Y-%m-%d')
                        if (isinstance(v1, str) or isinstance(v2, str)):
                            if (not isinstance(v1, str)): v1 = str(v1)
                            if (not isinstance(v2, str)): v2 = str(v2)
                        if (v1 != v2):
                            if (c.name not in self._lookup_changed_fields):
                                logger.warn("%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c.name, v1, v2))
                                #self._lookup_changed_fields.append(c["name"])

                # Update the row
                row = self.update(ctx, data, keys)
                return row

        row_with_id = self.insert(ctx, data)
        return row_with_id

    def _prepare_row(self, ctx, data):

        row = {}

        for column in self.columns:
            if column.type != "AutoIncrement":
                try:
                    row[column.name] = data[column.name]
                except KeyError as e:
                    raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data))

                # Checks
                if (column.type == "String") and (not isinstance(row[column.name], str)):
                    self._unicode_errors = self._unicode_errors + 1
                    if (ctx.debug):
                        logger.warn("Unicode column %r received non-unicode string: %r " % (column.name, row[column.name]))

        return row

    def insert(self, ctx, data):

        row = self._prepare_row(ctx, data)

        logger.debug("Inserting in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(self.sa_table.insert(row))

        pk = self.pk(ctx)
        if pk:
            row[pk.name] = res.inserted_primary_key[0]

        self._inserts = self._inserts + 1
        SQLTable._inserts = SQLTable._inserts + 1

        if pk is not None:
            return row
        else:
            return row  # None

    def update(self, ctx, data, keys = []):

        row = self._prepare_row(ctx, data)

        # Automatically calculate lookup if necessary
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        logger.debug("Updating in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(self.sa_table.update(self._attribsToClause(qfilter), row))

        self._updates = self._updates +1
        SQLTable._updates = SQLTable._updates + 1

        if pk is not None:
            return row
        else:
            return None
Пример #55
0
from sqlalchemy.types import Unicode

engine = create_engine('postgresql://python@localhost/')

loader = PostgresLoader(engine)

testmodule = loader.load_module('testmodule')

metadata = MetaData(bind=engine)

table = Table('testtable', metadata,
        Column('test', Unicode),
        Column('test2', Unicode))

table.drop(checkfirst=True)
table.create(checkfirst=True)

for i in range(20):
    table.insert({'test': 'test%d' % i, 'test2': 'test%d' %i}).execute()

print(engine.execute(testmodule.pyconcat(table.c.test, table.c.test2)).fetchall())

statement = """
CREATE TRIGGER mytrigger
BEFORE INSERT
ON %s
FOR EACH ROW EXECUTE PROCEDURE %s();
"""

engine.execute(statement % (table.name, testmodule.nullifying_trigger.__name__))
Пример #56
0
class Table(object):
    def __init__(self, db, schema, table, columns=None):
        self.db = db
        self.schema = schema
        self.name = table
        self.engine = create_engine(db.url)
        self.metadata = MetaData(schema=schema)
        self.metadata.bind = self.engine
        # http://docs.sqlalchemy.org/en/rel_1_0/core/metadata.html
        # if provided columns (SQLAlchemy columns), create the table
        if table:
            if columns:
                self.table = SQLATable(
                    table, self.metadata, schema=self.schema, *columns
                )
                self.table.create()
            # otherwise just load from db
            else:
                self.table = SQLATable(
                    table, self.metadata, schema=self.schema, autoload=True
                )
            self.indexes = dict((i.name, i) for i in self.table.indexes)
            self._is_dropped = False
        else:
            self._is_dropped = True
            self.table = None

    @property
    def _normalized_columns(self):
        return list(map(normalize_column_name, self.columns))

    @property
    def columns(self):
        """Return list of all columns in table
        """
        return list(self.table.columns.keys())

    @property
    def sqla_columns(self):
        """Return all columns in table as sqlalchemy column types
        """
        return self.table.columns

    @property
    def column_types(self):
        """Return a dict mapping column name to type for all columns in table
        """
        column_types = {}
        for c in self.sqla_columns:
            column_types[c.name] = c.type
        return column_types

    @property
    def primary_key(self):
        """Return a list of columns making up the primary key constraint
        """
        return [c.name for c in self.table.primary_key]

    @property
    def op(self):
        ctx = MigrationContext.configure(self.engine.connect())
        return Operations(ctx)

    def _valid_table_name(self, table_name):
        """Check if the table name is obviously invalid.
        """
        if table_name is None or not len(table_name.strip()):
            raise ValueError("Invalid table name: %r" % table_name)
        return table_name.strip()

    def _update_table(self, table_name):
        self.metadata = MetaData(schema=self.schema)
        self.metadata.bind = self.engine
        return SQLATable(table_name, self.metadata, schema=self.schema)

    def add_primary_key(self, column="id"):
        """Add primary key constraint to specified column
        """
        if not self.primary_key:
            sql = """ALTER TABLE {s}.{t}
                     ADD PRIMARY KEY ({c})
                  """.format(
                s=self.schema, t=self.name, c=column
            )
            self.db.execute(sql)

    def drop(self):
        """Drop the table from the database
        """
        if self._is_dropped is False:
            self.table.drop(self.engine)
        self._is_dropped = True

    def _check_dropped(self):
        if self._is_dropped:
            raise DatasetException(
                "the table has been dropped. this object should not be used again."
            )

    def _args_to_clause(self, args):
        clauses = []
        for k, v in args.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.table.c[k].in_(v))
            else:
                clauses.append(self.table.c[k] == v)
        return and_(*clauses)

    def create_column(self, name, type):
        """
        Explicitely create a new column ``name`` of a specified type.
        ``type`` must be a `SQLAlchemy column type <http://docs.sqlalchemy.org/en/rel_0_8/core/types.html>`_.
        ::

            table.create_column('created_at', sqlalchemy.DateTime)
        """
        self._check_dropped()
        if normalize_column_name(name) not in self._normalized_columns:
            self.op.add_column(self.table.name, Column(name, type), self.table.schema)
            self.table = self._update_table(self.table.name)

    def drop_column(self, name):
        """
        Drop the column ``name``
        ::

            table.drop_column('created_at')
        """
        self._check_dropped()
        if name in list(self.table.columns.keys()):
            self.op.drop_column(self.table.name, name, schema=self.schema)
            self.table = self._update_table(self.table.name)

    def create_index(self, columns, name=None, index_type="btree"):
        """
        Create an index to speed up queries on a table.
        If no ``name`` is given a random name is created.
        ::
            table.create_index(['name', 'country'])
        """
        self._check_dropped()
        if not name:
            sig = "||".join(columns + [index_type])
            # This is a work-around for a bug in <=0.6.1 which would create
            # indexes based on hash() rather than a proper hash.
            key = abs(hash(sig))
            name = "ix_%s_%s" % (self.table.name, key)
            if name in self.indexes:
                return self.indexes[name]
            key = sha1(sig.encode("utf-8")).hexdigest()[:16]
            name = "ix_%s_%s" % (self.table.name, key)
        if name in self.indexes:
            return self.indexes[name]
        # self.db._acquire()
        columns = [self.table.c[col] for col in columns]
        idx = Index(name, *columns, postgresql_using=index_type)
        idx.create(self.engine)
        # finally:
        #    self.db._release()
        self.indexes[name] = idx
        return idx

    def create_index_geom(self, column="geom"):
        """Shortcut to create index on geometry
        """
        self.create_index([column], index_type="gist")

    def distinct(self, *columns, **_filter):
        """
        Returns all rows of a table, but removes rows in with duplicate values in ``columns``.
        Interally this creates a `DISTINCT statement <http://www.w3schools.com/sql/sql_distinct.asp>`_.
        ::

            # returns only one row per year, ignoring the rest
            table.distinct('year')
            # works with multiple columns, too
            table.distinct('year', 'country')
            # you can also combine this with a filter
            table.distinct('year', country='China')
        """
        self._check_dropped()
        qargs = []
        try:
            columns = [self.table.c[c] for c in columns]
            for col, val in _filter.items():
                qargs.append(self.table.c[col] == val)
        except KeyError:
            return []

        q = expression.select(
            columns,
            distinct=True,
            whereclause=and_(*qargs),
            order_by=[c.asc() for c in columns],
        )
        # if just looking at one column, return a simple list
        if len(columns) == 1:
            return itertools.chain.from_iterable(self.engine.execute(q))
        # otherwise return specified row_type
        else:
            return ResultIter(self.engine.execute(q), row_type=self.db.row_type)

    def insert(self, row):
        """
        Add a row (type: dict) by inserting it into the table.
        Columns must exist.
        ::
            data = dict(title='I am a banana!')
            table.insert(data)
        Returns the inserted row's primary key.
        """
        self._check_dropped()
        res = self.engine.execute(self.table.insert(row))
        if len(res.inserted_primary_key) > 0:
            return res.inserted_primary_key[0]

    def insert_many(self, rows, chunk_size=1000):
        """
        Add many rows at a time, which is significantly faster than adding
        them one by one. Per default the rows are processed in chunks of
        1000 per commit, unless you specify a different ``chunk_size``.
        See :py:meth:`insert() <dataset.Table.insert>` for details on
        the other parameters.
        ::
            rows = [dict(name='Dolly')] * 10000
            table.insert_many(rows)
        """

        def _process_chunk(chunk):
            self.table.insert().execute(chunk)

        self._check_dropped()

        chunk = []
        for i, row in enumerate(rows, start=1):
            chunk.append(row)
            if i % chunk_size == 0:
                _process_chunk(chunk)
                chunk = []
        if chunk:
            _process_chunk(chunk)

    def rename(self, name):
        """Rename the table
        """
        sql = """ALTER TABLE {s}.{t} RENAME TO {name}
              """.format(
            s=self.schema, t=self.name, name=name
        )
        self.engine.execute(sql)
        self.table = SQLATable(name, self.metadata, schema=self.schema, autoload=True)

    def find_one(self, **kwargs):
        """
        Works just like :py:meth:`find() <dataset.Table.find>` but returns one result, or None.
        ::
            row = table.find_one(country='United States')
        """
        kwargs["_limit"] = 1
        iterator = self.find(**kwargs)
        try:
            return next(iterator)
        except StopIteration:
            return None

    def _args_to_order_by(self, order_by):
        if order_by[0] == "-":
            return self.table.c[order_by[1:]].desc()
        else:
            return self.table.c[order_by].asc()

    def find(
        self,
        _limit=None,
        _offset=0,
        _step=5000,
        order_by="id",
        return_count=False,
        **_filter
    ):
        """
        Performs a simple search on the table. Simply pass keyword arguments as ``filter``.
        ::
            results = table.find(country='France')
            results = table.find(country='France', year=1980)
        Using ``_limit``::
            # just return the first 10 rows
            results = table.find(country='France', _limit=10)
        You can sort the results by single or multiple columns. Append a minus sign
        to the column name for descending order::
            # sort results by a column 'year'
            results = table.find(country='France', order_by='year')
            # return all rows sorted by multiple columns (by year in descending order)
            results = table.find(order_by=['country', '-year'])
        By default :py:meth:`find() <dataset.Table.find>` will break the
        query into chunks of ``_step`` rows to prevent huge tables
        from being loaded into memory at once.
        For more complex queries, please use :py:meth:`db.query()`
        instead."""
        self._check_dropped()
        if not isinstance(order_by, (list, tuple)):
            order_by = [order_by]
        order_by = [
            o
            for o in order_by
            if (o.startswith("-") and o[1:] or o) in self.table.columns
        ]
        order_by = [self._args_to_order_by(o) for o in order_by]

        args = self._args_to_clause(_filter)

        # query total number of rows first
        count_query = alias(
            self.table.select(whereclause=args, limit=_limit, offset=_offset),
            name="count_query_alias",
        ).count()
        rp = self.engine.execute(count_query)
        total_row_count = rp.fetchone()[0]
        if return_count:
            return total_row_count

        if _limit is None:
            _limit = total_row_count

        if _step is None or _step is False or _step == 0:
            _step = total_row_count

        if total_row_count > _step and not order_by:
            _step = total_row_count
            log.warn(
                "query cannot be broken into smaller sections because it is unordered"
            )

        queries = []

        for i in count():
            qoffset = _offset + (_step * i)
            qlimit = min(_limit - (_step * i), _step)
            if qlimit <= 0:
                break
            queries.append(
                self.table.select(
                    whereclause=args, limit=qlimit, offset=qoffset, order_by=order_by
                )
            )
        return ResultIter(
            (self.engine.execute(q) for q in queries), row_type=self.db.row_type
        )

    def count(self, **_filter):
        """
        Return the count of results for the given filter set
        (same filter options as with ``find()``).
        """
        return self.find(return_count=True, **_filter)

    def __getitem__(self, item):
        """
        This is an alias for distinct which allows the table to be queried as using
        square bracket syntax.
        ::
            # Same as distinct:
            print list(table['year'])
        """
        if not isinstance(item, tuple):
            item = (item,)
        return self.distinct(*item)

    def all(self):
        """
        Returns all rows of the table as simple dictionaries. This is simply a shortcut
        to *find()* called with no arguments.
        ::
            rows = table.all()"""
        return self.find()

    def __iter__(self):
        """
        Allows for iterating over all rows in the table without explicetly
        calling :py:meth:`all() <dataset.Table.all>`.
        ::
            for row in table:
                print(row)
        """
        return self.all()

    def __repr__(self):
        return "<Table(%s)>" % self.table.name
Пример #57
0
class Table(object):
    """Represents a table in a database and exposes common operations."""
    PRIMARY_DEFAULT = 'id'

    def __init__(self,
                 database,
                 table_name,
                 primary_id=None,
                 primary_type=None,
                 auto_create=False):
        """Initialise the table from database schema."""
        self.db = database
        self.name = normalize_table_name(table_name)
        self._table = None
        self._indexes = []
        self._primary_id = primary_id
        self._primary_type = primary_type
        self._auto_create = auto_create

    @property
    def exists(self):
        """Check to see if the table currently exists in the database."""
        if self._table is not None:
            return True
        return self.name in self.db

    @property
    def table(self):
        """Get a reference to the table, which may be reflected or created."""
        if self._table is None:
            self._sync_table(())
        return self._table

    @property
    def columns(self):
        """Get a listing of all columns that exist in the table."""
        if not self.exists:
            return []
        return self.table.columns.keys()

    def has_column(self, column):
        """Check if a column with the given name exists on this table."""
        return normalize_column_name(column) in self.columns

    def insert(self, row, ensure=None, types=None):
        """Add a ``row`` dict by inserting it into the table.

        If ``ensure`` is set, any of the keys of the row are not
        table columns, they will be created automatically.

        During column creation, ``types`` will be checked for a key
        matching the name of a column to be created, and the given
        SQLAlchemy column type will be used. Otherwise, the type is
        guessed from the row value, defaulting to a simple unicode
        field.
        ::

            data = dict(title='I am a banana!')
            table.insert(data)

        Returns the inserted row's primary key.
        """
        row = self._sync_columns(row, ensure, types=types)
        res = self.db.executable.execute(self.table.insert(row))
        if len(res.inserted_primary_key) > 0:
            return res.inserted_primary_key[0]
        return True

    def insert_ignore(self, row, keys, ensure=None, types=None):
        """Add a ``row`` dict into the table if the row does not exist.

        If rows with matching ``keys`` exist they will be added to the table.

        Setting ``ensure`` results in automatically creating missing columns,
        i.e., keys of the row are not table columns.

        During column creation, ``types`` will be checked for a key
        matching the name of a column to be created, and the given
        SQLAlchemy column type will be used. Otherwise, the type is
        guessed from the row value, defaulting to a simple unicode
        field.
        ::

            data = dict(id=10, title='I am a banana!')
            table.insert_ignore(data, ['id'])
        """
        row = self._sync_columns(row, ensure, types=types)
        if self._check_ensure(ensure):
            self.create_index(keys)
        args, _ = self._keys_to_args(row, keys)
        if self.count(**args) == 0:
            return self.insert(row, ensure=False)
        return False

    def insert_many(self, rows, chunk_size=1000, ensure=None, types=None):
        """Add many rows at a time.

        This is significantly faster than adding them one by one. Per default
        the rows are processed in chunks of 1000 per commit, unless you specify
        a different ``chunk_size``.

        See :py:meth:`insert() <dataset.Table.insert>` for details on
        the other parameters.
        ::

            rows = [dict(name='Dolly')] * 10000
            table.insert_many(rows)
        """
        chunk = []
        for row in rows:
            row = self._sync_columns(row, ensure, types=types)
            chunk.append(row)
            if len(chunk) == chunk_size:
                self.table.insert().execute(chunk)
                chunk = []

        if len(chunk):
            self.table.insert().execute(chunk)

    def update(self, row, keys, ensure=None, types=None, return_count=False):
        """Update a row in the table.

        The update is managed via the set of column names stated in ``keys``:
        they will be used as filters for the data to be updated, using the
        values in ``row``.
        ::

            # update all entries with id matching 10, setting their title columns
            data = dict(id=10, title='I am a banana!')
            table.update(data, ['id'])

        If keys in ``row`` update columns not present in the table, they will
        be created based on the settings of ``ensure`` and ``types``, matching
        the behavior of :py:meth:`insert() <dataset.Table.insert>`.
        """
        row = self._sync_columns(row, ensure, types=types)
        args, row = self._keys_to_args(row, keys)
        clause = self._args_to_clause(args)
        if not len(row):
            return self.count(clause)
        stmt = self.table.update(whereclause=clause, values=row)
        rp = self.db.executable.execute(stmt)
        if rp.supports_sane_rowcount():
            return rp.rowcount
        if return_count:
            return self.count(clause)

    def upsert(self, row, keys, ensure=None, types=None):
        """An UPSERT is a smart combination of insert and update.

        If rows with matching ``keys`` exist they will be updated, otherwise a
        new row is inserted in the table.
        ::

            data = dict(id=10, title='I am a banana!')
            table.upsert(data, ['id'])
        """
        row = self._sync_columns(row, ensure, types=types)
        if self._check_ensure(ensure):
            self.create_index(keys)
        row_count = self.update(row, keys, ensure=False, return_count=True)
        if row_count == 0:
            return self.insert(row, ensure=False)
        return True

    def delete(self, *clauses, **filters):
        """Delete rows from the table.

        Keyword arguments can be used to add column-based filters. The filter
        criterion will always be equality:
        ::

            table.delete(place='Berlin')

        If no arguments are given, all records are deleted.
        """
        if not self.exists:
            return False
        clause = self._args_to_clause(filters, clauses=clauses)
        stmt = self.table.delete(whereclause=clause)
        rp = self.db.executable.execute(stmt)
        return rp.rowcount > 0

    def _reflect_table(self):
        """Load the tables definition from the database."""
        with self.db.lock:
            try:
                self._table = SQLATable(self.name,
                                        self.db.metadata,
                                        schema=self.db.schema,
                                        autoload=True)
            except NoSuchTableError:
                pass

    def _threading_warn(self):
        if self.db.in_transaction and threading.active_count() > 1:
            warnings.warn(
                "Changing the database schema inside a transaction "
                "in a multi-threaded environment is likely to lead "
                "to race conditions and synchronization issues.",
                RuntimeWarning)

    def _sync_table(self, columns):
        """Lazy load, create or adapt the table structure in the database."""
        if self._table is None:
            # Load an existing table from the database.
            self._reflect_table()
        if self._table is None:
            # Create the table with an initial set of columns.
            if not self._auto_create:
                raise DatasetException("Table does not exist: %s" % self.name)
            # Keep the lock scope small because this is run very often.
            with self.db.lock:
                self._threading_warn()
                self._table = SQLATable(self.name,
                                        self.db.metadata,
                                        schema=self.db.schema)
                if self._primary_id is not False:
                    # This can go wrong on DBMS like MySQL and SQLite where
                    # tables cannot have no columns.
                    primary_id = self._primary_id or self.PRIMARY_DEFAULT
                    primary_type = self._primary_type or Types.integer
                    increment = primary_type in [Types.integer, Types.bigint]
                    column = Column(primary_id,
                                    primary_type,
                                    primary_key=True,
                                    autoincrement=increment)
                    self._table.append_column(column)
                for column in columns:
                    if not column.name == self._primary_id:
                        self._table.append_column(column)
                self._table.create(self.db.executable, checkfirst=True)
        elif len(columns):
            with self.db.lock:
                self._threading_warn()
                for column in columns:
                    if not self.has_column(column.name):
                        self.db.op.add_column(self.name, column,
                                              self.db.schema)
                self._reflect_table()

    def _sync_columns(self, row, ensure, types=None):
        """Create missing columns (or the table) prior to writes.

        If automatic schema generation is disabled (``ensure`` is ``False``),
        this will remove any keys from the ``row`` for which there is no
        matching column.
        """
        columns = self.columns
        ensure = self._check_ensure(ensure)
        types = types or {}
        types = {normalize_column_name(k): v for (k, v) in types.items()}
        out = {}
        sync_columns = []
        for name, value in row.items():
            name = normalize_column_name(name)
            if ensure and name not in columns:
                _type = types.get(name)
                if _type is None:
                    _type = self.db.types.guess(value)
                sync_columns.append(Column(name, _type))
                columns.append(name)
            if name in columns:
                out[name] = value
        self._sync_table(sync_columns)
        return out

    def _check_ensure(self, ensure):
        if ensure is None:
            return self.db.ensure_schema
        return ensure

    def _args_to_clause(self, args, clauses=()):
        clauses = list(clauses)
        for column, value in args.items():
            if not self.has_column(column):
                clauses.append(false())
            elif isinstance(value, (list, tuple)):
                clauses.append(self.table.c[column].in_(value))
            else:
                clauses.append(self.table.c[column] == value)
        return and_(*clauses)

    def _args_to_order_by(self, order_by):
        orderings = []
        for ordering in ensure_tuple(order_by):
            if ordering is None:
                continue
            column = ordering.lstrip('-')
            if column not in self.table.columns:
                continue
            if ordering.startswith('-'):
                orderings.append(self.table.c[column].desc())
            else:
                orderings.append(self.table.c[column].asc())
        return orderings

    def _keys_to_args(self, row, keys):
        keys = ensure_tuple(keys)
        keys = [normalize_column_name(k) for k in keys]
        # keys = [self.has_column(k) for k in keys]
        row = row.copy()
        args = {k: row.pop(k) for k in keys if k in row}
        return args, row

    def create_column(self, name, type):
        """Create a new column ``name`` of a specified type.
        ::

            table.create_column('created_at', db.types.datetime)
        """
        name = normalize_column_name(name)
        if self.has_column(name):
            log.debug("Column exists: %s" % name)
            return
        self._sync_table((Column(name, type), ))

    def create_column_by_example(self, name, value):
        """
        Explicitly create a new column ``name`` with a type that is appropriate
        to store the given example ``value``.  The type is guessed in the same
        way as for the insert method with ``ensure=True``.
        ::

            table.create_column_by_example('length', 4.2)

        If a column of the same name already exists, no action is taken, even
        if it is not of the type we would have created.
        """
        type_ = self.db.types.guess(value)
        self.create_column(name, type_)

    def drop_column(self, name):
        """Drop the column ``name``.
        ::
            table.drop_column('created_at')
        """
        if self.db.engine.dialect.name == 'sqlite':
            raise RuntimeError("SQLite does not support dropping columns.")
        name = normalize_column_name(name)
        with self.db.lock:
            if not self.exists or not self.has_column(name):
                log.debug("Column does not exist: %s", name)
                return

            self._threading_warn()
            self.db.op.drop_column(self.table.name, name, self.table.schema)
            self._reflect_table()

    def drop(self):
        """Drop the table from the database.

        Deletes both the schema and all the contents within it.
        """
        with self.db.lock:
            if self.exists:
                self._threading_warn()
                self.table.drop(self.db.executable, checkfirst=True)
                self._table = None

    def has_index(self, columns):
        """Check if an index exists to cover the given ``columns``."""
        if not self.exists:
            return False
        columns = set([normalize_column_name(c) for c in columns])
        if columns in self._indexes:
            return True
        for column in columns:
            if not self.has_column(column):
                return False
        indexes = self.db.inspect.get_indexes(self.name, schema=self.db.schema)
        for index in indexes:
            if columns == set(index.get('column_names', [])):
                self._indexes.append(columns)
                return True
        return False

    def create_index(self, columns, name=None, **kw):
        """Create an index to speed up queries on a table.

        If no ``name`` is given a random name is created.
        ::

            table.create_index(['name', 'country'])
        """
        columns = [normalize_column_name(c) for c in ensure_tuple(columns)]
        with self.db.lock:
            if not self.exists:
                raise DatasetException("Table has not been created yet.")

            for column in columns:
                if not self.has_column(column):
                    return

            if not self.has_index(columns):
                self._threading_warn()
                name = name or index_name(self.name, columns)
                columns = [self.table.c[c] for c in columns]
                idx = Index(name, *columns, **kw)
                idx.create(self.db.executable)

    def find(self, *_clauses, **kwargs):
        """Perform a simple search on the table.

        Simply pass keyword arguments as ``filter``.
        ::

            results = table.find(country='France')
            results = table.find(country='France', year=1980)

        Using ``_limit``::

            # just return the first 10 rows
            results = table.find(country='France', _limit=10)

        You can sort the results by single or multiple columns. Append a minus
        sign to the column name for descending order::

            # sort results by a column 'year'
            results = table.find(country='France', order_by='year')
            # return all rows sorted by multiple columns (descending by year)
            results = table.find(order_by=['country', '-year'])

        To perform complex queries with advanced filters or to perform
        aggregation, use :py:meth:`db.query() <dataset.Database.query>`
        instead.
        """
        if not self.exists:
            return []

        _limit = kwargs.pop('_limit', None)
        _offset = kwargs.pop('_offset', 0)
        order_by = kwargs.pop('order_by', None)
        _streamed = kwargs.pop('_streamed', False)
        _step = kwargs.pop('_step', QUERY_STEP)
        if _step is False or _step == 0:
            _step = None

        order_by = self._args_to_order_by(order_by)
        args = self._args_to_clause(kwargs, clauses=_clauses)
        query = self.table.select(whereclause=args,
                                  limit=_limit,
                                  offset=_offset)
        if len(order_by):
            query = query.order_by(*order_by)

        conn = self.db.executable
        if _streamed:
            conn = self.db.engine.connect()
            conn = conn.execution_options(stream_results=True)

        return ResultIter(conn.execute(query),
                          row_type=self.db.row_type,
                          step=_step)

    def find_one(self, *args, **kwargs):
        """Get a single result from the table.

        Works just like :py:meth:`find() <dataset.Table.find>` but returns one
        result, or ``None``.
        ::

            row = table.find_one(country='United States')
        """
        if not self.exists:
            return None

        kwargs['_limit'] = 1
        kwargs['_step'] = None
        resiter = self.find(*args, **kwargs)
        try:
            for row in resiter:
                return row
        finally:
            resiter.close()

    def count(self, *_clauses, **kwargs):
        """Return the count of results for the given filter set."""
        # NOTE: this does not have support for limit and offset since I can't
        # see how this is useful. Still, there might be compatibility issues
        # with people using these flags. Let's see how it goes.
        if not self.exists:
            return 0

        args = self._args_to_clause(kwargs, clauses=_clauses)
        query = select([func.count()], whereclause=args)
        query = query.select_from(self.table)
        rp = self.db.executable.execute(query)
        return rp.fetchone()[0]

    def __len__(self):
        """Return the number of rows in the table."""
        return self.count()

    def distinct(self, *args, **_filter):
        """Return all the unique (distinct) values for the given ``columns``.
        ::

            # returns only one row per year, ignoring the rest
            table.distinct('year')
            # works with multiple columns, too
            table.distinct('year', 'country')
            # you can also combine this with a filter
            table.distinct('year', country='China')
        """
        if not self.exists:
            return []

        filters = []
        for column, value in _filter.items():
            if not self.has_column(column):
                raise DatasetException("No such column: %s" % column)
            filters.append(self.table.c[column] == value)

        columns = []
        for column in args:
            if isinstance(column, ClauseElement):
                filters.append(column)
            else:
                if not self.has_column(column):
                    raise DatasetException("No such column: %s" % column)
                columns.append(self.table.c[column])

        if not len(columns):
            return []

        q = expression.select(columns,
                              distinct=True,
                              whereclause=and_(*filters),
                              order_by=[c.asc() for c in columns])
        return self.db.query(q)

    # Legacy methods for running find queries.
    all = find

    def __iter__(self):
        """Return all rows of the table as simple dictionaries.

        Allows for iterating over all rows in the table without explicetly
        calling :py:meth:`find() <dataset.Table.find>`.
        ::

            for row in table:
                print(row)
        """
        return self.find()

    def __repr__(self):
        """Get table representation."""
        return '<Table(%s)>' % self.table.name
Пример #58
0
class SQLTable(Component):

    _selects = 0
    _inserts = 0
    _updates = 0
    _finalized = False

    STORE_MODE_LOOKUP = "lookup"
    STORE_MODE_INSERT = "insert"
    STORE_MODE_UPSERT = "upsert"

    _pk = False

    columns = []

    create = True

    _unicode_errors = 0
    _lookup_changed_fields = None

    def __init__(self, name, connection, columns, label=None):

        super(SQLTable, self).__init__()

        self.sa_table = None
        self.sa_metadata = None

        self.name = name
        self.connection = connection

        self.label = label if label else name

        self.columns = columns or []
        for col in columns:
            col.sqltable = self

    def _get_sa_type(self, column):

        if (column.type == "Integer"):
            return Integer
        elif (column.type == "String"):
            #if (column.length is None): column.length = 128
            return Unicode(length=128)
        elif (column.type == "Float"):
            return Float
        elif (column.type == "Boolean"):
            return Boolean
        elif (column.type == "AutoIncrement"):
            return Integer
        elif (column.type == "Date"):
            return Date
        elif (column.type == "Time"):
            return Time
        elif (column.type == "DateTime"):
            return DateTime
        elif (column.type == "Binary"):
            return Binary
        else:
            raise Exception("Invalid data type (%s): %s" %
                            (column, column.type))

    def finalize(self, ctx):

        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info(
                    "SQLTable Totals  ins/upd/sel: %d/%d/%d " %
                    (SQLTable._inserts, SQLTable._updates, SQLTable._selects))

        if (self._inserts + self._selects > 0):
            logger.info(
                "SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " %
                (self.name, self._inserts, self._updates, self._selects))
        if (self._unicode_errors > 0):
            logger.warning(
                "SQLTable %s found %d warnings assigning non-unicode fields to unicode columns"
                % (self.name, self._unicode_errors))

        ctx.comp.finalize(self.connection)

        super(SQLTable, self).finalize(ctx)

    def initialize(self, ctx):

        super(SQLTable, self).initialize(ctx)

        if self._lookup_changed_fields == None:
            self._lookup_changed_fields = []

        ctx.comp.initialize(self.connection)

        logger.debug("Loading table %s on %s" % (self.name, self))

        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        self._selects = 0
        self._inserts = 0
        self._updates = 0
        self._unicode_errors = 0

        # Drop?

        columns_ex = []
        for column in self.columns:

            logger.debug("Adding column to %s: %s" % (self, column))

            column.sqltable = self

            # Check for duplicate names
            if (column.name in columns_ex):
                raise ETLConfigurationException(
                    "Duplicate column name '%s' in %s" % (column.name, self))

            columns_ex.append(column.name)

            # Configure column
            if isinstance(column, SQLColumnFK):
                if column.fk_sqlcolumn.sqltable.sa_table is None:
                    logger.warning(
                        "Column %s foreign key %s table (%s) has not been defined in backend (ignoring).",
                        column, column.fk_sqlcolumn,
                        column.fk_sqlcolumn.sqltable)
                    continue

                self.sa_table.append_column(
                    Column(column.name,
                           self._get_sa_type(column),
                           ForeignKey(
                               column.fk_sqlcolumn.sqltable.sa_table.columns[
                                   column.fk_sqlcolumn.name]),
                           primary_key=column.pk,
                           nullable=column.nullable,
                           autoincrement=(True if column.type
                                          == "AutoIncrement" else False)))
            else:
                self.sa_table.append_column(
                    Column(column.name,
                           self._get_sa_type(column),
                           primary_key=column.pk,
                           nullable=column.nullable,
                           autoincrement=(True if column.type
                                          == "AutoIncrement" else False)))

        # Check schema:

        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name)
            self.sa_table.create(self.connection.connection())

        # TODO:? Extend?  (unsafe, allow read-only connections and make them default?)
        # TODO:? Delete columns (unsafe, allow read-only connections and make them default?)

    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """

        #if (self._pk == False):
        if True:
            pk_cols = []
            for col in self.columns:
                if col.pk:
                    pk_cols.append(col)

            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" %
                                (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None

        return self._pk

    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)

        return and_(*clauses)

    def _rowtodict(self, row):

        d = {}
        for column in self.columns:
            #print column
            d[column.name] = getattr(row, column.name)

        return d

    def _find(self, ctx, attribs):

        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1

        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)

    def lookup(self, ctx, attribs, find_function=None):

        logger.debug("Lookup on '%s' attribs: %s" % (self, attribs))

        if (len(attribs.keys()) == 0):
            raise Exception(
                "Cannot lookup on table '%s' with no criteria (empty attribute set)"
                % self.name)

        find_function = find_function or self._find
        rows = find_function(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception(
                "Found more than one row when searching for just one in table %s: %s"
                % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]
        else:
            row = None

        logger.debug("Lookup result on %s: %s = %s" %
                     (self.name, attribs, row))
        return row

    def upsert(self, ctx, data, keys=[]):
        """
        Upsert checks if the row exists and has changed. It does a lookup
        followed by an update or insert as appropriate.
        """

        # TODO: Check for AutoIncrement in keys, shall not be used

        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception(
                        "Could not find attribute '%s' in data when storing row data: %s"
                        % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        # Do lookup
        if len(qfilter) > 0:

            row = self.lookup(ctx, qfilter)

            if (row):
                # Check row is identical
                for c in self.columns:
                    if c.type != "AutoIncrement":
                        v1 = row[c.name]
                        v2 = data[c.name]
                        if c.type == "Date":
                            v1 = row[c.name].strftime('%Y-%m-%d')
                            v2 = data[c.name].strftime('%Y-%m-%d')
                        if (isinstance(v1, str) or isinstance(v2, str)):
                            if (not isinstance(v1, str)): v1 = str(v1)
                            if (not isinstance(v2, str)): v2 = str(v2)
                        if (v1 != v2):
                            if (c.name not in self._lookup_changed_fields):
                                logger.warning(
                                    "%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)"
                                    % (self, c.name, v1, v2))
                                #self._lookup_changed_fields.append(c["name"])

                # Update the row
                row = self.update(ctx, data, keys)
                return row

        row_with_id = self.insert(ctx, data)
        return row_with_id

    def _prepare_row(self, ctx, data):

        row = {}

        for column in self.columns:
            if column.type != "AutoIncrement":
                try:
                    row[column.name] = data[column.name]
                except KeyError as e:
                    raise Exception(
                        "Missing attribute for column %s in table '%s' while inserting row: %s"
                        % (e, self.name, data))

                # Checks
                if (column.type == "String") and (not isinstance(
                        row[column.name], str)):
                    self._unicode_errors = self._unicode_errors + 1
                    if (ctx.debug):
                        logger.warning(
                            "Unicode column %r received non-unicode string: %r "
                            % (column.name, row[column.name]))

        return row

    def insert(self, ctx, data):

        row = self._prepare_row(ctx, data)

        logger.debug("Inserting in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(self.sa_table.insert(row))

        pk = self.pk(ctx)
        if pk:
            row[pk.name] = res.inserted_primary_key[0]

        self._inserts = self._inserts + 1
        SQLTable._inserts = SQLTable._inserts + 1

        if pk is not None:
            return row
        else:
            return row  # None

    def update(self, ctx, data, keys=[]):

        row = self._prepare_row(ctx, data)

        # Automatically calculate lookup if necessary
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception(
                        "Could not find attribute '%s' in data when storing row data: %s"
                        % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        logger.debug("Updating in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(
            self.sa_table.update(self._attribsToClause(qfilter), row))

        self._updates = self._updates + 1
        SQLTable._updates = SQLTable._updates + 1

        if pk is not None:
            return row
        else:
            return None