Пример #1
0
    def test_insert_select(self, engine, connection):
        one_row = Table('one_row', MetaData(bind=engine), autoload=True)
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer))
        table.drop(checkfirst=True)
        table.create()
        connection.execute(table.insert().from_select(['a'], one_row.select()))

        result = table.select().execute().fetchall()
        expected = [(1, )]
        self.assertEqual(result, expected)
Пример #2
0
    def test_insert_select(self, engine, connection):
        one_row = Table('one_row', MetaData(bind=engine), autoload=True)
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer),
                      schema='pyhive_test_database')
        table.drop(checkfirst=True)
        table.create()
        connection.execute('SET mapred.job.tracker=local')
        # NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES
        connection.execute(table.insert().from_select(['a'], one_row.select()))

        result = table.select().execute().fetchall()
        expected = [(1,)]
        self.assertEqual(result, expected)
Пример #3
0
def test_engine_with_dataset(engine_using_test_dataset):
    rows = engine_using_test_dataset.execute('SELECT * FROM sample_one_row').fetchall()
    assert list(rows[0]) == ONE_ROW_CONTENTS

    table_one_row = Table('sample_one_row', MetaData(bind=engine_using_test_dataset), autoload=True)
    rows = table_one_row.select().execute().fetchall()
    assert list(rows[0]) == ONE_ROW_CONTENTS_EXPANDED

    table_one_row = Table('test_pybigquery.sample_one_row', MetaData(bind=engine_using_test_dataset), autoload=True)
    rows = table_one_row.select().execute().fetchall()
    # verify that we are pulling from the specifically-named dataset,
    # instead of pulling from the default dataset of the engine (which
    # does not have this table at all)
    assert list(rows[0]) == ONE_ROW_CONTENTS_EXPANDED
Пример #4
0
    def test_insert_select(self, engine, connection):
        one_row = Table('one_row', MetaData(bind=engine), autoload=True)
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer),
                      schema='pyhive_test_database')
        table.drop(checkfirst=True)
        table.create()
        connection.execute('SET mapred.job.tracker=local')
        # NOTE(jing) I'm stuck on a version of Hive without INSERT ... VALUES
        connection.execute(table.insert().from_select(['a'], one_row.select()))

        result = table.select().execute().fetchall()
        expected = [(1,)]
        self.assertEqual(result, expected)
Пример #5
0
    def test_reflect_select(self, engine, connection):
        """reflecttable should be able to fill in a table from the name"""
        one_row_complex = Table('one_row_complex',
                                MetaData(bind=engine),
                                autoload=True)
        self.assertEqual(len(one_row_complex.c), 14)
        self.assertIsInstance(one_row_complex.c.string, Column)
        row = one_row_complex.select().execute().fetchone()
        self.assertEqual(list(row), _ONE_ROW_COMPLEX_CONTENTS)

        # TODO some of these types could be filled in better
        self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean)
        self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.int.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.bigint.type, types.BigInteger)
        self.assertIsInstance(one_row_complex.c.float.type, types.Float)
        self.assertIsInstance(one_row_complex.c.double.type, types.Float)
        self.assertIsInstance(one_row_complex.c.string.type, types.String)
        self.assertIsInstance(one_row_complex.c.timestamp.type,
                              types.TIMESTAMP)
        self.assertIsInstance(one_row_complex.c.binary.type, types.String)
        self.assertIsInstance(one_row_complex.c.array.type, types.String)
        self.assertIsInstance(one_row_complex.c.map.type, types.String)
        self.assertIsInstance(one_row_complex.c.struct.type, types.String)
        self.assertIsInstance(one_row_complex.c.decimal.type, types.DECIMAL)
Пример #6
0
 def test_one_row_complex_null(self, engine, connection):
     one_row_complex_null = Table('one_row_complex_null',
                                  MetaData(bind=engine),
                                  autoload=True)
     rows = one_row_complex_null.select().execute().fetchall()
     self.assertEqual(len(rows), 1)
     self.assertEqual(list(rows[0]), [None] * len(rows[0]))
    def test_reflect_select(self, engine, connection):
        """reflecttable should be able to fill in a table from the name"""
        one_row_complex = Table('one_row_complex',
                                MetaData(bind=engine),
                                autoload=True)
        self.assertEqual(len(one_row_complex.c), 15)
        self.assertIsInstance(one_row_complex.c.string, Column)
        rows = one_row_complex.select().execute().fetchall()
        self.assertEqual(len(rows), 1)
        self.assertEqual(list(rows[0]), _ONE_ROW_COMPLEX_CONTENTS)

        try:
            from sqlalchemy.types import BigInteger
        except ImportError:
            from sqlalchemy.databases.mysql import MSBigInteger as BigInteger

        # TODO some of these types could be filled in better
        self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean)
        self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.int.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.bigint.type, BigInteger)
        self.assertIsInstance(one_row_complex.c.float.type, types.Float)
        self.assertIsInstance(one_row_complex.c.double.type, types.Float)
        self.assertIsInstance(one_row_complex.c.string.type, types.String)
        self.assertIsInstance(one_row_complex.c.timestamp.type, HiveTimestamp)
        self.assertIsInstance(one_row_complex.c.binary.type, types.String)
        self.assertIsInstance(one_row_complex.c.array.type, types.String)
        self.assertIsInstance(one_row_complex.c.map.type, types.String)
        self.assertIsInstance(one_row_complex.c.struct.type, types.String)
        self.assertIsInstance(one_row_complex.c.union.type, types.String)
        self.assertIsInstance(one_row_complex.c.decimal.type, HiveDecimal)
Пример #8
0
 def test_reflect_select(self, engine, connection):
     """reflecttable should be able to fill in a table from the name"""
     one_row_complex = Table('one_row_complex',
                             MetaData(bind=engine),
                             autoload=True)
     # Presto ignores the union and decimal columns
     self.assertEqual(len(one_row_complex.c), 15 - 2)
     self.assertIsInstance(one_row_complex.c.string, Column)
     rows = one_row_complex.select().execute().fetchall()
     self.assertEqual(len(rows), 1)
     self.assertEqual(
         list(rows[0]),
         [
             True,
             127,
             32767,
             2147483647,
             9223372036854775807,
             0.5,
             0.25,
             'a string',
             0,
             '123',
             '[1,2]',
             '{"1":2,"3":4}',  # Presto converts all keys to strings so that they're valid JSON
             '{"a":1,"b":2}',
             #'{0:1}',
             #0.1,
         ])
def upgrade(migrate_engine):
    meta.bind = migrate_engine

    records_table = Table('records', meta, autoload=True)

    # Add the hash column, start with allowing NULLs
    hash_column = Column('hash', String(32), nullable=True, default=None,
                         unique=True)
    hash_column.create(records_table, unique_name='unique_record')

    sync_domains = []

    # Fill out the hash values. We need to do this in a way that lets us track
    # which domains need to be re-synced, so having the DB do this directly
    # won't work.
    for record in records_table.select().execute():
        try:
            records_table.update()\
                         .where(records_table.c.id == record.id)\
                         .values(hash=_build_hash(record))\
                         .execute()
        except IntegrityError:
            if record.domain_id not in sync_domains:
                sync_domains.append(record.domain_id)
                LOG.warn("Domain '%s' needs to be synchronised" %
                         record.domain_id)

            records_table.delete()\
                         .where(records_table.c.id == record.id)\
                         .execute()

    # Finally, the column should not be nullable.
    records_table.c.hash.alter(nullable=False)
Пример #10
0
    def test_reflect_select(self, engine, connection):
        """reflecttable should be able to fill in a table from the name"""
        one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True)
        self.assertEqual(len(one_row_complex.c), 15)
        self.assertIsInstance(one_row_complex.c.string, Column)
        rows = one_row_complex.select().execute().fetchall()
        self.assertEqual(len(rows), 1)
        self.assertEqual(list(rows[0]), _ONE_ROW_COMPLEX_CONTENTS)

        try:
            from sqlalchemy.types import BigInteger
        except ImportError:
            from sqlalchemy.databases.mysql import MSBigInteger as BigInteger

        # TODO some of these types could be filled in better
        self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean)
        self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.int.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.bigint.type, BigInteger)
        self.assertIsInstance(one_row_complex.c.float.type, types.Float)
        self.assertIsInstance(one_row_complex.c.double.type, types.Float)
        self.assertIsInstance(one_row_complex.c.string.type, types.String)
        self.assertIsInstance(one_row_complex.c.timestamp.type, HiveTimestamp)
        self.assertIsInstance(one_row_complex.c.binary.type, types.String)
        self.assertIsInstance(one_row_complex.c.array.type, types.String)
        self.assertIsInstance(one_row_complex.c.map.type, types.String)
        self.assertIsInstance(one_row_complex.c.struct.type, types.String)
        self.assertIsInstance(one_row_complex.c.union.type, types.String)
        self.assertIsInstance(one_row_complex.c.decimal.type, HiveDecimal)
Пример #11
0
def downgrade(migrate_engine):
    meta = MetaData()
    meta.bind = migrate_engine

    tasks_table = Table('tasks', meta, autoload=True)
    task_info_table = Table('task_info', meta, autoload=True)

    for col_name in TASKS_MIGRATE_COLUMNS:
        column = Column(col_name, Text())
        column.create(tasks_table)

    task_info_records = task_info_table.select().execute().fetchall()

    for task_info in task_info_records:
        values = {
            'input': task_info.input,
            'result': task_info.result,
            'message': task_info.message
        }

        tasks_table\
            .update(values=values)\
            .where(tasks_table.c.id == task_info.task_id)\
            .execute()

    drop_tables([task_info_table])
Пример #12
0
def downgrade(migrate_engine):
    meta = MetaData()
    meta.bind = migrate_engine

    tasks_table = Table('tasks', meta, autoload=True)
    task_info_table = Table('task_info', meta, autoload=True)

    for col_name in TASKS_MIGRATE_COLUMNS:
        column = Column(col_name, Text())
        column.create(tasks_table)

    task_info_records = task_info_table.select().execute().fetchall()

    for task_info in task_info_records:
        values = {
            'input': task_info.input,
            'result': task_info.result,
            'message': task_info.message
        }

        tasks_table\
            .update(values=values)\
            .where(tasks_table.c.id == task_info.task_id)\
            .execute()

    drop_tables([task_info_table])
Пример #13
0
 def test_reflect_select(self, engine, connection):
     """reflecttable should be able to fill in a table from the name"""
     one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True)
     # Presto ignores the union and decimal columns
     self.assertEqual(len(one_row_complex.c), 15 - 2)
     self.assertIsInstance(one_row_complex.c.string, Column)
     rows = one_row_complex.select().execute().fetchall()
     self.assertEqual(len(rows), 1)
     self.assertEqual(list(rows[0]), [
         True,
         127,
         32767,
         2147483647,
         9223372036854775807,
         0.5,
         0.25,
         'a string',
         '1970-01-01 00:00:00.000',
         '123',
         [1, 2],
         {"1": 2, "3": 4},  # Presto converts all keys to strings so that they're valid JSON
         [1, 2],  # struct is returned as a list of elements
         #'{0:1}',
         #0.1,
     ])
def upgrade(migrate_engine):
    meta.bind = migrate_engine

    records_table = Table('records', meta, autoload=True)

    # Add the hash column, start with allowing NULLs
    hash_column = Column('hash', String(32), nullable=True, default=None,
                         unique=True)
    hash_column.create(records_table, unique_name='unique_record')

    sync_domains = []

    # Fill out the hash values. We need to do this in a way that lets us track
    # which domains need to be re-synced, so having the DB do this directly
    # won't work.
    for record in records_table.select().execute():
        try:
            records_table.update()\
                         .where(records_table.c.id == record.id)\
                         .values(hash=_build_hash(record))\
                         .execute()
        except IntegrityError:
            if record.domain_id not in sync_domains:
                sync_domains.append(record.domain_id)
                LOG.warn(_LW("Domain '%s' needs to be synchronised") %
                         record.domain_id)

            records_table.delete()\
                         .where(records_table.c.id == record.id)\
                         .execute()

    # Finally, the column should not be nullable.
    records_table.c.hash.alter(nullable=False)
Пример #15
0
def check_ckan_login(request):
    """ Connect to a specified CKAN database via SQLAlchemy and 
    try to find the user that is authenticating. 
    """
    db_uri = app.config.get('CKAN_DB_URI')
    if db_uri is None:
        log.warn("No CKAN_DB_URI given, cannot authenticate!")
        return False
    if 'Authorization' in request.headers:
        authorization = request.headers.get('Authorization')
        authorization = authorization.split(' ', 1)[-1]
        user, password = authorization.decode('base64').split(':', 1)
        engine = create_engine(db_uri, poolclass=NullPool)
        meta = MetaData()
        meta.bind = engine
        table = Table('user', meta, autoload=True)
        results = engine.execute(table.select(table.c.name == user))
        # TODO: check for multiple matches, never trust ckan.
        record = results.first()
        if record is not None and check_hashed_password(
                password, record['password']):
            return user
        raise WebstoreException('Invalid username or password!',
                                None,
                                state='error',
                                code=401)
    return None
Пример #16
0
    def copy_table(self,
                   table_to_copy,
                   database_in,
                   database_out,
                   new_table_name=None,
                   use_chunking=True,
                   memory_in_gigabytes=2):

        in_metadata = database_in.metadata
        out_metadata = database_out.metadata

        in_table = Table(table_to_copy, in_metadata, autoload=True)

        out_table = in_table.tometadata(out_metadata)
        if new_table_name is not None:
            out_table.rename(new_table_name)
        out_metadata.create_all()

        if use_chunking:
            memory_in_bytes = memory_in_gigabytes * 1024**2

            row_size = self._row_byte_size(in_table)
            chunk_size = int(.1 * memory_in_bytes / row_size)

            num_rows = in_table.count().execute().fetchone()[0]
            num_inserted_rows = 0

            while num_inserted_rows < num_rows:
                qry = in_table.select().offset(num_inserted_rows).limit(
                    chunk_size)

                result = qry.execute()
                data = [
                    dict((col.key, x[col.name]) for col in in_table.c)
                    for x in result
                ]

                out_table.insert().execute(*data)
                num_inserted_rows += len(data)

        else:
            qry = in_table.select()
            data = [
                dict((col.key, x[col.name]) for col in in_table.c)
                for x in qry.execute()
            ]
            out_table.insert().execute(*data)
Пример #17
0
    def test_create_view(self):
        """
        Test the creation of a view

        construct a view that:
            1) joins two tables together
            2) sums one column
            3) assigns a new label to a column
            4) groups by user
        """

        url = connection_string()
        db = db_connect_impl(DatabaseTablesV0, url, False)

        t1 = db.tables.TestTable1
        t2 = db.tables.TestTable2
        query = select([
                t1.c.user_id,
                func.sum(t1.c.size).label('usage'),
                t2.c.quota,
        ]).select_from(t2) \
            .where(t1.c.user_id == t2.c.user_id) \
            .group_by(t1.c.user_id)

        db.engine.execute(CreateView('v_usage_v0', query))

        db.metadata.bind = db.engine
        db.metadata.create_all()

        view = Table('v_usage_v0', db.metadata, Column('user_id', String),
                     Column('usage', Integer), Column('quota', Integer))

        # insert 3 users with different data
        for i, user_id in enumerate(["user0", "user1", "user2"]):

            db.tables.TestTable1.insert().values({
                'user_id': user_id,
                'size': 5 * (i + 1) + 100
            }).execute()

            db.tables.TestTable1.insert().values({
                'user_id': user_id,
                'size': 7 * (i + 1) + 100
            }).execute()

            db.tables.TestTable1.insert().values({
                'user_id': user_id,
                'size': 9 * (i + 1) + 100
            }).execute()

            db.tables.TestTable2.insert().values({
                'user_id': user_id,
                'quota': 400 * (i + 1)
            }).execute()

        print(dir(view))
        for row in view.select().where(view.c.user_id == 'user0').execute():
            self.assertEqual(row.usage, 321)
            self.assertEqual(row.quota, 400)
Пример #18
0
 def test_reflect_select(self, engine, connection):
     """reflecttable should be able to fill in a table from the name"""
     one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True)
     self.assertEqual(len(one_row_complex.c), 15)
     self.assertIsInstance(one_row_complex.c.string, Column)
     rows = one_row_complex.select().execute().fetchall()
     self.assertEqual(len(rows), 1)
     self.assertEqual(list(rows[0]), _ONE_ROW_COMPLEX_CONTENTS)
Пример #19
0
 def test_reflect_select(self, engine, connection):
     """reflecttable should be able to fill in a table from the name"""
     one_row_complex = Table('one_row_complex', MetaData(bind=engine), autoload=True)
     self.assertEqual(len(one_row_complex.c), 15)
     self.assertIsInstance(one_row_complex.c.string, Column)
     rows = one_row_complex.select().execute().fetchall()
     self.assertEqual(len(rows), 1)
     self.assertEqual(list(rows[0]), _ONE_ROW_COMPLEX_CONTENTS)
Пример #20
0
 def test_reserved_words(self, engine, connection):
     """Hive uses backticks"""
     # Use keywords for the table/column name
     fake_table = Table('select', MetaData(bind=engine), Column('map', sqlalchemy.types.String))
     query = str(fake_table.select(fake_table.c.map == 'a'))
     self.assertIn('`select`', query)
     self.assertIn('`map`', query)
     self.assertNotIn('"select"', query)
     self.assertNotIn('"map"', query)
Пример #21
0
 def test_reserved_words(self, engine, connection):
     """Presto uses double quotes, not backticks"""
     # Use keywords for the table/column name
     fake_table = Table('select', MetaData(bind=engine), Column('current_timestamp', String))
     query = str(fake_table.select(fake_table.c.current_timestamp == 'a'))
     self.assertIn('"select"', query)
     self.assertIn('"current_timestamp"', query)
     self.assertNotIn('`select`', query)
     self.assertNotIn('`current_timestamp`', query)
Пример #22
0
 def test_reserved_words(self, engine, connection):
     """Hive uses backticks"""
     # Use keywords for the table/column name
     fake_table = Table('select', MetaData(bind=engine), Column('map', sqlalchemy.types.String))
     query = str(fake_table.select(fake_table.c.map == 'a'))
     self.assertIn('`select`', query)
     self.assertIn('`map`', query)
     self.assertNotIn('"select"', query)
     self.assertNotIn('"map"', query)
Пример #23
0
    def test_insert_values(self, engine, connection):
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer))
        table.drop(checkfirst=True)
        table.create()
        connection.execute(table.insert([{'a': 1}, {'a': 2}]))

        result = table.select().execute().fetchall()
        expected = [(1, ), (2, )]
        self.assertEqual(result, expected)
Пример #24
0
 def query(self):
     query = self.config.get('query')
     if not query:
         table_name = self.config.get('table')
         table = Table(table_name, self.meta, autoload=True)
         query = table.select()
     else:
         query = sql_text(query)
     log.info("Query: %s", query)
     return query
Пример #25
0
 def test_reserved_words(self, engine, connection):
     """Presto uses double quotes, not backticks"""
     # Use keywords for the table/column name
     fake_table = Table('select', MetaData(bind=engine),
                        Column('current_timestamp', String))
     query = str(fake_table.select(fake_table.c.current_timestamp == 'a'))
     self.assertIn('"select"', query)
     self.assertIn('"current_timestamp"', query)
     self.assertNotIn('`select`', query)
     self.assertNotIn('`current_timestamp`', query)
Пример #26
0
 def query(self):
     query = self.config.get('query')
     if not query:
         table_name = self.config.get('table')
         table = Table(table_name, self.meta, autoload=True)
         query = table.select()
     else:
         query = sql_text(query)
     log.info("Query: %s", query)
     return query
Пример #27
0
    def test_insert_values(self, engine, connection):
        table = Table('insert_test', MetaData(bind=engine),
                      Column('a', sqlalchemy.types.Integer),
                      schema='pyhive_test_database')
        table.drop(checkfirst=True)
        table.create()
        connection.execute(table.insert([{'a': 1}, {'a': 2}]))

        result = table.select().execute().fetchall()
        expected = [(1,), (2,)]
        self.assertEqual(result, expected)
Пример #28
0
    def test_reflect_select(self, engine, connection):
        """reflecttable should be able to fill in a table from the name"""
        one_row_complex = Table('one_row_complex',
                                MetaData(bind=engine),
                                autoload=True)
        # Presto ignores the union column
        self.assertEqual(len(one_row_complex.c), 15 - 1)
        self.assertIsInstance(one_row_complex.c.string, Column)
        rows = one_row_complex.select().execute().fetchall()
        self.assertEqual(len(rows), 1)
        self.assertEqual(
            list(rows[0]),
            [
                True,
                127,
                32767,
                2147483647,
                9223372036854775807,
                0.5,
                0.25,
                'a string',
                '1970-01-01 00:00:00.000',
                b'123',
                [1, 2],
                {
                    "1": 2,
                    "3": 4
                },  # Presto converts all keys to strings so that they're valid JSON
                [1, 2],  # struct is returned as a list of elements
                # '{0:1}',
                '0.1',
            ])

        try:
            from sqlalchemy.types import BigInteger
        except ImportError:
            from sqlalchemy.databases.mysql import MSBigInteger as BigInteger

        # TODO some of these types could be filled in better
        self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean)
        self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.int.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.bigint.type, BigInteger)
        self.assertIsInstance(one_row_complex.c.float.type, types.Float)
        self.assertIsInstance(one_row_complex.c.double.type, types.Float)
        self.assertIsInstance(one_row_complex.c.string.type, String)
        self.assertIsInstance(one_row_complex.c.timestamp.type,
                              types.TIMESTAMP)
        self.assertIsInstance(one_row_complex.c.binary.type, types.NullType)
        self.assertIsInstance(one_row_complex.c.array.type, types.NullType)
        self.assertIsInstance(one_row_complex.c.map.type, types.NullType)
        self.assertIsInstance(one_row_complex.c.struct.type, types.NullType)
        self.assertIsInstance(one_row_complex.c.decimal.type, types.NullType)
Пример #29
0
    def test_migrate_data(self):
        meta = MetaData(bind=self.engine)

        # create TableA
        table_a = Table('TableA',
                        meta,
                        Column('id', Integer, primary_key=True),
                        Column('first', String(8), nullable=False),
                        Column('second', Integer))
        table_a.create()

        # update it with sample data
        values = [
            {'id': 1, 'first': 'a'},
            {'id': 2, 'first': 'b'},
            {'id': 3, 'first': 'c'}
        ]

        for value in values:
            self.engine.execute(table_a.insert(values=value))

        # create TableB similar to TableA, except column 'second'
        table_b = Table('TableB',
                        meta,
                        Column('id', Integer, primary_key=True),
                        Column('first', String(8), nullable=False))
        table_b.create()

        # migrate data
        migrate_utils.migrate_data(self.engine,
                                   table_a,
                                   table_b,
                                   ['second'])

        # validate table_a is dropped
        self.assertTrue(self.engine.dialect.has_table(
            self.engine.connect(),
            'TableA'),
            'Data migration failed to drop source table')

        # validate table_b is updated with data from table_a
        table_b_rows = list(table_b.select().execute())
        self.assertEqual(3,
                         len(table_b_rows),
                         "Data migration is failed")
        table_b_values = []
        for row in table_b_rows:
            table_b_values.append({'id': row.id,
                                   'first': row.first})

        self.assertEqual(values,
                         table_b_values,
                         "Data migration failed with invalid data copy")
    def copy_table(self, table_to_copy, 
                   database_in, database_out,
                   new_table_name = None, 
                   use_chunking = True,
                   memory_in_gigabytes = 2):        
        
        in_metadata = database_in.metadata
        out_metadata = database_out.metadata
    
        in_table = Table(table_to_copy, in_metadata, autoload=True)

        out_table = in_table.tometadata(out_metadata)
        if new_table_name is not None:
            out_table.rename(new_table_name)
        out_metadata.create_all()
            
        if use_chunking:
            memory_in_bytes = memory_in_gigabytes * 1024**2
            
            row_size = self._row_byte_size(in_table)
            chunk_size = int( .1 * memory_in_bytes / row_size  )
            
            num_rows = in_table.count().execute().fetchone()[0]
            num_inserted_rows = 0
    
            while num_inserted_rows < num_rows:
                qry = in_table.select().offset(num_inserted_rows).limit(chunk_size)
                    
                result = qry.execute()
                data = [ dict( (col.key, x[col.name]) for col in in_table.c)
                                    for x in result ]
            
                out_table.insert().execute(*data)
                num_inserted_rows += len(data)

        else:
            qry = in_table.select()
            data = [ dict( (col.key, x[col.name]) for col in in_table.c)
                                    for x in qry.execute() ]   
            out_table.insert().execute(*data) 
    def beforOutLog(self, logLevel, farmNo, componentNo, componentName, instanceNo, instanceName, code, additions):


        farmT = Table("FARM", METADATA, autoload=True)
        farm = self.selectOne(farmT.select(farmT.c.FARM_NO==farmNo))
        farmName = None
        if farm is not None:
            farmName = farm["FARM_NAME"]

        userT = Table("USER", METADATA, autoload=True)
        user = self.selectOne(userT.select(userT.c.USER_NO==self.userNo))
        userName = None
        if user is not None:
            userName = user["USERNAME"]

        instanceType = None
        platformNo = None
        if instanceNo is not None:
            instanceT = Table("INSTANCE", METADATA, autoload=True)
            instance = self.selectOne(instanceT.select(instanceT.c.INSTANCE_NO==instanceNo))
            if instance is not None:
                platformNo = instance["PLATFORM_NO"]

            isSelect = True
            if isSelect:
                awsinstanceT = Table("AWS_INSTANCE", METADATA, autoload=True)
                awsinstance = self.selectOne(awsinstanceT.select(awsinstanceT.c.INSTANCE_NO==instanceNo))
                if awsinstance is not None:
                    instanceType = awsinstance["INSTANCE_TYPE"]
                    isSelect = False

            if isSelect:
                csinstanceT = Table("CLOUDSTACK_INSTANCE", METADATA, autoload=True)
                csinstance = self.selectOne(csinstanceT.select(csinstanceT.c.INSTANCE_NO==instanceNo))
                if csinstance is not None:
                    instanceType = csinstance["INSTANCE_TYPE"]
                    isSelect = False

            if isSelect:
                vminstanceT = Table("VMWARE_INSTANCE", METADATA, autoload=True)
                vminstance = self.selectOne(vminstanceT.select(vminstanceT.c.INSTANCE_NO==instanceNo))
                if vminstance is not None:
                    instanceType = vminstance["INSTANCE_TYPE"]
                    isSelect = False

            if isSelect:
                nifinstanceT = Table("NIFTY_INSTANCE", METADATA, autoload=True)
                nifinstance = self.selectOne(nifinstanceT.select(nifinstanceT.c.INSTANCE_NO==instanceNo))
                if nifinstance is not None:
                    instanceType = nifinstance["INSTANCE_TYPE"]
                    isSelect = False


        self.outLog(logLevel, self.userNo, userName, farmNo, farmName, componentNo, componentName, instanceNo, instanceName, code, instanceType,  platformNo, additions)
Пример #32
0
    def test_reflect_select(self, engine, connection):
        """reflecttable should be able to fill in a table from the name"""
        one_row_complex = Table("one_row_complex", MetaData(bind=engine), autoload=True)
        # Presto ignores the union column
        self.assertEqual(len(one_row_complex.c), 15 - 1)
        self.assertIsInstance(one_row_complex.c.string, Column)
        rows = one_row_complex.select().execute().fetchall()
        self.assertEqual(len(rows), 1)
        self.assertEqual(
            list(rows[0]),
            [
                True,
                127,
                32767,
                2147483647,
                9223372036854775807,
                0.5,
                0.25,
                "a string",
                "1970-01-01 00:00:00.000",
                b"123",
                [1, 2],
                {"1": 2, "3": 4},  # Presto converts all keys to strings so that they're valid JSON
                [1, 2],  # struct is returned as a list of elements
                # '{0:1}',
                "0.1",
            ],
        )

        try:
            from sqlalchemy.types import BigInteger
        except ImportError:
            from sqlalchemy.databases.mysql import MSBigInteger as BigInteger

        # TODO some of these types could be filled in better
        self.assertIsInstance(one_row_complex.c.boolean.type, types.Boolean)
        self.assertIsInstance(one_row_complex.c.tinyint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.smallint.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.int.type, types.Integer)
        self.assertIsInstance(one_row_complex.c.bigint.type, BigInteger)
        self.assertIsInstance(one_row_complex.c.float.type, types.Float)
        self.assertIsInstance(one_row_complex.c.double.type, types.Float)
        self.assertIsInstance(one_row_complex.c.string.type, String)
        self.assertIsInstance(one_row_complex.c.timestamp.type, types.TIMESTAMP)
        self.assertIsInstance(one_row_complex.c.binary.type, types.NullType)
        self.assertIsInstance(one_row_complex.c.array.type, types.NullType)
        self.assertIsInstance(one_row_complex.c.map.type, types.NullType)
        self.assertIsInstance(one_row_complex.c.struct.type, types.NullType)
        self.assertIsInstance(one_row_complex.c.decimal.type, types.NullType)
Пример #33
0
class TableHandler(object):

    """ Used by automatically generated objects such as datasets
    and dimensions to generate, write and clear the table under
    its management. """

    def _init_table(self, meta, namespace, name, id_type=Integer):
        """ Create the given table if it does not exist, otherwise
        reflect the current table schema from the database.
        """
        name = namespace + '__' + name
        self.table = Table(name, meta)
        if id_type is not None:
            col = Column('id', id_type, primary_key=True)
            self.table.append_column(col)

    def _generate_table(self):
        """ Create the given table if it does not exist. """
        # TODO: make this support some kind of migration?
        if not db.engine.has_table(self.table.name):
            self.table.create(db.engine)

    def _upsert(self, bind, data, unique_columns):
        """ Upsert a set of values into the table. This will
        query for the set of unique columns and either update an
        existing row or create a new one. In both cases, the ID
        of the changed row will be returned. """
        key = and_(*[self.table.c[c] == data.get(c)
                     for c in unique_columns])
        q = self.table.update(key, data)
        if bind.execute(q).rowcount == 0:
            q = self.table.insert(data)
            rs = bind.execute(q)
            return rs.inserted_primary_key[0]
        else:
            q = self.table.select(key)
            row = bind.execute(q).fetchone()
            return row['id']

    def _flush(self, bind):
        """ Delete all rows in the table. """
        q = self.table.delete()
        bind.execute(q)

    def _drop(self, bind):
        """ Drop the table and the local reference to it. """
        if db.engine.has_table(self.table.name):
            self.table.drop()
        del self.table
Пример #34
0
    def test_migrate_data(self):
        meta = MetaData(bind=self.engine)

        # create TableA
        table_a = Table('TableA', meta, Column('id', Integer,
                                               primary_key=True),
                        Column('first', String(8), nullable=False),
                        Column('second', Integer))
        table_a.create()

        # update it with sample data
        values = [{
            'id': 1,
            'first': 'a'
        }, {
            'id': 2,
            'first': 'b'
        }, {
            'id': 3,
            'first': 'c'
        }]

        for value in values:
            self.engine.execute(table_a.insert(values=value))

        # create TableB similar to TableA, except column 'second'
        table_b = Table('TableB', meta, Column('id', Integer,
                                               primary_key=True),
                        Column('first', String(8), nullable=False))
        table_b.create()

        # migrate data
        migrate_utils.migrate_data(self.engine, table_a, table_b, ['second'])

        # validate table_a is dropped
        self.assertTrue(
            self.engine.dialect.has_table(self.engine.connect(), 'TableA'),
            'Data migration failed to drop source table')

        # validate table_b is updated with data from table_a
        table_b_rows = list(table_b.select().execute())
        self.assertEqual(3, len(table_b_rows), "Data migration is failed")
        table_b_values = []
        for row in table_b_rows:
            table_b_values.append({'id': row.id, 'first': row.first})

        self.assertEqual(values, table_b_values,
                         "Data migration failed with invalid data copy")
def test_engine_with_dataset(engine_using_test_dataset):
    rows = engine_using_test_dataset.execute(
        'SELECT * FROM sample_one_row').fetchall()
    assert list(rows[0]) == ONE_ROW_CONTENTS

    table_one_row = Table('sample_one_row',
                          MetaData(bind=engine_using_test_dataset),
                          autoload=True)
    rows = table_one_row.select().execute().fetchall()
    assert list(rows[0]) == ONE_ROW_CONTENTS_EXPANDED

    # Table name shouldn't include dataset
    with pytest.raises(Exception):
        table_one_row = Table('test_pybigquery.sample_one_row',
                              MetaData(bind=engine_using_test_dataset),
                              autoload=True)
Пример #36
0
class TableHandler(object):
    """ Used by automatically generated objects such as datasets
    and dimensions to generate, write and clear the table under
    its management. """
    def _init_table(self, meta, namespace, name, id_type=Integer):
        """ Create the given table if it does not exist, otherwise
        reflect the current table schema from the database.
        """
        name = namespace + '__' + name
        self.table = Table(name, meta)
        if id_type is not None:
            col = Column('id', id_type, primary_key=True)
            self.table.append_column(col)

    def _generate_table(self):
        """ Create the given table if it does not exist. """
        # TODO: make this support some kind of migration?
        if not db.engine.has_table(self.table.name):
            self.table.create(db.engine)

    def _upsert(self, bind, data, unique_columns):
        """ Upsert a set of values into the table. This will
        query for the set of unique columns and either update an
        existing row or create a new one. In both cases, the ID
        of the changed row will be returned. """
        key = and_(*[self.table.c[c] == data.get(c) for c in unique_columns])
        q = self.table.update(key, data)
        if bind.execute(q).rowcount == 0:
            q = self.table.insert(data)
            rs = bind.execute(q)
            return rs.inserted_primary_key[0]
        else:
            q = self.table.select(key)
            row = bind.execute(q).fetchone()
            return row['id']

    def _flush(self, bind):
        """ Delete all rows in the table. """
        q = self.table.delete()
        bind.execute(q)

    def _drop(self, bind):
        """ Drop the table and the local reference to it. """
        if db.engine.has_table(self.table.name):
            self.table.drop()
        del self.table
    def unload(self, schema: str, table: str, unload_plan: RecordsUnloadPlan,
               directory: RecordsDirectory) -> None:
        if not isinstance(unload_plan.records_format, DelimitedRecordsFormat):
            raise NotImplementedError(
                "This only supports delimited mode for now")

        unhandled_hints = set(unload_plan.records_format.hints.keys())
        processing_instructions = unload_plan.processing_instructions
        date_output_style, date_order_style, postgres_options =\
            postgres_copy_to_options(unhandled_hints,
                                     unload_plan.records_format,
                                     processing_instructions.fail_if_cant_handle_hint)
        if date_order_style is None:
            # U-S-A!  U-S-A!
            date_order_style = 'MDY'
        complain_on_unhandled_hints(
            processing_instructions.fail_if_dont_understand, unhandled_hints,
            unload_plan.records_format.hints)

        table_obj = Table(table,
                          self.meta,
                          schema=schema,
                          autoload=True,
                          autoload_with=self.db)

        with self.db.engine.begin() as conn:
            # https://www.postgresql.org/docs/8.3/sql-set.html
            #
            # The effects of SET LOCAL last only till the end of the
            # current transaction, whether committed or not. A special
            # case is SET followed by SET LOCAL within a single
            # transaction: the SET LOCAL value will be seen until the end
            # of the transaction, but afterwards (if the transaction is
            # committed) the SET value will take effect.
            date_style = f"{date_output_style}, {date_order_style}"
            sql = f"SET LOCAL DateStyle = {quote_value(conn, date_style)}"
            logger.info(sql)
            conn.execute(sql)

            filename = unload_plan.records_format.generate_filename('data')
            loc = directory.loc.file_in_this_directory(filename)
            with loc.open(mode='wb') as fileobj:
                copy_to(table_obj.select(), fileobj, conn, **postgres_options)

        logger.info('Copy complete')
        directory.save_preliminary_manifest()
Пример #38
0
def get_data(table="europe_load_dt", schema="agg_avg_1hour_pdata_caps_eu2020", metadata=None):

    if metadata == None:
        engine, inspector, metadata = connect2database(localhost=True)

    data = Table(table, metadata, schema=schema, autoload=True)
    s = data.select()
    rs = s.execute()

    data = list(empty(rs.rowcount))
    for i in arange(rs.rowcount):
        data[i] = list(rs.fetchone())

    return_data = transposed(data)
    # data = rs.fetchall()

    return return_data
Пример #39
0
def check_ckan_login(user, password):
    """ Connect to a specified CKAN database via SQLAlchemy and 
    try to find the user that is authenticating. 
    """
    db_uri = app.config.get('CKAN_DB_URI')
    if db_uri is None:
        log.warn("No CKAN_DB_URI given, cannot authenticate!")
        return False
    engine = create_engine(db_uri, poolclass=NullPool)
    meta = MetaData()
    meta.bind = engine
    table = Table('user', meta, autoload=True)
    results = engine.execute(table.select(table.c.name==user))
    # TODO: check for multiple matches, never trust ckan.
    record = results.first()
    if record is not None:
        return check_hashed_password(password, record['password'])
    return False
Пример #40
0
 def test_lots_of_types(self, engine, connection):
     # Presto doesn't have raw CREATE TABLE support, so we ony test hive
     # take type list from sqlalchemy.types
     types = [
         'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT',
         'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB',
         'BOOLEAN', 'SMALLINT', 'DATE', 'TIME',
         'String', 'Integer', 'SmallInteger',
         'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'Binary',
         'Boolean', 'Unicode', 'UnicodeText',
     ]
     cols = []
     for i, t in enumerate(types):
         cols.append(Column(str(i), getattr(sqlalchemy.types, t)))
     cols.append(Column('hive_date', HiveDate))
     cols.append(Column('hive_decimal', HiveDecimal))
     cols.append(Column('hive_timestamp', HiveTimestamp))
     table = Table('test_table', MetaData(bind=engine), *cols, schema='pyhive_test_database')
     table.drop(checkfirst=True)
     table.create()
     connection.execute('SET mapred.job.tracker=local')
     connection.execute('USE pyhive_test_database')
     big_number = 10 ** 10 - 1
     connection.execute("""
     INSERT OVERWRITE TABLE test_table
     SELECT
         1, "a", "a", "a", "a", "a", 0.1,
         0.1, 0.1, 0, 0, "a", "a",
         false, 1, 0, 0,
         "a", 1, 1,
         0.1, 0.1, 0, 0, 0, "a",
         false, "a", "a",
         0, %d, 123 + 2000
     FROM default.one_row
     """, big_number)
     row = connection.execute(table.select()).fetchone()
     self.assertEqual(row.hive_date, datetime.date(1970, 1, 1))
     self.assertEqual(row.hive_decimal, decimal.Decimal(big_number))
     self.assertEqual(row.hive_timestamp, datetime.datetime(1970, 1, 1, 0, 0, 2, 123))
     table.drop()
Пример #41
0
 def test_lots_of_types(self, engine, connection):
     # Presto doesn't have raw CREATE TABLE support, so we ony test hive
     # take type list from sqlalchemy.types
     types = [
         'INT', 'CHAR', 'VARCHAR', 'NCHAR', 'TEXT', 'Text', 'FLOAT',
         'NUMERIC', 'DECIMAL', 'TIMESTAMP', 'DATETIME', 'CLOB', 'BLOB',
         'BOOLEAN', 'SMALLINT', 'DATE', 'TIME',
         'String', 'Integer', 'SmallInteger',
         'Numeric', 'Float', 'DateTime', 'Date', 'Time', 'Binary',
         'Boolean', 'Unicode', 'UnicodeText',
     ]
     cols = []
     for i, t in enumerate(types):
         cols.append(Column(str(i), getattr(sqlalchemy.types, t)))
     cols.append(Column('hive_date', HiveDate))
     cols.append(Column('hive_decimal', HiveDecimal))
     cols.append(Column('hive_timestamp', HiveTimestamp))
     table = Table('test_table', MetaData(bind=engine), *cols, schema='pyhive_test_database')
     table.drop(checkfirst=True)
     table.create()
     connection.execute('SET mapred.job.tracker=local')
     connection.execute('USE pyhive_test_database')
     big_number = 10 ** 10 - 1
     connection.execute("""
     INSERT OVERWRITE TABLE test_table
     SELECT
         1, "a", "a", "a", "a", "a", 0.1,
         0.1, 0.1, 0, 0, "a", "a",
         false, 1, 0, 0,
         "a", 1, 1,
         0.1, 0.1, 0, 0, 0, "a",
         false, "a", "a",
         0, %d, 123 + 2000
     FROM default.one_row
     """, big_number)
     row = connection.execute(table.select()).fetchone()
     self.assertEqual(row.hive_date, datetime.date(1970, 1, 1))
     self.assertEqual(row.hive_decimal, decimal.Decimal(big_number))
     self.assertEqual(row.hive_timestamp, datetime.datetime(1970, 1, 1, 0, 0, 2, 123))
     table.drop()
Пример #42
0
def upgrade(migrate_engine):
    meta = MetaData()
    meta.bind = migrate_engine

    tables = [define_task_info_table(meta)]
    create_tables(tables)

    tasks_table = Table('tasks', meta, autoload=True)
    task_info_table = Table('task_info', meta, autoload=True)

    tasks = tasks_table.select().execute().fetchall()
    for task in tasks:
        values = {
            'task_id': task.id,
            'input': task.input,
            'result': task.result,
            'message': task.message,
        }
        task_info_table.insert(values=values).execute()

    for col_name in TASKS_MIGRATE_COLUMNS:
        tasks_table.columns[col_name].drop()
Пример #43
0
def check_ckan_login(request):
    """ Connect to a specified CKAN database via SQLAlchemy and 
    try to find the user that is authenticating. 
    """
    db_uri = app.config.get('CKAN_DB_URI')
    if db_uri is None:
        log.warn("No CKAN_DB_URI given, cannot authenticate!")
        return False
    if 'Authorization' in request.headers:
        apikey = request.headers.get('Authorization')
        engine = create_engine(db_uri, poolclass=NullPool)
        meta = MetaData()
        meta.bind = engine
        table = Table('user', meta, autoload=True)
        results = engine.execute(table.select(table.c.apikey==apikey))
        # TODO: check for multiple matches, never trust ckan.
        record = results.first()
        if record is not None:
            return record['name']
        raise WebstoreException('Invalid apikey!', None,
                                state='error', code=401)
    return None
Пример #44
0
def upgrade(migrate_engine):
    meta = MetaData()
    meta.bind = migrate_engine

    tables = [define_task_info_table(meta)]
    create_tables(tables)

    tasks_table = Table('tasks', meta, autoload=True)
    task_info_table = Table('task_info', meta, autoload=True)

    tasks = tasks_table.select().execute().fetchall()
    for task in tasks:
        values = {
            'task_id': task.id,
            'input': task.input,
            'result': task.result,
            'message': task.message,
        }
        task_info_table.insert(values=values).execute()

    for col_name in TASKS_MIGRATE_COLUMNS:
        tasks_table.columns[col_name].drop()
Пример #45
0
def check_ckan_login(request):
    """ Connect to a specified CKAN database via SQLAlchemy and 
    try to find the user that is authenticating. 
    """
    db_uri = app.config.get("CKAN_DB_URI")
    if db_uri is None:
        log.warn("No CKAN_DB_URI given, cannot authenticate!")
        return False
    if "Authorization" in request.headers:
        authorization = request.headers.get("Authorization")
        authorization = authorization.split(" ", 1)[-1]
        user, password = authorization.decode("base64").split(":", 1)
        engine = create_engine(db_uri, poolclass=NullPool)
        meta = MetaData()
        meta.bind = engine
        table = Table("user", meta, autoload=True)
        results = engine.execute(table.select(table.c.name == user))
        # TODO: check for multiple matches, never trust ckan.
        record = results.first()
        if record is not None and check_hashed_password(password, record["password"]):
            return user
        raise WebstoreException("Invalid username or password!", None, state="error", code=401)
    return None
Пример #46
0
def check_ckan_login(request):
    """ Connect to a specified CKAN database via SQLAlchemy and 
    try to find the user that is authenticating. 
    """
    db_uri = app.config.get('CKAN_DB_URI')
    if db_uri is None:
        log.warn("No CKAN_DB_URI given, cannot authenticate!")
        return False
    if 'Authorization' in request.headers:
        apikey = request.headers.get('Authorization')
        engine = create_engine(db_uri, poolclass=NullPool)
        meta = MetaData()
        meta.bind = engine
        table = Table('user', meta, autoload=True)
        results = engine.execute(table.select(table.c.apikey == apikey))
        # TODO: check for multiple matches, never trust ckan.
        record = results.first()
        if record is not None:
            return record['name']
        raise WebstoreException('Invalid apikey!',
                                None,
                                state='error',
                                code=401)
    return None
Пример #47
0
 def test_one_row_complex_null(self, engine, connection):
     one_row_complex_null = Table('one_row_complex_null', MetaData(bind=engine), autoload=True)
     rows = one_row_complex_null.select().execute().fetchall()
     self.assertEqual(len(rows), 1)
     self.assertEqual(list(rows[0]), [None] * len(rows[0]))
Пример #48
0
class BaseTestCase(unittest.TestCase):

    def setUp(self):
        engine = create_engine('postgresql://postgres@localhost/pypet')
        self.metadata = MetaData(bind=engine)

        self.store_table = Table('store', self.metadata,
                Column('store_id', types.Integer, primary_key=True),
                Column('store_name', types.String),
                Column('country_id', types.Integer,
                    ForeignKey('country.country_id')))

        self.country_table = Table('country', self.metadata,
                Column('country_id', types.Integer, primary_key=True),
                Column('country_name', types.String),
                Column('region_id', types.Integer,
                    ForeignKey('region.region_id')))

        self.region_table = Table('region', self.metadata,
                Column('region_id', types.Integer, primary_key=True),
                Column('region_name', types.String))

        self.product_table = Table('product', self.metadata,
                Column('product_id', types.Integer, primary_key=True),
                Column('product_name', types.String),
                Column('product_category_id', types.Integer,
                   ForeignKey('product_category.product_category_id')))

        self.product_category_table = Table('product_category', self.metadata,
                Column('product_category_id', types.Integer, primary_key=True),
                Column('product_category_name', types.String))

        self.facts_table = Table('facts_table', self.metadata,
                Column('store_id', types.Integer,
                    ForeignKey('store.store_id')),
                Column('date', types.Date),
                Column('product_id', types.Integer,
                    ForeignKey('product.product_id')),
                Column('price', types.Float),
                Column('qty', types.Integer))

        agg_name = ('agg_time_month_product_product_store_store'
                    '_Unit Price_Quantity')
        self.agg_by_month_table = Table(agg_name,
                self.metadata,
                Column('store_store', types.Integer,
                    ForeignKey('store.store_id')),
                Column('time_month', types.Date),
                Column('product_product', types.Integer,
                    ForeignKey('product.product_id')),
                Column('Unit Price', types.Float),
                Column('Quantity', types.Integer),
                Column('fact_count', types.Integer))
        agg_name = ('agg_time_year_store_country_product_product'
                    '_Unit Price_Quantity')

        self.agg_by_year_country_table = Table(agg_name,
                self.metadata,
                Column('store_country', types.Integer,
                    ForeignKey('country.country_id')),
                Column('time_year', types.Date),
                Column('product_product', types.Integer,
                    ForeignKey('product.product_id')),
                Column('Unit Price', types.Float),
                Column('Quantity', types.Integer),
                Column('fact_count', types.Integer))

        self.metadata.create_all()

        self.store_dim = Dimension('store', [
            Hierarchy('default', [
                Level('region', self.region_table.c.region_id,
                    self.region_table.c.region_name),
                Level('country', self.country_table.c.country_id,
                    self.country_table.c.country_name),
                Level('store', self.store_table.c.store_id,
                    self.store_table.c.store_name)])])

        self.product_dim = Dimension('product', [
            Hierarchy('default', [
                Level('category',
                    self.product_category_table.c.product_category_id,
                    self.product_category_table.c
                    .product_category_name),
                Level('product', self.product_table.c.product_id,
                    self.product_table.c.product_name)])])

        self.time_dim = TimeDimension('time', self.facts_table.c.date,
                ['year', 'month', 'day'])

        unit_price = Measure('Unit Price', self.facts_table.c.price,
                aggregates.avg)
        quantity = Measure('Quantity', self.facts_table.c.qty, aggregates.sum)
        price = ((unit_price.aggregate_with(None) *
                quantity.aggregate_with(None))
                .aggregate_with(aggregates.sum).label('Price'))

        self.cube = Cube(self.metadata, self.facts_table, [self.store_dim,
            self.product_dim, self.time_dim], [unit_price, quantity, price],
            fact_count_column=self.facts_table.c.qty)

        self.region_table.insert({'region_id': 1, 'region_name':
            'Europe'}).execute()

        self.country_table.insert({'region_id': 1, 'country_name':
            'France', 'country_id': 1}).execute()

        self.country_table.insert({'region_id': 1, 'country_name':
            'Germany', 'country_id': 2}).execute()

        self.region_table.insert({'region_id': 2, 'region_name':
            'America'}).execute()

        self.country_table.insert({'region_id': 2, 'country_name':
            'USA', 'country_id': 3}).execute()

        self.country_table.insert({'region_id': 2, 'country_name':
            'Canada', 'country_id': 4}).execute()

        self.store_table.insert({
            'store_id': 1,
            'store_name': 'ACME.fr',
            'country_id': 1}).execute()

        self.store_table.insert({
            'store_id': 2,
            'store_name': 'ACME.de',
            'country_id': 2}).execute()

        self.store_table.insert({
            'store_id': 3,
            'store_name': 'Food Mart.fr',
            'country_id': 1}).execute()

        self.store_table.insert({
            'store_id': 4,
            'store_name': 'Food Mart.de',
            'country_id': 2}).execute()

        self.store_table.insert({
            'store_id': 5,
            'store_name': 'ACME.us',
            'country_id': 3}).execute()

        self.store_table.insert({
            'store_id': 6,
            'store_name': 'Food Mart.us',
            'country_id': 3}).execute()

        self.store_table.insert({
            'store_id': 7,
            'store_name': 'ACME.ca',
            'country_id': 4}).execute()

        self.store_table.insert({
            'store_id': 8,
            'store_name': 'Food Mart.ca',
            'country_id': 4}).execute()

        self.product_category_table.insert({
            'product_category_id': 1,
            'product_category_name': 'Vegetables'}).execute()

        self.product_category_table.insert({
            'product_category_id': 2,
            'product_category_name': 'Shoes'}).execute()

        self.product_table.insert({
            'product_id': 1,
            'product_category_id': 1,
            'product_name': 'Carrots'}).execute()
        self.product_table.insert({
            'product_id': 2,
            'product_category_id': 1,
            'product_name': 'Bananas'}).execute()
        self.product_table.insert({
            'product_id': 3,
            'product_category_id': 2,
            'product_name': 'Red shoes'}).execute()
        self.product_table.insert({
            'product_id': 4,
            'product_category_id': 2,
            'product_name': 'Green shoes'}).execute()
        self.product_table.insert({
            'product_id': 5,
            'product_category_id': 2,
            'product_name': 'Blue shoes'}).execute()

        years = cycle([2009, 2010, 2011])
        months = cycle([1, 5, 8, 9, 11])
        days = cycle([3, 12, 21, 29])
        prices = iter(cycle([100, 500, 1000]))
        quantities = iter(cycle([1, 5, 1, 2, 3, 20, 8]))
        values = iter((date(*value) for value in izip(years, months, days)))
        for value in self.product_table.select().with_only_columns([
            self.product_table.c.product_id,
            self.store_table.c.store_id]).execute():
            self.facts_table.insert({
                'product_id': value.product_id,
                'store_id': value.store_id,
                'date': next(values),
                'qty': next(quantities),
                'price': next(prices)}).execute()
        results = (self.facts_table.select().with_only_columns([
                (func.sum(self.facts_table.c.price *
                    self.facts_table.c.qty) /
                    func.sum(self.facts_table.c.qty))
                    .label('Unit Price'),
                func.sum(self.facts_table.c.qty).label('Quantity'),
                func.sum(self.facts_table.c.qty).label('fact_count'),
                self.facts_table.c.product_id.label('product_product'),
                self.facts_table.c.store_id.label('store_store'),
                func.date_trunc('month',
                    self.facts_table.c.date).label('time_month')])
            .group_by(func.date_trunc('month', self.facts_table.c.date),
                self.facts_table.c.product_id,
                self.facts_table.c.store_id)
            .execute())
        for res in results:
            self.agg_by_month_table.insert().execute(dict(res))
        second_agg = (self.facts_table.select().with_only_columns([
            (func.sum(self.facts_table.c.price *
                    self.facts_table.c.qty) /
                    func.sum(self.facts_table.c.qty))
                    .label('Unit Price'),
            func.sum(self.facts_table.c.qty).label('Quantity'),
            func.sum(self.facts_table.c.qty).label('fact_count'),
            self.facts_table.c.product_id.label('product_product'),
            self.store_table.c.country_id.label('store_country'),
            func.date_trunc('year',
                self.facts_table.c.date).label('time_year')])
            .where(self.facts_table.c.store_id == self.store_table.c.store_id)
            .group_by(self.facts_table.c.product_id.label('product_product'),
            self.store_table.c.country_id.label('store_country'),
            func.date_trunc('year',
                self.facts_table.c.date).label('time_year'))
            .execute())
        for res in second_agg:
            self.agg_by_year_country_table.insert().execute(dict(res))

    def tearDown(self):
        self.metadata.drop_all()
Пример #49
0
    def beforOutLog(self, logLevel, farmNo, componentNo, componentName,
                    instanceNo, instanceName, code, additions):

        farmT = Table("FARM", METADATA, autoload=True)
        farm = self.selectOne(farmT.select(farmT.c.FARM_NO == farmNo))
        farmName = None
        if farm is not None:
            farmName = farm["FARM_NAME"]

        userT = Table("USER", METADATA, autoload=True)
        user = self.selectOne(userT.select(userT.c.USER_NO == self.userNo))
        userName = None
        if user is not None:
            userName = user["USERNAME"]

        instanceType = None
        platformNo = None
        if instanceNo is not None:
            instanceT = Table("INSTANCE", METADATA, autoload=True)
            instance = self.selectOne(
                instanceT.select(instanceT.c.INSTANCE_NO == instanceNo))
            if instance is not None:
                platformNo = instance["PLATFORM_NO"]

            isSelect = True
            if isSelect:
                awsinstanceT = Table("AWS_INSTANCE", METADATA, autoload=True)
                awsinstance = self.selectOne(
                    awsinstanceT.select(
                        awsinstanceT.c.INSTANCE_NO == instanceNo))
                if awsinstance is not None:
                    instanceType = awsinstance["INSTANCE_TYPE"]
                    isSelect = False

            if isSelect:
                csinstanceT = Table("CLOUDSTACK_INSTANCE",
                                    METADATA,
                                    autoload=True)
                csinstance = self.selectOne(
                    csinstanceT.select(
                        csinstanceT.c.INSTANCE_NO == instanceNo))
                if csinstance is not None:
                    instanceType = csinstance["INSTANCE_TYPE"]
                    isSelect = False

            if isSelect:
                vminstanceT = Table("VMWARE_INSTANCE", METADATA, autoload=True)
                vminstance = self.selectOne(
                    vminstanceT.select(
                        vminstanceT.c.INSTANCE_NO == instanceNo))
                if vminstance is not None:
                    instanceType = vminstance["INSTANCE_TYPE"]
                    isSelect = False

            if isSelect:
                nifinstanceT = Table("NIFTY_INSTANCE", METADATA, autoload=True)
                nifinstance = self.selectOne(
                    nifinstanceT.select(
                        nifinstanceT.c.INSTANCE_NO == instanceNo))
                if nifinstance is not None:
                    instanceType = nifinstance["INSTANCE_TYPE"]
                    isSelect = False

        self.outLog(logLevel, self.userNo, userName, farmNo, farmName,
                    componentNo, componentName, instanceNo, instanceName, code,
                    instanceType, platformNo, additions)
Пример #50
0
class AlchemySqlStore(Model):
    """
    datastore using SQLAlchemy meta-SQL Python package
   
    create table vesper_stmts (
      subject UNIQUE
      predicate UNIQUE
      object UNIQUE
      objecttype UNIQUE
      context UNIQUE
    )
    """

    def __init__(self, source=None, defaultStatements=None, autocommit=False, **kw):
        if source is None:
            # this seems like a reasonable default thing to do
            source = "sqlite://"
            log.debug("SQLite in-memory database being opened")

        # We take source to be a SQLAlchemy-style dbapi spec:
        # dialect+driver://username:password@host:port/database
        # connection is made JIT on first connect()
        log.debug("sqla engine being created with:", source)

        self.engine = create_engine(source)
        self.md = sqlalchemy.schema.MetaData()
        # utterly insufficient datatypes. just for first pass
        # technically the keep_existing bool is redundant as create_all() default is "check first"
        self.vesper_stmts = Table(
            "vesper_stmts",
            self.md,
            Column("subject", String(255)),  # primary_key = True),
            Column("predicate", String(255)),  # primary_key = True),
            Column("object", String(255)),  # primary_key = True),
            Column("objecttype", String(8)),
            Column("context", String(8)),
            UniqueConstraint("subject", "predicate", "object", "objecttype", "context"),
            keep_existing=True,
        )
        Index("idx_vs", self.vesper_stmts.c.subject, self.vesper_stmts.c.predicate, self.vesper_stmts.c.object)
        self.md.create_all(self.engine)

        # Set up our state machine and grab a connection from the sqlalchemy pool
        self.conn = self.engine.connect()
        self.trans = None
        self.autocommit = autocommit

    def _checkConnection(self):
        if self.conn is None:
            self.conn = self.engine.connect()
        if self.autocommit is False:
            if not self.conn.in_transaction():
                self.trans = self.conn.begin()
        self.conn.execution_options(autocommit=self.autocommit)

    def getStatements(
        self, subject=None, predicate=None, object=None, objecttype=None, context=None, asQuad=True, hints=None
    ):
        """ 
        Return all the statements in the model that match the given arguments.
        Any combination of subject and predicate can be None, and any None slot is
        treated as a wildcard that matches any value in the model.
        """
        fs = subject is not None
        fp = predicate is not None
        fo = object is not None
        fot = objecttype is not None
        fc = context is not None
        hints = hints or {}
        limit = hints.get("limit")
        offset = hints.get("offset")

        log.debug("s p o ot c quad lim offset: ", fs, fp, fo, fot, fc, asQuad, limit, offset)

        if fo:
            if isinstance(object, ResourceUri):
                object = object.uri
                fot = True
                objecttype = OBJECT_TYPE_RESOURCE
            elif not fot:
                objecttype = OBJECT_TYPE_LITERAL

        if not asQuad and not fc:
            query = select(
                [
                    self.vesper_stmts.c.subject,
                    self.vesper_stmts.c.predicate,
                    self.vesper_stmts.c.object,
                    self.vesper_stmts.c.objecttype,
                    func.min(self.vesper_stmts.c.context).label("context"),
                ]
            )
        else:  # asQuad is True
            query = self.vesper_stmts.select()
        if fs:
            query = query.where(self.vesper_stmts.c.subject == subject)
        if fp:
            query = query.where(self.vesper_stmts.c.predicate == predicate)
        if fo:
            query = query.where(self.vesper_stmts.c.object == object)
        if fot:
            query = query.where(self.vesper_stmts.c.objecttype == objecttype)
        if fc:
            query = query.where(self.vesper_stmts.c.context == context)
        if not asQuad and not fc:
            query = query.group_by(
                self.vesper_stmts.c.subject,
                self.vesper_stmts.c.predicate,
                self.vesper_stmts.c.object,
                self.vesper_stmts.c.objecttype,
            )
        if limit is not None:
            query = query.limit(limit)
        if offset is not None:
            query = query.offset(offset)

        stmts = []
        self._checkConnection()
        result = self.conn.execute(query)
        for r in result:
            stmts.append(Statement(r["subject"], r["predicate"], r["object"], r["objecttype"], r["context"]))

        log.debug("stmts returned: ", len(stmts), stmts)
        return stmts

    def addStatement(self, stmt):
        """add the specified statement to the model"""
        log.debug("addStatement called with ", stmt)

        self._checkConnection()
        result = self.conn.execute(
            self.vesper_stmts.insert(prefixes=["OR IGNORE"]),
            {"subject": stmt[0], "predicate": stmt[1], "object": stmt[2], "objecttype": stmt[3], "context": stmt[4]},
        )
        return result.rowcount

    def addStatements(self, stmts):
        """adds multiple statements to the model"""
        log.debug("addStatement called with ", stmts)

        self._checkConnection()
        result = self.conn.execute(
            self.vesper_stmts.insert(prefixes=["OR IGNORE"]),
            [
                {"subject": stmt[0], "predicate": stmt[1], "object": stmt[2], "objecttype": stmt[3], "context": stmt[4]}
                for stmt in stmts
            ],
        )
        return result.rowcount

    def removeStatement(self, stmt):
        """removes the statement from the model"""
        log.debug("removeStatement called with: ", stmt)

        rmv = self.vesper_stmts.delete().where(
            (self.vesper_stmts.c.subject == stmt[0])
            & (self.vesper_stmts.c.predicate == stmt[1])
            & (self.vesper_stmts.c.object == stmt[2])
            & (self.vesper_stmts.c.objecttype == stmt[3])
            & (self.vesper_stmts.c.context == stmt[4])
        )
        self._checkConnection()
        result = self.conn.execute(rmv)
        return result.rowcount

    def removeStatements(self, stmts):
        """removes multiple statements from the model"""
        log.debug("removeStatements called with: ", stmts)

        wc = []
        [
            wc.append(
                (self.vesper_stmts.c.subject == stmt[0])
                & (self.vesper_stmts.c.predicate == stmt[1])
                & (self.vesper_stmts.c.object == stmt[2])
                & (self.vesper_stmts.c.objecttype == stmt[3])
                & (self.vesper_stmts.c.context == stmt[4])
            )
            for stmt in stmts
        ]
        # no protection for singleton stmt here!
        rmv = self.vesper_stmts.delete().where(or_(*wc))
        self._checkConnection()
        result = self.conn.execute(rmv)
        return result.rowcount

    def commit(self, **kw):
        if self.conn is not None:
            if self.conn.in_transaction():
                self.trans.commit()

    def rollback(self):
        if self.conn is not None:
            if self.conn.in_transaction():
                self.trans.rollback()

    def close(self):
        log.debug("closing!")
        if self.conn is not None:
            self.conn.close()
            self.conn = None
Пример #51
0
class Table(object):
    def __init__(self, db, schema, table, columns=None):
        self.db = db
        self.schema = schema
        self.name = table
        self.engine = create_engine(db.url)
        self.metadata = MetaData(schema=schema)
        self.metadata.bind = self.engine
        # http://docs.sqlalchemy.org/en/rel_1_0/core/metadata.html
        # if provided columns (SQLAlchemy columns), create the table
        if table:
            if columns:
                self.table = SQLATable(
                    table, self.metadata, schema=self.schema, *columns
                )
                self.table.create()
            # otherwise just load from db
            else:
                self.table = SQLATable(
                    table, self.metadata, schema=self.schema, autoload=True
                )
            self.indexes = dict((i.name, i) for i in self.table.indexes)
            self._is_dropped = False
        else:
            self._is_dropped = True
            self.table = None

    @property
    def _normalized_columns(self):
        return list(map(normalize_column_name, self.columns))

    @property
    def columns(self):
        """Return list of all columns in table
        """
        return list(self.table.columns.keys())

    @property
    def sqla_columns(self):
        """Return all columns in table as sqlalchemy column types
        """
        return self.table.columns

    @property
    def column_types(self):
        """Return a dict mapping column name to type for all columns in table
        """
        column_types = {}
        for c in self.sqla_columns:
            column_types[c.name] = c.type
        return column_types

    @property
    def primary_key(self):
        """Return a list of columns making up the primary key constraint
        """
        return [c.name for c in self.table.primary_key]

    @property
    def op(self):
        ctx = MigrationContext.configure(self.engine.connect())
        return Operations(ctx)

    def _valid_table_name(self, table_name):
        """Check if the table name is obviously invalid.
        """
        if table_name is None or not len(table_name.strip()):
            raise ValueError("Invalid table name: %r" % table_name)
        return table_name.strip()

    def _update_table(self, table_name):
        self.metadata = MetaData(schema=self.schema)
        self.metadata.bind = self.engine
        return SQLATable(table_name, self.metadata, schema=self.schema)

    def add_primary_key(self, column="id"):
        """Add primary key constraint to specified column
        """
        if not self.primary_key:
            sql = """ALTER TABLE {s}.{t}
                     ADD PRIMARY KEY ({c})
                  """.format(
                s=self.schema, t=self.name, c=column
            )
            self.db.execute(sql)

    def drop(self):
        """Drop the table from the database
        """
        if self._is_dropped is False:
            self.table.drop(self.engine)
        self._is_dropped = True

    def _check_dropped(self):
        if self._is_dropped:
            raise DatasetException(
                "the table has been dropped. this object should not be used again."
            )

    def _args_to_clause(self, args):
        clauses = []
        for k, v in args.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.table.c[k].in_(v))
            else:
                clauses.append(self.table.c[k] == v)
        return and_(*clauses)

    def create_column(self, name, type):
        """
        Explicitely create a new column ``name`` of a specified type.
        ``type`` must be a `SQLAlchemy column type <http://docs.sqlalchemy.org/en/rel_0_8/core/types.html>`_.
        ::

            table.create_column('created_at', sqlalchemy.DateTime)
        """
        self._check_dropped()
        if normalize_column_name(name) not in self._normalized_columns:
            self.op.add_column(self.table.name, Column(name, type), self.table.schema)
            self.table = self._update_table(self.table.name)

    def drop_column(self, name):
        """
        Drop the column ``name``
        ::

            table.drop_column('created_at')
        """
        self._check_dropped()
        if name in list(self.table.columns.keys()):
            self.op.drop_column(self.table.name, name, schema=self.schema)
            self.table = self._update_table(self.table.name)

    def create_index(self, columns, name=None, index_type="btree"):
        """
        Create an index to speed up queries on a table.
        If no ``name`` is given a random name is created.
        ::
            table.create_index(['name', 'country'])
        """
        self._check_dropped()
        if not name:
            sig = "||".join(columns + [index_type])
            # This is a work-around for a bug in <=0.6.1 which would create
            # indexes based on hash() rather than a proper hash.
            key = abs(hash(sig))
            name = "ix_%s_%s" % (self.table.name, key)
            if name in self.indexes:
                return self.indexes[name]
            key = sha1(sig.encode("utf-8")).hexdigest()[:16]
            name = "ix_%s_%s" % (self.table.name, key)
        if name in self.indexes:
            return self.indexes[name]
        # self.db._acquire()
        columns = [self.table.c[col] for col in columns]
        idx = Index(name, *columns, postgresql_using=index_type)
        idx.create(self.engine)
        # finally:
        #    self.db._release()
        self.indexes[name] = idx
        return idx

    def create_index_geom(self, column="geom"):
        """Shortcut to create index on geometry
        """
        self.create_index([column], index_type="gist")

    def distinct(self, *columns, **_filter):
        """
        Returns all rows of a table, but removes rows in with duplicate values in ``columns``.
        Interally this creates a `DISTINCT statement <http://www.w3schools.com/sql/sql_distinct.asp>`_.
        ::

            # returns only one row per year, ignoring the rest
            table.distinct('year')
            # works with multiple columns, too
            table.distinct('year', 'country')
            # you can also combine this with a filter
            table.distinct('year', country='China')
        """
        self._check_dropped()
        qargs = []
        try:
            columns = [self.table.c[c] for c in columns]
            for col, val in _filter.items():
                qargs.append(self.table.c[col] == val)
        except KeyError:
            return []

        q = expression.select(
            columns,
            distinct=True,
            whereclause=and_(*qargs),
            order_by=[c.asc() for c in columns],
        )
        # if just looking at one column, return a simple list
        if len(columns) == 1:
            return itertools.chain.from_iterable(self.engine.execute(q))
        # otherwise return specified row_type
        else:
            return ResultIter(self.engine.execute(q), row_type=self.db.row_type)

    def insert(self, row):
        """
        Add a row (type: dict) by inserting it into the table.
        Columns must exist.
        ::
            data = dict(title='I am a banana!')
            table.insert(data)
        Returns the inserted row's primary key.
        """
        self._check_dropped()
        res = self.engine.execute(self.table.insert(row))
        if len(res.inserted_primary_key) > 0:
            return res.inserted_primary_key[0]

    def insert_many(self, rows, chunk_size=1000):
        """
        Add many rows at a time, which is significantly faster than adding
        them one by one. Per default the rows are processed in chunks of
        1000 per commit, unless you specify a different ``chunk_size``.
        See :py:meth:`insert() <dataset.Table.insert>` for details on
        the other parameters.
        ::
            rows = [dict(name='Dolly')] * 10000
            table.insert_many(rows)
        """

        def _process_chunk(chunk):
            self.table.insert().execute(chunk)

        self._check_dropped()

        chunk = []
        for i, row in enumerate(rows, start=1):
            chunk.append(row)
            if i % chunk_size == 0:
                _process_chunk(chunk)
                chunk = []
        if chunk:
            _process_chunk(chunk)

    def rename(self, name):
        """Rename the table
        """
        sql = """ALTER TABLE {s}.{t} RENAME TO {name}
              """.format(
            s=self.schema, t=self.name, name=name
        )
        self.engine.execute(sql)
        self.table = SQLATable(name, self.metadata, schema=self.schema, autoload=True)

    def find_one(self, **kwargs):
        """
        Works just like :py:meth:`find() <dataset.Table.find>` but returns one result, or None.
        ::
            row = table.find_one(country='United States')
        """
        kwargs["_limit"] = 1
        iterator = self.find(**kwargs)
        try:
            return next(iterator)
        except StopIteration:
            return None

    def _args_to_order_by(self, order_by):
        if order_by[0] == "-":
            return self.table.c[order_by[1:]].desc()
        else:
            return self.table.c[order_by].asc()

    def find(
        self,
        _limit=None,
        _offset=0,
        _step=5000,
        order_by="id",
        return_count=False,
        **_filter
    ):
        """
        Performs a simple search on the table. Simply pass keyword arguments as ``filter``.
        ::
            results = table.find(country='France')
            results = table.find(country='France', year=1980)
        Using ``_limit``::
            # just return the first 10 rows
            results = table.find(country='France', _limit=10)
        You can sort the results by single or multiple columns. Append a minus sign
        to the column name for descending order::
            # sort results by a column 'year'
            results = table.find(country='France', order_by='year')
            # return all rows sorted by multiple columns (by year in descending order)
            results = table.find(order_by=['country', '-year'])
        By default :py:meth:`find() <dataset.Table.find>` will break the
        query into chunks of ``_step`` rows to prevent huge tables
        from being loaded into memory at once.
        For more complex queries, please use :py:meth:`db.query()`
        instead."""
        self._check_dropped()
        if not isinstance(order_by, (list, tuple)):
            order_by = [order_by]
        order_by = [
            o
            for o in order_by
            if (o.startswith("-") and o[1:] or o) in self.table.columns
        ]
        order_by = [self._args_to_order_by(o) for o in order_by]

        args = self._args_to_clause(_filter)

        # query total number of rows first
        count_query = alias(
            self.table.select(whereclause=args, limit=_limit, offset=_offset),
            name="count_query_alias",
        ).count()
        rp = self.engine.execute(count_query)
        total_row_count = rp.fetchone()[0]
        if return_count:
            return total_row_count

        if _limit is None:
            _limit = total_row_count

        if _step is None or _step is False or _step == 0:
            _step = total_row_count

        if total_row_count > _step and not order_by:
            _step = total_row_count
            log.warn(
                "query cannot be broken into smaller sections because it is unordered"
            )

        queries = []

        for i in count():
            qoffset = _offset + (_step * i)
            qlimit = min(_limit - (_step * i), _step)
            if qlimit <= 0:
                break
            queries.append(
                self.table.select(
                    whereclause=args, limit=qlimit, offset=qoffset, order_by=order_by
                )
            )
        return ResultIter(
            (self.engine.execute(q) for q in queries), row_type=self.db.row_type
        )

    def count(self, **_filter):
        """
        Return the count of results for the given filter set
        (same filter options as with ``find()``).
        """
        return self.find(return_count=True, **_filter)

    def __getitem__(self, item):
        """
        This is an alias for distinct which allows the table to be queried as using
        square bracket syntax.
        ::
            # Same as distinct:
            print list(table['year'])
        """
        if not isinstance(item, tuple):
            item = (item,)
        return self.distinct(*item)

    def all(self):
        """
        Returns all rows of the table as simple dictionaries. This is simply a shortcut
        to *find()* called with no arguments.
        ::
            rows = table.all()"""
        return self.find()

    def __iter__(self):
        """
        Allows for iterating over all rows in the table without explicetly
        calling :py:meth:`all() <dataset.Table.all>`.
        ::
            for row in table:
                print(row)
        """
        return self.all()

    def __repr__(self):
        return "<Table(%s)>" % self.table.name
Пример #52
0
class SQLTable(Component):

    _selects = 0
    _inserts = 0
    _updates = 0
    _finalized = False

    STORE_MODE_LOOKUP = "lookup"
    STORE_MODE_INSERT = "insert"
    STORE_MODE_UPSERT = "upsert"

    _pk = False

    columns = []

    create = True

    _unicode_errors = 0
    _lookup_changed_fields = None

    def __init__(self, name, connection, columns, label=None):

        super(SQLTable, self).__init__()

        self.sa_table = None
        self.sa_metadata = None

        self.name = name
        self.connection = connection

        self.label = label if label else name

        self.columns = columns or []
        for col in columns:
            col.sqltable = self

    def _get_sa_type(self, column):

        if (column.type == "Integer"):
            return Integer
        elif (column.type == "String"):
            #if (column.length is None): column.length = 128
            return Unicode(length=128)
        elif (column.type == "Float"):
            return Float
        elif (column.type == "Boolean"):
            return Boolean
        elif (column.type == "AutoIncrement"):
            return Integer
        elif (column.type == "Date"):
            return Date
        elif (column.type == "Time"):
            return Time
        elif (column.type == "DateTime"):
            return DateTime
        elif (column.type == "Binary"):
            return Binary
        else:
            raise Exception("Invalid data type (%s): %s" %
                            (column, column.type))

    def finalize(self, ctx):

        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info(
                    "SQLTable Totals  ins/upd/sel: %d/%d/%d " %
                    (SQLTable._inserts, SQLTable._updates, SQLTable._selects))

        if (self._inserts + self._selects > 0):
            logger.info(
                "SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " %
                (self.name, self._inserts, self._updates, self._selects))
        if (self._unicode_errors > 0):
            logger.warning(
                "SQLTable %s found %d warnings assigning non-unicode fields to unicode columns"
                % (self.name, self._unicode_errors))

        ctx.comp.finalize(self.connection)

        super(SQLTable, self).finalize(ctx)

    def initialize(self, ctx):

        super(SQLTable, self).initialize(ctx)

        if self._lookup_changed_fields == None:
            self._lookup_changed_fields = []

        ctx.comp.initialize(self.connection)

        logger.debug("Loading table %s on %s" % (self.name, self))

        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        self._selects = 0
        self._inserts = 0
        self._updates = 0
        self._unicode_errors = 0

        # Drop?

        columns_ex = []
        for column in self.columns:

            logger.debug("Adding column to %s: %s" % (self, column))

            column.sqltable = self

            # Check for duplicate names
            if (column.name in columns_ex):
                raise ETLConfigurationException(
                    "Duplicate column name '%s' in %s" % (column.name, self))

            columns_ex.append(column.name)

            # Configure column
            if isinstance(column, SQLColumnFK):
                if column.fk_sqlcolumn.sqltable.sa_table is None:
                    logger.warning(
                        "Column %s foreign key %s table (%s) has not been defined in backend (ignoring).",
                        column, column.fk_sqlcolumn,
                        column.fk_sqlcolumn.sqltable)
                    continue

                self.sa_table.append_column(
                    Column(column.name,
                           self._get_sa_type(column),
                           ForeignKey(
                               column.fk_sqlcolumn.sqltable.sa_table.columns[
                                   column.fk_sqlcolumn.name]),
                           primary_key=column.pk,
                           nullable=column.nullable,
                           autoincrement=(True if column.type
                                          == "AutoIncrement" else False)))
            else:
                self.sa_table.append_column(
                    Column(column.name,
                           self._get_sa_type(column),
                           primary_key=column.pk,
                           nullable=column.nullable,
                           autoincrement=(True if column.type
                                          == "AutoIncrement" else False)))

        # Check schema:

        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name)
            self.sa_table.create(self.connection.connection())

        # TODO:? Extend?  (unsafe, allow read-only connections and make them default?)
        # TODO:? Delete columns (unsafe, allow read-only connections and make them default?)

    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """

        #if (self._pk == False):
        if True:
            pk_cols = []
            for col in self.columns:
                if col.pk:
                    pk_cols.append(col)

            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" %
                                (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None

        return self._pk

    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)

        return and_(*clauses)

    def _rowtodict(self, row):

        d = {}
        for column in self.columns:
            #print column
            d[column.name] = getattr(row, column.name)

        return d

    def _find(self, ctx, attribs):

        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1

        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)

    def lookup(self, ctx, attribs, find_function=None):

        logger.debug("Lookup on '%s' attribs: %s" % (self, attribs))

        if (len(attribs.keys()) == 0):
            raise Exception(
                "Cannot lookup on table '%s' with no criteria (empty attribute set)"
                % self.name)

        find_function = find_function or self._find
        rows = find_function(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception(
                "Found more than one row when searching for just one in table %s: %s"
                % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]
        else:
            row = None

        logger.debug("Lookup result on %s: %s = %s" %
                     (self.name, attribs, row))
        return row

    def upsert(self, ctx, data, keys=[]):
        """
        Upsert checks if the row exists and has changed. It does a lookup
        followed by an update or insert as appropriate.
        """

        # TODO: Check for AutoIncrement in keys, shall not be used

        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception(
                        "Could not find attribute '%s' in data when storing row data: %s"
                        % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        # Do lookup
        if len(qfilter) > 0:

            row = self.lookup(ctx, qfilter)

            if (row):
                # Check row is identical
                for c in self.columns:
                    if c.type != "AutoIncrement":
                        v1 = row[c.name]
                        v2 = data[c.name]
                        if c.type == "Date":
                            v1 = row[c.name].strftime('%Y-%m-%d')
                            v2 = data[c.name].strftime('%Y-%m-%d')
                        if (isinstance(v1, str) or isinstance(v2, str)):
                            if (not isinstance(v1, str)): v1 = str(v1)
                            if (not isinstance(v2, str)): v2 = str(v2)
                        if (v1 != v2):
                            if (c.name not in self._lookup_changed_fields):
                                logger.warning(
                                    "%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)"
                                    % (self, c.name, v1, v2))
                                #self._lookup_changed_fields.append(c["name"])

                # Update the row
                row = self.update(ctx, data, keys)
                return row

        row_with_id = self.insert(ctx, data)
        return row_with_id

    def _prepare_row(self, ctx, data):

        row = {}

        for column in self.columns:
            if column.type != "AutoIncrement":
                try:
                    row[column.name] = data[column.name]
                except KeyError as e:
                    raise Exception(
                        "Missing attribute for column %s in table '%s' while inserting row: %s"
                        % (e, self.name, data))

                # Checks
                if (column.type == "String") and (not isinstance(
                        row[column.name], str)):
                    self._unicode_errors = self._unicode_errors + 1
                    if (ctx.debug):
                        logger.warning(
                            "Unicode column %r received non-unicode string: %r "
                            % (column.name, row[column.name]))

        return row

    def insert(self, ctx, data):

        row = self._prepare_row(ctx, data)

        logger.debug("Inserting in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(self.sa_table.insert(row))

        pk = self.pk(ctx)
        if pk:
            row[pk.name] = res.inserted_primary_key[0]

        self._inserts = self._inserts + 1
        SQLTable._inserts = SQLTable._inserts + 1

        if pk is not None:
            return row
        else:
            return row  # None

    def update(self, ctx, data, keys=[]):

        row = self._prepare_row(ctx, data)

        # Automatically calculate lookup if necessary
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception(
                        "Could not find attribute '%s' in data when storing row data: %s"
                        % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        logger.debug("Updating in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(
            self.sa_table.update(self._attribsToClause(qfilter), row))

        self._updates = self._updates + 1
        SQLTable._updates = SQLTable._updates + 1

        if pk is not None:
            return row
        else:
            return None
Пример #53
0
class SQLTable(Component):

    _selects = 0
    _inserts = 0
    _updates = 0
    _finalized = False

    STORE_MODE_LOOKUP = "lookup"
    STORE_MODE_INSERT = "insert"
    STORE_MODE_UPSERT = "upsert"

    _pk = False

    name = None
    connection = None
    columns = []

    create = True

    sa_table = None
    sa_metadata = None

    _selects = 0
    _inserts = 0
    _unicode_errors = 0
    _lookup_changed_fields = None

    def __init__(self):
        super(SQLTable, self).__init__()
        self.columns = []

    def _get_sa_type(self, column):


        if (column["type"] == "Integer"):
            return Integer
        elif (column["type"] == "String"):
            if (not "length" in column):
                column["length"] = 128
            return Unicode(length = column["length"])
        elif (column["type"] == "Float"):
            return Float
        elif (column["type"] == "Boolean"):
            return Boolean
        elif (column["type"] == "AutoIncrement"):
            return Integer
        elif (column["type"] == "Date"):
            return Date
        elif (column["type"] == "Time"):
            return Time
        elif (column["type"] == "DateTime"):
            return DateTime
        else:
            raise Exception("Invalid data type: %s" % column["type"])

    def finalize(self, ctx):

        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info("SQLTable Totals  ins/upd/sel: %d/%d/%d " %
                            (SQLTable._inserts, SQLTable._updates, SQLTable._selects))

        if (self._inserts + self._selects > 0):
            logger.info("SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " %
                            (self.name, self._inserts, self._updates, self._selects))
        if (self._unicode_errors > 0):
            logger.warn("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" %
                        (self.name, self._unicode_errors))

        ctx.comp.finalize(self.connection)

        super(SQLTable, self).finalize(ctx)

    def initialize(self, ctx):

        super(SQLTable, self).initialize(ctx)

        if self._lookup_changed_fields == None:
            self._lookup_changed_fields = []

        ctx.comp.initialize(self.connection)

        logger.debug("Loading table %s on %s" % (self.name, self))


        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        # Drop?

        columns_ex = []
        for column in self.columns:

            logger.debug("Adding column to %s: %s" % (self, column))

            # Check for duplicate names
            if (column["name"] in columns_ex):
                raise Exception("Duplicate column name '%s' in %s" % (column["name"], self))
            columns_ex.append(column["name"])

            # Configure column
            column["pk"] = False if (not "pk" in column) else parsebool(column["pk"])
            if (not "type" in column): column["type"] = "String"
            #if (not "value" in column): column["value"] = None
            self.sa_table.append_column( Column(column["name"],
                                                self._get_sa_type(column),
                                                primary_key = column["pk"],
                                                autoincrement = (True if column["type"] == "AutoIncrement" else False) ))

        # Check schema

        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name)
            self.sa_table.create(self.connection.connection())

        # Extend?

        # Delete columns?

    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """

        if (self._pk == False):
            pk_cols = []
            for col in self.columns:
                if ("pk" in col):
                    if parsebool(col["pk"]):
                        pk_cols.append(col)

            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None

        return self._pk

    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)

        return and_(*clauses)

    def _rowtodict(self, row):

        d = {}
        for column in self.columns:
            #print column
            d[column["name"]] = getattr(row, column["name"])

        return d

    def _find(self, ctx, attribs):

        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1

        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)


    def lookup(self, ctx, attribs):

        logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs))

        if (len(attribs.keys()) == 0):
            raise Exception("Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name)

        rows = self._find(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]
        else:
            row = None

        logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row))
        return row

    def upsert(self, ctx, data, keys = []):
        """
        Upsert checks if the row exists and has changed. It does a lookup
        followe by an update or insert as appropriate.
        """

        # TODO: Check for AutoIncrement in keys, shall not be used

        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk["name"]] = data[pk["name"]]

        # Do lookup
        if len(qfilter) > 0:

            row = self.lookup(ctx, qfilter)

            if (row):
                # Check row is identical
                for c in self.columns:
                    if c["type"] != "AutoIncrement":
                        v1 = row[c['name']]
                        v2 = data[c['name']]
                        if c["type"] == "Date":
                            v1 = row[c['name']].strftime('%Y-%m-%d')
                            v2 = data[c['name']].strftime('%Y-%m-%d')
                        if (isinstance(v1, basestring) or isinstance(v2, basestring)):
                            if (not isinstance(v1, basestring)): v1 = str(v1)
                            if (not isinstance(v2, basestring)): v2 = str(v2)
                        if (v1 != v2):
                            if (c["name"] not in self._lookup_changed_fields):
                                logger.warn("%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c["name"], v1, v2))
                                #self._lookup_changed_fields.append(c["name"])

                # Update the row
                row = self.update(ctx, data, keys)
                return row

        row_with_id = self.insert(ctx, data)
        return row_with_id

    def _prepare_row(self, ctx, data):

        row = {}

        for column in self.columns:
            if (column["type"] != "AutoIncrement"):
                try:
                    row[column["name"]] = data[column["name"]]
                except KeyError, e:
                    raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data))

                # Checks
                if ((column["type"] == "String") and (not isinstance(row[column["name"]], unicode))):
                    self._unicode_errors = self._unicode_errors + 1
                    if (ctx.debug):
                        logger.warn("Unicode column %r received non-unicode string: %r " % (column["name"], row[column["name"]]))

        return row
def test_reflect_select_shared_table(engine):
    one_row = Table('bigquery-public-data.samples.natality',
                    MetaData(bind=engine),
                    autoload=True)
    row = one_row.select().limit(1).execute().first()
    assert len(row) >= 1
    metadata = MetaData()
    #创建user表,继承metadata类
    #Engine使用Schama Type创建一个特定的结构对象
    testonly_tableobj = Table("testonly", metadata,
                              Column("id", Integer, primary_key=True),
                              Column("name", String(20)),
                              Column('bytes', VARBINARY(20)))

    # color = Table("color", metadata,
    #               Column("id", Integer, primary_key=True),
    #               Column("name", String(20)))
    metadata.create_all(engine)  #创建表结构

    conn = engine.connect()
    try:
        # 通过 metadata 动态从数据库中获取表结构,插入数据
        metadata = MetaData(engine)
        testonly_tableobj_autoload = Table('testonly', metadata, autoload=True)
        conn.execute(testonly_tableobj_autoload.insert(), {
            'id': 2,
            "name": "koka",
            'bytes': b'1234'
        })
        logger.info('insert data successfully')
        sql_str = testonly_tableobj_autoload.select()
        table_data = conn.execute(sql_str)
        for content in table_data.fetchall():
            logger.info('%s', content)
    finally:
        conn.close()
def test_querying_wildcard_tables(engine):
    table = Table('bigquery-public-data.noaa_gsod.gsod*',
                  MetaData(bind=engine),
                  autoload=True)
    rows = table.select().limit(1).execute().first()
    assert len(rows) > 0
Пример #57
0
class SQLTable(Component):

    _selects = 0
    _inserts = 0
    _updates = 0
    _finalized = False

    STORE_MODE_LOOKUP = "lookup"
    STORE_MODE_INSERT = "insert"
    STORE_MODE_UPSERT = "upsert"

    _pk = False

    columns = []

    create = True

    _unicode_errors = 0
    _lookup_changed_fields = None

    def __init__(self, name, connection, columns, label=None):

        super(SQLTable, self).__init__()

        self.sa_table = None
        self.sa_metadata = None

        self.name = name
        self.connection = connection

        self.label = label if label else name

        self.columns = columns or []
        for col in columns:
            col.sqltable = self

    def _get_sa_type(self, column):

        if (column.type == "Integer"):
            return Integer
        elif (column.type == "String"):
            #if (column.length is None): column.length = 128
            return Unicode(length = 128)
        elif (column.type == "Float"):
            return Float
        elif (column.type == "Boolean"):
            return Boolean
        elif (column.type == "AutoIncrement"):
            return Integer
        elif (column.type == "Date"):
            return Date
        elif (column.type == "Time"):
            return Time
        elif (column.type == "DateTime"):
            return DateTime
        elif (column.type == "Binary"):
            return Binary
        else:
            raise Exception("Invalid data type (%s): %s" % (column, column.type))

    def finalize(self, ctx):

        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info("SQLTable Totals  ins/upd/sel: %d/%d/%d " %
                            (SQLTable._inserts, SQLTable._updates, SQLTable._selects))

        if (self._inserts + self._selects > 0):
            logger.info("SQLTable %-18s ins/upd/sel: %6d/%6d/%-6d " %
                            (self.name, self._inserts, self._updates, self._selects))
        if (self._unicode_errors > 0):
            logger.warning("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" %
                           (self.name, self._unicode_errors))

        ctx.comp.finalize(self.connection)

        super(SQLTable, self).finalize(ctx)

    def initialize(self, ctx):

        super(SQLTable, self).initialize(ctx)

        if self._lookup_changed_fields == None:
            self._lookup_changed_fields = []

        ctx.comp.initialize(self.connection)

        logger.debug("Loading table %s on %s" % (self.name, self))

        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        self._selects = 0
        self._inserts = 0
        self._updates = 0
        self._unicode_errors = 0

        # Drop?

        columns_ex = []
        for column in self.columns:

            logger.debug("Adding column to %s: %s" % (self, column))

            column.sqltable = self

            # Check for duplicate names
            if (column.name in columns_ex):
                raise ETLConfigurationException("Duplicate column name '%s' in %s" % (column.name, self))

            columns_ex.append(column.name)

            # Configure column
            if isinstance(column, SQLColumnFK):
                if column.fk_sqlcolumn.sqltable.sa_table is None:
                    logger.warning("Column %s foreign key %s table (%s) has not been defined in backend (ignoring).", column, column.fk_sqlcolumn, column.fk_sqlcolumn.sqltable)
                    continue

                self.sa_table.append_column(Column(column.name,
                                                   self._get_sa_type(column),
                                                   ForeignKey(column.fk_sqlcolumn.sqltable.sa_table.columns[column.fk_sqlcolumn.name]),
                                                   primary_key=column.pk,
                                                   nullable=column.nullable,
                                                   autoincrement=(True if column.type == "AutoIncrement" else False)))
            else:
                self.sa_table.append_column(Column(column.name,
                                                   self._get_sa_type(column),
                                                   primary_key=column.pk,
                                                   nullable=column.nullable,
                                                   autoincrement=(True if column.type == "AutoIncrement" else False)))

        # Check schema:

        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name)
            self.sa_table.create(self.connection.connection())

        # TODO:? Extend?  (unsafe, allow read-only connections and make them default?)
        # TODO:? Delete columns (unsafe, allow read-only connections and make them default?)

    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """

        #if (self._pk == False):
        if True:
            pk_cols = []
            for col in self.columns:
                if col.pk:
                    pk_cols.append(col)

            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None

        return self._pk

    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)

        return and_(*clauses)

    def _rowtodict(self, row):

        d = {}
        for column in self.columns:
            #print column
            d[column.name] = getattr(row, column.name)

        return d

    def _find(self, ctx, attribs):

        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1

        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)


    def lookup(self, ctx, attribs, find_function=None):

        logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs))

        if (len(attribs.keys()) == 0):
            raise Exception("Cannot lookup on table '%s' with no criteria (empty attribute set)" % self.name)

        find_function = find_function or self._find
        rows = find_function(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]
        else:
            row = None

        logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row))
        return row

    def upsert(self, ctx, data, keys = []):
        """
        Upsert checks if the row exists and has changed. It does a lookup
        followed by an update or insert as appropriate.
        """

        # TODO: Check for AutoIncrement in keys, shall not be used

        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        # Do lookup
        if len(qfilter) > 0:

            row = self.lookup(ctx, qfilter)

            if (row):
                # Check row is identical
                for c in self.columns:
                    if c.type != "AutoIncrement":
                        v1 = row[c.name]
                        v2 = data[c.name]
                        if c.type == "Date":
                            v1 = row[c.name].strftime('%Y-%m-%d')
                            v2 = data[c.name].strftime('%Y-%m-%d')
                        if (isinstance(v1, str) or isinstance(v2, str)):
                            if (not isinstance(v1, str)): v1 = str(v1)
                            if (not isinstance(v2, str)): v2 = str(v2)
                        if (v1 != v2):
                            if (c.name not in self._lookup_changed_fields):
                                logger.warn("%s updating an entity that exists with different attributes, overwriting (field=%s, existing_value=%s, tried_value=%s)" % (self, c.name, v1, v2))
                                #self._lookup_changed_fields.append(c["name"])

                # Update the row
                row = self.update(ctx, data, keys)
                return row

        row_with_id = self.insert(ctx, data)
        return row_with_id

    def _prepare_row(self, ctx, data):

        row = {}

        for column in self.columns:
            if column.type != "AutoIncrement":
                try:
                    row[column.name] = data[column.name]
                except KeyError as e:
                    raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data))

                # Checks
                if (column.type == "String") and (not isinstance(row[column.name], str)):
                    self._unicode_errors = self._unicode_errors + 1
                    if (ctx.debug):
                        logger.warn("Unicode column %r received non-unicode string: %r " % (column.name, row[column.name]))

        return row

    def insert(self, ctx, data):

        row = self._prepare_row(ctx, data)

        logger.debug("Inserting in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(self.sa_table.insert(row))

        pk = self.pk(ctx)
        if pk:
            row[pk.name] = res.inserted_primary_key[0]

        self._inserts = self._inserts + 1
        SQLTable._inserts = SQLTable._inserts + 1

        if pk is not None:
            return row
        else:
            return row  # None

    def update(self, ctx, data, keys = []):

        row = self._prepare_row(ctx, data)

        # Automatically calculate lookup if necessary
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        else:
            pk = self.pk(ctx)
            qfilter[pk.name] = data[pk.name]

        logger.debug("Updating in table '%s' row: %s" % (self.name, row))
        res = self.connection.connection().execute(self.sa_table.update(self._attribsToClause(qfilter), row))

        self._updates = self._updates +1
        SQLTable._updates = SQLTable._updates + 1

        if pk is not None:
            return row
        else:
            return None
Пример #58
0
class SQLTable(Component):
    
    _selects = 0
    _inserts = 0
    _finalized = False
    
    def __init__(self):

        super(SQLTable, self).__init__()

        self._pk = False

        self.name = None
        self.connection = None
        self.columns = [ ]
        
        self.create = True
        
        self.sa_table = None
        self.sa_metadata = None
        
        self._selects = 0
        self._inserts = 0
        self._unicode_errors = 0
    
    def _get_sa_type(self, column):
        
        if (column["type"] == "Integer"):
            return Integer
        elif (column["type"] == "String"):
            if (not "length" in column): column["length"] = 128
            return Unicode(length = column["length"])
        elif (column["type"] == "Float"):
            return Float    
        elif (column["type"] == "Boolean"):
            return Boolean
        elif (column["type"] == "AutoIncrement"):
            return Integer
        else:
            raise Exception("Invalid data type: %s" % column["type"])
    
    def finalize(self, ctx):
        
        if (not SQLTable._finalized):
            SQLTable._finalized = True
            if (SQLTable._inserts + SQLTable._selects > 0):
                logger.info("SQLTable Totals  inserts/selects: %d/%d " % 
                            (SQLTable._inserts, SQLTable._selects))
        
        if (self._inserts + self._selects > 0):
            logger.info("SQLTable %-18s inserts/selects: %6d/%-6d " % 
                            (self.name, self._inserts, self._selects))
        if (self._unicode_errors > 0):
            logger.warn("SQLTable %s found %d warnings assigning non-unicode fields to unicode columns" % 
                        (self.name, self._unicode_errors))
        
        ctx.comp.finalize(self.connection)     
        
        super(SQLTable, self).finalize(ctx)
    
    def initialize(self, ctx):
        
        super(SQLTable, self).initialize(ctx)
        
        ctx.comp.initialize(self.connection) 
        
        logger.debug("Loading table %s on %s" % (self.name, self))
        
        self.sa_metadata = MetaData()
        self.sa_table = Table(self.name, self.sa_metadata)

        # Drop?

        columns_ex = []
        for column in self.columns:
            
            # Check for duplicate names
            if (column["name"] in columns_ex):
                raise Exception("Duplicate column name %s in %s" % (column["name"], self))
            columns_ex.append(column["name"])
                
            # Configure column            
            column["pk"] = False if (not "pk" in column) else parsebool(column["pk"])
            if (not "type" in column): column["type"] = "String"
            #if (not "value" in column): column["value"] = None
            logger.debug("Adding column %s" % column)
            self.sa_table.append_column( Column(column["name"], 
                                                self._get_sa_type(column), 
                                                primary_key = column["pk"], 
                                                autoincrement = (True if column["type"] == "AutoIncrement" else False) ))
        
        # Check schema
        
        # Create if doesn't exist
        if (not self.connection.engine().has_table(self.name)):
            logger.info("Creating table %s" % self.name) 
            self.sa_table.create(self.connection.connection())
            
        # Extend?
        
        # Delete columns?
            
    def pk(self, ctx):
        """
        Returns the primary key column definitToClauion, or None if none defined.
        """
        
        if (self._pk == False):
            pk_cols = []
            for col in self.columns:
                if ("pk" in col):
                    if parsebool(col["pk"]):
                        pk_cols.append(col)
                        
            if (len(pk_cols) > 1):
                raise Exception("Table %s has multiple primary keys: %s" % (self.name, pk_cols))
            elif (len(pk_cols) == 1):
                self._pk = pk_cols[0]
            else:
                self._pk = None
                
        return self._pk
            
    def _attribsToClause(self, attribs):
        clauses = []
        for k, v in attribs.items():
            if isinstance(v, (list, tuple)):
                clauses.append(self.sa_table.c[k].in_(v))
            else:
                clauses.append(self.sa_table.c[k] == v)
        
        return and_(*clauses)            
            
    def _rowtodict(self, row):
        
        d = {}
        for column in self.columns:
            d[column["name"]] = getattr(row, column["name"])
    
        return d
            
    def _find(self, ctx, attribs):
        
        self._selects = self._selects + 1
        SQLTable._selects = SQLTable._selects + 1
        
        query = self.sa_table.select(self._attribsToClause(attribs))
        rows = self.connection.connection().execute(query)

        for r in rows:
            # Ensure we return dicts, not RowProxys from SqlAlchemy
            yield self._rowtodict(r)
             
        
    def lookup(self, ctx, attribs):
        
        logger.debug ("Lookup on '%s' attribs: %s" % (self, attribs))
        
        if (len(attribs.keys()) == 0):
            raise Exception("Cannot lookup on table with no criteria (empty attribute set)")
        
        rows = self._find(ctx, attribs)
        rows = list(rows)
        if (len(rows) > 1):
            raise Exception("Found more than one row when searching for just one in table %s: %s" % (self.name, attribs))
        elif (len(rows) == 1):
            row = rows[0]   
        else:
            row = None
        
        logger.debug("Lookup result on %s: %s = %s" % (self.name, attribs, row))
        return row
    
    def upsert(self, ctx, data, keys = []):
        
        # TODO: Check for AutoIncrement in keys, shall not be used
        
        # If keys
        qfilter = {}
        if (len(keys) > 0):
            for key in keys:
                try:
                    qfilter[key] = data[key]
                except KeyError as e:
                    raise Exception("Could not find attribute '%s' in data when storing row data: %s" % (key, data))
        
            row = self.lookup(ctx, qfilter)
            if (row): return row
        
        row_with_id =  self.insert(ctx, data)
                        
        return row_with_id
        
    def _prepare_row(self, ctx, data):
        
        row = {}
        
        for column in self.columns:
            if (column["type"] != "AutoIncrement"):
                try:
                    row[column["name"]] = data[column["name"]]
                except KeyError, e:
                    raise Exception("Missing attribute for column %s in table '%s' while inserting row: %s" % (e, self.name, data))
                
                # Checks
                if ((column["type"] == "String") and (not isinstance(row[column["name"]], unicode))):
                    self._unicode_errors = self._unicode_errors + 1 
                    if (ctx.debug):
                        logger.warn("Unicode column %r received non-unicode string: %r " % (column["name"], row[column["name"]]))
                
        return row
Пример #59
0
 def test_column_proxy(self):
     t = Table('t', MetaData(),
             Column('foo', self._add_override_factory())
         )
     proxied = t.select().c.foo
     self._assert_add_override(proxied)