Exemplo n.º 1
0
    def execute(self, context: Dict[str, str]):
        hook = CassandraHook(cassandra_conn_id=self.cassandra_conn_id)

        query_extra = {}
        if self.query_timeout is not NOT_SET:
            query_extra['timeout'] = self.query_timeout

        cursor = hook.get_conn().execute(self.cql, **query_extra)

        files_to_upload = self._write_local_data_files(cursor)

        # If a schema is set, create a BQ schema JSON file.
        if self.schema_filename:
            files_to_upload.update(self._write_local_schema_file(cursor))

        # Flush all files before uploading
        for file_handle in files_to_upload.values():
            file_handle.flush()

        self._upload_to_gcs(files_to_upload)

        # Close all temp file handles.
        for file_handle in files_to_upload.values():
            file_handle.close()

        # Close all sessions and connection associated with this Cassandra cluster
        hook.shutdown_cluster()
Exemplo n.º 2
0
    def setUp(self):
        db.merge_conn(
            Connection(conn_id='cassandra_test',
                       conn_type='cassandra',
                       host='host-1,host-2',
                       port='9042',
                       schema='test_keyspace',
                       extra='{"load_balancing_policy":"TokenAwarePolicy"}'))
        db.merge_conn(
            Connection(conn_id='cassandra_default_with_schema',
                       conn_type='cassandra',
                       host='cassandra',
                       port='9042',
                       schema='s'))

        hook = CassandraHook("cassandra_default")
        session = hook.get_conn()
        cqls = [
            "DROP SCHEMA IF EXISTS s",
            """
                CREATE SCHEMA s WITH REPLICATION =
                    { 'class' : 'SimpleStrategy', 'replication_factor' : 1 }
            """,
        ]
        for cql in cqls:
            session.execute(cql)

        session.shutdown()
        hook.shutdown_cluster()
Exemplo n.º 3
0
    def test_table_exists_with_keyspace_from_session(self):
        hook = CassandraHook("cassandra_default_with_schema")
        session = hook.get_conn()
        cqls = [
            "DROP TABLE IF EXISTS t",
            "CREATE TABLE t (pk1 text PRIMARY KEY)",
        ]
        for cql in cqls:
            session.execute(cql)

        self.assertTrue(hook.table_exists("t"))
        self.assertFalse(hook.table_exists("u"))

        session.shutdown()
        hook.shutdown_cluster()
Exemplo n.º 4
0
    def test_table_exists_with_keyspace_from_cql(self):
        hook = CassandraHook("cassandra_default")
        session = hook.get_conn()
        cqls = [
            "DROP TABLE IF EXISTS s.t",
            "CREATE TABLE s.t (pk1 text PRIMARY KEY)",
        ]
        for cql in cqls:
            session.execute(cql)

        assert hook.table_exists("s.t")
        assert not hook.table_exists("s.u")

        session.shutdown()
        hook.shutdown_cluster()
Exemplo n.º 5
0
    def test_record_exists_with_keyspace_from_session(self):
        hook = CassandraHook("cassandra_default_with_schema")
        session = hook.get_conn()
        cqls = [
            "DROP TABLE IF EXISTS t",
            "CREATE TABLE t (pk1 text, pk2 text, c text, PRIMARY KEY (pk1, pk2))",
            "INSERT INTO t (pk1, pk2, c) VALUES ('foo', 'bar', 'baz')",
        ]
        for cql in cqls:
            session.execute(cql)

        self.assertTrue(hook.record_exists("t", {"pk1": "foo", "pk2": "bar"}))
        self.assertFalse(hook.record_exists("t", {"pk1": "foo", "pk2": "baz"}))

        session.shutdown()
        hook.shutdown_cluster()
Exemplo n.º 6
0
    def execute(self, context: 'Context'):
        hook = CassandraHook(cassandra_conn_id=self.cassandra_conn_id)

        query_extra = {}
        if self.query_timeout is not NOT_SET:
            query_extra['timeout'] = self.query_timeout

        cursor = hook.get_conn().execute(self.cql, **query_extra)

        # If a schema is set, create a BQ schema JSON file.
        if self.schema_filename:
            self.log.info('Writing local schema file')
            schema_file = self._write_local_schema_file(cursor)

            # Flush file before uploading
            schema_file['file_handle'].flush()

            self.log.info('Uploading schema file to GCS.')
            self._upload_to_gcs(schema_file)
            schema_file['file_handle'].close()

        counter = 0
        self.log.info('Writing local data files')
        for file_to_upload in self._write_local_data_files(cursor):
            # Flush file before uploading
            file_to_upload['file_handle'].flush()

            self.log.info('Uploading chunk file #%d to GCS.', counter)
            self._upload_to_gcs(file_to_upload)

            self.log.info('Removing local file')
            file_to_upload['file_handle'].close()
            counter += 1

        # Close all sessions and connection associated with this Cassandra cluster
        hook.shutdown_cluster()