def setUp(self):
     args = {'owner': 'airflow', 'start_date': DEFAULT_DATE}
     dag = DAG('test_dag_id', default_args=args)
     self.dag = dag
     self.sql = 'SELECT 1'
     self.hook = AwsDynamoDBHook(aws_conn_id='aws_default',
                                 region_name='us-east-1')
Пример #2
0
 def execute(self, context):
     table = AwsDynamoDBHook().get_conn().Table(self.dynamodb_table_name)
     scan_kwargs = copy(
         self.dynamodb_scan_kwargs) if self.dynamodb_scan_kwargs else {}
     err = None
     f = NamedTemporaryFile()
     try:
         f = self._scan_dynamodb_and_upload_to_s3(f, scan_kwargs, table)
     except Exception as e:
         err = e
         raise e
     finally:
         if err is None:
             _upload_file_to_s3(f, self.s3_bucket_name, self.s3_key_prefix)
         f.close()
    def test_insert_batch_items_dynamodb_table(self):

        hook = AwsDynamoDBHook(aws_conn_id='aws_default',
                               table_name='test_airflow',
                               table_keys=['id'],
                               region_name='us-east-1')

        # this table needs to be created in production
        table = hook.get_conn().create_table(TableName='test_airflow',
                                             KeySchema=[
                                                 {
                                                     'AttributeName': 'id',
                                                     'KeyType': 'HASH'
                                                 },
                                             ],
                                             AttributeDefinitions=[{
                                                 'AttributeName':
                                                 'id',
                                                 'AttributeType':
                                                 'S'
                                             }],
                                             ProvisionedThroughput={
                                                 'ReadCapacityUnits': 10,
                                                 'WriteCapacityUnits': 10
                                             })

        table = hook.get_conn().Table('test_airflow')

        items = [{
            'id': str(uuid.uuid4()),
            'name': 'airflow'
        } for _ in range(10)]

        hook.write_batch_data(items)

        table.meta.client.get_waiter('table_exists').wait(
            TableName='test_airflow')
        self.assertEqual(table.item_count, 10)
Пример #4
0
    def execute(self, context):
        hive = HiveServer2Hook(hiveserver2_conn_id=self.hiveserver2_conn_id)

        self.log.info('Extracting data from Hive')
        self.log.info(self.sql)

        data = hive.get_pandas_df(self.sql, schema=self.schema)
        dynamodb = AwsDynamoDBHook(aws_conn_id=self.aws_conn_id,
                                   table_name=self.table_name,
                                   table_keys=self.table_keys,
                                   region_name=self.region_name)

        self.log.info('Inserting rows into dynamodb')

        if self.pre_process is None:
            dynamodb.write_batch_data(
                json.loads(data.to_json(orient='records')))
        else:
            dynamodb.write_batch_data(
                self.pre_process(data=data,
                                 args=self.pre_process_args,
                                 kwargs=self.pre_process_kwargs))

        self.log.info('Done.')
 def test_get_conn_returns_a_boto3_connection(self):
     hook = AwsDynamoDBHook(aws_conn_id='aws_default')
     self.assertIsNotNone(hook.get_conn())