def _get_scheduled_jobs(self, dynamodb_connection): # noqa """ WARNING -- this method requires cleanup; the user must remember to delete the table once complete. For example: >>> NEW_JOB = {'log_version': 'ad_click', 'log_schema_version': '1'} >>> def cool_test_fn(dynamodb_connection): >>> tsj = TestScheduledJobs() >>> table, scheduled_jobs = tsj._get_scheduled_jobs(dynamodb_connection) >>> assert scheduled_jobs.put(**NEW_JOB) >>> yield scheduled_jobs >>> assert table.delete() # THIS IS THE KEY CLEANUP!! """ avro_schema = get_avro_schema('mycroft/avro/scheduled_jobs.json') index_load_status = GlobalAllIndex( ScheduledJobs.INDEX_LOAD_STATUS, parts=[HashKey('load_status')]) index_et_status = GlobalAllIndex( ScheduledJobs.INDEX_ET_STATUS, parts=[HashKey('et_status')]) index_load_status = GlobalAllIndex( ScheduledJobs.INDEX_LOAD_STATUS, parts=[HashKey('load_status')]) index_log_name_and_log_schema_version = GlobalAllIndex( ScheduledJobs.INDEX_LOG_NAME_AND_LOG_SCHEMA_VERSION, parts=[HashKey('log_name'), RangeKey('log_schema_version')]) table = Table.create( 'ScheduledJobs', schema=NAME_TO_SCHEMA['scheduled_jobs'], connection=dynamodb_connection, global_indexes=[index_et_status, index_load_status, index_log_name_and_log_schema_version]) return table, ScheduledJobs(persistence_object=table, avro_schema_object=avro_schema)
def redshift_clusters(self, dynamodb_connection): avro_schema = get_avro_schema('mycroft/avro/redshift_cluster.json') with make_temp_redshift_clusters_table(dynamodb_connection, 'RedshiftClusters') as table: redshift_clusters = RedshiftClusters( persistence_object=table, avro_schema_object=avro_schema ) for redshift_cluster in SAMPLE_CLUSTER_ITEMS: assert redshift_clusters.put(**redshift_cluster) yield redshift_clusters
def etl_records(dynamodb_connection): avro_schema = get_avro_schema('mycroft/avro/etl_record.json') index_job_id = GlobalAllIndex( ETLRecords.INDEX_JOB_ID_AND_DATA_DATE, parts=[HashKey('job_id'), RangeKey('data_date')]) table = Table.create('ETLRecords', schema=NAME_TO_SCHEMA['etl_records'], connection=dynamodb_connection, global_indexes=[index_job_id]) etl_records = ETLRecords(persistence_object=table, avro_schema_object=avro_schema) for etl_record in SAMPLE_RECORD_JOBS: assert etl_records.put(**etl_record) yield etl_records assert table.delete()
def etl_records(self, dynamodb_connection): avro_schema = get_avro_schema('mycroft/avro/etl_record.json') index_job_id = GlobalAllIndex( ETLRecords.INDEX_JOB_ID_AND_DATA_DATE, parts=[HashKey('job_id'), RangeKey('data_date')]) table = Table.create( 'ETLRecords', schema=NAME_TO_SCHEMA['etl_records'], connection=dynamodb_connection, global_indexes=[index_job_id]) etl_records = ETLRecords(persistence_object=table, avro_schema_object=avro_schema) for job in SAMPLE_RECORD_JOBS: assert etl_records.put(**job) yield etl_records assert table.delete()
def _get_redshift_clusters(self, dynamodb_connection): # noqa """ WARNING -- this method requires cleanup; the user must remember to delete the table once complete. For example: >>> NEW_JOB = {'redshift_id': 'rs_1', 'host': 'host', 'port': 5439} >>> def cool_test_fn(dynamodb_connection): >>> trc = TestRedshiftClusters() >>> table, redshift_clusters = trc._get_redshift_clusters(dynamodb_connection) >>> assert redshift_clusters.put(**NEW_JOB) >>> yield redshift_clusters >>> assert table.delete() # THIS IS THE KEY CLEANUP!! """ avro_schema = get_avro_schema('mycroft/avro/redshift_cluster.json') table = Table.create( 'RedshiftClusters', schema=NAME_TO_SCHEMA['redshift_clusters'], connection=dynamodb_connection) return table, RedshiftClusters(persistence_object=table, avro_schema_object=avro_schema)