示例#1
0
    def _get_scheduled_jobs(self, dynamodb_connection):  # noqa
        """
        WARNING -- this method requires cleanup; the user must remember to
        delete the table once complete.  For example:

        >>> NEW_JOB = {'log_version': 'ad_click', 'log_schema_version': '1'}
        >>> def cool_test_fn(dynamodb_connection):
        >>>     tsj = TestScheduledJobs()
        >>>     table, scheduled_jobs = tsj._get_scheduled_jobs(dynamodb_connection)
        >>>     assert scheduled_jobs.put(**NEW_JOB)
        >>>     yield scheduled_jobs
        >>>     assert table.delete()  # THIS IS THE KEY CLEANUP!!

        """
        avro_schema = get_avro_schema('mycroft/avro/scheduled_jobs.json')
        index_load_status = GlobalAllIndex(
            ScheduledJobs.INDEX_LOAD_STATUS,
            parts=[HashKey('load_status')])
        index_et_status = GlobalAllIndex(
            ScheduledJobs.INDEX_ET_STATUS,
            parts=[HashKey('et_status')])
        index_load_status = GlobalAllIndex(
            ScheduledJobs.INDEX_LOAD_STATUS,
            parts=[HashKey('load_status')])
        index_log_name_and_log_schema_version = GlobalAllIndex(
            ScheduledJobs.INDEX_LOG_NAME_AND_LOG_SCHEMA_VERSION,
            parts=[HashKey('log_name'), RangeKey('log_schema_version')])
        table = Table.create(
            'ScheduledJobs',
            schema=NAME_TO_SCHEMA['scheduled_jobs'],
            connection=dynamodb_connection,
            global_indexes=[index_et_status, index_load_status,
                            index_log_name_and_log_schema_version])
        return table, ScheduledJobs(persistence_object=table, avro_schema_object=avro_schema)
示例#2
0
 def redshift_clusters(self, dynamodb_connection):
     avro_schema = get_avro_schema('mycroft/avro/redshift_cluster.json')
     with make_temp_redshift_clusters_table(dynamodb_connection, 'RedshiftClusters') as table:
         redshift_clusters = RedshiftClusters(
             persistence_object=table,
             avro_schema_object=avro_schema
         )
         for redshift_cluster in SAMPLE_CLUSTER_ITEMS:
             assert redshift_clusters.put(**redshift_cluster)
         yield redshift_clusters
示例#3
0
def etl_records(dynamodb_connection):
    avro_schema = get_avro_schema('mycroft/avro/etl_record.json')
    index_job_id = GlobalAllIndex(
        ETLRecords.INDEX_JOB_ID_AND_DATA_DATE,
        parts=[HashKey('job_id'), RangeKey('data_date')])
    table = Table.create('ETLRecords',
                         schema=NAME_TO_SCHEMA['etl_records'],
                         connection=dynamodb_connection,
                         global_indexes=[index_job_id])
    etl_records = ETLRecords(persistence_object=table,
                             avro_schema_object=avro_schema)
    for etl_record in SAMPLE_RECORD_JOBS:
        assert etl_records.put(**etl_record)
    yield etl_records
    assert table.delete()
示例#4
0
 def etl_records(self, dynamodb_connection):
     avro_schema = get_avro_schema('mycroft/avro/etl_record.json')
     index_job_id = GlobalAllIndex(
         ETLRecords.INDEX_JOB_ID_AND_DATA_DATE,
         parts=[HashKey('job_id'), RangeKey('data_date')])
     table = Table.create(
         'ETLRecords',
         schema=NAME_TO_SCHEMA['etl_records'],
         connection=dynamodb_connection,
         global_indexes=[index_job_id])
     etl_records = ETLRecords(persistence_object=table, avro_schema_object=avro_schema)
     for job in SAMPLE_RECORD_JOBS:
         assert etl_records.put(**job)
     yield etl_records
     assert table.delete()
示例#5
0
    def _get_redshift_clusters(self, dynamodb_connection):  # noqa
        """
        WARNING -- this method requires cleanup; the user must remember to
        delete the table once complete.  For example:

        >>> NEW_JOB = {'redshift_id': 'rs_1', 'host': 'host', 'port': 5439}
        >>> def cool_test_fn(dynamodb_connection):
        >>>     trc = TestRedshiftClusters()
        >>>     table, redshift_clusters = trc._get_redshift_clusters(dynamodb_connection)
        >>>     assert redshift_clusters.put(**NEW_JOB)
        >>>     yield redshift_clusters
        >>>     assert table.delete()  # THIS IS THE KEY CLEANUP!!

        """
        avro_schema = get_avro_schema('mycroft/avro/redshift_cluster.json')
        table = Table.create(
            'RedshiftClusters',
            schema=NAME_TO_SCHEMA['redshift_clusters'],
            connection=dynamodb_connection)
        return table, RedshiftClusters(persistence_object=table, avro_schema_object=avro_schema)