Пример #1
0
    def _get_scheduled_jobs(self, dynamodb_connection):  # noqa
        """
        WARNING -- this method requires cleanup; the user must remember to
        delete the table once complete.  For example:

        >>> NEW_JOB = {'log_version': 'ad_click', 'log_schema_version': '1'}
        >>> def cool_test_fn(dynamodb_connection):
        >>>     tsj = TestScheduledJobs()
        >>>     table, scheduled_jobs = tsj._get_scheduled_jobs(dynamodb_connection)
        >>>     assert scheduled_jobs.put(**NEW_JOB)
        >>>     yield scheduled_jobs
        >>>     assert table.delete()  # THIS IS THE KEY CLEANUP!!

        """
        avro_schema = get_avro_schema('mycroft/avro/scheduled_jobs.json')
        index_load_status = GlobalAllIndex(
            ScheduledJobs.INDEX_LOAD_STATUS,
            parts=[HashKey('load_status')])
        index_et_status = GlobalAllIndex(
            ScheduledJobs.INDEX_ET_STATUS,
            parts=[HashKey('et_status')])
        index_load_status = GlobalAllIndex(
            ScheduledJobs.INDEX_LOAD_STATUS,
            parts=[HashKey('load_status')])
        index_log_name_and_log_schema_version = GlobalAllIndex(
            ScheduledJobs.INDEX_LOG_NAME_AND_LOG_SCHEMA_VERSION,
            parts=[HashKey('log_name'), RangeKey('log_schema_version')])
        table = Table.create(
            'ScheduledJobs',
            schema=NAME_TO_SCHEMA['scheduled_jobs'],
            connection=dynamodb_connection,
            global_indexes=[index_et_status, index_load_status,
                            index_log_name_and_log_schema_version])
        return table, ScheduledJobs(persistence_object=table, avro_schema_object=avro_schema)
Пример #2
0
def createGamesTable(db):

    try:
        hostStatusDate = GlobalAllIndex("hostStatusDate", parts=[HashKey("HostId"),
                                                                RangeKey("StatusDate")])
        opponentStatusDate  = GlobalAllIndex("opponentStatusDate", parts=[HashKey("OpponentId"),
                                                                RangeKey("StatusDate")]) 

        #global secondary indexes
        GSI = [hostStatusDate, opponentStatusDate]

        gamesTable = Table.create("Games",
                    schema=[HashKey("GameId")],
                    throughput={
                        'read':1,
                        'write':1
                    },
                    global_indexes=GSI,
                    connection=db)

    except JSONResponseError, jre:
        try:
            gamesTable = Table("Games", connection=db)
        except Exception, e:
            print "Games Table doesn't exist."
Пример #3
0
 def create(self):
     Table.create(self.table_name,
                  schema=[
                      HashKey(Tweet.tweet_user_id),
                      RangeKey(Tweet.tweet_id),
                  ],
                  throughput=standard_throughput,
                  indexes={
                      AllIndex(self.index_timestamp,
                               parts=[
                                   HashKey(Tweet.tweet_user_id),
                                   RangeKey(Tweet.ts_ms)
                               ])
                  },
                  global_indexes={
                      GlobalAllIndex(self.index_site,
                                     parts=[
                                         HashKey(keys.entity_site),
                                         RangeKey(Tweet.tweet_id)
                                     ],
                                     throughput=standard_throughput),
                      GlobalAllIndex(self.index_league,
                                     parts=[
                                         HashKey(keys.entity_league),
                                         RangeKey(Tweet.tweet_id)
                                     ],
                                     throughput=standard_throughput),
                      GlobalAllIndex(self.index_team,
                                     parts=[
                                         HashKey(keys.entity_team),
                                         RangeKey(Tweet.tweet_id)
                                     ],
                                     throughput=standard_throughput)
                  })
Пример #4
0
 def create(self):
     Table.create(self.table_name,
                  schema=[
                      HashKey(keys.entity_profile),
                      RangeKey(time_keys.ts_add, data_type=NUMBER)
                  ],
                  throughput=standard_throughput,
                  indexes={
                      AllIndex(self.index_delta,
                               parts=[
                                   HashKey(keys.entity_profile),
                                   RangeKey(time_keys.ts_delta,
                                            data_type=NUMBER)
                               ]),
                      AllIndex(self.index_cut,
                               parts=[
                                   HashKey(keys.entity_profile),
                                   RangeKey(time_keys.ts_cut,
                                            data_type=NUMBER)
                               ])
                  },
                  global_indexes={
                      GlobalAllIndex(self.index_team,
                                     parts=[
                                         HashKey(keys.entity_team),
                                         RangeKey(time_keys.ts_add,
                                                  data_type=NUMBER)
                                     ],
                                     throughput=standard_throughput),
                      GlobalAllIndex(self.index_league,
                                     parts=[
                                         HashKey(keys.entity_league),
                                         RangeKey(time_keys.ts_add,
                                                  data_type=NUMBER)
                                     ],
                                     throughput=standard_throughput),
                      GlobalAllIndex(self.index_twitter,
                                     parts=[
                                         HashKey(keys.entity_twitter),
                                         RangeKey(time_keys.ts_add,
                                                  data_type=NUMBER)
                                     ],
                                     throughput=standard_throughput),
                      GlobalAllIndex(self.index_site,
                                     parts=[
                                         HashKey(keys.entity_site),
                                         RangeKey(time_keys.ts_add,
                                                  data_type=NUMBER)
                                     ],
                                     throughput=standard_throughput)
                  })
     print 'creating entity history table'
Пример #5
0
class RacingCrewCollection(DBTable):
    table_name = 'racing_crew'
    item_class = Race
    global_indexes = [
        GlobalAllIndex('race-index', parts=[HashKey('race')]),
        GlobalAllIndex('event-index', parts=[HashKey('event')])
    ]

    def __init__(self, dbconn):
        super(RacingCrewCollection,
              self).__init__(RacingCrewCollection.table_name,
                             DBTable.simple_schema,
                             RacingCrewCollection.global_indexes, dbconn)
Пример #6
0
def clear_db():
    global org_table
    global event_table
    global regatta_table
    # model.OrganizationCollection(conn).delete()
    org_table.delete()
    # model.EventCollection(conn).delete()
    event_table.delete()
    # model.RegattaCollection(conn).delete()
    regatta_table.delete()

    app.logger.info("Sleeping for 60 seconds to allow tables to delete")
    time.sleep(60)

    app.logger.debug("Creating organization table")
    org_table = Table.create(
        application.config['ORG_TABLE'],
        throughput={
            'read': 1,
            'write': 1
        },
        connection=conn.conn,
        schema=[HashKey('_id')],
        global_indexes=[GlobalAllIndex('name-index', parts=[HashKey('name')])])
    app.logger.debug("Creating events table")
    event_table = Table.create(application.config['EVENT_TABLE'],
                               throughput={
                                   'read': 1,
                                   'write': 1
                               },
                               connection=conn.conn,
                               schema=[HashKey('_id')],
                               global_indexes=[
                                   GlobalAllIndex('event-index',
                                                  parts=[HashKey('event')])
                               ])
    app.logger.debug("Creating regatta table")
    regatta_table = Table.create(
        application.config['REGATTA_TABLE'],
        throughput={
            'read': 1,
            'write': 1
        },
        connection=conn.conn,
        schema=[HashKey('_id')],
        global_indexes=[GlobalAllIndex('name-index', parts=[HashKey('name')])])
    app.logger.info("Sleeping for 60 seconds to allow tables to create")
    time.sleep(60)
 def tableCreateKwargs(self):
     return dict(
         schema=[
             HashKey("instance_id"),
             RangeKey("date_hour"),
         ],
         throughput={
             "read": (taurus.engine.config.getint(
                 "dynamodb", "instance_data_hourly_throughput_read")),
             "write": (taurus.engine.config.getint(
                 "dynamodb", "instance_data_hourly_throughput_write"))
         },
         global_indexes=[
             GlobalAllIndex("taurus.instance_data_hourly-date_hour_index",
                            parts=[HashKey("date"),
                                   RangeKey("hour")],
                            throughput={
                                "read":
                                taurus.engine.config.getint(
                                    "dynamodb",
                                    "instance_data_hourly_throughput_read"),
                                "write":
                                taurus.engine.config.getint(
                                    "dynamodb",
                                    "instance_data_hourly_throughput_write")
                            })
         ])
Пример #8
0
def createDynamoObject():
    try:
        users = Table.create(
            'data',
            schema=[HashKey('id')],
            global_indexes=[
                GlobalAllIndex('EverythingIndex', parts=[HashKey('name')])
            ],
            connection=boto.dynamodb2.connect_to_region('us-west-2'))
    except boto.exception.JSONResponseError:
        users = Table('data',
                      connection=boto.dynamodb2.connect_to_region('us-west-2'))
        print "1) Table 'data' already created."
    #On first Run this wont insert data because of delay to create table on aws server side.
    try:
        users.put_item(
            data={
                'id': '3',
                'type': 'person',
                'name': 'dummy',
                'activities': ['activity one'],
            })
    except:
        print "2) Dummy Data already added."
    return users
Пример #9
0
def _extract_index(index_data, global_index=False):
    '''
    Instantiates and returns an AllIndex object given a valid index
    configuration
    '''
    parsed_data = {}
    keys = []

    for key, value in six.iteritems(index_data):
        for item in value:
            for field, data in six.iteritems(item):
                if field == 'hash_key':
                    parsed_data['hash_key'] = data
                elif field == 'hash_key_data_type':
                    parsed_data['hash_key_data_type'] = data
                elif field == 'range_key':
                    parsed_data['range_key'] = data
                elif field == 'range_key_data_type':
                    parsed_data['range_key_data_type'] = data
                elif field == 'name':
                    parsed_data['name'] = data
                elif field == 'read_capacity_units':
                    parsed_data['read_capacity_units'] = data
                elif field == 'write_capacity_units':
                    parsed_data['write_capacity_units'] = data

    if parsed_data['hash_key']:
        keys.append(
            HashKey(
                parsed_data['hash_key'],
                data_type=parsed_data['hash_key_data_type']
            )
        )
    if parsed_data['range_key']:
        keys.append(
            RangeKey(
                parsed_data['range_key'],
                data_type=parsed_data['range_key_data_type']
            )
        )
    if (
            global_index and
            parsed_data['read_capacity_units'] and
            parsed_data['write_capacity_units']):
        parsed_data['throughput'] = {
            'read':     parsed_data['read_capacity_units'],
            'write':    parsed_data['write_capacity_units']
        }
    if parsed_data['name'] and len(keys) > 0:
        if global_index:
            return GlobalAllIndex(
                parsed_data['name'],
                parts=keys,
                throughput=parsed_data['throughput']
            )
        else:
            return AllIndex(
                parsed_data['name'],
                parts=keys
            )
Пример #10
0
def createDynamoObject(name):
    try:
        users = Table.create(name, schema=[HashKey('id')],
                             throughput={'read': db_read_cap,
                             'write': db_write_cap},
                             global_indexes=[GlobalAllIndex('EverythingIndex'
                             , parts=[HashKey('name')])],
                             connection=boto.dynamodb2.connect_to_region(AWS_REGION))
    except:
        users = Table(name,
                      connection=boto.dynamodb2.connect_to_region('us-west-2'
                      ))
        print "1) Table 'data' already created for table: " + name

  # On first Run this wont insert data because of delay to create table on aws server side.

    try:
        users.put_item(data={
            'id': '3',
            'type': 'person',
            'name': 'dummy',
            'activities': ['activity one'],
            })
    except:
        print '2) Dummy Data already added for tabe: ' + name
    return users
Пример #11
0
    def createTable(isLocal, localPort):
        """Used to create table for Dyanmo DB"""
        SessionTable.LOCAL_PORT = localPort
        secondaryIndex = [
            GlobalAllIndex('expiration-index',
                           parts=[HashKey('expiration', data_type=NUMBER)],
                           throughput={
                               'read': 5,
                               'write': 5
                           })
        ]
        if isLocal:
            try:
                Table.create(SessionTable.TABLE_NAME,
                             schema=[HashKey(SessionTable.KEY_NAME)],
                             global_indexes=secondaryIndex,
                             connection=SessionTable.getLocalConnection())
            except exceptions.JSONResponseError as jre:
                if jre.status == 400 and "preexisting" in jre.message.lower():
                    #table already exists
                    pass

        else:
            Table.create(SessionTable.TABLE_NAME,
                         schema=[HashKey(SessionTable.KEY_NAME)],
                         global_indexes=secondaryIndex)
Пример #12
0
def get_indexes(all_indexes):
    indexes = []
    global_indexes = []
    for index in all_indexes:
        name = index['name']
        schema = get_schema_param(index.get('hash_key_name'), index.get('hash_key_type'), index.get('range_key_name'), index.get('range_key_type'))
        throughput = {
            'read': index.get('read_capacity', 1),
            'write': index.get('write_capacity', 1)
        }

        if index['type'] == 'all':
            indexes.append(AllIndex(name, parts=schema))

        elif index['type'] == 'global_all':
            global_indexes.append(GlobalAllIndex(name, parts=schema, throughput=throughput))

        elif index['type'] == 'global_include':
            global_indexes.append(GlobalIncludeIndex(name, parts=schema, throughput=throughput, includes=index['includes']))

        elif index['type'] == 'global_keys_only':
            global_indexes.append(GlobalKeysOnlyIndex(name, parts=schema, throughput=throughput))

        elif index['type'] == 'include':
            indexes.append(IncludeIndex(name, parts=schema, includes=index['includes']))

        elif index['type'] == 'keys_only':
            indexes.append(KeysOnlyIndex(name, parts=schema))

    return indexes, global_indexes
Пример #13
0
    def createFreshTable(self):
        """
        Create a fresh empty distance table.
        """

        # delete existing table if it exists
        try:
            self.__getTable().delete()
            time.sleep(10)
        except:
            pass
            # do nothing. Maybe there was no existing table

        # create new table
        tableConnectionParams = parseConnectionString(self.tableConnString)
        return Table.create(tableConnectionParams['name'],
                            schema=[HashKey('from'),
                                    RangeKey('to')],
                            throughput={
                                'read': 1,
                                'write': 2,
                            },
                            global_indexes=[
                                GlobalAllIndex(
                                    'reverseIndex',
                                    parts=[HashKey('to'),
                                           RangeKey('from')],
                                    throughput={
                                        'read': 1,
                                        'write': 2,
                                    })
                            ],
                            connection=getDbConnection(tableConnectionParams))
Пример #14
0
class StageCollection(DBTable):
    table_name = 'stage'
    global_indexes = [GlobalAllIndex('event-index', parts=[HashKey('event')])]

    def __init__(self, dbconn):
        super(StageCollection,
              self).__init__(StageCollection.table_name, DBTable.simple_schema,
                             StageCollection.global_indexes, dbconn)
Пример #15
0
class EventCollection(DBTable):
    table_name = 'events'
    global_indexes = [GlobalAllIndex('event-index', parts=[HashKey('event')])]

    def __init__(self, dbconn):
        super(EventCollection,
              self).__init__(EventCollection.table_name, DBTable.simple_schema,
                             None, dbconn)
        self.schema = DBTable.simple_schema
Пример #16
0
class OrganizationCollection(DBTable):
    table_name = 'organizations'
    global_indexes = [GlobalAllIndex('NameIndex', parts=[HashKey('name')])]

    def __init__(self, dbconn):
        super(OrganizationCollection,
              self).__init__(OrganizationCollection.table_name,
                             DBTable.simple_schema,
                             OrganizationCollection.global_indexes, dbconn)
Пример #17
0
def test_query_with_global_indexes():
    table = Table.create('messages',
                         schema=[
                             HashKey('subject'),
                             RangeKey('version'),
                         ],
                         global_indexes=[
                             GlobalAllIndex('topic-created_at-index',
                                            parts=[
                                                HashKey('topic'),
                                                RangeKey('created_at',
                                                         data_type='N')
                                            ],
                                            throughput={
                                                'read': 6,
                                                'write': 1
                                            }),
                             GlobalAllIndex('status-created_at-index',
                                            parts=[
                                                HashKey('status'),
                                                RangeKey('created_at',
                                                         data_type='N')
                                            ],
                                            throughput={
                                                'read': 2,
                                                'write': 1
                                            })
                         ])

    item_data = {
        'subject': 'Check this out!',
        'version': '1',
        'created_at': 0,
        'status': 'inactive'
    }
    item = Item(table, item_data)
    item.save(overwrite=True)

    item['version'] = '2'
    item.save(overwrite=True)

    results = table.query(status__eq='active')
    list(results).should.have.length_of(0)
Пример #18
0
class RegattaCollection(DBTable):
    table_name = 'regatta'
    #item_class = Regatta
    global_indexes = [GlobalAllIndex('name-index', parts=[HashKey('name')])]

    def __init__(self, dbconn):
        super(RegattaCollection,
              self).__init__(RegattaCollection.table_name,
                             DBTable.simple_schema,
                             RegattaCollection.global_indexes, dbconn)
Пример #19
0
def get_table():
  return Table(table_name, schema=[
    HashKey('CreatedAt'),
    RangeKey('Count'),
  ], global_indexes=[
     GlobalAllIndex('CountsIndex', parts=[
      HashKey('EventType',data_type=STRING),
      RangeKey('Timestamp',data_type=STRING)
  ])
  ])
Пример #20
0
class AthleteCollection(DBTable):
    table_name = 'athletes'
    global_indexes = [
        GlobalAllIndex('OrgIndex', parts=[HashKey('organization')])
    ]

    def __init__(self, dbconn):
        super(AthleteCollection,
              self).__init__(AthleteCollection.table_name,
                             DBTable.simple_schema,
                             AthleteCollection.global_indexes, dbconn)
Пример #21
0
 def __getMappingsTable(self):
   return getDbTableWithSchemaAndGlobalIndexes(
     self.docClusterMappingTable,
     [
       HashKey('clusterId'),
       RangeKey('docId')
     ],
     [
       GlobalAllIndex('docId-clusterId-index', parts=[
         HashKey('docId'),
         RangeKey('clusterId')
       ])
     ]
   )
Пример #22
0
class Audit(DBTable):
    table_name = 'audit'
    schema = [HashKey('timeStamp'), RangeKey('user')]
    global_indexes = [GlobalAllIndex('race-index', parts=[HashKey('user')])]

    def __init__(self, dbconn):
        self.logger = logging.getLogger('audit')

        super(Audit, self).__init__(Audit.table_name, Audit.schema,
                                    Audit.global_indexes, dbconn)

    def info(self, message):
        self.logger.info(message)
        self.insert(data={'message': message, 'timestamp': time.time()})
Пример #23
0
def sendtodynamo_cnn(cnnjson):
    ''' Send json to DynamoDB
  Assumes that article timestamps have been deduped to avoid collisions
  '''

    conn = connect_to_region('us-west-2',
                             aws_access_key_id=AWS_ACCESS_KEY_ID,
                             aws_secret_access_key=AWS_SECRET_ACCESS_KEY)

    hashkey = "CNN"  # primary key to be used for DynamoDB table

    try:
        table = Table('CNN', connection=conn)
        table.describe()
    except boto.exception.JSONResponseError:
        print "Creating table"
        table = Table.create(
            'CNN',
            schema=[HashKey('source'),
                    RangeKey('tstamp', data_type=NUMBER)],
            throughput={
                'read': 25,
                'write': 25
            },
            indexes=[
                GlobalAllIndex('showidx',
                               parts=[HashKey('show')],
                               throughput={
                                   'read': 10,
                                   'write': 5
                               })
            ])

    iteration = 0
    for article in cnnjson:
        # Iterate through list of articles and upload to table
        rangekey = float(article['timestamp'])
        rowdata = {
            'source': hashkey,
            'tstamp': rangekey,
            'cnnShow': article['show']
        }
        for key in article.keys():
            rowdata[key] = article[key]
        item = table.put_item(data=rowdata)
        iteration += 1
        if iteration % 100 == 0:
            print "Uploaded " + iteration + " articles"

    return None
Пример #24
0
def etl_records(dynamodb_connection):
    avro_schema = get_avro_schema('mycroft/avro/etl_record.json')
    index_job_id = GlobalAllIndex(
        ETLRecords.INDEX_JOB_ID_AND_DATA_DATE,
        parts=[HashKey('job_id'), RangeKey('data_date')])
    table = Table.create('ETLRecords',
                         schema=NAME_TO_SCHEMA['etl_records'],
                         connection=dynamodb_connection,
                         global_indexes=[index_job_id])
    etl_records = ETLRecords(persistence_object=table,
                             avro_schema_object=avro_schema)
    for etl_record in SAMPLE_RECORD_JOBS:
        assert etl_records.put(**etl_record)
    yield etl_records
    assert table.delete()
Пример #25
0
  def __getTable(self):
    """
    Get the clusters table.
    """

    return getDbTableWithSchemaAndGlobalIndexes(
      self.tableConnString,
      [
        HashKey('clusterId')
      ],
      [
        GlobalAllIndex('isCurrent-clusterId-index', parts=[
          HashKey('isCurrent'),
          RangeKey('clusterId')
        ])
      ]
    )
def test_create_with_global_indexes():
    conn = boto.dynamodb2.layer1.DynamoDBConnection()

    Table.create('messages', schema=[
        HashKey('subject'),
        RangeKey('version'),
    ], global_indexes=[
        GlobalAllIndex('topic-created_at-index',
                       parts=[
                           HashKey('topic'),
                           RangeKey('created_at', data_type='N')
                       ],
                       throughput={
                           'read': 6,
                           'write': 1
                       }
                       ),
    ])

    table_description = conn.describe_table("messages")
    table_description['Table']["GlobalSecondaryIndexes"].should.equal([
        {
            "IndexName": "topic-created_at-index",
            "KeySchema": [
                {
                    "AttributeName": "topic",
                    "KeyType": "HASH"
                },
                {
                    "AttributeName": "created_at",
                    "KeyType": "RANGE"
                },
            ],
            "Projection": {
                "ProjectionType": "ALL"
            },
            "ProvisionedThroughput": {
                "ReadCapacityUnits": 6,
                "WriteCapacityUnits": 1,
            }
        }
    ])
def createTable():
    consumer_complaint = Table.create('consumer_complaint', schema=[
    HashKey('Complaint_ID'), # defaults to STRING data_type

    ], throughput={
    'read': 5,
    'write': 15,
    }, global_indexes=[
    GlobalAllIndex('EverythingIndex', parts=[
     HashKey('State'),
    ],
    throughput={
    'read': 1,
    'write': 1,
    })
    ],
    # If you need to specify custom parameters, such as credentials or region,
    # use the following:
    connection=boto.dynamodb2.connect_to_region('us-west-2')
    )
    return consumer_complaint
Пример #28
0
 def __init__(self):
     super(self.__class__,
           self).__init__('store',
                          schema=[HashKey('store_id')],
                          global_indexes=[
                              GlobalAllIndex('StoreCompanyIndex',
                                             parts=[
                                                 HashKey('company_id'),
                                                 RangeKey('store_id',
                                                          data_type=STRING)
                                             ],
                                             throughput={
                                                 'read': 3,
                                                 'write': 3
                                             })
                          ],
                          throughput={
                              'read': 3,
                              'write': 3
                          },
                          record_class=Store)
Пример #29
0
 def _mock_create_table(
         self,
         name,
         hash_key=MOCK_TABLE_HASH_KEY,
         range_key=MOCK_TABLE_RANGE_KEY,
         read_throughput=MOCK_TABLE_READ_THROUGHPUT,
         write_throughput=MOCK_TABLE_WRITE_THROUGHPUT,
         global_index_name=MOCK_TABLE_GLOBAL_INDEX_NAME,
         global_index_attr_name=MOCK_TABLE_GLOBAL_INDEX_ATTR_NAME):
     Table.create(name,
                  schema=[HashKey(hash_key),
                          RangeKey(range_key)],
                  throughput={
                      'read': read_throughput,
                      'write': write_throughput
                  },
                  global_indexes=[
                      GlobalAllIndex(
                          global_index_name,
                          parts=[HashKey(global_index_attr_name)])
                  ])
 def tableCreateKwargs(self):
     return dict(
         schema=[HashKey("metric_name_tweet_uid"),
                 RangeKey("agg_ts")],
         throughput={
             "read":
             1,
             "write":
             taurus.engine.config.getint("dynamodb",
                                         "metric_tweets_throughput_write")
         },
         global_indexes=[
             GlobalAllIndex(
                 "taurus.metric_data-metric_name_index",
                 parts=[HashKey("metric_name"),
                        RangeKey("agg_ts")],
                 throughput={
                     "read": (taurus.engine.config.getint(
                         "dynamodb", "metric_tweets_throughput_read")),
                     "write": (taurus.engine.config.getint(
                         "dynamodb", "metric_tweets_throughput_write"))
                 })
         ])