class Storage(): def __init__(self, storage_conf): self.conf = storage_conf self.max_results = storage_conf["max_results"] self.publishers = self.conf["publishers"] if self.conf["region"] == "localhost": from boto.dynamodb2.layer1 import DynamoDBConnection self.connection = DynamoDBConnection( host='localhost', port=8000, aws_secret_access_key='anything', is_secure=False) else: self.connection = boto.dynamodb2.connect_to_region(self.conf["region"]) self.tables = dict() for prod in self.publishers: self.tables[prod] = Table(self.connection, max_results=self.max_results, **self.conf[prod]) def close(self): """ Closes the connection. This allows you to use with contextlib's closing. Mostly necessary for the test DB which seems to only allow a single connection. """ self.connection.close()
def delete_dynamodb(): conn = DynamoDBConnection(aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region = RegionInfo(name=REGION, endpoint='dynamodb.{0}.amazonaws.com'.format(REGION))) conn.delete_table(EASY_LIST_TBL) conn.close()
def getuserlast(user): conn = DynamoDBConnection() results = conn.query( 'gpstrack', key_conditions={'user': {'AttributeValueList': [{'S': user}], 'ComparisonOperator': 'EQ'}}, scan_index_forward=False, limit=1 ) #print results conn.close() return results
class DynamoDbPoller(object): def __init__(self, region, table, aws_id, aws_key, host_id, heartbeat_timeout, app_id): self.region = region self.table = table self.aws_id = aws_id self.aws_key = aws_key self.host_id = host_id self.heartbeat_timeout = heartbeat_timeout self.myshard = None self.app_id = app_id random.seed(hash(time.time()) + hash(host_id)) #note -- only use this if you're going to create your local table... # these options are probably not right for production def create_my_table(self): attributes = [{'AttributeName': 'id', 'AttributeType': 'S'}] schema = [{'AttributeName': 'id', 'KeyType': 'HASH'}] throughput = {'ReadCapacityUnits': 1, 'WriteCapacityUnits': 1} self.conn.create_table(attributes, self.table, schema, throughput) def table_exists(self): tables = self.conn.list_tables()['TableNames'] if self.table in tables: return True else: return False def init_connection(self): logging.info('Region: %s' % (self.region)) if self.region == 'local': logging.info('using 127.0.0.1') self.conn = DynamoDBConnection(host='127.0.0.1', port=k_default_local_port, is_secure=False) if not self.table_exists(): logging.info('Creating Table %s' % self.table) self.create_my_table() else: self.conn = boto.dynamodb2.connect_to_region( self.region, aws_access_key_id=self.aws_id, aws_secret_access_key=self.aws_key) def get_table(self): table = Table(self.table) #takes string argument table.connection = self.conn #important if the local connection is used, because by default it starts with us-east-1 return table def close_connection(self): self.conn.close() def update_heartbeat(self, shard): table = self.get_table() shard_id = self.myshard + "-" + self.app_id try: myitem = table.get_item(id=shard_id) now = int(time.time()) host = str(myitem['host']) if host != self.host_id: logging.warning( 'Another poller named %s replaced me (%s) for shard %s. Oh the humanity (this was not expected).' % (host, self.host_id, shard_id)) return False myitem['time'] = now if not myitem.save(): logging.warning( 'someone took over me before I could save. What the heck! (this was not expected)' ) return False return True except ItemNotFound: logging.critical( 'can not find %s, so I am not updating the heartbeat' % shard_id) return False def update_shard_sequence_number(self, sequence_number): if self.myshard is not None: table = self.get_table() shard_id = self.myshard + "-" + self.app_id try: myitem = table.get_item(id=shard_id) myitem[k_dynamodb_last_sequence_number] = sequence_number if not myitem.save(): logging.warning( 'Someone saved to shard %s before I (%s) could' % (shard_id, self.host_id)) except ItemNotFound, e: #gah! This shard tracking item does not exist yet. logging.warning( 'Unable to update %s in DynamoDB with new sequence number' % shard_id)
def upload_dynamo(): """ dynamodb.py Uploads the given easy list (add blocking) file to dynamodb. """ print "Connecting to DynamoDB..." conn = DynamoDBConnection(aws_access_key_id=AWS_ACCESS_KEY, aws_secret_access_key=AWS_SECRET_ACCESS_KEY, region = RegionInfo(name=REGION, endpoint='dynamodb.{0}.amazonaws.com'.format(REGION))) print "\tConnected!" # Create the tables or use already existing ones print "Getting tables..." #define expected throughput throughput = { 'read': 2, 'write': 5 } #define the schema - in our case a simple key easylist_tbl_schema = [HashKey(EASY_LIST_TBL_KEY)] easylist_table = None #get existing tables to check if need to create tables existing_tables = conn.list_tables()[u"TableNames"] if EASY_LIST_TBL not in existing_tables: print "\ttrying to create {0} table...".format(EASY_LIST_TBL) try: easylist_table = Table.create(EASY_LIST_TBL, schema=easylist_tbl_schema, throughput=throughput, connection=conn) # Wait some for the tables to be created. time.sleep(60) print "\t\ttable created!" except: print "\t\t{0} table does not exist and could not be created. Quiting".format(EASY_LIST_TBL) return else: print "\ttable {0} already exists".format(EASY_LIST_TBL) easylist_table = Table(EASY_LIST_TBL, schema=easylist_tbl_schema, throughput=throughput, connection=conn) #read csv file and upload to db domain_list = [] resource_list = [] query_list = [] #with easylist_table.batch_write() as batch: with open(EASYLIST_FILE_PATH, 'r') as reader: for line in reader.readlines(): if line[0] in IGNORE_PREFIX_LIST or "##" in line: continue token = line.strip() if line[0] == "&": query_list.append(token) elif line[0] == "/": resource_list.append(token) else: domain_list.append(token) #batch.put_item(data = { EASY_LIST_TBL_KEY: token, EASY_LIST_TBL_ATTR: token}) print "Loading the list to table" easylist_table.put_item( data={ EASY_LIST_TBL_KEY: DOMAIN_LIST_KEY, EASY_LIST_TBL_ATTR: set(domain_list) }) #there is size limit - consider storing some other way easylist_table.put_item( data={ EASY_LIST_TBL_KEY: RESOURCE_LIST_KEY, EASY_LIST_TBL_ATTR: set(resource_list[100:1100]) }) easylist_table.put_item( data={ EASY_LIST_TBL_KEY: QUERY_LIST_KEY, EASY_LIST_TBL_ATTR: set(query_list) }) conn.close() print "Finished uploading easy list"
class TestDynamoDB(unittest.TestCase): def setUp(self): # Connect to DynamoDB Local self.conn = DynamoDBConnection(host='localhost', port=8000, aws_secret_access_key='anything', is_secure=False) tables = self.conn.list_tables() if 'employees' not in tables['TableNames']: # Create table of employees self.employees = Table.create( 'employees', schema=[HashKey('etype'), RangeKey('id')], indexes=[ AllIndex('TitleIndex', parts=[HashKey('etype'), RangeKey('title')]) ], connection=self.conn) else: self.employees = Table('employees', connection=self.conn) self.employeeData = [{ 'etype': 'E', 'first_name': 'John', 'last_name': 'Doe', 'id': '123456789', 'title': 'Head Bottle Washer', 'hiredate': 'June 5 1986' }, { 'etype': 'E', 'first_name': 'Alice', 'last_name': 'Kramden', 'id': '007', 'title': 'Assistant Bottle Washer', 'hiredate': 'July 1 1950' }, { 'etype': 'E', 'first_name': 'Bob', 'last_name': 'Dylan', 'id': '42', 'title': 'Assistant Bottle Washer', 'hiredate': 'January 1 1970' }] for data in self.employeeData: self.employees.put_item(data=data, overwrite=True) def tearDown(self): self.conn.close() def getEmployeeData(self, key, value): return filter(lambda x: x[key] == value, self.employeeData) def test_001_get_item(self): emp = self.employees.get_item(etype='E', id='123456789') data = self.getEmployeeData('id', '123456789')[0] expected = Item(self.employees, data=data) self.assertNotEqual(emp._data, expected._data) def test_002_update_item(self): emp = self.employees.get_item(etype='E', id='123456789') emp['first_name'] = 'Jane' emp.save() emp = self.employees.get_item(etype='E', id='123456789') data = self.getEmployeeData('id', '123456789')[0] expected = Item(self.employees, data=data) expected['first_name'] = 'Jane' self.assertEqual(emp._data, expected._data) @unittest.skip("this test is broken") def test_003_failed_update_item(self): emp = self.employees.get_item(etype='E', id='123456789') emp2 = self.employees.get_item(etype='E', id='123456789') emp['first_name'] = 'Jane' emp.save() self.assertFalse(emp2.save())
class TestDynamoDB(unittest.TestCase): def setUp(self): # Connect to DynamoDB Local self.conn = DynamoDBConnection( host='localhost', port=8000, aws_secret_access_key='anything', is_secure=False) tables = self.conn.list_tables() if 'employees' not in tables['TableNames']: # Create table of employees self.employees = Table.create('employees', schema = [HashKey('etype'), RangeKey('id')], indexes = [AllIndex('TitleIndex', parts = [ HashKey('etype'), RangeKey('title')])], connection = self.conn ) else: self.employees = Table('employees', connection=self.conn) self.employeeData = [{'etype' : 'E', 'first_name' : 'John', 'last_name': 'Doe', 'id' : '123456789', 'title' : 'Head Bottle Washer', 'hiredate' : 'June 5 1986'}, {'etype' : 'E', 'first_name' : 'Alice', 'last_name': 'Kramden', 'id' : '007', 'title' : 'Assistant Bottle Washer', 'hiredate' : 'July 1 1950'}, {'etype' : 'E', 'first_name' : 'Bob', 'last_name': 'Dylan', 'id' : '42', 'title' : 'Assistant Bottle Washer', 'hiredate' : 'January 1 1970'}] for data in self.employeeData: self.employees.put_item(data=data, overwrite=True) def tearDown(self): self.conn.close() def getEmployeeData(self, key, value): return filter(lambda x: x[key] == value, self.employeeData) def test_001_get_item(self): emp = self.employees.get_item(etype='E', id='123456789') data = self.getEmployeeData('id', '123456789')[0] expected = Item(self.employees, data = data) self.assertNotEqual(emp._data, expected._data) def test_002_update_item(self): emp = self.employees.get_item(etype='E', id='123456789') emp['first_name'] = 'Jane' emp.save() emp = self.employees.get_item(etype='E', id='123456789') data = self.getEmployeeData('id', '123456789')[0] expected = Item(self.employees, data = data) expected['first_name'] = 'Jane' self.assertEqual(emp._data, expected._data) @unittest.skip("this test is broken") def test_003_failed_update_item(self): emp = self.employees.get_item(etype='E', id='123456789') emp2 = self.employees.get_item(etype='E', id='123456789') emp['first_name'] = 'Jane' emp.save() self.assertFalse(emp2.save())
class DBconn(object): def __init__(self): aws_access_key_id = os.environ['S3_KEY'] # I AM OPS U NO GET MY KEYS aws_secret_access_key = os.environ['S3_SECRET'] # DIS IS MY JOB self._conn = DynamoDBConnection( aws_access_key_id=aws_access_key_id, aws_secret_access_key=aws_secret_access_key) self.works_table = Table('ao3rdr-works', connection=self._conn) self.immutable_fields = ['work_id', 'user_id'] def get_user(self, user_id): res = self.works_table.query_2( user_id__eq=user_id, work_id__eq='settings', attributes=['user_id']) out = [] for entry in res: out.append(self.serialize(entry)['user_id']) return out def add_user(self, user_id): """ Adding a user adds a special "work" which is used to store a user's settings. """ return self.works_table.put_item(data={ 'user_id': user_id, 'work_id': 'settings', 'created': time.time() }) def update_work(self, user_id, work_id, data): item = self.works_table.get_item(user_id=user_id, work_id=work_id) # update the item for key, value in data.iteritems(): if key not in self.immutable_fields: item[key] = value item['updated'] = time.time() item.partial_save() def create_work(self, user_id, work_id, data): data['user_id'] = user_id data['work_id'] = work_id self.works_table.put_item(data) def batch_update(self, data_list): with self.works_table.batch_write() as batch: for data in data_list: batch.put_item(data=data) def get_work(self, user_id, work_id): try: res = self.works_table.get_item(user_id=user_id, work_id=work_id) except ItemNotFound: return {} return self.serialize(res) def get_all_works(self, user_id): res = self.works_table.query_2(user_id__eq=user_id) for entry in res: yield self.serialize(entry) def close(self): self._conn.close() def serialize(self, item): out = serialize(dict(item)) return out