def get_address(pool): """Retrieve an EIP for the given pool from DynamoDB""" #Connect to ddb conn = boto.dynamodb2.connect_to_region(options.region) ddb = Table(options.table_name, connection=conn) # Get available EIPs from pool eips = ddb.query(pool__eq=pool, consistent=True) if not eips: raise FatalError(u"No EIPs found in pool %s" % pool) address = None for eip in eips: if not eip.get('stack_id', False): eip['stack_id'] = stack_id eip['logical_id'] = logical_id if eip.save(): address = eip['address'] break if not address: raise FatalError(u"All EIPs in pool %s are in use" % pool) return address
def get_config(cluster_tag=None, template_name=None): """ Loads a list of configurations started with the given <cluster_tag>, most recent configuration is first """ cfg = StarClusterConfig().load() if not cluster_tag and not template_name: log.warning("Attempt to clone without a template_name or cluster_tag") return [] s3_bucket = cfg.aws['aws_meta_bucket'] table_name = cfg.aws['aws_config_table'] if table_name is None: log.warning(("AWS_CONFIG_TABLE is not defined." " This cluster will not be cloneable.")) return False if s3_bucket is None: log.warning(("AWS_META_BUCKET is not defined." "This cluster will not be cloneable")) return False conn = boto.dynamodb2.connect_to_region('us-east-1') cluster_info_table = Table(table_name, connection=conn) #print cluster_info_table.describe() if cluster_tag: clusters = cluster_info_table.query(cluster_tag__eq=cluster_tag) else: clusters = cluster_info_table.scan(template_name__eq=template_name) if clusters: return [c for c in clusters] else: return []
def getUsersWithTenantBlackList(environment, blacklist): """ Gets users for all tenants except blacklisted tenants :param environment: environment to use :param blacklist: [ tid1, tid2, tid3 ... ] all str' return [ {uid:str, email:str, last_login_time:str, tid:str}, ... ] """ # hash=id (tenant ID) | contentMarketing | enabled | licenses | name tInfoTable = Table(environment + "_idp_tenant") # hash=tenant_id | range=id | email | enabled | password | user_profile | last_login_time | login_token tUserTable = Table(environment + "_idp_user") tenantResults = tInfoTable.scan() users = [] for res in tenantResults: if str(res['id']) in blacklist: continue for userItem in tUserTable.query(tenant_id__eq=res['id']): user = {} user['tid'] = res['id'] user['uid'] = userItem['id'] user['email'] = userItem['email'] user['last_login_time'] = userItem['last_login_time'] users.append(user) return users
def get_config( cluster_tag=None, template_name=None ): """ Loads a list of configurations started with the given <cluster_tag>, most recent configuration is first """ cfg = StarClusterConfig().load() if not cluster_tag and not template_name: log.warning("Attempt to clone without a template_name or cluster_tag") return [] s3_bucket = cfg.aws['aws_meta_bucket'] table_name = cfg.aws['aws_config_table'] if table_name is None: log.warning(("AWS_CONFIG_TABLE is not defined." " This cluster will not be cloneable.")) return False if s3_bucket is None: log.warning(("AWS_META_BUCKET is not defined." "This cluster will not be cloneable")) return False conn = boto.dynamodb2.connect_to_region('us-east-1') cluster_info_table = Table(table_name, connection=conn) #print cluster_info_table.describe() if cluster_tag: clusters = cluster_info_table.query( cluster_tag__eq=cluster_tag) else: clusters = cluster_info_table.scan( template_name__eq=template_name ) if clusters: return [c for c in clusters] else: return []
def getTenantIDs(environment, blacklist): """ Returns a list of Tenant IDs for the current environment :param environment: Current environment :return: [ (tid, [email1, ...], name), ... ] """ # hash=id (tenant ID) | contentMarketing | enabled | licenses | name tInfoTable = Table(environment + "_idp_tenant") # hash=tenant_id | range=id | email | enabled | password | user_profile | last_login_time | login_token tUserTable = Table(environment + "_idp_user") tenantResults = tInfoTable.scan() tenants = [] for res in tenantResults: if str(res['id']) in blacklist: continue tenant = [res["id"], [], res["name"]] for email in tUserTable.query(tenant_id__eq=tenant[0]): tenant[1].append(email["email"]) tenants.append(tenant) return tenants
def cost(self): """ Returns the current hourly and cumulative cost of this node """ if self.is_spot(): current = utils.current_spot(self.placement, self.instance_type) compute_time = math.ceil( utils.get_elapsed_seconds(self.launch_time) / 3600.0) mysum = 0.0 if self.spot_data_table: t = Table(self.spot_data_table) for r in t.query(instanceid__eq=self.id): mysum += float(r['charge']) compute_time -= 1 return (current, mysum + compute_time * current) else: compute_time = math.ceil( utils.get_elapsed_seconds(self.launch_time) / 3600.0) return (current, current * compute_time) elif self.is_up(): price = utils.on_demand_price(self.region.name, self.instance_type) compute_time = math.ceil( utils.get_elapsed_seconds(self.launch_time) / 3600.0) return (price, compute_time * price) else: return (0, 0)
def cost(self): """ Returns the current hourly and cumulative cost of this node """ if self.is_spot(): current = utils.current_spot(self.placement, self.instance_type) compute_time = math.ceil( utils.get_elapsed_seconds(self.launch_time)/3600.0) mysum = 0.0 if self.spot_data_table: t = Table(self.spot_data_table) for r in t.query(instanceid__eq=self.id): mysum += float(r['charge']) compute_time -= 1 return (current, mysum + compute_time*current) else: compute_time = math.ceil( utils.get_elapsed_seconds(self.launch_time)/3600.0) return (current, current*compute_time) elif self.is_up(): price = utils.on_demand_price(self.region.name, self.instance_type) compute_time = math.ceil( utils.get_elapsed_seconds(self.launch_time)/3600.0) return (price, compute_time*price) else: return (0,0)
def get_address(pool): """Retrieve an EIP for the given pool from DynamoDB""" #Connect to ddb conn = boto.dynamodb2.connect_to_region(options.region) ddb = Table(options.table_name, connection=conn) # Get available EIPs from pool eips = ddb.query( pool__eq=pool, consistent=True ) if not eips: raise FatalError(u"No EIPs found in pool %s" % pool) address = None for eip in eips: if not eip.get('stack_id', False): eip['stack_id'] = stack_id eip['logical_id'] = logical_id if eip.save(): address = eip['address'] break if not address: raise FatalError(u"All EIPs in pool %s are in use" % pool) return address
def test_query_with_undeclared_table(): table = Table('undeclared') results = table.query( forum_name__eq='Amazon DynamoDB', subject__beginswith='DynamoDB', limit=1 ) iterate_results.when.called_with(results).should.throw(JSONResponseError)
def getTenantSocialInfo(environment, tid): """ :returns: {'fb_T':int, 'fb_F':int, 'twit_T':int, 'twit_F':int, 'li_T':int, 'li_F':int, 'wpStats':{}} """ # tenant_id (hash) | ticket (range) | status | network_type tPostedItems = Table(environment + "_social-posted-item") postsByTenant = tPostedItems.query(tenant_id__eq=tid) ret = {'fb_T':0, 'fb_F':0, 'twit_T':0, 'twit_F':0, 'li_T':0, 'li_F':0, 'wpStats':{}} for post in postsByTenant: key = "" if "FACEBOOK" == post['network_type']: key = 'fb_' elif "TWITTER" == post['network_type']: key = 'twit_' elif "LINKEDIN" == post['network_type']: key = 'li_' if "SUCCESS" == post['status']: key += "T" elif "FAILED" == post['status']: key += "F" if key in ret: ret[key] += 1 # tenant_id (hash) | id | content_item | last_modified | type tBlogs = Table(environment + "_content_items") for blog in tBlogs.query(tenant_id__eq=tid): if blog['type'] == "blogPost": contentInfo = blog['content_item'] jObj = json.loads(contentInfo) if jObj['state'] not in ret['wpStats']: ret['wpStats'][jObj['state']] = 0 ret['wpStats'][jObj['state']] += 1 return ret
def test_select_item(self): self.storage_mocker.StubOutWithMock(storage, 'select_item') blob_data1 = bytes(bytearray([1, 2, 3, 4, 5])) blob_data2 = bytes(bytearray([5, 4, 3, 2, 1])) hash_key = "4.5621201231232132132132132132132142354E126" range_key = "range" storage.select_item( IgnoreArg(), IgnoreArg(), IgnoreArg(), select_type=IgnoreArg(), index_name=IgnoreArg(), limit=IgnoreArg(), exclusive_start_key=IgnoreArg(), consistent=IgnoreArg(), order_type=IgnoreArg(), ).AndReturn( models.SelectResult( items=[ { "hash_key": models.AttributeValue( models.ATTRIBUTE_TYPE_NUMBER, decimal.Decimal(hash_key) ), "range_key": models.AttributeValue( models.ATTRIBUTE_TYPE_STRING, range_key ), "value_blob": models.AttributeValue( models.ATTRIBUTE_TYPE_BLOB, blob_data1 ), "value_blob_set": models.AttributeValue( models.ATTRIBUTE_TYPE_BLOB_SET, set([blob_data1, blob_data2]) ) } ] ) ) self.storage_mocker.ReplayAll() table = Table('test_table', connection=self.DYNAMODB_CON) items = list(table.query(consistent=False, hash_key__eq=1)) expected_item = { "hash_key": decimal.Decimal(hash_key), "range_key": range_key, "value_blob": types.Binary(blob_data1), "value_blob_set": set([types.Binary(blob_data1), types.Binary(blob_data2)]) } self.assertEqual(len(items), 1) self.assertDictEqual(expected_item, dict(items[0].items())) self.storage_mocker.VerifyAll()
def test_select_item(self): self.storage_mocker.StubOutWithMock(storage, 'select_item') blob_data1 = bytes(bytearray([1, 2, 3, 4, 5])) blob_data2 = bytes(bytearray([5, 4, 3, 2, 1])) hash_key = "4.5621201231232132132132132132132142354E126" range_key = "range" storage.select_item( IgnoreArg(), IgnoreArg(), IgnoreArg(), select_type=IgnoreArg(), index_name=IgnoreArg(), limit=IgnoreArg(), exclusive_start_key=IgnoreArg(), consistent=IgnoreArg(), order_type=IgnoreArg(), ).AndReturn( models.SelectResult(items=[{ "hash_key": models.AttributeValue(models.ATTRIBUTE_TYPE_NUMBER, decimal.Decimal(hash_key)), "range_key": models.AttributeValue(models.ATTRIBUTE_TYPE_STRING, range_key), "value_blob": models.AttributeValue(models.ATTRIBUTE_TYPE_BLOB, blob_data1), "value_blob_set": models.AttributeValue(models.ATTRIBUTE_TYPE_BLOB_SET, set([blob_data1, blob_data2])) }])) self.storage_mocker.ReplayAll() table = Table('test_table', connection=self.DYNAMODB_CON) items = list(table.query(consistent=False, hash_key__eq=1)) expected_item = { "hash_key": decimal.Decimal(hash_key), "range_key": range_key, "value_blob": types.Binary(blob_data1), "value_blob_set": set([types.Binary(blob_data1), types.Binary(blob_data2)]) } self.assertEqual(len(items), 1) self.assertDictEqual(expected_item, dict(items[0].items())) self.storage_mocker.VerifyAll()
def populateCacheCities (): print("Inside populateTopicHash") masterHash = {} # Switch to topics table topics = Table( "Topics", connection=conn ) sys.stdout.flush() for city in cityList: # New nested hash for this city. code = city['code'] masterHash[code] = {} print("Querying all categories for:" + code + '\n') for category in CATEGORY_LIST: print("Querying for category:" + category + '\n') sys.stdout.flush() topicsData = topics.query(index='Categories', Category__eq=category, Date__beginswith=code) print("Recieved data back.") sys.stdout.flush() for dataPoint in topicsData: entryYear = dataPoint['Date'][3:][:-4] entryMonth = dataPoint['Date'][7:][:-2] #entryDay = dataPoint['Date'][9:] # Populate the topic hash. codes = ["TOP-"+code, "TOP-"+code+entryYear, "TOP-"+code+entryYear+entryMonth] for curCode in codes: if curCode not in masterHash: masterHash[curCode]={} if dataPoint['Name'] not in masterHash[curCode]: masterHash[curCode][dataPoint['Name']] = dataPoint['Score'] else: masterHash[curCode][dataPoint['Name']] += dataPoint['Score'] # Populate the category hash. codes = ["CAT-"+code, "CAT-"+code+entryYear, "CAT-"+code+entryYear+entryMonth] for curCode in codes: if curCode not in masterHash: masterHash[curCode]={} if category not in masterHash[curCode]: masterHash[curCode][category] = dataPoint['Score'] else: masterHash[curCode][category] += dataPoint['Score'] print("Loading data for: " + city['code'] + " into redis") loadDataRedis(masterHash) masterHash = {}
class Db: def __init__(self, conn_details): self.table = Table(conn_details['table']) def insert(self, student_id, student): student_item = student_to_item(self.table, student) return student_item.save() def get_by_id(self, my_student_id): student_item = self.table.get_item(student_id=int(my_student_id)) if student_item is not None: return item_to_student(student_item) else: print 'DB lookup Miss was performed on DB with student_id ' + str(my_student_id) return None """currently only supports attributes 'faculty'||'city'""" # TODO - support general column name def get_by_attribute(self, attribute, value): if attribute == 'faculty': items = self._get_items_by_faculty(value) if attribute == 'city': items = self._get_items_by_city(value) students = [] for item in items: students.append(item_to_student(item)) return students def _get_items_by_faculty(self, faculty): return self.table.query(faculty__eq=faculty, index='faculty-index') def _get_items_by_city(self, city): return self.table.query(city__eq=city, index='city-index') def remove(self, student_id): student_item = self.get_by_id(student_id) return student_item.delete() def _clear(self): all_students_items = self.table.scan() for student_item in all_students_items: student_item.delete()
def deprecated__get_config(self): net_table = Table('run_gpudirac_hd') r_spec = net_table.query(run_id__eq=self._run_id) config = None last_time = datetime.datetime(1975,2,18,0,0,0) for s in r_spec: time_stamp = s['timestamp'] curr = datetime.datetime.strptime(time_stamp, '%Y.%m.%d-%H:%M:%S') if curr > last_time: config = json.loads(base64.b64decode(s['config'])) return config
def main(): conn = boto.dynamodb2.connect_to_region( 'us-west-2', aws_access_key_id='AKIAJFBBXT6CLH4UHOLQ', aws_secret_access_key='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' ) Test_Table = Table('Test_Table', connection=conn) result = Test_Table.query(Timestamp__eq='timestamp1',index='Timestamp-index') for res in result: print(res['Timestamp'])
def get_pathways(self): """ Returns list of pathways """ if len(self.pathways) == 0: table = Table(self.table_name) pw_ids = table.query(src_id__eq=self.source_id, attributes=('pw_id','gene_ids')) for pw in pw_ids: self.gene_map[pw['pw_id']] =pw['gene_ids'][6:].split('~:~') #simple load balancing t = [(len(g), p) for p,g in self.gene_map.iteritems()] t.sort() self.pathways = [pw for _,pw in t] return self.pathways
class NetworkInfo: """ Stores network information """ def __init__(self, table_name="net_info_table", source_id="c2.cp.biocarta.v4.0.symbols.gmt"): self.table = Table(table_name) self.source_id = source_id self.gene_map = {} #clean refers to the genes being filtered to match the genes #available in the expression file self.gene_clean = {} self.pathways = [] def getGenes(self, pathway_id, cache=True): if pathway_id not in self.gene_map: table = self.table source_id = self.source_id logging.info("Getting network info [%s.%s.%s]" % (table.table_name, source_id, pathway_id)) nit_item = table.get_item(src_id=source_id, pw_id=pathway_id) self.gene_map[pathway_id] = nit_item['gene_ids'][6:].split('~:~') self.gene_clean[pathway_id] = False return self.gene_map[pathway_id] def getPathways(self): if len(self.pathways) == 0: pw_ids = self.table.query(src_id__eq=self.source_id, attributes=('pw_id', 'gene_ids')) #simple load balancing t = [(len(pw['gene_ids'].split('~:~')), pw['pw_id']) for pw in pw_ids] t.sort() self.pathways = [pw for l, pw in t] return self.pathways def isClean(self, pathway_id): return self.gene_clean[pathway_id] def updateGenes(self, pathway_id, genes): """ Updates gene list for pathway_id to genes(list) """ self.gene_map[pathway_id] = genes self.gene_clean[pathway_id] = True def clearCache(self): self.gene_map = {} self.gene_clean = {}
def test_query(): table = create_table() item_data = { 'forum_name': 'the-key', 'Body': 'http://url_to_lolcat.gif', 'SentBy': 'User A', 'ReceivedTime': '12/9/2011 11:36:03 PM', } item = Item(table, item_data) item.save(overwrite=True) table.count().should.equal(1) table = Table("messages") results = table.query(forum_name__eq='the-key') sum(1 for _ in results).should.equal(1)
def test_query(): table = create_table() item_data = { "forum_name": "the-key", "Body": "http://url_to_lolcat.gif", "SentBy": "User A", "ReceivedTime": "12/9/2011 11:36:03 PM", } item = Item(table, item_data) item.save(overwrite=True) table.count().should.equal(1) table = Table("messages") results = table.query(forum_name__eq="the-key") sum(1 for _ in results).should.equal(1)
class NetworkInfo: """ Stores network information """ def __init__(self, table_name="net_info_table",source_id="c2.cp.biocarta.v4.0.symbols.gmt"): self.table = Table(table_name) self.source_id = source_id self.gene_map = {} #clean refers to the genes being filtered to match the genes #available in the expression file self.gene_clean = {} self.pathways = [] def getGenes(self, pathway_id, cache=True): if pathway_id not in self.gene_map: table = self.table source_id = self.source_id logging.info("Getting network info [%s.%s.%s]" % (table.table_name, source_id, pathway_id)) nit_item = table.get_item(src_id=source_id, pw_id=pathway_id) self.gene_map[pathway_id] = nit_item['gene_ids'][6:].split('~:~') self.gene_clean[pathway_id] = False return self.gene_map[pathway_id] def getPathways(self): if len(self.pathways) == 0: pw_ids = self.table.query(src_id__eq=self.source_id, attributes=('pw_id','gene_ids')) #simple load balancing t = [(len(pw['gene_ids'].split('~:~')), pw['pw_id']) for pw in pw_ids] t.sort() self.pathways = [pw for l,pw in t] return self.pathways def isClean(self, pathway_id): return self.gene_clean[pathway_id] def updateGenes(self, pathway_id, genes): """ Updates gene list for pathway_id to genes(list) """ self.gene_map[pathway_id] = genes self.gene_clean[pathway_id] = True def clearCache(self): self.gene_map = {} self.gene_clean = {}
class Collection(CollectionEngine): def __init__(self, name, table_name, region, host=None, is_secure=None, port=None): kwargs = {} if host is not None: kwargs['host'] = host if is_secure is not None: kwargs['is_secure'] = is_secure if port is not None: kwargs['port'] = port self.__table = Table( table_name=table_name, connection=boto.dynamodb2.connect_to_region(region, **kwargs), ) @property def table(self): return self.__table def create_raw_item(self, index, data_dict, context): if context is None: self.__table.put_item(data_dict) else: context.put_item(self.__table, data_dict) return BotoItem(self.__table, data_dict, True) def retrieve_raw_item(self, key_dict): try: return self.__table.get_item(**key_dict) except ItemNotFound: raise KeyError(key_dict) except: raise def query_raw_items(self, index, parent_key_value, **kwargs): if parent_key_value is not None: kwargs['{}__eq'.format(parent_key_value[0])] = parent_key_value[1] return self.__table.query(index=index, **kwargs) def bulk_get_raw_items(self, **kwargs): return self.__table.batch_get(**kwargs) @classmethod def get_context(cls, *args, **kwargs): return Context()
def getstatusDetail(self, reportid, day, rlimit=""): """ Purpose: Get report status for the specific date from TBL_AWS_REPORT_DTL table :param self: class object itself :param reportid: table hash key condition value. :param day: table range key condition value. :param rlimit: Row limit for the recordset output. """ ## Check if no limit is provided for recordset if not rlimit: ## set default limit for recordset to 1 rlimit=1 ## Create table object for the dynamo DB table tab = Table('TBL_AWS_REPORT_DTL', connection=self.conn) ## Retrun resultset object for the query ouput return tab.query(REPORT_ID__eq=reportid,Date_Modified__beginswith=day,reverse='False',limit=rlimit)
def getTenantInfo(environment, tid): """ Returns a tenant info block :param environment: Current environment to use :return: { name:str, tid:str, userCount:int, users:[ {uid:str, email:str, last_login_time:str}, ...] } or {} on error """ # hash=id (tenant ID) | contentMarketing | enabled | licenses | name tInfoTable = Table(environment + "_idp_tenant") # hash=tenant_id | range=id | email | enabled | password | user_profile | last_login_time | login_token tUserTable = Table(environment + "_idp_user") tenant = {} try: tInfo = tInfoTable.get_item(id=tid) tenant = {'tid':tid} if 'name' in tInfo.keys(): tenant['name'] = tInfo['name'] userInfo = tUserTable.query(tenant_id__eq=tid) tenant['userCount'] = 0 tenant['users'] = [] for user in userInfo: tenant['userCount'] += 1 userObj = {'uid':user['id'], 'email':user['email']} if 'last_login_time' in user.keys(): userObj['last_login_time'] = user['last_login_time'] tenant['users'].append(userObj) return tenant except: return {}
class AwsDataFactory(DataFactory): def __init__(self, config): ''' Constructor. @param config Configuration settings. Expected definition: Section: database Key: data_table Type: string Desc: Name of the Data model table @paramType ConfigParser @returns n/a ''' self.global_table = Table(config.get('database', 'global_data_table')) self.set_table = Table(config.get('database', 'set_data_table')) def create_data(self, content, datum_id, location, set_id, timestamp, type): ''' {@inheritDocs} ''' assert content is not None assert datum_id is not None assert -180 <= location[0] and location[0] < 180, location[0] assert -90 <= location[1] and location[1] < 90, location[1] assert set_id is not None assert timestamp is not None assert type is not None # Normalize the values lat_norm = int(location[1] * 10000000) lon_norm = int(location[0] * 10000000) timestamp_norm = strftime('%Y-%m-%d %H:%M:%S', timestamp) # Create the database record data = { 'content' : content, 'datum_id' : datum_id, 'lat' : lat_norm, 'lat_copy' : lat_norm, 'lon' : lon_norm, 'lon_copy' : lon_norm, 'set_id' : set_id, 'timestamp' : timestamp_norm, 'timestamp_copy' : timestamp_norm, 'type' : type } result = False if set_id == 'global': # If this is a global data point result = self.global_table.put_item(data=data) else: # If this is a set data point result = self.set_table.put_item(data=data) # If we failed to create the database record if result is False: raise CreateError("Failed to create the Data(" + str(data) + ")!") def copy_data(self, set_id, datas): ''' {@inheritDocs} ''' assert set_id is not None with self.set_table.batch_write() as batch: for data in datas: batch.put_item(data = { 'content' : data.get_content(), 'datum_id' : data.get_datum_id(), 'lat' : data.record['lat'], 'lat_copy' : data.record['lat_copy'], 'lon' : data.record['lon'], 'lon_copy' : data.record['lon_copy'], 'set_id' : set_id, 'timestamp' : data.record['timestamp'], 'timestamp_copy' : data.record['timestamp_copy'], 'type' : data.record['type'] }) def filter_global_data(self, min_timestamp=None, max_timestamp=None, min_lat=None, max_lat=None, min_lon=None, max_lon=None, segment_id=0, num_segments=1, type=None ): ''' {@inheritDocs} ''' kwargs = {} if min_timestamp is not None: kwargs['timestamp__gte'] = strftime('%Y-%m-%d %H:%M:%S', min_timestamp) if max_timestamp is not None: kwargs['timestamp_copy__lte'] = strftime('%Y-%m-%d %H:%M:%S', max_timestamp) if min_lat is not None: kwargs['lat__gte'] = int(min_lat * 10000000) if max_lat is not None: kwargs['lat_copy__lte'] = int(max_lat * 10000000) if min_lon is not None: kwargs['lon__gte'] = int(min_lon * 10000000) if max_lon is not None: kwargs['lon_copy__lte'] = int(max_lon * 10000000) if type is not None: kwargs['type__eq'] = type kwargs['set_id__eq'] = 'global' kwargs['segment'] = segment_id kwargs['total_segments'] = num_segments logger.debug("Scan Args: %s", kwargs) return AwsDataIterator(self.global_table.scan(**kwargs)) def get_data_set(self, set_id): ''' {@inheritDocs} ''' return AwsDataIterator(self.set_table.query(set_id__eq=set_id))
from boto.dynamodb2.fields import HashKey, RangeKey, GlobalAllIndex from boto.dynamodb2.layer1 import DynamoDBConnection from boto.dynamodb2.table import Table from boto.dynamodb2.items import Item table = Table('bootcamplog') items = table.query(user__eq='10011') for item in items: print item['latitude']
def populateCacheToday (): print("Inside populateTopicHash") masterHash = {} # Switch to topics table topics = Table( "Topics", connection=conn ) sys.stdout.flush() for city in cityList: # New nested hash for this city. # We want to query for just TODAY from this data point. code = city['code'] + todayYear + todayMonth + todayDay masterHash[code] = {} print("Querying all categories for:" + code + '\n') for category in CATEGORY_LIST: print("Querying for category:" + category + '\n') sys.stdout.flush() topicsData = topics.query(index='Categories', Category__eq=category, Date__beginswith=code) print("Recieved data back.") sys.stdout.flush() for dataPoint in topicsData: entryYear = dataPoint['Date'][3:][:-4] entryMonth = dataPoint['Date'][7:][:-2] entryDay = dataPoint['Date'][9:] # Populate the topic hash. curCode = "TOP-"+code if curCode not in masterHash: masterHash[curCode]={} if dataPoint['Name'] not in masterHash[curCode]: masterHash[curCode][dataPoint['Name']] = dataPoint['Score'] else: masterHash[curCode][dataPoint['Name']] += dataPoint['Score'] # Populate the category hash. curCode = "CAT-"+code if curCode not in masterHash: masterHash[curCode]={} if category not in masterHash[curCode]: masterHash[curCode][category] = dataPoint['Score'] else: masterHash[curCode][category] += dataPoint['Score'] # Populate the ALL topic hash. curCode = "TOP-"+'ALL'+entryYear+entryMonth+entryDay if curCode not in masterHash: masterHash[curCode]={} if dataPoint['Name'] not in masterHash[curCode]: masterHash[curCode][dataPoint['Name']] = dataPoint['Score'] else: masterHash[curCode][dataPoint['Name']] += dataPoint['Score'] # Populate the ALL category hash. curCode = "CAT-"+'ALL'+entryYear+entryMonth+entryDay if curCode not in masterHash: masterHash[curCode]={} if category not in masterHash[curCode]: masterHash[curCode][category] = dataPoint['Score'] else: masterHash[curCode][category] += dataPoint['Score'] # Load these daily entries into redis. loadDataRedis(masterHash) # Update any existing year/month entries. updateRedisEntries(masterHash)
class DynamoTable(object): conn = None table = None table_name = 'test-table' hash_key = 'hash_key' range_key = 'range_key' indexes = [] read_units = 10 write_units = 10 counters = {'reads':0,'writes':0,'delete':0,'batch_w':0} def __init__(self, table_name, hash_key, range_key, indexes, read_units=10, write_units=10 ): self.table_name = table_name self.hash_key = hash_key self.range_key = range_key self.indexes = indexes self.read_units = read_units self.write_units = write_units try: self.connect() self.setup() except: logger.warn('Unable to connect or handle DynamoDB Table') traceback.print_exc() def connect(self): # create initial database tables self.conn = boto.dynamodb2.connect_to_region( settings.AWS_DYNAMODB_REGION, aws_access_key_id=settings.AWS_DYNAMODB_ACCESS_KEY_ID, aws_secret_access_key=settings.AWS_DYNAMODB_SECRET_ACCESS_KEY ) def setup(self): ''' Set's up the table schema if table does not exists yet Return the Table ''' try: self.table = Table.create(self.table_name, connection=self.conn, schema=[ HashKey(self.hash_key), RangeKey(self.range_key), ], throughput={'read':self.read_units,'write':self.write_units}) logger.warning('Created new DynamoDB Table') except: self.table = Table(self.table_name, connection=self.conn, schema=[ HashKey(self.hash_key), RangeKey(self.range_key), ], throughput={'read':self.read_units,'write':self.write_units}) return self.table def put(self, hash_key, range_key, data): ''' puts the data to the table if key/range_key exists ''' if settings.DEBUG: bench_start = time() data[self.hash_key] = hash_key data[self.range_key] = range_key item = self.table.put_item( data=data, overwrite=True ) if settings.DEBUG: if not hash_key in self.counters: self.counters[hash_key] = {'reads':0,'writes':0} self.counters[hash_key]['writes'] +=1 self.counters['writes'] +=1 elapsed_time = time() - bench_start logger.info(data) logger.info("R%sW%s - write %0.5f seconds" % (self.counters[hash_key]['reads'], self.counters[hash_key]['writes'], elapsed_time)) return item def get_latest(self, hash_key ): ''' retreive the last recorded data hash_key item for the hash key ''' if settings.DEBUG: bench_start = time() kwargs = {} kwargs[self.hash_key+'__eq'] = hash_key kwargs['limit'] = 1 items = self.table.query( **kwargs ) if items: data = {} for item in items: for key in item.keys(): if not key in (self.hash_key, self.range_key): data[key] = item[key] else: return None if not len(data): return None if settings.DEBUG: if not hash_key in self.counters: self.counters[hash_key] = {'reads':0,'writes':0} self.counters[hash_key]['reads'] +=1 self.counters['reads'] +=1 elapsed_time = time() - bench_start logger.info("R%sW%s - %s - read %0.5f seconds" % (self.counters[hash_key]['reads'], self.counters[hash_key]['writes'], hash_key, elapsed_time)) return data def get_range_obj(self, hash_key): if settings.DEBUG: bench_start = time() kwargs = {} kwargs[self.hash_key+'__eq'] = hash_key # TODO - use batch_get items = self.table.query( **kwargs ) self.counters['reads'] +=1 data = {} for item in items: rkey_data = {} rkey = item[self.range_key] if rkey == 'index': data = json.loads(item['value']) break else: for key in item.keys(): if key != None and not key in (self.hash_key, self.range_key) and key != 'index': if key == 'value': value = item[key] try: rkey_data = json.loads(str(value)) except: rkey_data = value #else: # rkey_data[key] = item[key] data[rkey] = rkey_data if settings.DEBUG: if not hash_key in self.counters: self.counters[hash_key] = {'reads':0,'writes':0} self.counters[hash_key]['reads'] +=1 self.counters['reads'] +=1 elapsed_time = time() - bench_start #logger.info(data) logger.info("R%sW%s - %s - read %0.5f seconds" % (self.counters[hash_key]['reads'], self.counters[hash_key]['writes'], hash_key, elapsed_time)) return data def set_range_obj(self, hash_key, data, range_keys=None): # avoid crashing on attempt to write None data if data == None: return if range_keys == None: range_keys = data.keys() # TODO # add better size estimate datablocks = 0 for range_key in data.keys(): try: len_size = len( data[range_key] ) except: len_size = 1 datablocks += len_size # update date in msecs since epoch update_date = time() if datablocks > 1000: #print hash_key, #print datablocks # split over multiple items by data dict key with self.table.batch_write() as batch: for range_key in range_keys: value = json.dumps( data[range_key] ) batch_data = {} batch_data[self.hash_key] = hash_key batch_data[self.range_key] = range_key batch_data['value'] = value batch_data['update_date'] = update_date batch.put_item(data=batch_data) self.counters['batch_w'] +=1 # delete index if exists self.remove_range_obj(hash_key, range_keys=['index']) else: value = json.dumps(data) batch_data = {} batch_data[self.hash_key] = hash_key batch_data[self.range_key] = 'index' batch_data['value'] = value batch_data['update_date'] = update_date self.table.put_item(data=batch_data, overwrite=True) self.counters['writes'] +=1 return True def remove_range_obj(self, hash_key, range_keys=None): ''' deletes ranged object or specific range_keys ''' # get range object if range_keys == None: data = self.get_range_obj(hash_key) range_keys = data.keys() # remove possible index try: kwargs = {} kwargs[self.hash_key] = hash_key kwargs[self.range_key] = 'index' self.table.delete_item( **kwargs ) except: pass with self.table.batch_write() as batch: for range_key in range_keys: kwargs = {} kwargs[self.hash_key] = hash_key kwargs[self.range_key] = range_key batch.delete_item( **kwargs ) self.counters['delete'] +=1 return True
class ClientRadiusSecretsDDB(RadiusSecrets): """ get secrets for a particular client ddb=DynamoDBConnection( aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key ) secrets = Table.create('qradius_secrets',schema=[\ HashKey('ip_address'), RangeKey('not_before',data_type=NUMBER), ], indexes=[ AllIndex('IPNotAfter',parts=[ HashKey('ip_address'), RangeKey('not_after',data_type=NUMBER), ]) ],connection=ddb) we will normally want secrets where ip_address = client ip address not_before < now not_after >= now likely we'll want to limit the # of secrets we look at, here i limit it to 3 For queries (these are the least impactful on dynamo: res=secrets.query(ip_address__eq='10.25.95.158',not_before__lt=now,limit=3,consistent=False) res=secrets.query(ip_address__eq='10.25.95.158',not_after__gt=now,limit=3,consistent=False,index='IPNotAfter') if you really need everything, this is a full scan: res=secrets.scan(ip_address__eq='10.25.95.158',not_before__lt=now,not_after__gt=now,limit=3) """ def __init__(self, encryption_key=None, aws_keys=None, table_name=None): """ @param encryption_key : string containing encryption key @param aws_keys : object containing aws keys @param table_name : dynamo table name @type encryption_key string @type aws_keys AWSKeys @type table_name string """ if encryption_key is None: raise ValueError("encryption_key must be specified") if not isinstance(aws_keys,AWSKeys): raise ValueError("aws_material must be specified and of type AWSKeys") if table_name is None: raise ValueError("dynamo table containing secrets must be specified") self._encryption_key = encryption_key self._encryptor = DataEncryptor(self._encryption_key) self._ddb_connection = DynamoDBConnection( aws_access_key_id=aws_keys.aws_access_key, aws_secret_access_key=aws_keys.aws_secret_key ) if self._ddb_connection is None: raise ValueError("unable to obtain dynamo connection using %s" % aws_material) self._secret_table = Table(table_name,connection=self._ddb_connection) logging.debug('connectd to dynamo table %s' % table_name) if self._secret_table is None: raise ValueError("unable to connect to dynamo table %s" % table_name) def encryptSecret(self, secret): """ call the included encryption module to encrypt/encode secrets @param secret @type secret string """ if len(secret) > MAX_SECRET_LENGTH: raise ValueError("secret may not be more than %d bytes" % MAX_SECRET_LENGTH) encoded_secret = self._encryptor.encrypt(secret) return(encoded_secret) def decryptSecret(self, encoded_secret): """ call the included encryption module to decrypt/decode secrets @param encoded_secret @type encoded_secret string """ plain_secret = self._encryptor.decrypt(encoded_secret) if len(plain_secret) > MAX_SECRET_LENGTH: raise ValueError("decryption resulted in a plain secret longer than the maximum length of %d bytes" % MAX_SECRET_LENGTH) return(plain_secret) def putSecret(self, clientIP, secret, not_before=None,not_after=None,tries=0): """ store a secret for clientIP @param clientIP : client ip address @param secret : radius secret for client @param tries : internal parameter to constrain recursion depth for self-calls @type clientIP string @type secret example usage: from radiussecrets import * rs=ClientRadiusSecrets(encryption_key='someencryptionkey', aws_keys=AWSKeys('myaccesskey','mysecretkey'),table_name='qradius_secrets') ValidationException rs.putSecret('1.2.3.4','shhdonottellanyone') """ now = time.time() if not_before is None: not_before = now if not_after is None: not_after = now + DEFAULT_KEY_LIFE if not isinstance(not_before,(int,float,long)) or not_before < 0: raise ValueError("not_before must be a number representing seconds since epoch") if not isinstance(not_after,(int,float,long)) or not_after < 0: raise ValueError("not_before must be a number representing seconds since epoch") if len(secret) > MAX_SECRET_LENGTH: raise ValueError("length of secret may not exceed %d bytes" % MAX_SECRET_LENGTH) result = None try: result = self._secret_table.put_item(data={ 'ip_address': clientIP, 'not_before': not_before, 'not_after': not_after, 'secret': self.encryptSecret(secret) }) except boto.dynamodb2.exceptions.ConditionalCheckFailedException as e: tries += 1 if tries > 5: logging.crit('pk violation for client %s not_before %d after %d tries at incrementing' % (clientIP,not_before,tries)) raise e #increment not_before to avoid pk violation not_before += 1 logging.warn('pk violation for client %s not_before %d; retrying with higher not_before ' % (clientIP,not_before)) result = self.putSecret(clientIP, secret, not_before=not_before,not_after=not_after,tries=tries) return result def deleteSecret(self, clientIP, not_before): """ delete a secret this should be used carefully @param clientIP @param not_before @type clientIP string @type not_before number """ return self._secret_table.delete_item(ip_address=clientIP,not_before=not_before) def getSecret(self, clientIP): """ return the secret associated with IP address if multiple secrets are found, selection is by: 1) not_before < now 2) not_after >= now 3) the highest value of not before, if there are still multiple secrets @param clientIP @param not_before seconds since epoch that the secret becomes valid @param not_after seconds since epoch after which the secret is not valid @type clientIP string representing an ip address """ now = time.time() #i wanted to limit to 3 (limit=3) but, boto kept barfing results = self._secret_table.query(ip_address__eq=clientIP,not_before__lt=now,consistent=False) client_secret = None client_secret_not_before = 0 for result in results: if result['not_after'] >= now: if client_secret_not_before < result['not_before']: client_secret_not_before = result['not_before'] client_secret = self.decryptSecret(result['secret']) logging.debug('retrieved secret for %s' % (clientIP)) return client_secret def purgeSecrets(self, clientIP): """ purge stale secrets associated with IP address 1) remove all keys for clientIP where not after is older than current time - PURGE_TIME_BEFORE_NOW 2) scan remaining keys, keep @param clientIP @type string @returns # of purged secrets """ now = time.time() min_purge_time = now nr_purged = 0 # first get rid of expired keys results = self._secret_table.query(ip_address__eq=clientIP, not_after__lt=min_purge_time, consistent=False, index='IPNotAfter') for result in results: logging.info('purging secret: %s %d' % (result['ip'],result['not_before'])) result.delete() nr_purged += 1 # now the fun... result_list = [] results = self._secret_table.query(ip_address__eq=clientIP, not_before__lt=min_purge_time, consistent=False) for result in results: result_list.append(result) # delete results if there are more than PURGE_RETAIN_NR_ACTIVE results, # we want the oldest not befores to be removed first if len(result_list) > PURGE_RETAIN_NR_ACTIVE_KEYS: for result in sorted(result_list, key=lambda result: result['not_before'])[:-PURGE_RETAIN_NR_ACTIVE_KEYS]: logging.info('purging secret: %s %d' % (result['ip'],result['not_before'])) result.delete() nr_purged += 1 return nr_purged
class ClientRadiusSecretsDDB(RadiusSecrets): """ get secrets for a particular client ddb=DynamoDBConnection( aws_access_key_id=aws_access_key, aws_secret_access_key=aws_secret_key ) secrets = Table.create('qradius_secrets',schema=[\ HashKey('ip_address'), RangeKey('not_before',data_type=NUMBER), ], indexes=[ AllIndex('IPNotAfter',parts=[ HashKey('ip_address'), RangeKey('not_after',data_type=NUMBER), ]) ],connection=ddb) we will normally want secrets where ip_address = client ip address not_before < now not_after >= now likely we'll want to limit the # of secrets we look at, here i limit it to 3 For queries (these are the least impactful on dynamo: res=secrets.query(ip_address__eq='10.25.95.158',not_before__lt=now,limit=3,consistent=False) res=secrets.query(ip_address__eq='10.25.95.158',not_after__gt=now,limit=3,consistent=False,index='IPNotAfter') if you really need everything, this is a full scan: res=secrets.scan(ip_address__eq='10.25.95.158',not_before__lt=now,not_after__gt=now,limit=3) """ def __init__(self, encryption_key=None, aws_keys=None, table_name=None): """ @param encryption_key : string containing encryption key @param aws_keys : object containing aws keys @param table_name : dynamo table name @type encryption_key string @type aws_keys AWSKeys @type table_name string """ if encryption_key is None: raise ValueError("encryption_key must be specified") if not isinstance(aws_keys, AWSKeys): raise ValueError( "aws_material must be specified and of type AWSKeys") if table_name is None: raise ValueError( "dynamo table containing secrets must be specified") self._encryption_key = encryption_key self._encryptor = DataEncryptor(self._encryption_key) self._ddb_connection = DynamoDBConnection( aws_access_key_id=aws_keys.aws_access_key, aws_secret_access_key=aws_keys.aws_secret_key) if self._ddb_connection is None: raise ValueError("unable to obtain dynamo connection using %s" % aws_material) self._secret_table = Table(table_name, connection=self._ddb_connection) logging.debug('connectd to dynamo table %s' % table_name) if self._secret_table is None: raise ValueError("unable to connect to dynamo table %s" % table_name) def encryptSecret(self, secret): """ call the included encryption module to encrypt/encode secrets @param secret @type secret string """ if len(secret) > MAX_SECRET_LENGTH: raise ValueError("secret may not be more than %d bytes" % MAX_SECRET_LENGTH) encoded_secret = self._encryptor.encrypt(secret) return (encoded_secret) def decryptSecret(self, encoded_secret): """ call the included encryption module to decrypt/decode secrets @param encoded_secret @type encoded_secret string """ plain_secret = self._encryptor.decrypt(encoded_secret) if len(plain_secret) > MAX_SECRET_LENGTH: raise ValueError( "decryption resulted in a plain secret longer than the maximum length of %d bytes" % MAX_SECRET_LENGTH) return (plain_secret) def putSecret(self, clientIP, secret, not_before=None, not_after=None, tries=0): """ store a secret for clientIP @param clientIP : client ip address @param secret : radius secret for client @param tries : internal parameter to constrain recursion depth for self-calls @type clientIP string @type secret example usage: from radiussecrets import * rs=ClientRadiusSecrets(encryption_key='someencryptionkey', aws_keys=AWSKeys('myaccesskey','mysecretkey'),table_name='qradius_secrets') ValidationException rs.putSecret('1.2.3.4','shhdonottellanyone') """ now = time.time() if not_before is None: not_before = now if not_after is None: not_after = now + DEFAULT_KEY_LIFE if not isinstance(not_before, (int, float, long)) or not_before < 0: raise ValueError( "not_before must be a number representing seconds since epoch") if not isinstance(not_after, (int, float, long)) or not_after < 0: raise ValueError( "not_before must be a number representing seconds since epoch") if len(secret) > MAX_SECRET_LENGTH: raise ValueError("length of secret may not exceed %d bytes" % MAX_SECRET_LENGTH) result = None try: result = self._secret_table.put_item( data={ 'ip_address': clientIP, 'not_before': not_before, 'not_after': not_after, 'secret': self.encryptSecret(secret) }) except boto.dynamodb2.exceptions.ConditionalCheckFailedException as e: tries += 1 if tries > 5: logging.crit( 'pk violation for client %s not_before %d after %d tries at incrementing' % (clientIP, not_before, tries)) raise e #increment not_before to avoid pk violation not_before += 1 logging.warn( 'pk violation for client %s not_before %d; retrying with higher not_before ' % (clientIP, not_before)) result = self.putSecret(clientIP, secret, not_before=not_before, not_after=not_after, tries=tries) return result def deleteSecret(self, clientIP, not_before): """ delete a secret this should be used carefully @param clientIP @param not_before @type clientIP string @type not_before number """ return self._secret_table.delete_item(ip_address=clientIP, not_before=not_before) def getSecret(self, clientIP): """ return the secret associated with IP address if multiple secrets are found, selection is by: 1) not_before < now 2) not_after >= now 3) the highest value of not before, if there are still multiple secrets @param clientIP @param not_before seconds since epoch that the secret becomes valid @param not_after seconds since epoch after which the secret is not valid @type clientIP string representing an ip address """ now = time.time() #i wanted to limit to 3 (limit=3) but, boto kept barfing results = self._secret_table.query(ip_address__eq=clientIP, not_before__lt=now, consistent=False) client_secret = None client_secret_not_before = 0 for result in results: if result['not_after'] >= now: if client_secret_not_before < result['not_before']: client_secret_not_before = result['not_before'] client_secret = self.decryptSecret(result['secret']) logging.debug('retrieved secret for %s' % (clientIP)) return client_secret def purgeSecrets(self, clientIP): """ purge stale secrets associated with IP address 1) remove all keys for clientIP where not after is older than current time - PURGE_TIME_BEFORE_NOW 2) scan remaining keys, keep @param clientIP @type string @returns # of purged secrets """ now = time.time() min_purge_time = now nr_purged = 0 # first get rid of expired keys results = self._secret_table.query(ip_address__eq=clientIP, not_after__lt=min_purge_time, consistent=False, index='IPNotAfter') for result in results: logging.info('purging secret: %s %d' % (result['ip'], result['not_before'])) result.delete() nr_purged += 1 # now the fun... result_list = [] results = self._secret_table.query(ip_address__eq=clientIP, not_before__lt=min_purge_time, consistent=False) for result in results: result_list.append(result) # delete results if there are more than PURGE_RETAIN_NR_ACTIVE results, # we want the oldest not befores to be removed first if len(result_list) > PURGE_RETAIN_NR_ACTIVE_KEYS: for result in sorted(result_list, key=lambda result: result['not_before'] )[:-PURGE_RETAIN_NR_ACTIVE_KEYS]: logging.info('purging secret: %s %d' % (result['ip'], result['not_before'])) result.delete() nr_purged += 1 return nr_purged
def send_ios_general_notification(message): conn = dynamo_connection() ios_device_table = Table(ios_table, connection=conn) items = list(ios_device_table.query(notification_type__eq='general')) tokens = [item['token'] for item in items] send_ios_notifications(message, tokens)
from boto.dynamodb2.fields import HashKey, RangeKey, GlobalAllIndex from boto.dynamodb2.layer1 import DynamoDBConnection from boto.dynamodb2.table import Table from boto.dynamodb2.items import Item table = Table('bootcamplog') items = table.query(user__eq='10011', timestamp__gt='1403173738269') for item in items: print item['latitude']
def send_ios_general_notification(message): conn = dynamo_connection() ios_device_table = Table(ios_table, connection = conn) items = list(ios_device_table.query(notification_type__eq = 'general')) tokens = [item['token'] for item in items] send_ios_notifications(message, tokens)
class Backend(): def __init__(self, region, link_prefix, bucket_name, table_name): if link_prefix[-1] != u'/': link_prefix += u'/' self.link_prefix = link_prefix conn = boto.s3.connect_to_region(region) self.bucket = conn.get_bucket(bucket_name) conn = boto.dynamodb2.connect_to_region(region) self.table = Table(table_name, connection=conn) def write_to_backend(self, station, title, timestamp, description, files_path, files): if self._get_item(station, title): msg = u'%s %s' % (station, title) raise TitleExistError(msg) if files_path and files: if files_path[-1] != u'/': files_path = files_path + u'/' files_with_path = [] for f in files: k = Key(self.bucket) k.key = u''.join([station, u'_', title, u'_', f]) k.set_contents_from_filename(files_path+f) k.set_acl(u'public-read') files_with_path.append(self.link_prefix+k.key) files_str = u' '.join(files_with_path) else: files_str = u'empty' self.table.put_item(data={u'station':station, u'title':title, \ u'timestamp':timestamp, u'description': description, u'files':files_str}) return True def update_description_and_append_files(self, station, title, timestamp, description, files_path, files): if files_path and files: if files_path[-1] != u'/': files_path = files_path + u'/' files_with_path = [] for f in files: k = Key(self.bucket) k.key = u''.join([station, u'_', title, u'_', f]) k.set_contents_from_filename(files_path+f) k.set_acl(u'public-read') files_with_path.append(self.link_prefix+k.key) new_files_str = u' '.join(files_with_path) else: new_files_str = u'empty' item = self._get_item(station, title) if not item: return False old_files_str = unicode(item[u'files']) if old_files_str == u'empty': files_str = new_files_str elif new_files_str == u'empty': files_str = old_files_str else: files_str = u''.join([old_files_str, u' ', new_files_str]) item[u'files'] = files_str item[u'description'] = description item.partial_save() return True def delete_item(self, station, title): item = self._get_item(station, title) if not item: return False files_str = unicode(item[u'files']) if files_str == u'empty': files = [] elif not files_str: files = [] else: files = files_str.split(u' ') self.table.delete_item(station=station, title=title) for f in files: k = Key(self.bucket) k.key = f[len(self.link_prefix):] k.delete() return True def delete_file(self, station, title, f): item = self._get_item(station, title) files = unicode(item[u'files']).split(u' ') if f in files: files.remove(f) if files: files_str = u' '.join(files) else: files_str = u'empty' item[u'files'] = files_str item.partial_save() k =Key(self.bucket) k.key = f[len(self.link_prefix):] k.delete() return True return False def get_page(self, station, direction, timestamp, page_size): need_reverse = False if timestamp == u'0': items = self.table.query(station__eq=station, index=u'timestamp-index', reverse=False, limit=page_size) elif direction == u'next': items = self.table.query(station__eq=station, timestamp__lt=timestamp, index=u'timestamp-index', reverse=False, limit=page_size) elif direction == u'prev': items = self.table.query(station__eq=station, timestamp__gt=timestamp, index=u'timestamp-index', reverse=True, limit=page_size) need_reverse = True else: return [] ret = [] for item in items: r = {} r[u'title'] = unicode(item[u'title']) r[u'timestamp'] = unicode(item[u'timestamp']) r[u'description'] = unicode(item[u'description']) r[u'files'] = unicode(item[u'files']).split(u' ') ret.append(r) if need_reverse: ret.reverse() return ret def get_item(self, station, title): item = self._get_item(station, title) if not item: return None r = {} r[u'title'] = unicode(item[u'title']) r[u'timestamp'] = unicode(item[u'timestamp']) r[u'description'] = unicode(item[u'description']) r[u'files'] = unicode(item[u'files']).split(u' ') return r def _get_item(self, station, title): items = self.table.query(station__eq=station, title__eq=title) for item in items: return item return None
class S3mper: """ S3mper is a metastore library used to provide a layer of consistency on top of S3 by using dynamodb to record what files should be in the S3 listing. See go/s3mper for more information. """ def __init__(self, disabled=False, fail_on_error=False, table_name="ConsistentListingMetastoreTest"): self.disabled = disabled self.disable_alerts = False self.fail_on_error = fail_on_error if self.disabled: logger.warning("S3mper Explicitly Disabled") return self.db = Table(table_name) def add(self, paths): """ Adds a list of Paths to the file metastore and returns True on success. Example: s.add([path1, path2]) -> True """ if self.disabled: return epoch = self.__time_now() paths = self.__as_paths(paths) with self.db.batch_write() as batch: for path in paths: batch.put_item(data={"path": path.parent().normalize(), "file": path.filename(), "epoch": epoch}) def list(self, path, include_delete_marked=False): """ Lists the given directory in the metastore. The passed in path must be a directory. Example: s.list(path) -> [] """ if self.disabled: return if isinstance(path, basestring): path = Path(path) listing = self.db.query(path__eq=path.normalize(), consistent=True) paths = [] for e in listing: if (not include_delete_marked) and "deleted" in e: continue paths.append(Path("s3n:" + e["path"] + "/" + e["file"])) return paths def checked_listing(self, s3_listing, path): """ Checks the s3_listing against the metastore listing. All attempts are made to use the boto generator for listing if a check isn't necessary, but if a check must be made the whole listing for both the metastore and s3 listing need to be pulled into memory. """ if self.disabled: return s3_listing expected = set([p.url for p in self.list(path)]) if not expected: return s3_listing # This isn't ideal since we are sucking in the whole listing # to perform the check, but if we check on-the-fly, processing # could be partially complete before inconsistency is detected s3_listing = list(s3_listing()) for p in s3_listing: expected.discard(p if not isinstance(p, Key) else "s3://%s/%s" % (p.bucket, p.name)) if not expected: return s3_listing else: logger.error( "Failed consistency check. Missing file count %d. Missing paths: %s" % (len(expected), expected) ) self.__send_alert(expected) if self.fail_on_error: raise S3ConsistencyException(expected) def delete(self, paths, delete_marker=False): """ Deletes the provided paths from the metastore. Completly removing files from the metastore can cause problems because the s3 listing may show the files even though the data may not be available. This will cause MR jobs to fail. The delete marker can be used to hide files from the listing. Example: s.delete([path1, path2]) -> True """ if self.disabled: return paths = self.__as_paths(paths) if delete_marker: for path in paths: item = self.db.get_item(path=path.parent().normalize(), file=path.filename()) item["deleted"] = "true" else: with self.db.batch_write() as batch: for path in paths: batch.delete_item(path=path.parent().normalize(), file=path.filename()) def __send_alert(self, paths, detail={}): if self.disable_alerts: return try: body = { "truncated": detail.get("truncated", False), "paths": paths if len(paths) <= 10 else paths[0:9], "recovered": detail.get("recovered", False), "missingFiles": len(paths), "stackTrace": traceback.extract_stack(), "timestamp": "%s" % datetime.utcnow(), "queryId": detail.get("", None), "taskId": detail.get("", None), "hostname": platform.node(), "username": getpass.getuser(), "queryType": "DSE Platform Lib", "jobId": detail.get("jobId", None), "attemptId": detail.get("attemptId", None), "email": detail.get("email", None), "dataovenId": detail.get("dataovenId", None), "logFile": detail.get("logFile", None), "inputFile": detail.get("inputFile", None), "genieId": detail.get("genieId", None), "epoch": self.__time_now(), } message = RawMessage() message.set_body(body) conn = sqs.connect_to_region("us-east-1") queue = conn.get_queue("s3mper-alert-queue") queue.write(message) except Exception as e: print e def __as_paths(self, paths): if isinstance(paths, basestring): return [Path(paths)] elif isinstance(paths, Path): return [paths] else: return paths def __time_now(self): """ Returns current time in milliseconds. """ return int(time.time())
class S3mper: """ S3mper is a metastore library used to provide a layer of consistency on top of S3 by using dynamodb to record what files should be in the S3 listing. See go/s3mper for more information. """ def __init__(self, disabled=False, fail_on_error=False, table_name='ConsistentListingMetastoreTest'): self.disabled = disabled self.disable_alerts = False self.fail_on_error = fail_on_error if self.disabled: logger.warning('S3mper Explicitly Disabled') return self.db = Table(table_name) def add(self, paths): """ Adds a list of Paths to the file metastore and returns True on success. Example: s.add([path1, path2]) -> True """ if self.disabled: return epoch = self.__time_now() paths = self.__as_paths(paths) with self.db.batch_write() as batch: for path in paths: batch.put_item( data={ 'path': path.parent().normalize(), 'file': path.filename(), 'epoch': epoch }) def list(self, path, include_delete_marked=False): """ Lists the given directory in the metastore. The passed in path must be a directory. Example: s.list(path) -> [] """ if self.disabled: return if isinstance(path, basestring): path = Path(path) listing = self.db.query(path__eq=path.normalize(), consistent=True) paths = [] for e in listing: if (not include_delete_marked) and 'deleted' in e: continue paths.append(Path('s3n:' + e['path'] + "/" + e['file'])) return paths def checked_listing(self, s3_listing, path): """ Checks the s3_listing against the metastore listing. All attempts are made to use the boto generator for listing if a check isn't necessary, but if a check must be made the whole listing for both the metastore and s3 listing need to be pulled into memory. """ if self.disabled: return s3_listing expected = set([p.url for p in self.list(path)]) if not expected: return s3_listing #This isn't ideal since we are sucking in the whole listing #to perform the check, but if we check on-the-fly, processing #could be partially complete before inconsistency is detected s3_listing = list(s3_listing()) for p in s3_listing: expected.discard(p if not isinstance(p, Key) else 's3://%s/%s' % (p.bucket, p.name)) if not expected: return s3_listing else: logger.error( "Failed consistency check. Missing file count %d. Missing paths: %s" % (len(expected), expected)) self.__send_alert(expected) if self.fail_on_error: raise S3ConsistencyException(expected) def delete(self, paths, delete_marker=False): """ Deletes the provided paths from the metastore. Completly removing files from the metastore can cause problems because the s3 listing may show the files even though the data may not be available. This will cause MR jobs to fail. The delete marker can be used to hide files from the listing. Example: s.delete([path1, path2]) -> True """ if (self.disabled): return paths = self.__as_paths(paths) if delete_marker: for path in paths: item = self.db.get_item(path=path.parent().normalize(), file=path.filename()) item['deleted'] = "true" else: with self.db.batch_write() as batch: for path in paths: batch.delete_item(path=path.parent().normalize(), file=path.filename()) def __send_alert(self, paths, detail={}): if self.disable_alerts: return try: body = { "truncated": detail.get('truncated', False), "paths": paths if len(paths) <= 10 else paths[0:9], "recovered": detail.get('recovered', False), "missingFiles": len(paths), "stackTrace": traceback.extract_stack(), "timestamp": "%s" % datetime.utcnow(), "queryId": detail.get('', None), "taskId": detail.get('', None), "hostname": platform.node(), "username": getpass.getuser(), "queryType": "DSE Platform Lib", "jobId": detail.get('jobId', None), "attemptId": detail.get('attemptId', None), "email": detail.get('email', None), "dataovenId": detail.get('dataovenId', None), "logFile": detail.get('logFile', None), "inputFile": detail.get('inputFile', None), "genieId": detail.get('genieId', None), "epoch": self.__time_now() } message = RawMessage() message.set_body(body) conn = sqs.connect_to_region("us-east-1") queue = conn.get_queue('s3mper-alert-queue') queue.write(message) except Exception as e: print e def __as_paths(self, paths): if isinstance(paths, basestring): return [Path(paths)] elif isinstance(paths, Path): return [paths] else: return paths def __time_now(self): """ Returns current time in milliseconds. """ return int(time.time())