def mail_table(domain_name): if domain_name.startswith(mail_table_prefix): print 'mail_table:', domain_name mt = Table(domain_name) print 'mail_table count:', mt.count() return mt else: return mail_table(mail_table_prefix + domain_name)
def count_table(name): # input must be string, 6 hours delayed try: table = Table(name, connection=client_dynamo) count = table.count() return count except KeyboardInterrupt: exit
def _get_table(self, table_name, create=True): """ Get a DynamoDB table reference based on a table name. If we have looked up/created this table before, use the cached reference. If not, check to make sure the table exists. If it does not exist, create it and return that reference. Note that if this function does not find the table, it will create it and this creation operation can block for some time (typically ~10s). It will only return the table reference once the table is active and ready to be stored to or read from. As a result, it probably makes sense to call this method in a lock for the specific table. Otherwise, simultaneous calls to _get_table could result in multiple table creations. Args: create (bool): If table does not exist, create it. Returns: table: A boto table reference Raises: Exception: When table does not exist and `create` is False. """ if table_name in self._table_cache: return self._table_cache[table_name] table = Table(table_name, connection=self._conn) try: num_items = table.count() self.logger.debug("Table {} found - contains {} items".format( table_name, num_items)) except JSONResponseError as jre: if create and 'ResourceNotFoundException' in str(jre): # If we get a resource not found exception, the table must not # exist, so let's create it self.logger.info( "Table {} not found - creating it".format(table_name)) table = self._create_table(table_name) self.logger.debug("Table created: {}".format(table)) else: # We got some other type of exception, raise it since that # wasn't expected raise except: self.logger.exception("Unable to determine table reference") raise # Cache this reference to the table for later use self._table_cache[table_name] = table return table
def get_mail_table(self, domain): mail_table = 'smtp' s3_mail_table = Table(mail_table) try: print mail_table, 'count:', s3_mail_table.count() except: print 'creating:', mail_table s3_mail_table = Table.create( mail_table, schema=[HashKey('derived_to'), RangeKey('derived_from')], throughput={ 'read': 3, 'write': 3 }) return s3_mail_table
except: print "keyword does not exist in this source. Try another one" exit() print "num doc with keyword" + str(numDocsWithKeyword) #now get a table with documents #"DailyBeast" tablenameWithDocs = sys.argv[3] tableWithDocs = Table(tablenameWithDocs,connection=db) #get count of number of documents #we know that this number gets updated only once every 6 hours. This would need to be modified if we are going to work with system in which articles are being added to the database #constantly lengthOfCorpus = tableWithDocs.count() #tableWithDocs.query_count(last_name__eq='Doe') #rows = tableWithDocs.scan(body__contains= 'obama') #index = 0 #for row in rows: # index +=1 #print index #exit() tdIdfCalculator = TfIdf.TfIdf(lengthOfCorpus,numDocsWithKeyword,keyword) columnWithBody = sys.argv[4] columnWithUniqueId = sys.argv[5]
except: print "keyword does not exist in this source. Try another one" exit() print "num doc with keyword" + str(numDocsWithKeyword) #now get a table with documents #"DailyBeast" tablenameWithDocs = sys.argv[3] tableWithDocs = Table(tablenameWithDocs, connection=db) #get count of number of documents #we know that this number gets updated only once every 6 hours. This would need to be modified if we are going to work with system in which articles are being added to the database #constantly lengthOfCorpus = tableWithDocs.count() #tableWithDocs.query_count(last_name__eq='Doe') #rows = tableWithDocs.scan(body__contains= 'obama') #index = 0 #for row in rows: # index +=1 #print index #exit() tdIdfCalculator = TfIdf.TfIdf(lengthOfCorpus, numDocsWithKeyword, keyword) columnWithBody = sys.argv[4] columnWithUniqueId = sys.argv[5]