def setup(self): # Create the following directories if they don't already exist self.csv_dir = self.directory + 'data/csvs/' self.mp3_dir = self.directory + 'data/mp3s' self.dynamodb = boto3.resource('dynamodb', region_name=self.region, endpoint_url=self.db_endpoint) # Find the name of the table(s) that exist at this endpoint self.TABLE_NAME = 'Rightcall' self.table = self.dynamodb.Table(self.TABLE_NAME) self.INDEX_NAME = 'rightcall' self.TYPE_NAME = '_doc' self.s3 = boto3.client('s3') # Get host and port from endpoint string self.es_host = self.es_endpoint.split(':')[1].replace('/', '') self.es_port = int(self.es_endpoint.split(':')[2]) self.es = elasticsearch_tools.Elasticsearch([{ 'host': self.es_host, 'port': self.es_port }]) # Logging levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL'] if self.LOGLEVEL not in levels: raise ValueError(f"Invalid log level choice {self.LOGLEVEL}") self.logger = logging.getLogger(self.__class__.__name__) self.logger.setLevel(self.LOGLEVEL) # create console handler and set level to LOGLEVEL ch = logging.StreamHandler() ch.setLevel(self.LOGLEVEL) # create file handler and set level to DEBUG fh = logging.FileHandler('rightcall_local.log') fh.setLevel(logging.DEBUG) # create formatter formatter = logging.Formatter( '%(asctime)s : %(levelname)s : %(name)s : %(message)s') # add formatter to ch ch.setFormatter(formatter) fh.setFormatter(formatter) # add ch to logger self.logger.addHandler(ch) self.logger.addHandler(fh)
def Elasticsearch(event): # Get item from s3 bucket try: body = json.loads(event['Records'][0]['body']) except Exception as e: raise e bucket = body['Records'][0]['s3']['bucket']['name'] filename = body['Records'][0]['s3']['object']['key'] logger.info('Event from bucket: {}'.format(str(bucket))) if bucket == COMPREHEND: s3_data = get_item_from_s3(COMPREHEND, filename) else: logger.error(f"Wrong bucket. Source: {bucket} not equal to {COMPREHEND}") return False logger.info(f"Data from {bucket}: {s3_data}") referenceNumber = s3_data['referenceNumber'] # Get corresponding item from metadata db = dynamodb_tools.RightcallTable(REGION, TABLE_NAME) if db.get_db_item(referenceNumber, check_exists=True): metadata = db.get_db_item(referenceNumber, check_exists=False) logger.info(f"Data retrieved from {TABLE_NAME} database: {metadata}") else: logger.warning(f"Couldn't find metadata for {referenceNumber} in {TABLE_NAME}") # raise CustomException(f"Couldn't retrieve metadata from {TABLE_NAME} table.") logger.error(f"Aborting") return False # Combine items and sanitize them credentials = boto3.Session().get_credentials() awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, REGION, 'es', session_token=credentials.token) es = elasticsearch_tools.Elasticsearch(ES_HOST, REGION, INDEX, awsauth) # Load them into elasticsearch result = es.load_call_record(metadata, s3_data) logger.info(f"Result: {result}") return result
def setUp(self): self.es = elasticsearch_tools.Elasticsearch( 'some_host.robinisbadatpython.com', 'silly_index', 'onthetrain')
if __name__ == '__main__': region = 'eu-west-1' dynamodb_table_name = 'rightcall_metadata' BUCKET = 'comprehend.rightcall' s3 = boto3.client('s3') credentials = boto3.Session().get_credentials() awsauth = AWS4Auth(credentials.access_key, credentials.secret_key, region, 'es', session_token=credentials.token) es = elasticsearch_tools.Elasticsearch( 'search-rightcall-445kqimzhyim4r44blgwlq532y.eu-west-1.es.amazonaws.com', region, index='rightcall', auth=awsauth) rtable = dynamodb_tools.RightcallTable(region, dynamodb_table_name) mapping = { "mappings": { "_doc": { "properties": { "referenceNumber": { "type": "keyword" }, "text": { "type": "text" }, "sentiment": { "type": "keyword"