示例#1
0
    def setup(self):
        # Create the following directories if they don't already exist
        self.csv_dir = self.directory + 'data/csvs/'
        self.mp3_dir = self.directory + 'data/mp3s'

        self.dynamodb = boto3.resource('dynamodb',
                                       region_name=self.region,
                                       endpoint_url=self.db_endpoint)

        # Find the name of the table(s) that exist at this endpoint
        self.TABLE_NAME = 'Rightcall'
        self.table = self.dynamodb.Table(self.TABLE_NAME)

        self.INDEX_NAME = 'rightcall'
        self.TYPE_NAME = '_doc'

        self.s3 = boto3.client('s3')
        # Get host and port from endpoint string
        self.es_host = self.es_endpoint.split(':')[1].replace('/', '')
        self.es_port = int(self.es_endpoint.split(':')[2])
        self.es = elasticsearch_tools.Elasticsearch([{
            'host': self.es_host,
            'port': self.es_port
        }])

        # Logging
        levels = ['DEBUG', 'INFO', 'WARNING', 'ERROR', 'CRITICAL']
        if self.LOGLEVEL not in levels:
            raise ValueError(f"Invalid log level choice {self.LOGLEVEL}")
        self.logger = logging.getLogger(self.__class__.__name__)
        self.logger.setLevel(self.LOGLEVEL)
        # create console handler and set level to LOGLEVEL
        ch = logging.StreamHandler()
        ch.setLevel(self.LOGLEVEL)
        # create file handler and set level to DEBUG
        fh = logging.FileHandler('rightcall_local.log')
        fh.setLevel(logging.DEBUG)
        # create formatter
        formatter = logging.Formatter(
            '%(asctime)s : %(levelname)s : %(name)s : %(message)s')
        # add formatter to ch
        ch.setFormatter(formatter)
        fh.setFormatter(formatter)
        # add ch to logger
        self.logger.addHandler(ch)
        self.logger.addHandler(fh)
def Elasticsearch(event):
    # Get item from s3 bucket
    try:
        body = json.loads(event['Records'][0]['body'])
    except Exception as e:
        raise e

    bucket = body['Records'][0]['s3']['bucket']['name']
    filename = body['Records'][0]['s3']['object']['key']
    logger.info('Event from bucket: {}'.format(str(bucket)))
    if bucket == COMPREHEND:
        s3_data = get_item_from_s3(COMPREHEND, filename)
    else:
        logger.error(f"Wrong bucket. Source: {bucket} not equal to {COMPREHEND}")
        return False
    logger.info(f"Data from {bucket}: {s3_data}")

    referenceNumber = s3_data['referenceNumber']
    # Get corresponding item from metadata
    db = dynamodb_tools.RightcallTable(REGION, TABLE_NAME)
    if db.get_db_item(referenceNumber, check_exists=True):
        metadata = db.get_db_item(referenceNumber, check_exists=False)
        logger.info(f"Data retrieved from {TABLE_NAME} database: {metadata}")
    else:
        logger.warning(f"Couldn't find metadata for {referenceNumber} in {TABLE_NAME}")
        # raise CustomException(f"Couldn't retrieve metadata from {TABLE_NAME} table.")
        logger.error(f"Aborting")
        return False

    # Combine items and sanitize them
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(credentials.access_key,
                       credentials.secret_key,
                       REGION,
                       'es',
                       session_token=credentials.token)
    es = elasticsearch_tools.Elasticsearch(ES_HOST, REGION, INDEX, awsauth)
    # Load them into elasticsearch
    result = es.load_call_record(metadata, s3_data)
    logger.info(f"Result: {result}")
    return result
 def setUp(self):
     self.es = elasticsearch_tools.Elasticsearch(
         'some_host.robinisbadatpython.com', 'silly_index', 'onthetrain')

if __name__ == '__main__':
    region = 'eu-west-1'
    dynamodb_table_name = 'rightcall_metadata'
    BUCKET = 'comprehend.rightcall'
    s3 = boto3.client('s3')
    credentials = boto3.Session().get_credentials()
    awsauth = AWS4Auth(credentials.access_key,
                       credentials.secret_key,
                       region,
                       'es',
                       session_token=credentials.token)
    es = elasticsearch_tools.Elasticsearch(
        'search-rightcall-445kqimzhyim4r44blgwlq532y.eu-west-1.es.amazonaws.com',
        region,
        index='rightcall',
        auth=awsauth)
    rtable = dynamodb_tools.RightcallTable(region, dynamodb_table_name)
    mapping = {
        "mappings": {
            "_doc": {
                "properties": {
                    "referenceNumber": {
                        "type": "keyword"
                    },
                    "text": {
                        "type": "text"
                    },
                    "sentiment": {
                        "type": "keyword"