def download_file(bucket_name: str, key_prefix: str): s3 = get_service_resource("s3") """ Download the file locally to the /tmp folder provided by lambda runtime """ logger.debug(f"prefix is {key_prefix}") local_file_name = os.path.basename(key_prefix) # get file name local_file_path = os.path.join( local_folder_path, local_file_name) # generate destination file path bucket = s3.Bucket(bucket_name) # check file size size = bucket.Object(key_prefix).content_length if size > max_file_size: err_msg = f"File {key_prefix} in bucket {bucket_name} too big to process. Max file size allowed is 10 MB" logger.error(err_msg) raise FileSizeTooBigException(err_msg) try: bucket.download_file(key_prefix, local_file_path) except botocore.exceptions.ClientError as e: logger.error( f"When downloading file from bucket: {bucket_name} and prefix: {key_prefix} following error occured: {e}" ) if e.response["Error"]["Code"]: logger.error( f"The service returned following error code: {e.response['Error']['Code']}" ) raise e return local_file_path
def get_config(dynamodb=None, **scan_kwargs): """ This method retrieves configuration list from DDB which are "enabled = True" """ if not dynamodb: dynamodb = service_helper.get_service_resource("dynamodb") table = dynamodb.Table(os.environ["DDB_CONFIG_TABLE_NAME"]) config_list = [] start_key = None done = False while not done: scan_kwargs["FilterExpression"] = Attr("enabled").eq(True) if start_key: scan_kwargs["ExclusiveStartKey"] = start_key # scan all the config in the dynamodb table and filter records with enabled=false response = table.scan(**scan_kwargs) config_list.extend(response["Items"]) # if "LastEvaluatedKey" is not part of the respose then do not pagingate. Hence exit the loop start_key = response.get("LastEvaluatedKey", None) done = start_key is None logger.debug(f"Dumping config_list: {config_list}") return config_list
def test_get_service_resource(self): service_resource = service_helper.get_service_resource("dynamodb") self.assertIsNotNone(service_resource) table_name = os.environ["DDB_TABLE_NAME"] ddb_setup(table_name) table = service_resource.Table(table_name) assert table_name == table.table_name
def update_query(item, **put_item_kwargs): """ This method updates the 'query' (data searched through APIs) details into DDB. THis information is used for tracking the last time a jop was run for an account """ dynamodb = service_helper.get_service_resource("dynamodb") # hash key for the item should be account#url#topic#search_query and range key should be timestamp logger.info(f"Inserting item: {item}") table = dynamodb.Table(os.environ["TARGET_DDB_TABLE"]) table.put_item(Item=item, **put_item_kwargs)
def get_query_timestamp(video_id): ddb = get_service_resource("dynamodb") table = ddb.Table(os.environ["TARGET_DDB_TABLE"]) try: ddb_response = table.get_item(Key={"VIDEO_ID": video_id}) except ClientError as e: logger.error( f'Error in getting tracker {e.response["Error"]["Message"]}') raise e return ddb_response.get("Item", None)
def get_query_tracker(account, url, search_query, topic=None, **item_kwargs): dynamodb = service_helper.get_service_resource("dynamodb") table = dynamodb.Table(os.environ["TARGET_DDB_TABLE"]) query = "#".join([account, url]) if topic: query = "#".join([query, topic]) if search_query: query = "#".join([query, search_query]) logger.info(f"Query to retrieve tracker is {query}") response = table.query( KeyConditionExpression=Key("ID").eq(query), Limit=1, ScanIndexForward=False, ) if len(response["Items"]) == 0: logger.warning("Query tracker is empty") return { "LAST_PUBLISHED_TIMESTAMP": (datetime.now(timezone.utc) - timedelta(days=30)).isoformat() } return response["Items"][ 0] # since limit is 1, it will return only 1 record and hence taking the first index value