def process_data( athena: boto3.resource, comprehend: boto3.resource, query_id: str, token: Optional[str] = None, data: List[Tuple[str]] = None, headers: Tuple[Optional[str]] = None, ) -> List[Dict[str, Union[int, str, Dict[str, str]]]]: results = (athena.get_query_results( QueryExecutionId=query_id, MaxResults=1000, NextToken=token) if token else athena.get_query_results(QueryExecutionId=query_id, MaxResults=1000)) data = [] if data is None else data header_offset = 0 rows = results.get("ResultSet").get("Rows") if headers is None: headers = unpack_varchar_value(rows[header_offset]) header_offset += 1 for row in rows[header_offset:]: data += [construct_record(comprehend, headers, row)] next_token = results.get("NextToken") if next_token: return process_data(athena, comprehend, query_id, token=next_token, data=data, headers=headers) else: return data
def test_update_player(player: dict, dynamodb_config: boto3.resource) -> None: """ Test that a good event sent invokes the proper response :param player: Input character dictionary :param dynamodb_config: boto3 resource with our tables """ # Arrange - get entries from local mock database original_db_entry = dynamodb_config.get_item( Key={"playerId": "player_hash"}) db_item = original_db_entry["Item"] del db_item["playerId"] original_player_from_db = json.loads( json.dumps(db_item, indent=4, cls=DecimalEncoder)) fields_to_update = dict() fields_to_update["hit_points"] = 400 # Act test_result = operator.update_player(table=dynamodb_config, player_token="player_hash", update_map=fields_to_update) updated_db_entry = dynamodb_config.get_item( Key={"playerId": "player_hash"}) db_item = updated_db_entry["Item"] del db_item["playerId"] updated_player_from_db = json.loads( json.dumps(db_item, indent=4, cls=DecimalEncoder)) # Assert assert original_player_from_db != updated_player_from_db assert updated_player_from_db["player_data"]["hit_points"] == 400 assert test_result["ResponseMetadata"]["HTTPStatusCode"] == 200
def dict_to_s3_csv(data_dict_list: List[dict], bucket: str, s3_key: str, s3_resource: boto3.resource, ordered_fieldnames: List[str] or None = None, delimiter: str = ',') -> None: """ Write records in data_dict_list as csv rows to s3_key file in bucket. Creates bucket if it does not exist. """ file_str = dict_to_csv_string(data_dict_list, ordered_fieldnames=ordered_fieldnames, delimiter=delimiter) s3_resource.Bucket(bucket).create() s3_resource.Object(bucket, s3_key).put(Body=file_str)
def check_state(client: boto3.resource, crawler_name: str) -> str: try: request_state = client.get_crawler(Name=crawler_name) crawler = request_state.get("Crawler") state = crawler.get("State") if state == "READY": last_crawl = crawler.get("LastCrawl") last_crawl_state = last_crawl.get("Status") if last_crawl_state in ["FAILED", "CANCELLED"]: raise Exception(f"{last_crawl.get('ErrorMessage')}") elif last_crawl_state == "SUCCEEDED": logger.info(f"Crawler {crawler_name} run success.") return last_crawl_state else: logger.info(f"Crawler {crawler_name} still in ${state} state.") time.sleep(20) return check_state(client, crawler_name) except Exception as err: logger.error(f"Get crawler {crawler_name} state failed: ${str(err)}") raise err
def dynamodb_config(dynamodb: boto3.resource, player: dict) -> boto3.resource: """ Fixture to return a dynamodb resource initialized with a table :param dynamodb: Local DynamoDB fixture :param player: Input character; see above :return: boto3 resource with our tables """ # create a table table = dynamodb.create_table( TableName="Table", AttributeDefinitions=[{ "AttributeName": "playerId", "AttributeType": "S" }], KeySchema=[{ "AttributeName": "playerId", "KeyType": "HASH" }], ProvisionedThroughput={ "ReadCapacityUnits": 1, "WriteCapacityUnits": 1 }, ) # Put player into DB table.put_item(Item={"playerId": "player_hash", "player_data": player}) # Put target into DB table.put_item(Item={"playerId": "target_hash", "player_data": player}) return table
def test_dynamodb(dynamodb: boto3.resource): """ Simple test for DynamoDB. # Create a table # Put an item # Get the item and check the content of this item """ # create a table table = dynamodb.create_table( TableName="Test", KeySchema=[{ "AttributeName": "playerId", "KeyType": "HASH" }], AttributeDefinitions=[{ "AttributeName": "playerId", "AttributeType": "S" }], ProvisionedThroughput={ "ReadCapacityUnits": 1, "WriteCapacityUnits": 1 }, ) _id = str(uuid.uuid4()) # put an item into db table.put_item(Item={"playerId": _id, "test_key": "test_value"}) # get the item item = table.get_item(Key={"playerId": _id}) # check the content of the item assert item["Item"]["test_key"] == "test_value"
def comprehend_text( comprehend: boto3.resource, freetext: str, language: Optional[str] = "en") -> List[Dict[str, Optional[str]]]: entities = comprehend.detect_entities(Text=freetext, LanguageCode=language) return [{ "Entity": entity.get("Text"), "Category": entity.get("Type"), } for entity in entities.get("Entities") if entity.get("Score") > 0.5]
def _inventory_object_iterator( bucket: boto3.resource, manifest_path: str) -> Generator[str, None, None]: """Yield S3 object keys listed in the inventory. Args: bucket: BinaryAlert S3 bucket resource manifest_path: S3 object key for an inventory manifest.json Yields: Object keys listed in the inventory """ response = bucket.Object(manifest_path).get() manifest = json.loads(response['Body'].read()) # The manifest contains a list of .csv.gz files, each with a list of object keys for record in manifest['files']: response = bucket.Object(record['key']).get() csv_data = gzip.decompress(response['Body'].read()).decode('utf-8') for line in csv_data.strip().split('\n'): yield line.split(',')[1].strip('"')
def test_change_class_db(player: dict, dynamodb_config: boto3.resource) -> None: """ Test that we can change character class and the DB entry gets updated :param player: Input character dictionary :param dynamodb_config: boto3 resource with our tables """ # Arrange - get entries from local mock database orig_db_entry = dynamodb_config.get_item(Key={"playerId": "player_hash"}) orig_db_item = orig_db_entry["Item"] orig_player_from_db = json.loads( json.dumps(orig_db_item, indent=4, cls=DecimalEncoder)) player["action"] = "change class hacker" # Act ( returned_player, returned_target, player_updates, target_updates, message, ) = actions.change_class(player=Player(**player), table=dynamodb_config) database_ops.update_player(table=dynamodb_config, player_token="player_hash", update_map=player_updates) db_entry = dynamodb_config.get_item(Key={"playerId": "player_hash"}) db_item = db_entry["Item"] updated_player_from_db = json.loads( json.dumps(db_item, indent=4, cls=DecimalEncoder)) # Assert assert orig_player_from_db != updated_player_from_db assert orig_player_from_db["player_data"]["character_class"] == "dreamer" assert updated_player_from_db["player_data"]["character_class"] == "hacker" assert updated_player_from_db["player_data"]["hit_points"] == 500 assert updated_player_from_db["player_data"]["ex"] == 0
def check_state(athena: boto3.resource, query: str, exec_id: str) -> str: request_status = athena.get_query_execution(QueryExecutionId=exec_id) status = request_status.get("QueryExecution").get("Status") state = status.get("State") if state in ["FAILED", "CANCELLED"]: raise Exception( f"Query {query} failed with execution {exec_id}: {status.get('StateChangeReason')}" ) elif state == "SUCCEEDED": logger.info(f"Query {query} succeeded with execution {exec_id}") return state else: logger.info(f"Query {query} running with execution {exec_id}") time.sleep(20) return check_state(athena, query, exec_id)
def run(self, sqs_queue: boto3.resource) -> None: """Send messages to SQS.""" while self.messages: response = sqs_queue.send_messages(Entries=[{ 'Id': str(i), 'MessageBody': message } for i, message in enumerate(self.messages)]) if not response.get('Failed'): return # There were some failed messages, put them back and retry in a few seconds self.messages = [ self.messages[int(failure['Id'])] for failure in response['Failed'] ] time.sleep(2)
def load_data_into_table(quiz_table: boto3.resource, quiz_id: str, questions: list) -> None: """Loads data into the designated quiz table Args: quiz_table: The name of the quiz table quiz_id: The id of the quiz. This will be assigned to the database quizId field. questions: The list of questions """ with quiz_table.batch_writer() as batch: for question in questions: # Format the question correctly for how the quiz expects it and the slack quiz dynamodb table will read it configured_question = configure_question_for_upload( question, quiz_id) batch.put_item(Item=configured_question ) # Finally put the item in the database logger.info(f"Uploaded {len(questions)} questions to database") return
def download_objects(s3_resource: boto3.resource, bucket_name: str) -> int: bucket = s3_resource.Bucket(bucket_name) download_count = 0 for s3_object in bucket.objects.all(): key_path, filename = os.path.split(s3_object.key) object_path = os.sep.join( [BASE_DEST_DIR, s3_object.bucket_name, key_path]) Path(object_path).mkdir(parents=True, exist_ok=True) if not filename: continue object_file_path = os.sep.join([object_path, filename]) print(f"Downloading: {object_file_path}") bucket.download_file(s3_object.key, object_file_path) download_count += 1 return download_count
def test_get_player(player: dict, dynamodb_config: boto3.resource) -> None: """ Test that a good event sent invokes the proper response :param player: Input character dictionary :param dynamodb_config: boto3 resource with our tables """ # Arrange - get entries from local mock database db_entry = dynamodb_config.get_item(Key={"playerId": "player_hash"}) db_item = db_entry["Item"] del db_item["playerId"] player_from_db = json.loads( json.dumps(db_item, indent=4, cls=DecimalEncoder)) expected_result = {"player_data": player} # Act test_result = database_ops.get_player(table=dynamodb_config, player_token="player_hash") # Assert assert test_result == expected_result assert test_result == player_from_db
def get_file_from_s3(s3_object: dict, s3_resource: boto3.resource): try: s3_object = s3_resource.Object(**s3_object) with tempfile.TemporaryFile(encoding='utf-8') as file_pointer: s3_object.download_fileobj(file_pointer) # Move pointer to the start of the file file_pointer.seek(0) return yaml.safe_load(file_pointer) except ClientError as error: LOGGER.error( f"Failed to download {s3_object.get('object_key')} " f"from {s3_object.get('bucket_name')}, due to {error}" ) raise except YAMLError as yaml_error: LOGGER.error( f"Failed to parse YAML file: {s3_object.get('object_key')} " f"from {s3_object.get('bucket_name')}, due to {yaml_error}" ) raise
def write_scores_to_dynamo(dynamo: boto3.resource, scores: tuple): scores_table = dynamo.Table('ddga_player_scores') with scores_table.batch_writer() as batch: for score in scores: batch.put_item(Item=score)