Exemplo n.º 1
0
def load_replay_into_redshift(event, context):
    """A handler that loads a replay into Redshift"""
    logger = logging.getLogger("hsreplaynet.lambdas.load_replay_into_redshift")
    replay_bucket = event["replay_bucket"]
    replay_key = event["replay_key"]
    metadata_str = event["metadata"]

    obj = S3.get_object(Bucket=replay_bucket, Key=replay_key)
    body_data = obj["Body"].read()
    log_str = decompress(body_data, 15 + 32)
    out = BytesIO()
    out.write(log_str)
    out.seek(0)

    try:
        replay = HSReplayDocument.from_xml_file(out)
        metadata = json.loads(metadata_str)

        global_game_id = metadata["game_id"]
        from hsreplaynet.games.models import GlobalGame

        global_game = GlobalGame.objects.get(id=global_game_id)

        packet_tree = replay.to_packet_tree()[0]
        exporter = RedshiftPublishingExporter(packet_tree).export()
        exporter.set_game_info(metadata)
        flush_exporter_to_firehose(exporter)
    except Exception:
        logger.info(metadata_str)
        raise
    else:
        global_game.loaded_into_redshift = datetime.now()
        global_game.save()
Exemplo n.º 2
0
    def get(self, request, **kwargs):
        # Prevent blacklisted clients from uploading an invalid collection
        user_agent = request.META["HTTP_USER_AGENT"] or ""
        is_bad_client = user_agent.startswith(
            settings.COLLECTION_UPLOAD_USER_AGENT_BLACKLIST)
        if is_bad_client:
            raise PermissionDenied(
                "Your client is outdated. Please update to the latest version."
            )

        expires_in = 180  # Seconds
        self.s3_params["ContentType"] = "application/json"

        presigned_url = S3.generate_presigned_url("put_object",
                                                  Params=self.s3_params,
                                                  HttpMethod="PUT",
                                                  ExpiresIn=expires_in)

        return Response({
            "method": "PUT",
            "url": presigned_url,
            "content_type": self.s3_params["ContentType"],
            "expires_in": expires_in,
            "account_hi": self._account.account_hi,
            "account_lo": self._account.account_lo,
            "region": self._account.region,
        })
Exemplo n.º 3
0
    def get(self, request, **kwargs):
        # Add cache headers to the s3 request
        self.s3_params.update(
            self._translate_s3_cache_headers(self.request.META))

        try:
            obj = S3.get_object(**self.s3_params)
        except S3.exceptions.NoSuchKey:
            raise NotFound()
        except ClientError as e:
            status_code = e.response.get("ResponseMetadata",
                                         {}).get("HTTPStatusCode", 0)
            if status_code in (status.HTTP_304_NOT_MODIFIED,
                               status.HTTP_412_PRECONDITION_FAILED):
                return Response(status=status_code)
            else:
                raise

        collection = self._parse_collection_json(
            obj.get("Body"), obj.get("ContentEncoding", ""))

        response_headers = obj.get("ResponseMetadata",
                                   {}).get("HTTPHeaders", {})
        headers = {"cache-control": "private, no-cache"}

        if collection:
            for header in ("etag", "last-modified"):
                v = response_headers.get(header, "")
                if v:
                    headers[header] = v

        return Response(collection, headers=headers)
Exemplo n.º 4
0
    def train_neural_network(self,
                             num_examples=1000000,
                             max_dropped_cards=15,
                             stratified=False,
                             min_cards_for_determination=5,
                             batch_size=1000,
                             num_epochs=20,
                             base_layer_size=64,
                             hidden_layer_size=64,
                             num_hidden_layers=2,
                             working_dir=None,
                             upload_to_s3=False):
        from hsarchetypes.classification import train_neural_net
        from hsarchetypes.utils import plot_accuracy_graph, plot_loss_graph
        common_prefix_template = "%s_%i_%i_%s"
        values = (
            self.cluster_set.game_format.name,
            self.cluster_set.id,
            self.cluster_set.training_run_id,
            self.player_class.name,
        )
        common_prefix = common_prefix_template % values
        full_model_path = os.path.join(working_dir,
                                       common_prefix + "_model.h5")
        train_x, train_Y = self._fetch_training_data(
            num_examples=num_examples,
            max_dropped_cards=max_dropped_cards,
            stratified=stratified,
            min_cards_for_determination=min_cards_for_determination)
        print("Finished generating training data")
        history = train_neural_net(train_x,
                                   train_Y,
                                   full_model_path,
                                   batch_size=batch_size,
                                   num_epochs=num_epochs,
                                   base_layer_size=base_layer_size,
                                   hidden_layer_size=hidden_layer_size,
                                   num_hidden_layers=num_hidden_layers)
        accuracy = history.history["val_acc"][-1] * 100
        vals = (self.player_class.name, accuracy)
        print("%s accuracy: %.2f%%\n" % vals)

        loss_file_path = os.path.join(working_dir, common_prefix + "_loss.png")
        plot_loss_graph(history, self.player_class.name, loss_file_path)

        accuracy_file_path = os.path.join(working_dir,
                                          common_prefix + "_accuracy.png")
        plot_accuracy_graph(history, self.player_class.name,
                            accuracy_file_path)

        if upload_to_s3:
            # The key structure for models in the bucket is as follows:
            # /models/<game_format>/<cluster_set_id>/<run_id>/<player_class>.h5
            # Which allows for easy listing of all the run_ids for a given snapshot

            # Within each run_id folder we expect:
            # A <player_class>.h5 file for each class
            # A summary.txt
            # A <player_class>_accuracy.png
            # A <player_class>_loss.png

            if os.path.exists(full_model_path):
                with open(full_model_path, "rb") as model:
                    S3.put_object(Bucket=settings.KERAS_MODELS_BUCKET,
                                  Key=self.model_key,
                                  Body=model)

            if os.path.exists(loss_file_path):
                with open(loss_file_path, "rb") as model:
                    S3.put_object(Bucket=settings.KERAS_MODELS_BUCKET,
                                  Key=self.loss_graph_key,
                                  Body=model)

            if os.path.exists(accuracy_file_path):
                with open(accuracy_file_path, "rb") as model:
                    S3.put_object(Bucket=settings.KERAS_MODELS_BUCKET,
                                  Key=self.accuracy_graph_key,
                                  Body=model)

        return accuracy
Exemplo n.º 5
0
    def train_neural_network(self,
                             num_examples=1000000,
                             max_dropped_cards=15,
                             stratified=False,
                             min_cards_for_determination=5,
                             batch_size=1000,
                             num_epochs=20,
                             base_layer_size=64,
                             hidden_layer_size=64,
                             num_hidden_layers=2,
                             working_dir=None,
                             upload_to_s3=False,
                             included_classes=None):
        start_ts = time.time()
        run_id = int(start_ts)
        self.training_run_id = run_id
        self.save()

        if working_dir:
            training_dir = working_dir
        else:
            training_dir = os.path.join(settings.BUILD_DIR, "models",
                                        str(run_id))

        if not os.path.exists(training_dir):
            os.mkdir(training_dir)

        summary_path = os.path.join(training_dir, "summary.txt")
        with open(summary_path, "w") as summary:
            summary.write("Game Format: %s\n" % self.game_format.name)
            summary.write("Cluster Set As Of: %s\n" % self.as_of.isoformat())
            summary.write("Training Run: %i\n\n" % run_id)

            summary.write("Num Examples: %i\n" % num_examples)
            summary.write("Max Dropped Cards: %i\n" % max_dropped_cards)
            summary.write("Stratified: %s\n" % str(stratified))
            summary.write("Min Cards For Determination: %i\n" %
                          min_cards_for_determination)
            summary.write("Batch Size: %i\n" % batch_size)
            summary.write("Num Epochs: %i\n" % num_epochs)
            summary.write("Base Layer Size: %i\n" % base_layer_size)
            summary.write("Hidden Layer Size: %i\n" % hidden_layer_size)
            summary.write("Num Hidden Layers: %i\n\n" % num_hidden_layers)

            for class_cluster in self.class_clusters:
                player_class_name = class_cluster.player_class.name

                if included_classes and player_class_name not in included_classes:
                    continue

                print("\nInitiating training for %s" %
                      class_cluster.player_class.name)
                training_start = time.time()
                accuracy = class_cluster.train_neural_network(
                    num_examples=num_examples,
                    max_dropped_cards=max_dropped_cards,
                    stratified=stratified,
                    min_cards_for_determination=min_cards_for_determination,
                    batch_size=batch_size,
                    num_epochs=num_epochs,
                    base_layer_size=base_layer_size,
                    hidden_layer_size=hidden_layer_size,
                    num_hidden_layers=num_hidden_layers,
                    working_dir=training_dir,
                    upload_to_s3=upload_to_s3)
                training_stop = time.time()
                duration = int(training_stop - training_start)
                print("Duration: %s seconds" % duration)
                print("Accuracy: %s" % round(accuracy, 4))

                summary.write("%s Duration: %i seconds\n" %
                              (player_class_name, duration))
                summary.write("%s Accuracy: %s\n\n" %
                              (player_class_name, round(accuracy, 4)))

            end_ts = time.time()
            full_duration = end_ts - start_ts
            duration_mins = int(full_duration / 60)
            duration_secs = int(full_duration % 60)
            summary.write("Full Duration: %i min(s) %i seconds\n" %
                          (duration_mins, duration_secs))

        if upload_to_s3 and os.path.exists(summary_path):
            with open(summary_path, "rb") as summary:
                S3.put_object(Bucket=settings.KERAS_MODELS_BUCKET,
                              Key=self.cluster_set_key_prefix + "summary.txt",
                              Body=summary)
Exemplo n.º 6
0
    def delete(self, request, **kwargs):
        S3.delete_object(**self.s3_params)

        return Response(status=status.HTTP_204_NO_CONTENT)