示例#1
0
    def test_correct_serialization(self):
        base_dir = os.path.dirname(__file__)
        dataset = sportscode.load(f"{base_dir}/files/code_xml.xml")

        del dataset.codes[2:]

        # Make sure that data in Period 2 get the timestamp corrected
        dataset.metadata.periods = [
            Period(id=1, start_timestamp=0, end_timestamp=45 * 60),
            Period(id=2, start_timestamp=45 * 60 + 10, end_timestamp=90 * 60),
        ]
        dataset.codes[1].period = dataset.metadata.periods[1]

        serializer = SportsCodeSerializer()
        output = serializer.serialize(dataset)

        expected_output = """<?xml version='1.0' encoding='utf-8'?>
<file>
  <ALL_INSTANCES>
    <instance>
      <ID>P1</ID>
      <start>3.6</start>
      <end>9.7</end>
      <code>PASS</code>
      <label>
        <group>Team</group>
        <text>Henkie</text>
      </label>
      <label>
        <group>Packing.Value</group>
        <text>1</text>
      </label>
      <label>
        <group>Receiver</group>
        <text>Klaas Nøme</text>
      </label>
    </instance>
    <instance>
      <ID>P2</ID>
      <start>2768.3</start>
      <end>2774.5</end>
      <code>PASS</code>
      <label>
        <group>Team</group>
        <text>Henkie</text>
      </label>
      <label>
        <group>Packing.Value</group>
        <text>3</text>
      </label>
      <label>
        <group>Receiver</group>
        <text>Piet</text>
      </label>
    </instance>
  </ALL_INSTANCES>
</file>
"""
        expected_output = bytes(expected_output, "utf-8")
        assert output == expected_output
示例#2
0
    def test_correct_deserialization(self):
        """
        This test uses data from the StatsBomb open data project.
        """
        base_dir = os.path.dirname(__file__)

        serializer = StatsBombSerializer()

        with open(f"{base_dir}/files/statsbomb_lineup.json",
                  "rb") as lineup_data, open(
                      f"{base_dir}/files/statsbomb_event.json",
                      "rb") as event_data:

            dataset = serializer.deserialize(inputs={
                "lineup_data": lineup_data,
                "event_data": event_data
            })

        assert len(dataset.events) == 4002
        assert len(dataset.periods) == 2
        assert dataset.orientation == Orientation.ACTION_EXECUTING_TEAM
        assert dataset.periods[0] == Period(
            id=1,
            start_timestamp=0.0,
            end_timestamp=2705.267,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.periods[1] == Period(
            id=2,
            start_timestamp=2705.268,
            end_timestamp=5557.321,
            attacking_direction=AttackingDirection.NOT_SET,
        )
示例#3
0
    def test_correct_deserialization(self):
        base_dir = os.path.dirname(__file__)

        serializer = OptaSerializer()

        with open(f"{base_dir}/files/opta_f24.xml",
                  "rb") as f24_data, open(f"{base_dir}/files/opta_f7.xml",
                                          "rb") as f7_data:

            dataset = serializer.deserialize(inputs={
                "f24_data": f24_data,
                "f7_data": f7_data
            })

        assert len(dataset.events) == 17
        assert len(dataset.periods) == 2
        assert dataset.orientation == Orientation.ACTION_EXECUTING_TEAM
        assert dataset.periods[0] == Period(
            id=1,
            start_timestamp=1537707733.608,
            end_timestamp=1537710501.222,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.periods[1] == Period(
            id=2,
            start_timestamp=1537711528.873,
            end_timestamp=1537714537.788,
            attacking_direction=AttackingDirection.NOT_SET,
        )
示例#4
0
    def test_correct_deserialization(self):
        base_dir = os.path.dirname(__file__)

        serializer = MetricaTrackingSerializer()

        with open(
            f"{base_dir}/files/metrica_home.csv", "rb"
        ) as raw_data_home, open(
            f"{base_dir}/files/metrica_away.csv", "rb"
        ) as raw_data_away:
            dataset = serializer.deserialize(
                inputs={
                    "raw_data_home": raw_data_home,
                    "raw_data_away": raw_data_away,
                }
            )
        assert dataset.metadata.provider == Provider.METRICA
        assert dataset.dataset_type == DatasetType.TRACKING
        assert len(dataset.records) == 6
        assert len(dataset.metadata.periods) == 2
        assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY
        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=0.04,
            end_timestamp=0.12,
            attacking_direction=AttackingDirection.HOME_AWAY,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=5800.16,
            end_timestamp=5800.24,
            attacking_direction=AttackingDirection.AWAY_HOME,
        )

        # make sure data is loaded correctly (including flip y-axis)
        home_player = dataset.metadata.teams[0].players[0]
        assert dataset.records[0].players_coordinates[home_player] == Point(
            x=0.00082, y=1 - 0.48238
        )

        away_player = dataset.metadata.teams[1].players[0]
        assert dataset.records[0].players_coordinates[away_player] == Point(
            x=0.90509, y=1 - 0.47462
        )

        assert dataset.records[0].ball_coordinates == Point(
            x=0.45472, y=1 - 0.38709
        )

        # make sure player data is only in the frame when the player is at the pitch
        assert "home_14" not in [
            player.player_id
            for player in dataset.records[0].players_coordinates.keys()
        ]
        assert "home_14" in [
            player.player_id
            for player in dataset.records[3].players_coordinates.keys()
        ]
示例#5
0
    def _get_tracking_dataset(self):
        home_team = Team(team_id="home", name="home", ground=Ground.HOME)
        away_team = Team(team_id="away", name="away", ground=Ground.AWAY)
        teams = [home_team, away_team]

        periods = [
            Period(
                id=1,
                start_timestamp=0.0,
                end_timestamp=10.0,
                attacking_direction=AttackingDirection.HOME_AWAY,
            ),
            Period(
                id=2,
                start_timestamp=15.0,
                end_timestamp=25.0,
                attacking_direction=AttackingDirection.AWAY_HOME,
            ),
        ]
        metadata = Metadata(
            flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE),
            pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100),
                                             y_dim=Dimension(-50, 50)),
            orientation=Orientation.HOME_TEAM,
            frame_rate=25,
            periods=periods,
            teams=teams,
            score=None,
            provider=None,
        )

        tracking_data = TrackingDataset(
            metadata=metadata,
            records=[
                Frame(
                    frame_id=1,
                    timestamp=0.1,
                    ball_owning_team=None,
                    ball_state=None,
                    period=periods[0],
                    players_coordinates={},
                    ball_coordinates=Point(x=100, y=-50),
                ),
                Frame(
                    frame_id=2,
                    timestamp=0.2,
                    ball_owning_team=None,
                    ball_state=None,
                    period=periods[0],
                    players_coordinates={
                        Player(team=home_team, player_id="home_1", jersey_no=1):
                        Point(x=15, y=35)
                    },
                    ball_coordinates=Point(x=0, y=50),
                ),
            ],
        )
        return tracking_data
示例#6
0
    def __create_iterator(self, data: Readable, sample_rate: float,
                          frame_rate: int) -> Iterator:
        """
        Notes:
            1. the y-axis is flipped because Metrica use (y, -y) instead of (-y, y)
        """

        team = None
        frame_idx = 0
        frame_sample = 1 / sample_rate
        player_jersey_numbers = []
        period = None

        for i, line in enumerate(data):
            line = line.strip().decode("ascii")
            columns = line.split(",")
            if i == 0:
                team = columns[3]
            elif i == 1:
                player_jersey_numbers = columns[3:-2:2]
            elif i == 2:
                # consider doing some validation on the columns
                pass
            else:
                period_id = int(columns[0])
                frame_id = int(columns[1])

                if period is None or period.id != period_id:
                    period = Period(
                        id=period_id,
                        start_timestamp=frame_id / frame_rate,
                        end_timestamp=frame_id / frame_rate,
                    )
                else:
                    # consider not update this every frame for performance reasons
                    period.end_timestamp = frame_id / frame_rate

                if frame_idx % frame_sample == 0:
                    yield self.__PartialFrame(
                        team=team,
                        period=period,
                        frame_id=frame_id,
                        player_positions={
                            player_no: Point(
                                x=float(columns[3 + i * 2]),
                                y=1 - float(columns[3 + i * 2 + 1]),
                            )
                            for i, player_no in enumerate(
                                player_jersey_numbers)
                            if columns[3 + i * 2] != "NaN"
                        },
                        ball_position=Point(x=float(columns[-2]),
                                            y=1 - float(columns[-1]))
                        if columns[-2] != "NaN" else None,
                    )
                frame_idx += 1
示例#7
0
    def test_correct_deserialization(self):
        """
        This test uses data from the StatsBomb open data project.
        """
        dataset = self._load_dataset()

        assert dataset.metadata.provider == Provider.STATSBOMB
        assert dataset.dataset_type == DatasetType.EVENT
        assert len(dataset.events) == 4022
        assert len(dataset.metadata.periods) == 2
        assert (
            dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM
        )
        assert dataset.metadata.teams[0].name == "Barcelona"
        assert dataset.metadata.teams[1].name == "Deportivo Alavés"

        player = dataset.metadata.teams[0].get_player_by_id("5503")
        assert player.player_id == "5503"
        assert player.jersey_no == 10
        assert str(player) == "Lionel Andrés Messi Cuccittini"
        assert player.position is None  # not set
        assert player.starting

        sub_player = dataset.metadata.teams[0].get_player_by_id("3501")
        assert str(sub_player) == "Philippe Coutinho Correia"
        assert not sub_player.starting

        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=0.0,
            end_timestamp=2705.267,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=2705.268,
            end_timestamp=5557.321,
            attacking_direction=AttackingDirection.NOT_SET,
        )

        assert (
            dataset.events[791].get_qualifier_value(BodyPartQualifier)
            == BodyPart.HEAD
        )

        assert (
            dataset.events[2231].get_qualifier_value(BodyPartQualifier)
            == BodyPart.RIGHT_FOOT
        )

        assert (
            dataset.events[195].get_qualifier_value(BodyPartQualifier) is None
        )
示例#8
0
    def test_correct_deserialization(self):
        base_dir = os.path.dirname(__file__)

        serializer = TRACABSerializer()

        with open(f"{base_dir}/files/tracab_meta.xml",
                  "rb") as meta_data, open(f"{base_dir}/files/tracab_raw.dat",
                                           "rb") as raw_data:

            dataset = serializer.deserialize(
                inputs={
                    "meta_data": meta_data,
                    "raw_data": raw_data
                },
                options={"only_alive": False},
            )

        assert len(dataset.records) == 6
        assert len(dataset.periods) == 2
        assert dataset.orientation == Orientation.FIXED_HOME_AWAY
        assert dataset.periods[0] == Period(
            id=1,
            start_timestamp=4.0,
            end_timestamp=4.08,
            attacking_direction=AttackingDirection.HOME_AWAY,
        )

        assert dataset.periods[1] == Period(
            id=2,
            start_timestamp=8.0,
            end_timestamp=8.08,
            attacking_direction=AttackingDirection.AWAY_HOME,
        )

        assert dataset.records[0].home_team_player_positions["19"] == Point(
            x=-1234.0, y=-294.0)
        assert dataset.records[0].away_team_player_positions["19"] == Point(
            x=8889, y=-666)
        assert dataset.records[0].ball_position == Point(x=-27, y=25)
        assert dataset.records[0].ball_state == BallState.ALIVE
        assert dataset.records[0].ball_owning_team == Team.HOME

        assert dataset.records[1].ball_owning_team == Team.AWAY

        assert dataset.records[2].ball_state == BallState.DEAD

        # make sure player data is only in the frame when the player is at the pitch
        assert "1337" not in dataset.records[0].away_team_player_positions
        assert "1337" in dataset.records[3].away_team_player_positions
示例#9
0
def _load_periods(global_config_elm, frame_rate: int) -> List[Period]:
    provider_params = _load_provider_parameters(
        global_config_elm.find("ProviderGlobalParameters"), value_mapper=int)

    period_names = [
        "first_half",
        "second_half",
        "first_extra_half",
        "second_extra_half",
    ]

    periods = []

    for idx, period_name in enumerate(period_names):
        start_key = f"{period_name}_start"
        end_key = f"{period_name}_end"
        if start_key in provider_params:
            periods.append(
                Period(
                    id=idx + 1,
                    start_timestamp=float(provider_params[start_key]) /
                    frame_rate,
                    end_timestamp=float(provider_params[end_key]) / frame_rate,
                ))
        else:
            # done
            break

    return periods
示例#10
0
 def _get_tracking_dataset(self):
     periods = [
         Period(
             id=1,
             start_timestamp=0.0,
             end_timestamp=10.0,
             attacking_direction=AttackingDirection.HOME_AWAY,
         ),
         Period(
             id=2,
             start_timestamp=15.0,
             end_timestamp=25.0,
             attacking_direction=AttackingDirection.AWAY_HOME,
         ),
     ]
     tracking_data = TrackingDataset(
         flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE),
         pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100),
                                          y_dim=Dimension(-50, 50)),
         orientation=Orientation.HOME_TEAM,
         frame_rate=25,
         records=[
             Frame(
                 frame_id=1,
                 timestamp=0.1,
                 ball_owning_team=None,
                 ball_state=None,
                 period=periods[0],
                 away_team_player_positions={},
                 home_team_player_positions={},
                 ball_position=Point(x=100, y=-50),
             ),
             Frame(
                 frame_id=2,
                 timestamp=0.2,
                 ball_owning_team=None,
                 ball_state=None,
                 period=periods[0],
                 away_team_player_positions={"1": Point(x=10, y=20)},
                 home_team_player_positions={"1": Point(x=15, y=35)},
                 ball_position=Point(x=0, y=50),
             ),
         ],
         periods=periods,
     )
     return tracking_data
示例#11
0
    def test_correct_deserialization(self):
        base_dir = os.path.dirname(__file__)

        serializer = OptaSerializer()

        with open(f"{base_dir}/files/opta_f24.xml", "rb") as f24_data, open(
            f"{base_dir}/files/opta_f7.xml", "rb"
        ) as f7_data:

            dataset = serializer.deserialize(
                inputs={"f24_data": f24_data, "f7_data": f7_data}
            )
        assert dataset.metadata.provider == Provider.OPTA
        assert dataset.dataset_type == DatasetType.EVENT
        assert len(dataset.events) == 17
        assert len(dataset.metadata.periods) == 2
        assert dataset.events[10].ball_owning_team == dataset.metadata.teams[1]
        assert dataset.events[15].ball_owning_team == dataset.metadata.teams[0]
        assert (
            dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM
        )
        assert dataset.metadata.teams[0].name == "FC København"
        assert dataset.metadata.teams[0].ground == Ground.HOME
        assert dataset.metadata.teams[1].name == "FC Nordsjælland"
        assert dataset.metadata.teams[1].ground == Ground.AWAY

        player = dataset.metadata.teams[0].players[0]
        assert player.player_id == "111319"
        assert player.jersey_no == 21
        assert str(player) == "Jesse Joronen"
        assert player.position.position_id == "1"
        assert player.position.name == "Goalkeeper"

        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=1537714933.608,
            end_timestamp=1537717701.222,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=1537718728.873,
            end_timestamp=1537721737.788,
            attacking_direction=AttackingDirection.NOT_SET,
        )
示例#12
0
    def test_correct_deserialization(self):
        """
        This test uses data from the StatsBomb open data project.
        """
        base_dir = os.path.dirname(__file__)

        serializer = StatsBombSerializer()

        with open(
            f"{base_dir}/files/statsbomb_lineup.json", "rb"
        ) as lineup_data, open(
            f"{base_dir}/files/statsbomb_event.json", "rb"
        ) as event_data:

            dataset = serializer.deserialize(
                inputs={"lineup_data": lineup_data, "event_data": event_data}
            )

        assert dataset.metadata.provider == Provider.STATSBOMB
        assert len(dataset.events) == 4002
        assert len(dataset.metadata.periods) == 2
        assert (
            dataset.metadata.orientation == Orientation.ACTION_EXECUTING_TEAM
        )
        assert dataset.metadata.teams[0].name == "Barcelona"
        assert dataset.metadata.teams[1].name == "Deportivo Alavés"

        player = dataset.metadata.teams[0].players[0]
        assert player.player_id == "3109"
        assert player.jersey_no == 14
        assert str(player) == "Malcom Filipe Silva de Oliveira"
        assert player.position is None  # not set

        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=0.0,
            end_timestamp=2705.267,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=2705.268,
            end_timestamp=5557.321,
            attacking_direction=AttackingDirection.NOT_SET,
        )
示例#13
0
    def test_correct_deserialization(self):

        base_dir = os.path.dirname(__file__)

        serializer = MetricaEventsJsonSerializer()

        with open(
            f"{base_dir}/files/metrica_metadata.xml", "rb"
        ) as metadata, open(
            f"{base_dir}/files/metrica_events.json", "rb"
        ) as event_data:

            dataset = serializer.deserialize(
                inputs={"metadata": metadata, "event_data": event_data}
            )

        assert dataset.metadata.provider == Provider.METRICA
        assert dataset.dataset_type == DatasetType.EVENT
        assert len(dataset.events) == 3684
        assert len(dataset.metadata.periods) == 2
        assert dataset.metadata.orientation is None
        assert dataset.metadata.teams[0].name == "Team A"
        assert dataset.metadata.teams[1].name == "Team B"

        player = dataset.metadata.teams[0].players[0]
        assert player.player_id == "P3578"
        assert player.jersey_no == 11
        assert str(player) == "Player 11"
        assert player.position.name == "Goalkeeper"

        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=14.44,
            end_timestamp=2783.76,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=2803.6,
            end_timestamp=5742.12,
            attacking_direction=AttackingDirection.NOT_SET,
        )

        # Make sure we are using the improved event types.
        dataset.records[1].qualifiers[0].value == SetPieceType.KICK_OFF
示例#14
0
    def test_correct_deserialization(self, home_data: str, away_data: str):
        dataset = metrica.load_tracking_csv(home_data=home_data,
                                            away_data=away_data)
        assert dataset.metadata.provider == Provider.METRICA
        assert dataset.dataset_type == DatasetType.TRACKING
        assert len(dataset.records) == 6
        assert len(dataset.metadata.periods) == 2
        assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY
        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=0.04,
            end_timestamp=0.12,
            attacking_direction=AttackingDirection.HOME_AWAY,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=5800.16,
            end_timestamp=5800.24,
            attacking_direction=AttackingDirection.AWAY_HOME,
        )

        # make sure data is loaded correctly (including flip y-axis)
        home_player = dataset.metadata.teams[0].players[0]
        assert dataset.records[0].players_data[
            home_player].coordinates == Point(x=0.00082, y=1 - 0.48238)

        away_player = dataset.metadata.teams[1].players[0]
        assert dataset.records[0].players_data[
            away_player].coordinates == Point(x=0.90509, y=1 - 0.47462)

        assert dataset.records[0].ball_coordinates == Point(x=0.45472,
                                                            y=1 - 0.38709)

        # make sure player data is only in the frame when the player is at the pitch
        assert "home_14" not in [
            player.player_id
            for player in dataset.records[0].players_data.keys()
        ]
        assert "home_14" in [
            player.player_id
            for player in dataset.records[3].players_data.keys()
        ]
示例#15
0
    def test_correct_deserialization(self, event_data: str):
        dataset = datafactory.load(event_data=event_data,
                                   coordinates="datafactory")

        assert dataset.metadata.provider == Provider.DATAFACTORY
        assert dataset.dataset_type == DatasetType.EVENT
        assert len(dataset.events) == 1027
        assert len(dataset.metadata.periods) == 2
        assert dataset.events[10].ball_owning_team == dataset.metadata.teams[1]
        assert dataset.events[23].ball_owning_team == dataset.metadata.teams[0]
        assert dataset.metadata.orientation == Orientation.HOME_TEAM
        assert dataset.metadata.teams[0].name == "Team A"
        assert dataset.metadata.teams[0].ground == Ground.HOME
        assert dataset.metadata.teams[1].name == "Team B"
        assert dataset.metadata.teams[1].ground == Ground.AWAY

        player = dataset.metadata.teams[0].players[0]
        assert player.player_id == "38804"
        assert player.jersey_no == 1
        assert str(player) == "Daniel Bold"
        assert player.position is None  # not set
        assert player.starting

        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=0,
            end_timestamp=2912,
            attacking_direction=AttackingDirection.HOME_AWAY,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=2700,
            end_timestamp=5710,
            attacking_direction=AttackingDirection.AWAY_HOME,
        )

        assert dataset.events[0].coordinates == Point(0.01, 0.01)

        # Check the qualifiers
        assert dataset.events[0].qualifiers[0].value == SetPieceType.KICK_OFF
        assert dataset.events[412].qualifiers[0].value == SetPieceType.THROW_IN
示例#16
0
    def test_correct_deserialization(self):
        base_dir = os.path.dirname(__file__)

        serializer = MetricaTrackingSerializer()

        with open(f"{base_dir}/files/metrica_home.csv",
                  "rb") as raw_data_home, open(
                      f"{base_dir}/files/metrica_away.csv",
                      "rb") as raw_data_away:
            dataset = serializer.deserialize(inputs={
                "raw_data_home": raw_data_home,
                "raw_data_away": raw_data_away,
            })

        assert len(dataset.records) == 6
        assert len(dataset.periods) == 2
        assert dataset.orientation == Orientation.FIXED_HOME_AWAY
        assert dataset.periods[0] == Period(
            id=1,
            start_timestamp=0.04,
            end_timestamp=0.12,
            attacking_direction=AttackingDirection.HOME_AWAY,
        )
        assert dataset.periods[1] == Period(
            id=2,
            start_timestamp=5800.16,
            end_timestamp=5800.24,
            attacking_direction=AttackingDirection.AWAY_HOME,
        )

        # make sure data is loaded correctly (including flip y-axis)
        assert dataset.records[0].home_team_player_positions["11"] == Point(
            x=0.00082, y=1 - 0.48238)
        assert dataset.records[0].away_team_player_positions["25"] == Point(
            x=0.90509, y=1 - 0.47462)
        assert dataset.records[0].ball_position == Point(x=0.45472,
                                                         y=1 - 0.38709)

        # make sure player data is only in the frame when the player is at the pitch
        assert "14" not in dataset.records[0].home_team_player_positions
        assert "14" in dataset.records[3].home_team_player_positions
示例#17
0
    def test_correct_deserialization(self):

        base_dir = os.path.dirname(__file__)

        serializer = MetricaEventsJsonSerializer()

        with open(f"{base_dir}/files/metrica_metadata.xml",
                  "rb") as metadata, open(
                      f"{base_dir}/files/metrica_events.json",
                      "rb") as raw_data:

            dataset = serializer.deserialize(inputs={
                "metadata": metadata,
                "raw_data": raw_data
            })

        assert dataset.metadata.provider == Provider.METRICA
        assert len(dataset.events) == 3620
        assert len(dataset.metadata.periods) == 2
        assert dataset.metadata.orientation is None
        assert dataset.metadata.teams[0].name == "Team A"
        assert dataset.metadata.teams[1].name == "Team B"

        player = dataset.metadata.teams[0].players[0]
        assert player.player_id == "P3578"
        assert player.jersey_no == 11
        assert str(player) == "Player 11"
        assert player.position.name == "Goalkeeper"

        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=14.44,
            end_timestamp=2783.76,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=2803.6,
            end_timestamp=5742.12,
            attacking_direction=AttackingDirection.NOT_SET,
        )
示例#18
0
    def test_correct_deserialization(self, event_data: str, meta_data: str):
        dataset = sportec.load(event_data=event_data,
                               meta_data=meta_data,
                               coordinates="sportec")

        assert dataset.metadata.provider == Provider.SPORTEC
        assert dataset.dataset_type == DatasetType.EVENT
        assert len(dataset.metadata.periods) == 2

        # raw_event must be flattened dict
        assert isinstance(dataset.events[0].raw_event, dict)

        assert len(dataset.events) == 28
        assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY
        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=1591381800.21,
            end_timestamp=1591384584.0,
            attacking_direction=AttackingDirection.HOME_AWAY,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=1591385607.01,
            end_timestamp=1591388598.0,
            attacking_direction=AttackingDirection.AWAY_HOME,
        )

        player = dataset.metadata.teams[0].players[0]
        assert player.player_id == "DFL-OBJ-00001D"
        assert player.jersey_no == 1
        assert str(player) == "A. Schwolow"
        assert player.position.position_id is None
        assert player.position.name == "TW"

        # Check the qualifiers
        assert dataset.events[25].qualifiers[0].value == SetPieceType.KICK_OFF
        assert dataset.events[16].qualifiers[0].value == BodyPart.RIGHT_FOOT
        assert dataset.events[24].qualifiers[0].value == BodyPart.LEFT_FOOT
        assert dataset.events[26].qualifiers[0].value == BodyPart.HEAD

        assert dataset.events[0].coordinates == Point(56.41, 68.0)
示例#19
0
    def deserialize(self, inputs: SportsCodeInputs) -> CodeDataset:
        all_instances = objectify.fromstring(inputs.data.read())

        codes = []
        period = Period(id=1, start_timestamp=0, end_timestamp=0)
        for instance in all_instances.ALL_INSTANCES.iterchildren():
            end_timestamp = float(instance.end)

            code = Code(
                period=period,
                code_id=str(instance.ID),
                code=str(instance.code),
                timestamp=float(instance.start),
                end_timestamp=end_timestamp,
                labels={
                    str(label.find("group")):
                    parse_value(str(label.find("text")))
                    for label in instance.iterchildren("label")
                },
                ball_state=None,
                ball_owning_team=None,
            )
            period.end_timestamp = end_timestamp
            codes.append(code)

        return CodeDataset(
            metadata=Metadata(
                teams=[],
                periods=[period],
                pitch_dimensions=None,
                score=Score(0, 0),
                frame_rate=0.0,
                orientation=Orientation.NOT_SET,
                flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE),
                provider=Provider.OTHER,
                coordinate_system=None,
            ),
            records=codes,
        )
示例#20
0
    def test_correct_deserialization(self):
        base_dir = os.path.dirname(__file__)

        dataset = load_sportec_event_data(
            f"{base_dir}/files/sportec_events.xml",
            f"{base_dir}/files/sportec_meta.xml",
        )

        assert dataset.metadata.provider == Provider.SPORTEC
        assert dataset.dataset_type == DatasetType.EVENT
        assert len(dataset.metadata.periods) == 2

        # raw_event must be flattened dict
        assert isinstance(dataset.events[0].raw_event, dict)

        assert len(dataset.events) == 28
        assert dataset.metadata.orientation == Orientation.FIXED_HOME_AWAY
        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=1591381800.21,
            end_timestamp=1591384584.0,
            attacking_direction=AttackingDirection.HOME_AWAY,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=1591385607.01,
            end_timestamp=1591388598.0,
            attacking_direction=AttackingDirection.AWAY_HOME,
        )

        player = dataset.metadata.teams[0].players[0]
        assert player.player_id == "DFL-OBJ-00001D"
        assert player.jersey_no == 1
        assert str(player) == "A. Schwolow"
        assert player.position.position_id is None
        assert player.position.name == "TW"
示例#21
0
    def test_correct_deserialization(self, event_data: str, meta_data: str):
        dataset = metrica.load_event(event_data=event_data,
                                     meta_data=meta_data)

        assert dataset.metadata.provider == Provider.METRICA
        assert dataset.dataset_type == DatasetType.EVENT
        assert len(dataset.events) == 3684
        assert len(dataset.metadata.periods) == 2
        assert dataset.metadata.orientation is None
        assert dataset.metadata.teams[0].name == "Team A"
        assert dataset.metadata.teams[1].name == "Team B"

        player = dataset.metadata.teams[0].players[10]
        assert player.player_id == "Track_11"
        assert player.jersey_no == 11
        assert str(player) == "Track_11"
        assert player.position.name == "Goalkeeper"

        assert dataset.metadata.periods[0] == Period(
            id=1,
            start_timestamp=14.44,
            end_timestamp=2783.76,
            attacking_direction=AttackingDirection.NOT_SET,
        )
        assert dataset.metadata.periods[1] == Period(
            id=2,
            start_timestamp=2803.6,
            end_timestamp=5742.12,
            attacking_direction=AttackingDirection.NOT_SET,
        )

        assert dataset.events[1].coordinates.x == 0.50125

        # Check the qualifiers
        assert dataset.records[1].qualifiers[0].value == SetPieceType.KICK_OFF
        assert dataset.records[100].qualifiers[0].value == BodyPart.HEAD
示例#22
0
    def __get_periods(cls, tracking):
        """gets the Periods contained in the tracking data"""
        periods = {}

        _periods = np.array([f["period"] for f in tracking])
        unique_periods = set(_periods)
        unique_periods = [
            period for period in unique_periods if period is not None
        ]

        for period in unique_periods:
            _frames = [
                frame for frame in tracking
                if frame["period"] == period and frame["time"] is not None
            ]

            periods[period] = Period(
                id=period,
                start_timestamp=cls._timestamp_from_timestring(
                    _frames[0]["time"]),
                end_timestamp=cls._timestamp_from_timestring(
                    _frames[-1]["time"]),
            )
        return periods
示例#23
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize Opta event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `f24_data` should point to a `Readable` object containing
                    the 'xml' formatted event data. input `f7_data` should point
                    to a `Readable` object containing the 'xml' formatted f7 data.
                options : dict
                    Options for deserialization of the Opta file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = OptaSerializer()
                >>> with open("123_f24.xml", "rb") as f24_data, \
                >>>      open("123_f7.xml", "rb") as f7_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'f24_data': f24_data,
                >>>             'f7_data': f7_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            f7_root = objectify.fromstring(inputs["f7_data"].read())
            f24_root = objectify.fromstring(inputs["f24_data"].read())

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

        with performance_logging("parse data", logger=logger):
            matchdata_path = objectify.ObjectPath(
                "SoccerFeed.SoccerDocument.MatchData")
            team_elms = list(
                matchdata_path.find(f7_root).iterchildren("TeamData"))

            away_player_map = {}
            home_player_map = {}
            home_team_id = None
            away_team_id = None
            for team_elm in team_elms:
                player_map = {
                    player_elm.attrib["PlayerRef"].lstrip("p"):
                    player_elm.attrib["ShirtNumber"]
                    for player_elm in team_elm.find(
                        "PlayerLineUp").iterchildren("MatchPlayer")
                }
                team_id = team_elm.attrib["TeamRef"].lstrip("t")

                if team_elm.attrib["Side"] == "Home":
                    home_player_map = player_map
                    home_team_id = team_id
                elif team_elm.attrib["Side"] == "Away":
                    away_player_map = player_map
                    away_team_id = team_id
                else:
                    raise Exception(f"Unknown side: {team_elm.attrib['Side']}")

            if not away_player_map or not home_player_map:
                raise Exception("LineUp incomplete")

            game_elm = f24_root.find("Game")
            periods = [
                Period(
                    id=1,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
                Period(
                    id=2,
                    start_timestamp=None,
                    end_timestamp=None,
                ),
            ]
            events = []
            for event_elm in game_elm.iterchildren("Event"):
                event_id = event_elm.attrib["id"]
                type_id = int(event_elm.attrib["type_id"])
                timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"])
                period_id = int(event_elm.attrib["period_id"])
                for period in periods:
                    if period.id == period_id:
                        break
                else:
                    logger.debug(
                        f"Skipping event {event_id} because period doesn't match {period_id}"
                    )
                    continue

                if type_id == EVENT_TYPE_START_PERIOD:
                    logger.debug(
                        f"Set start of period {period.id} to {timestamp}")
                    period.start_timestamp = timestamp
                elif type_id == EVENT_TYPE_END_PERIOD:
                    logger.debug(
                        f"Set end of period {period.id} to {timestamp}")
                    period.end_timestamp = timestamp
                else:
                    if not period.start_timestamp:
                        # not started yet
                        continue

                    if event_elm.attrib["team_id"] == home_team_id:
                        team = Team.HOME
                        current_team_map = home_player_map
                    elif event_elm.attrib["team_id"] == away_team_id:
                        team = Team.AWAY
                        current_team_map = away_player_map
                    else:
                        raise Exception(
                            f"Unknown team_id {event_elm.attrib['team_id']}")

                    x = float(event_elm.attrib["x"])
                    y = float(event_elm.attrib["y"])
                    outcome = int(event_elm.attrib["outcome"])
                    qualifiers = {
                        int(qualifier_elm.attrib["qualifier_id"]):
                        qualifier_elm.attrib.get("value")
                        for qualifier_elm in event_elm.iterchildren("Q")
                    }
                    player_jersey_no = None
                    if "player_id" in event_elm.attrib:
                        player_jersey_no = current_team_map[
                            event_elm.attrib["player_id"]]

                    generic_event_kwargs = dict(
                        # from DataRecord
                        period=period,
                        timestamp=timestamp - period.start_timestamp,
                        ball_owning_team=None,
                        ball_state=BallState.ALIVE,
                        # from Event
                        event_id=event_id,
                        team=team,
                        player_jersey_no=player_jersey_no,
                        position=Point(x=x, y=y),
                        raw_event=event_elm,
                    )

                    if type_id == EVENT_TYPE_PASS:
                        pass_event_kwargs = _parse_pass(qualifiers, outcome)
                        event = PassEvent(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_OFFSIDE_PASS:
                        pass_event_kwargs = _parse_offside_pass()
                        event = PassEvent(
                            **pass_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id == EVENT_TYPE_TAKE_ON:
                        take_on_event_kwargs = _parse_take_on(outcome)
                        event = TakeOnEvent(
                            **take_on_event_kwargs,
                            **generic_event_kwargs,
                        )
                    elif type_id in (
                            EVENT_TYPE_SHOT_MISS,
                            EVENT_TYPE_SHOT_POST,
                            EVENT_TYPE_SHOT_SAVED,
                            EVENT_TYPE_SHOT_GOAL,
                    ):
                        shot_event_kwargs = _parse_shot(
                            qualifiers,
                            type_id,
                            position=generic_event_kwargs["position"],
                        )
                        kwargs = {}
                        kwargs.update(generic_event_kwargs)
                        kwargs.update(shot_event_kwargs)
                        event = ShotEvent(**kwargs)
                    else:
                        event = GenericEvent(**generic_event_kwargs,
                                             result=None)

                    if (not wanted_event_types
                            or event.event_type in wanted_event_types):
                        events.append(event)

        return EventDataset(
            flags=DatasetFlag.BALL_OWNING_TEAM,
            orientation=Orientation.ACTION_EXECUTING_TEAM,
            pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100),
                                             y_dim=Dimension(0, 100)),
            periods=periods,
            records=events,
        )
示例#24
0
    def deserialize(self, inputs: DatafactoryInputs) -> EventDataset:

        transformer = self.get_transformer(length=2, width=2)

        with performance_logging("load data", logger=logger):
            data = json.load(inputs.event_data)
            match = data["match"]
            score_data = data["scoreStatus"]
            incidences = data["incidences"]
            players_data = data["players"]
            teams_data = data["teams"]

        with performance_logging("parse data", logger=logger):
            teams = []
            scores = []
            team_ids = (
                (Ground.HOME, str(match["homeTeamId"])),
                (Ground.AWAY, str(match["awayTeamId"])),
            )
            for ground, team_id in team_ids:
                team = Team(
                    team_id=team_id,
                    name=teams_data[team_id]["name"],
                    ground=ground,
                )
                team.players = [
                    Player(
                        player_id=player_id,
                        team=team,
                        first_name=player["name"]["first"],
                        last_name=player["name"]["last"],
                        name=player["name"]["shortName"]
                        or player["name"]["nick"],
                        jersey_no=player["squadNo"],
                        starting=not player["substitute"],
                    ) for player_id, player in players_data.items()
                    if str(player["teamId"]) == team_id
                ]
                teams.append(team)
                scores.append(score_data.get(team_id, {}).get("score"))
            score = Score(home=scores[0], away=scores[1])

            # setup periods
            status = incidences.pop(DF_EVENT_CLASS_STATUS)
            # start timestamps are fixed
            start_ts = {1: 0, 2: 45 * 60, 3: 90 * 60, 4: 105 * 60, 5: 120 * 60}
            # check for end status updates to setup periods
            end_event_types = {
                DF_EVENT_TYPE_STATUS_MATCH_END,
                DF_EVENT_TYPE_STATUS_FIRST_HALF_END,
                DF_EVENT_TYPE_STATUS_SECOND_HALF_END,
                DF_EVENT_TYPE_STATUS_FIRST_EXTRA_END,
                DF_EVENT_TYPE_STATUS_SECOND_EXTRA_END,
            }
            periods = {}
            for status_update in status.values():
                if status_update["type"] not in end_event_types:
                    continue
                half = status_update["t"]["half"]
                end_ts = parse_str_ts(status_update)
                periods[half] = Period(
                    id=half,
                    start_timestamp=start_ts[half],
                    end_timestamp=end_ts,
                    attacking_direction=AttackingDirection.HOME_AWAY if half %
                    2 == 1 else AttackingDirection.AWAY_HOME,
                )

            # exclude goals, already listed as shots too
            incidences.pop(DF_EVENT_CLASS_GOALS)
            raw_events = [(k, e_id, e) for k in incidences
                          for e_id, e in incidences[k].items()]
            # sort events by timestamp, event_id
            raw_events.sort(key=lambda e: (
                e[2]["t"]["half"],
                e[2]["t"]["m"],
                e[2]["t"]["s"] or 0,
                e[1],
            ))

            home_team, away_team = teams
            events = []
            previous_event = next_event = None
            for i, (e_class, e_id, raw_event) in enumerate(raw_events):
                period = periods.get(raw_event["t"]["half"])
                if period is None:
                    # skip invalid event
                    continue

                timestamp = parse_str_ts(raw_event)
                if (previous_event is not None and
                        previous_event["t"]["half"] != raw_event["t"]["half"]):
                    previous_event = None
                next_event = (raw_events[i + 1][2]
                              if i + 1 < len(raw_events) else None)

                team, player = _get_team_and_player(raw_event, home_team,
                                                    away_team)

                event_base_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=timestamp,
                    ball_owning_team=team,
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=e_id,
                    team=team,
                    player=player,
                    coordinates=(_parse_coordinates(raw_event["coord"]["1"])
                                 if "coord" in raw_event else None),
                    raw_event=raw_event,
                    result=None,
                    qualifiers=None,
                )

                if e_class in DF_EVENT_CLASS_PASSES:
                    pass_event_kwargs = _parse_pass(
                        raw_event=raw_event,
                        team=team,
                        previous_event=previous_event,
                        next_event=next_event,
                    )
                    event_base_kwargs.update(pass_event_kwargs)
                    event = PassEvent.create(**event_base_kwargs)

                elif e_class == DF_EVENT_CLASS_SHOTS:
                    shot_event_kwargs = _parse_shot(
                        raw_event=raw_event,
                        previous_event=previous_event,
                    )
                    event_base_kwargs.update(shot_event_kwargs)
                    event = ShotEvent.create(**event_base_kwargs)

                elif e_class == DF_EVENT_CLASS_STEALINGS:
                    event = RecoveryEvent.create(**event_base_kwargs)

                elif e_class == DF_EVENT_CLASS_FOULS:
                    # NOTE: could use qualifiers? (hand, foul, penalty?)
                    # switch possession team
                    event_base_kwargs["ball_owning_team"] = (
                        home_team if team == away_team else away_team)
                    event = FoulCommittedEvent.create(**event_base_kwargs)

                elif e_class in DF_EVENT_CLASS_CARDS:
                    card_kwargs = _parse_card(raw_event=raw_event, )
                    event_base_kwargs.update(card_kwargs)
                    event = CardEvent.create(**event_base_kwargs)

                elif e_class == DF_EVENT_CLASS_SUBSTITUTIONS:
                    substitution_event_kwargs = _parse_substitution(
                        raw_event=raw_event, team=team)
                    event_base_kwargs.update(substitution_event_kwargs)
                    event = SubstitutionEvent.create(**event_base_kwargs)

                else:
                    # otherwise, a generic event
                    event = GenericEvent.create(
                        event_name=e_class,
                        **event_base_kwargs,
                    )

                # check if the event implies ball was out of the field and add a synthetic out event
                if raw_event["type"] in BALL_OUT_EVENTS:
                    ball_out_event = BallOutEvent.create(
                        # from DataRecord
                        period=period,
                        timestamp=timestamp,
                        ball_owning_team=team,
                        ball_state=BallState.DEAD,
                        # from Event
                        event_id=e_id,
                        team=team,
                        player=player,
                        coordinates=event.coordinates,
                        raw_event=raw_event,
                        result=None,
                        qualifiers=None,
                    )
                    if self.should_include_event(event):
                        events.append(
                            transformer.transform_event(ball_out_event))

                if self.should_include_event(event):
                    events.append(transformer.transform_event(event))

                # only consider as a previous_event a ball-in-play event
                if e_class not in (
                        DF_EVENT_CLASS_YELLOW_CARDS,
                        DF_EVENT_CLASS_RED_CARDS,
                        DF_EVENT_CLASS_SUBSTITUTIONS,
                        DF_EVENT_CLASS_PENALTY_SHOOTOUT,
                ):
                    previous_event = raw_event

        metadata = Metadata(
            teams=teams,
            periods=sorted(periods.values(), key=lambda p: p.id),
            pitch_dimensions=transformer.get_to_coordinate_system().
            pitch_dimensions,
            frame_rate=None,
            orientation=Orientation.HOME_TEAM,
            flags=DatasetFlag.BALL_OWNING_TEAM,
            score=score,
            provider=Provider.DATAFACTORY,
            coordinate_system=transformer.get_to_coordinate_system(),
        )

        return EventDataset(
            metadata=metadata,
            records=events,
        )
示例#25
0
    def deserialize(
        self, inputs: Dict[str, Readable], options: Dict = None
    ) -> EventDataset:
        """
                Deserialize StatsBomb event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `event_data` should point to a `Readable` object containing
                    the 'json' formatted event data. input `lineup_data` should point
                    to a `Readable` object containing the 'json' formatted lineup data.
                options : dict
                    Options for deserialization of the StatsBomb file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = StatsBombSerializer()
                >>> with open("events/12312312.json", "rb") as event_data, \
                >>>      open("lineups/123123123.json", "rb") as lineup_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'event_data': event_data,
                >>>             'lineup_data': lineup_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs["event_data"])
            home_lineup, away_lineup = json.load(inputs["lineup_data"])
            (
                shot_fidelity_version,
                xy_fidelity_version,
            ) = _determine_xy_fidelity_versions(raw_events)
            logger.info(
                f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}"
            )

        with performance_logging("parse data", logger=logger):

            home_team = Team(
                team_id=str(home_lineup["team_id"]),
                name=home_lineup["team_name"],
                ground=Ground.HOME,
            )
            home_team.players = [
                Player(
                    player_id=str(player["player_id"]),
                    team=home_team,
                    name=player["player_name"],
                    jersey_no=int(player["jersey_number"]),
                )
                for player in home_lineup["lineup"]
            ]

            away_team = Team(
                team_id=str(away_lineup["team_id"]),
                name=away_lineup["team_name"],
                ground=Ground.AWAY,
            )
            away_team.players = [
                Player(
                    player_id=str(player["player_id"]),
                    team=away_team,
                    name=player["player_name"],
                    jersey_no=int(player["jersey_number"]),
                )
                for player in away_lineup["lineup"]
            ]

            teams = [home_team, away_team]

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get("event_types", [])
            ]

            periods = []
            period = None
            events = []
            for raw_event in raw_events:
                if raw_event["team"]["id"] == home_lineup["team_id"]:
                    team = teams[0]
                elif raw_event["team"]["id"] == away_lineup["team_id"]:
                    team = teams[1]
                else:
                    raise Exception(
                        f"Unknown team_id {raw_event['team']['id']}"
                    )

                if (
                    raw_event["possession_team"]["id"]
                    == home_lineup["team_id"]
                ):
                    possession_team = teams[0]
                elif (
                    raw_event["possession_team"]["id"]
                    == away_lineup["team_id"]
                ):
                    possession_team = teams[1]
                else:
                    raise Exception(
                        f"Unknown possession_team_id: {raw_event['possession_team']}"
                    )

                timestamp = parse_str_ts(raw_event["timestamp"])
                period_id = int(raw_event["period"])
                if not period or period.id != period_id:
                    period = Period(
                        id=period_id,
                        start_timestamp=(
                            timestamp
                            if not period
                            # period = [start, end], add millisecond to prevent overlapping
                            else timestamp + period.end_timestamp + 0.001
                        ),
                        end_timestamp=None,
                    )
                    periods.append(period)
                else:
                    period.end_timestamp = period.start_timestamp + timestamp

                player = None
                if "player" in raw_event:
                    player = team.get_player_by_id(raw_event["player"]["id"])

                event_type = raw_event["type"]["id"]
                if event_type == SB_EVENT_TYPE_SHOT:
                    fidelity_version = shot_fidelity_version
                elif event_type in (
                    SB_EVENT_TYPE_CARRY,
                    SB_EVENT_TYPE_DRIBBLE,
                    SB_EVENT_TYPE_PASS,
                ):
                    fidelity_version = xy_fidelity_version
                else:
                    # TODO: Uh ohhhh.. don't know which one to pick
                    fidelity_version = xy_fidelity_version

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=timestamp,
                    ball_owning_team=possession_team,
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=raw_event["id"],
                    team=team,
                    player=player,
                    coordinates=(
                        _parse_coordinates(
                            raw_event.get("location"), fidelity_version
                        )
                        if "location" in raw_event
                        else None
                    ),
                    raw_event=raw_event,
                )

                if event_type == SB_EVENT_TYPE_PASS:
                    pass_event_kwargs = _parse_pass(
                        pass_dict=raw_event["pass"],
                        team=team,
                        fidelity_version=fidelity_version,
                    )

                    event = PassEvent(
                        # TODO: Consider moving this to _parse_pass
                        receive_timestamp=timestamp + raw_event["duration"],
                        **pass_event_kwargs,
                        **generic_event_kwargs,
                    )
                elif event_type == SB_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(
                        shot_dict=raw_event["shot"]
                    )
                    event = ShotEvent(
                        **shot_event_kwargs, **generic_event_kwargs
                    )

                # For dribble and carry the definitions
                # are flipped between Statsbomb and kloppy
                elif event_type == SB_EVENT_TYPE_DRIBBLE:
                    take_on_event_kwargs = _parse_take_on(
                        take_on_dict=raw_event["dribble"]
                    )
                    event = TakeOnEvent(
                        **take_on_event_kwargs, **generic_event_kwargs
                    )
                elif event_type == SB_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(
                        carry_dict=raw_event["carry"],
                        fidelity_version=fidelity_version,
                    )
                    event = CarryEvent(
                        # TODO: Consider moving this to _parse_carry
                        end_timestamp=timestamp + raw_event["duration"],
                        **carry_event_kwargs,
                        **generic_event_kwargs,
                    )
                else:
                    event = GenericEvent(
                        result=None,
                        event_name=raw_event["type"]["name"],
                        **generic_event_kwargs,
                    )

                if (
                    not wanted_event_types
                    or event.event_type in wanted_event_types
                ):
                    events.append(event)

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=PitchDimensions(
                x_dim=Dimension(0, 120), y_dim=Dimension(0, 80)
            ),
            frame_rate=None,
            orientation=Orientation.ACTION_EXECUTING_TEAM,
            flags=DatasetFlag.BALL_OWNING_TEAM,
            score=None,
        )

        return EventDataset(metadata=metadata, records=events,)
示例#26
0
    def deserialize(
        self, inputs: Dict[str, Readable], options: Dict = None
    ) -> EventDataset:
        WyscoutSerializer.__validate_inputs(inputs)

        if not options:
            options = {}

        wanted_event_types = [
            EventType[event_type.upper()]
            for event_type in options.get("event_types", [])
        ]

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs["event_data"])

        periods = []

        with performance_logging("parse data", logger=logger):
            home_team_id, away_team_id = raw_events["teams"].keys()
            home_team = _parse_team(raw_events, home_team_id, Ground.HOME)
            away_team = _parse_team(raw_events, away_team_id, Ground.AWAY)
            teams = {home_team_id: home_team, away_team_id: away_team}
            players = dict(
                [
                    (wyId, _players_to_dict(team.players))
                    for wyId, team in teams.items()
                ]
            )

            events = []

            for idx, raw_event in enumerate(raw_events["events"]):
                next_event = None
                if (idx + 1) < len(raw_events["events"]):
                    next_event = raw_events["events"][idx + 1]

                team_id = str(raw_event["teamId"])
                player_id = str(raw_event["playerId"])

                if (
                    len(periods) == 0
                    or periods[-1].id != raw_event["matchPeriod"]
                ):
                    periods.append(
                        Period(
                            id=raw_event["matchPeriod"],
                            start_timestamp=0,
                            end_timestamp=0,
                        )
                    )

                generic_event_args = {
                    "event_id": raw_event["id"],
                    "raw_event": raw_event,
                    "coordinates": Point(
                        x=float(raw_event["positions"][0]["x"]),
                        y=float(raw_event["positions"][0]["y"]),
                    ),
                    "team": teams[team_id],
                    "player": players[team_id][player_id]
                    if player_id != INVALID_PLAYER
                    else None,
                    "ball_owning_team": None,
                    "ball_state": None,
                    "period": periods[-1],
                    "timestamp": raw_event["eventSec"],
                }

                event = None
                if raw_event["eventName"] == wyscout_events.SHOT.EVENT:
                    shot_event_args = _parse_shot(raw_event, next_event)
                    event = ShotEvent.create(
                        **shot_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] == wyscout_events.PASS.EVENT:
                    pass_event_args = _parse_pass(raw_event, next_event)
                    event = PassEvent.create(
                        **pass_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] == wyscout_events.FOUL.EVENT:
                    foul_event_args = _parse_foul(raw_event)
                    event = FoulCommittedEvent.create(
                        **foul_event_args, **generic_event_args
                    )
                    if any(
                        (_has_tag(raw_event, tag) for tag in wyscout_tags.CARD)
                    ):
                        card_event_args = _parse_card(raw_event)
                        event = CardEvent.create(
                            **card_event_args, **generic_event_args
                        )
                elif (
                    raw_event["eventName"] == wyscout_events.INTERRUPTION.EVENT
                ):
                    ball_out_event_args = _parse_ball_out(raw_event)
                    event = BallOutEvent.create(
                        **ball_out_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] == wyscout_events.FREE_KICK.EVENT:
                    set_piece_event_args = _parse_set_piece(
                        raw_event, next_event
                    )
                    if (
                        raw_event["subEventName"]
                        in wyscout_events.FREE_KICK.PASS_TYPES
                    ):
                        event = PassEvent.create(
                            **set_piece_event_args, **generic_event_args
                        )
                    elif (
                        raw_event["subEventName"]
                        in wyscout_events.FREE_KICK.SHOT_TYPES
                    ):
                        event = ShotEvent.create(
                            **set_piece_event_args, **generic_event_args
                        )

                elif (
                    raw_event["eventName"]
                    == wyscout_events.OTHERS_ON_BALL.EVENT
                ):
                    recovery_event_args = _parse_recovery(raw_event)
                    event = RecoveryEvent.create(
                        **recovery_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] == wyscout_events.DUEL.EVENT:
                    takeon_event_args = _parse_takeon(raw_event)
                    event = TakeOnEvent.create(
                        **takeon_event_args, **generic_event_args
                    )
                elif raw_event["eventName"] not in [
                    wyscout_events.SAVE.EVENT,
                    wyscout_events.OFFSIDE.EVENT,
                ]:
                    # The events SAVE and OFFSIDE are already merged with PASS and SHOT events
                    qualifiers = _generic_qualifiers(raw_event)
                    event = GenericEvent.create(
                        result=None,
                        qualifiers=qualifiers,
                        **generic_event_args
                    )

                if event and _include_event(event, wanted_event_types):
                    events.append(event)

        metadata = Metadata(
            teams=[home_team, away_team],
            periods=periods,
            pitch_dimensions=PitchDimensions(
                x_dim=Dimension(0, 100), y_dim=Dimension(0, 100)
            ),
            score=None,
            frame_rate=None,
            orientation=Orientation.BALL_OWNING_TEAM,
            flags=None,
            provider=Provider.WYSCOUT,
        )

        return EventDataset(metadata=metadata, records=events)
示例#27
0
文件: tracab.py 项目: PySport/kloppy
    def deserialize(self, inputs: TRACABInputs) -> TrackingDataset:
        # TODO: also used in Metrica, extract to a method
        home_team = Team(team_id="home", name="home", ground=Ground.HOME)
        away_team = Team(team_id="away", name="away", ground=Ground.AWAY)
        teams = [home_team, away_team]

        with performance_logging("Loading metadata", logger=logger):
            match = objectify.fromstring(inputs.meta_data.read()).match
            frame_rate = int(match.attrib["iFrameRateFps"])
            pitch_size_width = float(match.attrib["fPitchXSizeMeters"])
            pitch_size_height = float(match.attrib["fPitchYSizeMeters"])

            periods = []
            for period in match.iterchildren(tag="period"):
                start_frame_id = int(period.attrib["iStartFrame"])
                end_frame_id = int(period.attrib["iEndFrame"])
                if start_frame_id != 0 or end_frame_id != 0:
                    periods.append(
                        Period(
                            id=int(period.attrib["iId"]),
                            start_timestamp=start_frame_id / frame_rate,
                            end_timestamp=end_frame_id / frame_rate,
                        )
                    )

        with performance_logging("Loading data", logger=logger):

            transformer = self.get_transformer(
                length=pitch_size_width, width=pitch_size_height
            )

            def _iter():
                n = 0
                sample = 1.0 / self.sample_rate

                for line_ in inputs.raw_data.readlines():
                    line_ = line_.strip().decode("ascii")
                    if not line_:
                        continue

                    frame_id = int(line_[:10].split(":", 1)[0])
                    if self.only_alive and not line_.endswith("Alive;:"):
                        continue

                    for period_ in periods:
                        if period_.contains(frame_id / frame_rate):
                            if n % sample == 0:
                                yield period_, line_
                            n += 1

            frames = []
            for n, (period, line) in enumerate(_iter()):
                frame = self._frame_from_line(teams, period, line, frame_rate)

                frame = transformer.transform_frame(frame)

                frames.append(frame)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame
                        )
                    )

                if self.limit and n >= self.limit:
                    break

        orientation = (
            Orientation.FIXED_HOME_AWAY
            if periods[0].attacking_direction == AttackingDirection.HOME_AWAY
            else Orientation.FIXED_AWAY_HOME
        )

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions,
            score=None,
            frame_rate=frame_rate,
            orientation=orientation,
            provider=Provider.TRACAB,
            flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
            coordinate_system=transformer.get_to_coordinate_system(),
        )

        return TrackingDataset(
            records=frames,
            metadata=metadata,
        )
示例#28
0
文件: tracab.py 项目: xor-lab/kloppy
    def deserialize(
        self, inputs: Dict[str, Readable], options: Dict = None
    ) -> TrackingDataset:
        """
        Deserialize TRACAB tracking data into a `TrackingDataset`.

        Parameters
        ----------
        inputs : dict
            input `raw_data` should point to a `Readable` object containing
            the 'csv' formatted raw data. input `metadata` should point to
            the xml metadata data.
        options : dict
            Options for deserialization of the TRACAB file. Possible options are
            `only_alive` (boolean) to specify that only frames with alive ball state
            should be loaded, or `sample_rate` (float between 0 and 1) to specify
            the amount of frames that should be loaded, `limit` to specify the max number of
            frames that will be returned.
        Returns
        -------
        dataset : TrackingDataset
        Raises
        ------
        -

        See Also
        --------

        Examples
        --------
        >>> serializer = TRACABSerializer()
        >>> with open("metadata.xml", "rb") as meta, \
        >>>      open("raw.dat", "rb") as raw:
        >>>     dataset = serializer.deserialize(
        >>>         inputs={
        >>>             'metadata': meta,
        >>>             'raw_data': raw
        >>>         },
        >>>         options={
        >>>             'only_alive': True,
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)

        if not options:
            options = {}

        sample_rate = float(options.get("sample_rate", 1.0))
        limit = int(options.get("limit", 0))
        only_alive = bool(options.get("only_alive", True))

        # TODO: also used in Metrica, extract to a method
        home_team = Team(team_id="home", name="home", ground=Ground.HOME)
        away_team = Team(team_id="away", name="away", ground=Ground.AWAY)
        teams = [home_team, away_team]

        with performance_logging("Loading metadata", logger=logger):
            match = objectify.fromstring(inputs["metadata"].read()).match
            frame_rate = int(match.attrib["iFrameRateFps"])
            pitch_size_width = float(match.attrib["fPitchXSizeMeters"])
            pitch_size_height = float(match.attrib["fPitchYSizeMeters"])

            periods = []
            for period in match.iterchildren(tag="period"):
                start_frame_id = int(period.attrib["iStartFrame"])
                end_frame_id = int(period.attrib["iEndFrame"])
                if start_frame_id != 0 or end_frame_id != 0:
                    periods.append(
                        Period(
                            id=int(period.attrib["iId"]),
                            start_timestamp=start_frame_id / frame_rate,
                            end_timestamp=end_frame_id / frame_rate,
                        )
                    )

        with performance_logging("Loading data", logger=logger):

            def _iter():
                n = 0
                sample = 1.0 / sample_rate

                for line_ in inputs["raw_data"].readlines():
                    line_ = line_.strip().decode("ascii")
                    if not line_:
                        continue

                    frame_id = int(line_[:10].split(":", 1)[0])
                    if only_alive and not line_.endswith("Alive;:"):
                        continue

                    for period_ in periods:
                        if period_.contains(frame_id / frame_rate):
                            if n % sample == 0:
                                yield period_, line_
                            n += 1

            frames = []
            for n, (period, line) in enumerate(_iter()):
                frame = self._frame_from_line(teams, period, line, frame_rate)

                frames.append(frame)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame
                        )
                    )

                if limit and n >= limit:
                    break

        orientation = (
            Orientation.FIXED_HOME_AWAY
            if periods[0].attacking_direction == AttackingDirection.HOME_AWAY
            else Orientation.FIXED_AWAY_HOME
        )

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=PitchDimensions(
                x_dim=Dimension(
                    -1 * pitch_size_width / 2, pitch_size_width / 2
                ),
                y_dim=Dimension(
                    -1 * pitch_size_height / 2, pitch_size_height / 2
                ),
                x_per_meter=100,
                y_per_meter=100,
            ),
            score=None,
            frame_rate=frame_rate,
            orientation=orientation,
            provider=Provider.TRACAB,
            flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
        )

        return TrackingDataset(
            records=frames,
            metadata=metadata,
        )
示例#29
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> EventDataset:
        """
                Deserialize StatsBomb event data into a `EventDataset`.

                Parameters
                ----------
                inputs : dict
                    input `event_data` should point to a `Readable` object containing
                    the 'json' formatted event data. input `lineup_data` should point
                    to a `Readable` object containing the 'json' formatted lineup data.
                options : dict
                    Options for deserialization of the StatsBomb file. Possible options are
                    `event_types` (list of event types) to specify the event types that
                    should be returned. Valid types: "shot", "pass", "carry", "take_on" and
                    "generic". Generic is everything other than the first 4. Those events
                    are barely parsed. This type of event can be used to do the parsing
                    yourself.
                    Every event has a 'raw_event' attribute which contains the original
                    dictionary.
                Returns
                -------
                dataset : EventDataset
                Raises
                ------

                See Also
                --------

                Examples
                --------
                >>> serializer = StatsBombSerializer()
                >>> with open("events/12312312.json", "rb") as event_data, \
                >>>      open("lineups/123123123.json", "rb") as lineup_data:
                >>>
                >>>     dataset = serializer.deserialize(
                >>>         inputs={
                >>>             'event_data': event_data,
                >>>             'lineup_data': lineup_data
                >>>         },
                >>>         options={
                >>>             'event_types': ["pass", "take_on", "carry", "shot"]
                >>>         }
                >>>     )
                """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        with performance_logging("load data", logger=logger):
            raw_events = json.load(inputs['event_data'])
            home_lineup, away_lineup = json.load(inputs['lineup_data'])
            shot_fidelity_version, xy_fidelity_version = _determine_xy_fidelity_versions(
                raw_events)
            logger.info(
                f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}"
            )

        with performance_logging("parse data", logger=logger):
            home_player_map = {
                player['player_id']: str(player['jersey_number'])
                for player in home_lineup['lineup']
            }
            away_player_map = {
                player['player_id']: str(player['jersey_number'])
                for player in away_lineup['lineup']
            }

            wanted_event_types = [
                EventType[event_type.upper()]
                for event_type in options.get('event_types', [])
            ]

            periods = []
            period = None
            events = []
            for raw_event in raw_events:
                if raw_event['team']['id'] == home_lineup['team_id']:
                    team = Team.HOME
                    current_team_map = home_player_map
                elif raw_event['team']['id'] == away_lineup['team_id']:
                    team = Team.AWAY
                    current_team_map = away_player_map
                else:
                    raise Exception(
                        f"Unknown team_id {raw_event['team']['id']}")

                if raw_event['possession_team']['id'] == home_lineup[
                        'team_id']:
                    possession_team = Team.HOME
                elif raw_event['possession_team']['id'] == away_lineup[
                        'team_id']:
                    possession_team = Team.AWAY
                else:
                    raise Exception(
                        f"Unknown possession_team_id: {raw_event['possession_team']}"
                    )

                timestamp = parse_str_ts(raw_event['timestamp'])
                period_id = int(raw_event['period'])
                if not period or period.id != period_id:
                    period = Period(id=period_id,
                                    start_timestamp=timestamp if not period
                                    else timestamp + period.end_timestamp,
                                    end_timestamp=None)
                    periods.append(period)
                else:
                    period.end_timestamp = period.start_timestamp + timestamp

                player_jersey_no = None
                if 'player' in raw_event:
                    player_jersey_no = current_team_map[raw_event['player']
                                                        ['id']]

                event_type = raw_event['type']['id']
                if event_type == SB_EVENT_TYPE_SHOT:
                    fidelity_version = shot_fidelity_version
                elif event_type in (SB_EVENT_TYPE_CARRY, SB_EVENT_TYPE_DRIBBLE,
                                    SB_EVENT_TYPE_PASS):
                    fidelity_version = xy_fidelity_version
                else:
                    # TODO: Uh ohhhh.. don't know which one to pick
                    fidelity_version = xy_fidelity_version

                generic_event_kwargs = dict(
                    # from DataRecord
                    period=period,
                    timestamp=timestamp,
                    ball_owning_team=possession_team,
                    ball_state=BallState.ALIVE,
                    # from Event
                    event_id=raw_event['id'],
                    team=team,
                    player_jersey_no=player_jersey_no,
                    position=(_parse_position(raw_event.get('location'),
                                              fidelity_version)
                              if 'location' in raw_event else None),
                    raw_event=raw_event)

                if event_type == SB_EVENT_TYPE_PASS:
                    pass_event_kwargs = _parse_pass(
                        pass_dict=raw_event['pass'],
                        current_team_map=current_team_map,
                        fidelity_version=fidelity_version)

                    event = PassEvent(
                        # TODO: Consider moving this to _parse_pass
                        receive_timestamp=timestamp + raw_event['duration'],
                        **pass_event_kwargs,
                        **generic_event_kwargs)
                elif event_type == SB_EVENT_TYPE_SHOT:
                    shot_event_kwargs = _parse_shot(
                        shot_dict=raw_event['shot'])
                    event = ShotEvent(**shot_event_kwargs,
                                      **generic_event_kwargs)

                # For dribble and carry the definitions
                # are flipped between Statsbomb and kloppy
                elif event_type == SB_EVENT_TYPE_DRIBBLE:
                    take_on_event_kwargs = _parse_take_on(
                        take_on_dict=raw_event['dribble'])
                    event = TakeOnEvent(**take_on_event_kwargs,
                                        **generic_event_kwargs)
                elif event_type == SB_EVENT_TYPE_CARRY:
                    carry_event_kwargs = _parse_carry(
                        carry_dict=raw_event['carry'],
                        fidelity_version=fidelity_version)
                    event = CarryEvent(
                        # TODO: Consider moving this to _parse_carry
                        end_timestamp=timestamp + raw_event['duration'],
                        **carry_event_kwargs,
                        **generic_event_kwargs)
                else:
                    event = GenericEvent(result=None, **generic_event_kwargs)

                if not wanted_event_types or event.event_type in wanted_event_types:
                    events.append(event)

        return EventDataset(flags=DatasetFlag.BALL_OWNING_TEAM,
                            orientation=Orientation.ACTION_EXECUTING_TEAM,
                            pitch_dimensions=PitchDimensions(
                                x_dim=Dimension(0, 120),
                                y_dim=Dimension(0, 80)),
                            periods=periods,
                            records=events)
示例#30
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> TrackingDataSet:
        """
        Deserialize TRACAB tracking data into a `TrackingDataSet`.

        Parameters
        ----------
        inputs : dict
            input `raw_data` should point to a `Readable` object containing
            the 'csv' formatted raw data. input `meta_data` should point to
            the xml metadata data.
        options : dict
            Options for deserialization of the TRACAB file. Possible options are
            `only_alive` (boolean) to specify that only frames with alive ball state
            should be loaded, or `sample_rate` (float between 0 and 1) to specify
            the amount of frames that should be loaded.
        Returns
        -------
        data_set : TrackingDataSet
        Raises
        ------
        -

        See Also
        --------

        Examples
        --------
        >>> serializer = TRACABSerializer()
        >>> with open("metadata.xml", "rb") as meta, \
        >>>      open("raw.dat", "rb") as raw:
        >>>     data_set = serializer.deserialize(
        >>>         inputs={
        >>>             'meta_data': meta,
        >>>             'raw_data': raw
        >>>         },
        >>>         options={
        >>>             'only_alive': True,
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)

        if not options:
            options = {}

        sample_rate = float(options.get('sample_rate', 1.0))
        only_alive = bool(options.get('only_alive', True))

        with performance_logging("Loading metadata"):
            match = objectify.fromstring(inputs['meta_data'].read()).match
            frame_rate = int(match.attrib['iFrameRateFps'])
            pitch_size_width = float(match.attrib['fPitchXSizeMeters'])
            pitch_size_height = float(match.attrib['fPitchYSizeMeters'])

            periods = []
            for period in match.iterchildren(tag='period'):
                start_frame_id = int(period.attrib['iStartFrame'])
                end_frame_id = int(period.attrib['iEndFrame'])
                if start_frame_id != 0 or end_frame_id != 0:
                    periods.append(
                        Period(id=int(period.attrib['iId']),
                               start_timestamp=start_frame_id / frame_rate,
                               end_timestamp=end_frame_id / frame_rate))

        with performance_logging("Loading data"):

            def _iter():
                n = 0
                sample = 1. / sample_rate

                for line in inputs['raw_data'].readlines():
                    line = line.strip().decode("ascii")
                    if not line:
                        continue

                    frame_id = int(line[:10].split(":", 1)[0])
                    if only_alive and not line.endswith("Alive;:"):
                        continue

                    for period in periods:
                        if period.contains(frame_id / frame_rate):
                            if n % sample == 0:
                                yield period, line
                            n += 1

            frames = []
            for period, line in _iter():
                frame = self._frame_from_line(period, line, frame_rate)

                frames.append(frame)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame))

        orientation = (
            Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction
            == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME)

        return TrackingDataSet(flags=DataSetFlag.BALL_OWNING_TEAM
                               | DataSetFlag.BALL_STATE,
                               frame_rate=frame_rate,
                               orientation=orientation,
                               pitch_dimensions=PitchDimensions(
                                   x_dim=Dimension(-1 * pitch_size_width / 2,
                                                   pitch_size_width / 2),
                                   y_dim=Dimension(-1 * pitch_size_height / 2,
                                                   pitch_size_height / 2),
                                   x_per_meter=100,
                                   y_per_meter=100),
                               periods=periods,
                               records=frames)