Пример #1
0
    def _set_skillcorner_attacking_directions(cls, frames, periods):
        """
        with only partial tracking data we cannot rely on a single frame to
        infer the attacking directions as a simple average of only some players
        x-coords might not reflect the attacking direction.
        """
        attacking_directions = []

        for frame in frames:
            if len(frame.players_data) > 0:
                attacking_directions.append(
                    attacking_direction_from_frame(frame))
            else:
                attacking_directions.append(AttackingDirection.NOT_SET)

        frame_periods = np.array([_frame.period.id for _frame in frames])

        for period in periods.keys():
            if period in frame_periods:
                count = Counter(
                    np.array(attacking_directions)[frame_periods == period])
                att_direction = count.most_common()[0][0]
                periods[period].attacking_direction = att_direction
            else:
                periods[
                    period].attacking_direction = AttackingDirection.NOT_SET
Пример #2
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> TrackingDataset:
        """
        Deserialize Metrica tracking data into a `TrackingDataset`.

        Parameters
        ----------
        inputs : dict
            input `raw_data_home` should point to a `Readable` object containing
            the 'csv' formatted raw data for the home team. input `raw_data_away` should point
            to a `Readable` object containing the 'csv' formatted raw data for the away team.
        options : dict
            Options for deserialization of the Metrica file. Possible options are
            `sample_rate` (float between 0 and 1) to specify the amount of
            frames that should be loaded, `limit` to specify the max number of
            frames that will be returned.
        Returns
        -------
        dataset : TrackingDataset
        Raises
        ------
        ValueError when both input files don't seem to belong to each other

        See Also
        --------

        Examples
        --------
        >>> serializer = MetricaTrackingSerializer()
        >>> with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_home, \
        >>>      open("Sample_Game_1_RawTrackingData_Home_Team.csv", "rb") as raw_away:
        >>>
        >>>     dataset = serializer.deserialize(
        >>>         inputs={
        >>>             'raw_data_home': raw_home,
        >>>             'raw_data_away': raw_away
        >>>         },
        >>>         options={
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)
        if not options:
            options = {}

        sample_rate = float(options.get('sample_rate', 1.0))
        limit = int(options.get('limit', 0))

        # consider reading this from data
        frame_rate = 25

        with performance_logging("prepare", logger=logger):
            home_iterator = self.__create_iterator(inputs['raw_data_home'],
                                                   sample_rate, frame_rate)
            away_iterator = self.__create_iterator(inputs['raw_data_away'],
                                                   sample_rate, frame_rate)

            partial_frames = zip(home_iterator, away_iterator)

        with performance_logging("loading", logger=logger):
            frames = []
            periods = []

            partial_frame_type = self.__PartialFrame
            home_partial_frame: partial_frame_type
            away_partial_frame: partial_frame_type
            for n, (home_partial_frame,
                    away_partial_frame) in enumerate(partial_frames):
                self.__validate_partials(home_partial_frame,
                                         away_partial_frame)

                period: Period = home_partial_frame.period
                frame_id: int = home_partial_frame.frame_id

                frame = Frame(frame_id=frame_id,
                              timestamp=frame_id / frame_rate -
                              period.start_timestamp,
                              ball_position=home_partial_frame.ball_position,
                              home_team_player_positions=home_partial_frame.
                              player_positions,
                              away_team_player_positions=away_partial_frame.
                              player_positions,
                              period=period,
                              ball_state=None,
                              ball_owning_team=None)

                frames.append(frame)

                if not periods or period.id != periods[-1].id:
                    periods.append(period)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame))

                n += 1
                if limit and n >= limit:
                    break

        orientation = (
            Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction
            == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME)

        return TrackingDataset(
            flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
            frame_rate=frame_rate,
            orientation=orientation,
            pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 1),
                                             y_dim=Dimension(0, 1)),
            periods=periods,
            records=frames)
Пример #3
0
    def deserialize(self, inputs: TRACABInputs) -> TrackingDataset:
        # TODO: also used in Metrica, extract to a method
        home_team = Team(team_id="home", name="home", ground=Ground.HOME)
        away_team = Team(team_id="away", name="away", ground=Ground.AWAY)
        teams = [home_team, away_team]

        with performance_logging("Loading metadata", logger=logger):
            match = objectify.fromstring(inputs.meta_data.read()).match
            frame_rate = int(match.attrib["iFrameRateFps"])
            pitch_size_width = float(match.attrib["fPitchXSizeMeters"])
            pitch_size_height = float(match.attrib["fPitchYSizeMeters"])

            periods = []
            for period in match.iterchildren(tag="period"):
                start_frame_id = int(period.attrib["iStartFrame"])
                end_frame_id = int(period.attrib["iEndFrame"])
                if start_frame_id != 0 or end_frame_id != 0:
                    periods.append(
                        Period(
                            id=int(period.attrib["iId"]),
                            start_timestamp=start_frame_id / frame_rate,
                            end_timestamp=end_frame_id / frame_rate,
                        )
                    )

        with performance_logging("Loading data", logger=logger):

            transformer = self.get_transformer(
                length=pitch_size_width, width=pitch_size_height
            )

            def _iter():
                n = 0
                sample = 1.0 / self.sample_rate

                for line_ in inputs.raw_data.readlines():
                    line_ = line_.strip().decode("ascii")
                    if not line_:
                        continue

                    frame_id = int(line_[:10].split(":", 1)[0])
                    if self.only_alive and not line_.endswith("Alive;:"):
                        continue

                    for period_ in periods:
                        if period_.contains(frame_id / frame_rate):
                            if n % sample == 0:
                                yield period_, line_
                            n += 1

            frames = []
            for n, (period, line) in enumerate(_iter()):
                frame = self._frame_from_line(teams, period, line, frame_rate)

                frame = transformer.transform_frame(frame)

                frames.append(frame)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame
                        )
                    )

                if self.limit and n >= self.limit:
                    break

        orientation = (
            Orientation.FIXED_HOME_AWAY
            if periods[0].attacking_direction == AttackingDirection.HOME_AWAY
            else Orientation.FIXED_AWAY_HOME
        )

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions,
            score=None,
            frame_rate=frame_rate,
            orientation=orientation,
            provider=Provider.TRACAB,
            flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
            coordinate_system=transformer.get_to_coordinate_system(),
        )

        return TrackingDataset(
            records=frames,
            metadata=metadata,
        )
Пример #4
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> TrackingDataSet:
        """
        Deserialize TRACAB tracking data into a `TrackingDataSet`.

        Parameters
        ----------
        inputs : dict
            input `raw_data` should point to a `Readable` object containing
            the 'csv' formatted raw data. input `meta_data` should point to
            the xml metadata data.
        options : dict
            Options for deserialization of the TRACAB file. Possible options are
            `only_alive` (boolean) to specify that only frames with alive ball state
            should be loaded, or `sample_rate` (float between 0 and 1) to specify
            the amount of frames that should be loaded.
        Returns
        -------
        data_set : TrackingDataSet
        Raises
        ------
        -

        See Also
        --------

        Examples
        --------
        >>> serializer = TRACABSerializer()
        >>> with open("metadata.xml", "rb") as meta, \
        >>>      open("raw.dat", "rb") as raw:
        >>>     data_set = serializer.deserialize(
        >>>         inputs={
        >>>             'meta_data': meta,
        >>>             'raw_data': raw
        >>>         },
        >>>         options={
        >>>             'only_alive': True,
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)

        if not options:
            options = {}

        sample_rate = float(options.get('sample_rate', 1.0))
        only_alive = bool(options.get('only_alive', True))

        with performance_logging("Loading metadata"):
            match = objectify.fromstring(inputs['meta_data'].read()).match
            frame_rate = int(match.attrib['iFrameRateFps'])
            pitch_size_width = float(match.attrib['fPitchXSizeMeters'])
            pitch_size_height = float(match.attrib['fPitchYSizeMeters'])

            periods = []
            for period in match.iterchildren(tag='period'):
                start_frame_id = int(period.attrib['iStartFrame'])
                end_frame_id = int(period.attrib['iEndFrame'])
                if start_frame_id != 0 or end_frame_id != 0:
                    periods.append(
                        Period(id=int(period.attrib['iId']),
                               start_timestamp=start_frame_id / frame_rate,
                               end_timestamp=end_frame_id / frame_rate))

        with performance_logging("Loading data"):

            def _iter():
                n = 0
                sample = 1. / sample_rate

                for line in inputs['raw_data'].readlines():
                    line = line.strip().decode("ascii")
                    if not line:
                        continue

                    frame_id = int(line[:10].split(":", 1)[0])
                    if only_alive and not line.endswith("Alive;:"):
                        continue

                    for period in periods:
                        if period.contains(frame_id / frame_rate):
                            if n % sample == 0:
                                yield period, line
                            n += 1

            frames = []
            for period, line in _iter():
                frame = self._frame_from_line(period, line, frame_rate)

                frames.append(frame)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame))

        orientation = (
            Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction
            == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME)

        return TrackingDataSet(flags=DataSetFlag.BALL_OWNING_TEAM
                               | DataSetFlag.BALL_STATE,
                               frame_rate=frame_rate,
                               orientation=orientation,
                               pitch_dimensions=PitchDimensions(
                                   x_dim=Dimension(-1 * pitch_size_width / 2,
                                                   pitch_size_width / 2),
                                   y_dim=Dimension(-1 * pitch_size_height / 2,
                                                   pitch_size_height / 2),
                                   x_per_meter=100,
                                   y_per_meter=100),
                               periods=periods,
                               records=frames)
Пример #5
0
    def deserialize(self,
                    inputs: Dict[str, Readable],
                    options: Dict = None) -> TrackingDataset:
        """
        Deserialize EPTS tracking data into a `TrackingDataset`.

        Parameters
        ----------
        inputs : dict
            input `raw_data` should point to a `Readable` object containing
            the 'csv' formatted raw data. input `meta_data` should point to
            the xml metadata data.
        options : dict
            Options for deserialization of the EPTS file. Possible options are
            `sample_rate` (float between 0 and 1) to specify the amount of
            frames that should be loaded, `limit` to specify the max number of
            frames that will be returned.
        Returns
        -------
        dataset : TrackingDataset
        Raises
        ------
        -

        See Also
        --------

        Examples
        --------
        >>> serializer = EPTSSerializer()
        >>> with open("metadata.xml", "rb") as meta, \
        >>>      open("raw.dat", "rb") as raw:
        >>>     dataset = serializer.deserialize(
        >>>         inputs={
        >>>             'meta_data': meta,
        >>>             'raw_data': raw
        >>>         },
        >>>         options={
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)

        if not options:
            options = {}

        sample_rate = float(options.get("sample_rate", 1.0))
        limit = int(options.get("limit", 0))

        with performance_logging("Loading metadata", logger=logger):
            meta_data = load_meta_data(inputs["meta_data"])

        periods = meta_data.periods

        with performance_logging("Loading data", logger=logger):
            # assume they are sorted
            frames = [
                self._frame_from_row(row, meta_data) for row in read_raw_data(
                    raw_data=inputs["raw_data"],
                    meta_data=meta_data,
                    sensor_ids=["position"
                                ],  # we don't care about other sensors
                    sample_rate=sample_rate,
                    limit=limit,
                )
            ]

        if periods:
            start_attacking_direction = periods[0].attacking_direction
        elif frames:
            start_attacking_direction = attacking_direction_from_frame(
                frames[0])
        else:
            start_attacking_direction = None

        orientation = (
            (Orientation.FIXED_HOME_AWAY if start_attacking_direction ==
             AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) if
            start_attacking_direction != AttackingDirection.NOT_SET else None)

        return TrackingDataset(
            flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM),
            frame_rate=meta_data.frame_rate,
            orientation=orientation,
            pitch_dimensions=meta_data.pitch_dimensions,
            periods=periods,
            records=frames,
        )
Пример #6
0
    def deserialize(
        self, inputs: Dict[str, Readable], options: Dict = None
    ) -> TrackingDataset:
        """
        Deserialize TRACAB tracking data into a `TrackingDataset`.

        Parameters
        ----------
        inputs : dict
            input `raw_data` should point to a `Readable` object containing
            the 'csv' formatted raw data. input `metadata` should point to
            the xml metadata data.
        options : dict
            Options for deserialization of the TRACAB file. Possible options are
            `only_alive` (boolean) to specify that only frames with alive ball state
            should be loaded, or `sample_rate` (float between 0 and 1) to specify
            the amount of frames that should be loaded, `limit` to specify the max number of
            frames that will be returned.
        Returns
        -------
        dataset : TrackingDataset
        Raises
        ------
        -

        See Also
        --------

        Examples
        --------
        >>> serializer = TRACABSerializer()
        >>> with open("metadata.xml", "rb") as meta, \
        >>>      open("raw.dat", "rb") as raw:
        >>>     dataset = serializer.deserialize(
        >>>         inputs={
        >>>             'metadata': meta,
        >>>             'raw_data': raw
        >>>         },
        >>>         options={
        >>>             'only_alive': True,
        >>>             'sample_rate': 1/12
        >>>         }
        >>>     )
        """
        self.__validate_inputs(inputs)

        if not options:
            options = {}

        sample_rate = float(options.get("sample_rate", 1.0))
        limit = int(options.get("limit", 0))
        only_alive = bool(options.get("only_alive", True))

        # TODO: also used in Metrica, extract to a method
        home_team = Team(team_id="home", name="home", ground=Ground.HOME)
        away_team = Team(team_id="away", name="away", ground=Ground.AWAY)
        teams = [home_team, away_team]

        with performance_logging("Loading metadata", logger=logger):
            match = objectify.fromstring(inputs["metadata"].read()).match
            frame_rate = int(match.attrib["iFrameRateFps"])
            pitch_size_width = float(match.attrib["fPitchXSizeMeters"])
            pitch_size_height = float(match.attrib["fPitchYSizeMeters"])

            periods = []
            for period in match.iterchildren(tag="period"):
                start_frame_id = int(period.attrib["iStartFrame"])
                end_frame_id = int(period.attrib["iEndFrame"])
                if start_frame_id != 0 or end_frame_id != 0:
                    periods.append(
                        Period(
                            id=int(period.attrib["iId"]),
                            start_timestamp=start_frame_id / frame_rate,
                            end_timestamp=end_frame_id / frame_rate,
                        )
                    )

        with performance_logging("Loading data", logger=logger):

            def _iter():
                n = 0
                sample = 1.0 / sample_rate

                for line_ in inputs["raw_data"].readlines():
                    line_ = line_.strip().decode("ascii")
                    if not line_:
                        continue

                    frame_id = int(line_[:10].split(":", 1)[0])
                    if only_alive and not line_.endswith("Alive;:"):
                        continue

                    for period_ in periods:
                        if period_.contains(frame_id / frame_rate):
                            if n % sample == 0:
                                yield period_, line_
                            n += 1

            frames = []
            for n, (period, line) in enumerate(_iter()):
                frame = self._frame_from_line(teams, period, line, frame_rate)

                frames.append(frame)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame
                        )
                    )

                if limit and n >= limit:
                    break

        orientation = (
            Orientation.FIXED_HOME_AWAY
            if periods[0].attacking_direction == AttackingDirection.HOME_AWAY
            else Orientation.FIXED_AWAY_HOME
        )

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=PitchDimensions(
                x_dim=Dimension(
                    -1 * pitch_size_width / 2, pitch_size_width / 2
                ),
                y_dim=Dimension(
                    -1 * pitch_size_height / 2, pitch_size_height / 2
                ),
                x_per_meter=100,
                y_per_meter=100,
            ),
            score=None,
            frame_rate=frame_rate,
            orientation=orientation,
            provider=Provider.TRACAB,
            flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
        )

        return TrackingDataset(
            records=frames,
            metadata=metadata,
        )
Пример #7
0
    def deserialize(self, inputs: SecondSpectrumInputs) -> TrackingDataset:

        metadata = None

        # Handles the XML metadata that contains the pitch dimensions and frame info
        with performance_logging("Loading XML metadata", logger=logger):
            # The meta data can also be in JSON format. In that case
            # it also contains the 'additional metadata'.
            # First do a 'peek' to determine the char
            first_byte = inputs.meta_data.read(1)
            if first_byte == b"{":
                metadata = json.loads(first_byte + inputs.meta_data.read())

                frame_rate = int(metadata["fps"])
                pitch_size_height = float(metadata["pitchLength"])
                pitch_size_width = float(metadata["pitchWidth"])

                periods = []
                for period in metadata["periods"]:
                    start_frame_id = int(period["startFrameIdx"])
                    end_frame_id = int(period["endFrameIdx"])
                    if start_frame_id != 0 or end_frame_id != 0:
                        # Frame IDs are unix timestamps (in milliseconds)
                        periods.append(
                            Period(
                                id=int(period["number"]),
                                start_timestamp=start_frame_id,
                                end_timestamp=end_frame_id,
                            )
                        )
            else:
                match = objectify.fromstring(
                    first_byte + inputs.meta_data.read()
                ).match
                frame_rate = int(match.attrib["iFrameRateFps"])
                pitch_size_height = float(match.attrib["fPitchYSizeMeters"])
                pitch_size_width = float(match.attrib["fPitchXSizeMeters"])

                periods = []
                for period in match.iterchildren(tag="period"):
                    start_frame_id = int(period.attrib["iStartFrame"])
                    end_frame_id = int(period.attrib["iEndFrame"])
                    if start_frame_id != 0 or end_frame_id != 0:
                        # Frame IDs are unix timestamps (in milliseconds)
                        periods.append(
                            Period(
                                id=int(period.attrib["iId"]),
                                start_timestamp=start_frame_id,
                                end_timestamp=end_frame_id,
                            )
                        )

        # Default team initialisation
        home_team = Team(team_id="home", name="home", ground=Ground.HOME)
        away_team = Team(team_id="away", name="away", ground=Ground.AWAY)
        teams = [home_team, away_team]

        if inputs.additional_meta_data or metadata:
            with performance_logging("Loading JSON metadata", logger=logger):
                try:
                    if inputs.additional_meta_data:
                        metadata = json.loads(
                            inputs.additional_meta_data.read()
                        )

                    home_team_id = metadata["homeOptaId"]
                    away_team_id = metadata["awayOptaId"]

                    # Tries to parse (short) team names from the description string
                    try:
                        home_name = (
                            metadata["description"].split("-")[0].strip()
                        )
                        away_name = (
                            metadata["description"]
                            .split("-")[1]
                            .split(":")[0]
                            .strip()
                        )
                    except:
                        home_name, away_name = "home", "away"

                    teams[0].team_id = home_team_id
                    teams[0].name = home_name
                    teams[1].team_id = away_team_id
                    teams[1].name = away_name

                    for team, team_str in zip(
                        teams, ["homePlayers", "awayPlayers"]
                    ):
                        for player_data in metadata[team_str]:

                            # We use the attributes field of Player to store the extra IDs provided by the
                            # metadata. We designate the player_id to be the 'optaId' field as this is what's
                            # used as 'player_id' in the raw frame data file
                            player_attributes = {
                                k: v
                                for k, v in player_data.items()
                                if k in ["ssiId", "optaUuid"]
                            }

                            player = Player(
                                player_id=player_data["optaId"],
                                name=player_data["name"],
                                starting=player_data["position"] != "SUB",
                                position=player_data["position"],
                                team=team,
                                jersey_no=int(player_data["number"]),
                                attributes=player_attributes,
                            )
                            team.players.append(player)

                except:  # TODO: More specific exception
                    logging.warning(
                        "Optional JSON Metadata is malformed. Continuing without"
                    )

        # Handles the tracking frame data
        with performance_logging("Loading data", logger=logger):
            transformer = self.get_transformer(
                length=pitch_size_width, width=pitch_size_height
            )

            def _iter():
                n = 0
                sample = 1 / self.sample_rate

                for line_ in inputs.raw_data.readlines():
                    line_ = line_.strip().decode("ascii")
                    if not line_:
                        continue

                    # Each line is just json so we just parse it
                    frame_data = json.loads(line_)

                    if self.only_alive and not frame_data["live"]:
                        continue

                    if n % sample == 0:
                        yield frame_data

                    n += 1

            frames = []
            for n, frame_data in enumerate(_iter()):
                period = periods[frame_data["period"] - 1]

                frame = self._frame_from_framedata(teams, period, frame_data)
                frame = transformer.transform_frame(frame)
                frames.append(frame)

                if not period.attacking_direction_set:
                    period.set_attacking_direction(
                        attacking_direction=attacking_direction_from_frame(
                            frame
                        )
                    )

                if self.limit and n + 1 >= self.limit:
                    break

        orientation = (
            Orientation.FIXED_HOME_AWAY
            if periods[0].attacking_direction == AttackingDirection.HOME_AWAY
            else Orientation.FIXED_AWAY_HOME
        )

        metadata = Metadata(
            teams=teams,
            periods=periods,
            pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions,
            score=None,
            frame_rate=frame_rate,
            orientation=orientation,
            provider=Provider.SECONDSPECTRUM,
            flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE,
            coordinate_system=transformer.get_to_coordinate_system(),
        )

        return TrackingDataset(
            records=frames,
            metadata=metadata,
        )