def _set_skillcorner_attacking_directions(cls, frames, periods): """ with only partial tracking data we cannot rely on a single frame to infer the attacking directions as a simple average of only some players x-coords might not reflect the attacking direction. """ attacking_directions = [] for frame in frames: if len(frame.players_data) > 0: attacking_directions.append( attacking_direction_from_frame(frame)) else: attacking_directions.append(AttackingDirection.NOT_SET) frame_periods = np.array([_frame.period.id for _frame in frames]) for period in periods.keys(): if period in frame_periods: count = Counter( np.array(attacking_directions)[frame_periods == period]) att_direction = count.most_common()[0][0] periods[period].attacking_direction = att_direction else: periods[ period].attacking_direction = AttackingDirection.NOT_SET
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ Deserialize Metrica tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data_home` should point to a `Readable` object containing the 'csv' formatted raw data for the home team. input `raw_data_away` should point to a `Readable` object containing the 'csv' formatted raw data for the away team. options : dict Options for deserialization of the Metrica file. Possible options are `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ ValueError when both input files don't seem to belong to each other See Also -------- Examples -------- >>> serializer = MetricaTrackingSerializer() >>> with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_home, \ >>> open("Sample_Game_1_RawTrackingData_Home_Team.csv", "rb") as raw_away: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'raw_data_home': raw_home, >>> 'raw_data_away': raw_away >>> }, >>> options={ >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get('sample_rate', 1.0)) limit = int(options.get('limit', 0)) # consider reading this from data frame_rate = 25 with performance_logging("prepare", logger=logger): home_iterator = self.__create_iterator(inputs['raw_data_home'], sample_rate, frame_rate) away_iterator = self.__create_iterator(inputs['raw_data_away'], sample_rate, frame_rate) partial_frames = zip(home_iterator, away_iterator) with performance_logging("loading", logger=logger): frames = [] periods = [] partial_frame_type = self.__PartialFrame home_partial_frame: partial_frame_type away_partial_frame: partial_frame_type for n, (home_partial_frame, away_partial_frame) in enumerate(partial_frames): self.__validate_partials(home_partial_frame, away_partial_frame) period: Period = home_partial_frame.period frame_id: int = home_partial_frame.frame_id frame = Frame(frame_id=frame_id, timestamp=frame_id / frame_rate - period.start_timestamp, ball_position=home_partial_frame.ball_position, home_team_player_positions=home_partial_frame. player_positions, away_team_player_positions=away_partial_frame. player_positions, period=period, ball_state=None, ball_owning_team=None) frames.append(frame) if not periods or period.id != periods[-1].id: periods.append(period) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame)) n += 1 if limit and n >= limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) return TrackingDataset( flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), frame_rate=frame_rate, orientation=orientation, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 1), y_dim=Dimension(0, 1)), periods=periods, records=frames)
def deserialize(self, inputs: TRACABInputs) -> TrackingDataset: # TODO: also used in Metrica, extract to a method home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] with performance_logging("Loading metadata", logger=logger): match = objectify.fromstring(inputs.meta_data.read()).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate, ) ) with performance_logging("Loading data", logger=logger): transformer = self.get_transformer( length=pitch_size_width, width=pitch_size_height ) def _iter(): n = 0 sample = 1.0 / self.sample_rate for line_ in inputs.raw_data.readlines(): line_ = line_.strip().decode("ascii") if not line_: continue frame_id = int(line_[:10].split(":", 1)[0]) if self.only_alive and not line_.endswith("Alive;:"): continue for period_ in periods: if period_.contains(frame_id / frame_rate): if n % sample == 0: yield period_, line_ n += 1 frames = [] for n, (period, line) in enumerate(_iter()): frame = self._frame_from_line(teams, period, line, frame_rate) frame = transformer.transform_frame(frame) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if self.limit and n >= self.limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.TRACAB, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataSet: """ Deserialize TRACAB tracking data into a `TrackingDataSet`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `meta_data` should point to the xml metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are `only_alive` (boolean) to specify that only frames with alive ball state should be loaded, or `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded. Returns ------- data_set : TrackingDataSet Raises ------ - See Also -------- Examples -------- >>> serializer = TRACABSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> data_set = serializer.deserialize( >>> inputs={ >>> 'meta_data': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'only_alive': True, >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get('sample_rate', 1.0)) only_alive = bool(options.get('only_alive', True)) with performance_logging("Loading metadata"): match = objectify.fromstring(inputs['meta_data'].read()).match frame_rate = int(match.attrib['iFrameRateFps']) pitch_size_width = float(match.attrib['fPitchXSizeMeters']) pitch_size_height = float(match.attrib['fPitchYSizeMeters']) periods = [] for period in match.iterchildren(tag='period'): start_frame_id = int(period.attrib['iStartFrame']) end_frame_id = int(period.attrib['iEndFrame']) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period(id=int(period.attrib['iId']), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate)) with performance_logging("Loading data"): def _iter(): n = 0 sample = 1. / sample_rate for line in inputs['raw_data'].readlines(): line = line.strip().decode("ascii") if not line: continue frame_id = int(line[:10].split(":", 1)[0]) if only_alive and not line.endswith("Alive;:"): continue for period in periods: if period.contains(frame_id / frame_rate): if n % sample == 0: yield period, line n += 1 frames = [] for period, line in _iter(): frame = self._frame_from_line(period, line, frame_rate) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame)) orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) return TrackingDataSet(flags=DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE, frame_rate=frame_rate, orientation=orientation, pitch_dimensions=PitchDimensions( x_dim=Dimension(-1 * pitch_size_width / 2, pitch_size_width / 2), y_dim=Dimension(-1 * pitch_size_height / 2, pitch_size_height / 2), x_per_meter=100, y_per_meter=100), periods=periods, records=frames)
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ Deserialize EPTS tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `meta_data` should point to the xml metadata data. options : dict Options for deserialization of the EPTS file. Possible options are `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = EPTSSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'meta_data': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) with performance_logging("Loading metadata", logger=logger): meta_data = load_meta_data(inputs["meta_data"]) periods = meta_data.periods with performance_logging("Loading data", logger=logger): # assume they are sorted frames = [ self._frame_from_row(row, meta_data) for row in read_raw_data( raw_data=inputs["raw_data"], meta_data=meta_data, sensor_ids=["position" ], # we don't care about other sensors sample_rate=sample_rate, limit=limit, ) ] if periods: start_attacking_direction = periods[0].attacking_direction elif frames: start_attacking_direction = attacking_direction_from_frame( frames[0]) else: start_attacking_direction = None orientation = ( (Orientation.FIXED_HOME_AWAY if start_attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) if start_attacking_direction != AttackingDirection.NOT_SET else None) return TrackingDataset( flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), frame_rate=meta_data.frame_rate, orientation=orientation, pitch_dimensions=meta_data.pitch_dimensions, periods=periods, records=frames, )
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> TrackingDataset: """ Deserialize TRACAB tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `metadata` should point to the xml metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are `only_alive` (boolean) to specify that only frames with alive ball state should be loaded, or `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = TRACABSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'metadata': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'only_alive': True, >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) only_alive = bool(options.get("only_alive", True)) # TODO: also used in Metrica, extract to a method home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] with performance_logging("Loading metadata", logger=logger): match = objectify.fromstring(inputs["metadata"].read()).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate, ) ) with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / sample_rate for line_ in inputs["raw_data"].readlines(): line_ = line_.strip().decode("ascii") if not line_: continue frame_id = int(line_[:10].split(":", 1)[0]) if only_alive and not line_.endswith("Alive;:"): continue for period_ in periods: if period_.contains(frame_id / frame_rate): if n % sample == 0: yield period_, line_ n += 1 frames = [] for n, (period, line) in enumerate(_iter()): frame = self._frame_from_line(teams, period, line, frame_rate) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if limit and n >= limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension( -1 * pitch_size_width / 2, pitch_size_width / 2 ), y_dim=Dimension( -1 * pitch_size_height / 2, pitch_size_height / 2 ), x_per_meter=100, y_per_meter=100, ), score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.TRACAB, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize(self, inputs: SecondSpectrumInputs) -> TrackingDataset: metadata = None # Handles the XML metadata that contains the pitch dimensions and frame info with performance_logging("Loading XML metadata", logger=logger): # The meta data can also be in JSON format. In that case # it also contains the 'additional metadata'. # First do a 'peek' to determine the char first_byte = inputs.meta_data.read(1) if first_byte == b"{": metadata = json.loads(first_byte + inputs.meta_data.read()) frame_rate = int(metadata["fps"]) pitch_size_height = float(metadata["pitchLength"]) pitch_size_width = float(metadata["pitchWidth"]) periods = [] for period in metadata["periods"]: start_frame_id = int(period["startFrameIdx"]) end_frame_id = int(period["endFrameIdx"]) if start_frame_id != 0 or end_frame_id != 0: # Frame IDs are unix timestamps (in milliseconds) periods.append( Period( id=int(period["number"]), start_timestamp=start_frame_id, end_timestamp=end_frame_id, ) ) else: match = objectify.fromstring( first_byte + inputs.meta_data.read() ).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: # Frame IDs are unix timestamps (in milliseconds) periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id, end_timestamp=end_frame_id, ) ) # Default team initialisation home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] if inputs.additional_meta_data or metadata: with performance_logging("Loading JSON metadata", logger=logger): try: if inputs.additional_meta_data: metadata = json.loads( inputs.additional_meta_data.read() ) home_team_id = metadata["homeOptaId"] away_team_id = metadata["awayOptaId"] # Tries to parse (short) team names from the description string try: home_name = ( metadata["description"].split("-")[0].strip() ) away_name = ( metadata["description"] .split("-")[1] .split(":")[0] .strip() ) except: home_name, away_name = "home", "away" teams[0].team_id = home_team_id teams[0].name = home_name teams[1].team_id = away_team_id teams[1].name = away_name for team, team_str in zip( teams, ["homePlayers", "awayPlayers"] ): for player_data in metadata[team_str]: # We use the attributes field of Player to store the extra IDs provided by the # metadata. We designate the player_id to be the 'optaId' field as this is what's # used as 'player_id' in the raw frame data file player_attributes = { k: v for k, v in player_data.items() if k in ["ssiId", "optaUuid"] } player = Player( player_id=player_data["optaId"], name=player_data["name"], starting=player_data["position"] != "SUB", position=player_data["position"], team=team, jersey_no=int(player_data["number"]), attributes=player_attributes, ) team.players.append(player) except: # TODO: More specific exception logging.warning( "Optional JSON Metadata is malformed. Continuing without" ) # Handles the tracking frame data with performance_logging("Loading data", logger=logger): transformer = self.get_transformer( length=pitch_size_width, width=pitch_size_height ) def _iter(): n = 0 sample = 1 / self.sample_rate for line_ in inputs.raw_data.readlines(): line_ = line_.strip().decode("ascii") if not line_: continue # Each line is just json so we just parse it frame_data = json.loads(line_) if self.only_alive and not frame_data["live"]: continue if n % sample == 0: yield frame_data n += 1 frames = [] for n, frame_data in enumerate(_iter()): period = periods[frame_data["period"] - 1] frame = self._frame_from_framedata(teams, period, frame_data) frame = transformer.transform_frame(frame) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if self.limit and n + 1 >= self.limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=transformer.get_to_coordinate_system().pitch_dimensions, score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.SECONDSPECTRUM, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, coordinate_system=transformer.get_to_coordinate_system(), ) return TrackingDataset( records=frames, metadata=metadata, )