def test_pitchdimensions_properties(self): pitch_without_scale = PitchDimensions(x_dim=Dimension(-100, 100), y_dim=Dimension(-50, 50)) assert pitch_without_scale.length is None assert pitch_without_scale.width is None pitch_with_scale = PitchDimensions( x_dim=Dimension(-100, 100), y_dim=Dimension(-50, 50), x_per_meter=20 / 12, y_per_meter=10 / 8, ) assert pitch_with_scale.length == 120 assert pitch_with_scale.width == 80
def test_pitchdimensions_properties(self): pitch_without_scale = PitchDimensions( x_dim=Dimension(-100, 100), y_dim=Dimension(-50, 50) ) assert pitch_without_scale.length is None assert pitch_without_scale.width is None pitch_with_scale = PitchDimensions( x_dim=Dimension(-100, 100), y_dim=Dimension(-50, 50), length=120, width=80, ) assert pitch_with_scale.length == 120 assert pitch_with_scale.width == 80
def _get_tracking_dataset(self): home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] periods = [ Period( id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY, ), Period( id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME, ), ] metadata = Metadata( flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(-50, 50)), orientation=Orientation.HOME_TEAM, frame_rate=25, periods=periods, teams=teams, score=None, provider=None, ) tracking_data = TrackingDataset( metadata=metadata, records=[ Frame( frame_id=1, timestamp=0.1, ball_owning_team=None, ball_state=None, period=periods[0], players_coordinates={}, ball_coordinates=Point(x=100, y=-50), ), Frame( frame_id=2, timestamp=0.2, ball_owning_team=None, ball_state=None, period=periods[0], players_coordinates={ Player(team=home_team, player_id="home_1", jersey_no=1): Point(x=15, y=35) }, ball_coordinates=Point(x=0, y=50), ), ], ) return tracking_data
def _get_tracking_dataset(self): periods = [ Period( id=1, start_timestamp=0.0, end_timestamp=10.0, attacking_direction=AttackingDirection.HOME_AWAY, ), Period( id=2, start_timestamp=15.0, end_timestamp=25.0, attacking_direction=AttackingDirection.AWAY_HOME, ), ] tracking_data = TrackingDataset( flags=~(DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE), pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(-50, 50)), orientation=Orientation.HOME_TEAM, frame_rate=25, records=[ Frame( frame_id=1, timestamp=0.1, ball_owning_team=None, ball_state=None, period=periods[0], away_team_player_positions={}, home_team_player_positions={}, ball_position=Point(x=100, y=-50), ), Frame( frame_id=2, timestamp=0.2, ball_owning_team=None, ball_state=None, period=periods[0], away_team_player_positions={"1": Point(x=10, y=20)}, home_team_player_positions={"1": Point(x=15, y=35)}, ball_position=Point(x=0, y=50), ), ], periods=periods, ) return tracking_data
def test_transform(self): tracking_data = self._get_tracking_dataset() # orientation change AND dimension scale transformed_dataset = tracking_data.transform( to_orientation="AWAY_TEAM", to_pitch_dimensions=[[0, 1], [0, 1]], ) assert transformed_dataset.frames[0].ball_coordinates == Point3D(x=0, y=1, z=0) assert transformed_dataset.frames[1].ball_coordinates == Point3D(x=1, y=0, z=1) assert ( transformed_dataset.metadata.orientation == Orientation.AWAY_TEAM) assert transformed_dataset.metadata.coordinate_system is None assert ( transformed_dataset.metadata.pitch_dimensions == PitchDimensions( x_dim=Dimension(min=0, max=1), y_dim=Dimension(min=0, max=1)))
def _load_pitch_dimensions( meta_data_elm, sensors: List[Sensor]) -> Union[None, PitchDimensions]: normalized = False for sensor in sensors: if sensor.sensor_id == 'position': if sensor.channels[0].unit == 'normalized': normalized = True break field_size_path = objectify.ObjectPath("Metadata.Sessions.Session[0]") field_size_elm = field_size_path.find(meta_data_elm).find('FieldSize') if field_size_elm is not None and normalized: return PitchDimensions( x_dim=Dimension(0, 1), y_dim=Dimension(0, 1), x_per_meter=1 / int(field_size_elm.find('Width')), y_per_meter=1 / int(field_size_elm.find('Height'))) else: return None
def _load_pitch_dimensions( metadata_elm, sensors: List[Sensor]) -> Union[None, PitchDimensions]: normalized = False for sensor in sensors: if sensor.sensor_id == "position": if sensor.channels[0].unit == "normalized": normalized = True break field_size_path = objectify.ObjectPath("Metadata.Sessions.Session[0]") field_size_elm = field_size_path.find(metadata_elm).find("FieldSize") if field_size_elm is not None and normalized: return PitchDimensions( x_dim=Dimension(0, 1), y_dim=Dimension(0, 1), length=int(field_size_elm.find("Width")), width=int(field_size_elm.find("Height")), ) else: return None
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ Deserialize Metrica tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data_home` should point to a `Readable` object containing the 'csv' formatted raw data for the home team. input `raw_data_away` should point to a `Readable` object containing the 'csv' formatted raw data for the away team. options : dict Options for deserialization of the Metrica file. Possible options are `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ ValueError when both input files don't seem to belong to each other See Also -------- Examples -------- >>> serializer = MetricaTrackingSerializer() >>> with open("Sample_Game_1_RawTrackingData_Away_Team.csv", "rb") as raw_home, \ >>> open("Sample_Game_1_RawTrackingData_Home_Team.csv", "rb") as raw_away: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'raw_data_home': raw_home, >>> 'raw_data_away': raw_away >>> }, >>> options={ >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get('sample_rate', 1.0)) limit = int(options.get('limit', 0)) # consider reading this from data frame_rate = 25 with performance_logging("prepare", logger=logger): home_iterator = self.__create_iterator(inputs['raw_data_home'], sample_rate, frame_rate) away_iterator = self.__create_iterator(inputs['raw_data_away'], sample_rate, frame_rate) partial_frames = zip(home_iterator, away_iterator) with performance_logging("loading", logger=logger): frames = [] periods = [] partial_frame_type = self.__PartialFrame home_partial_frame: partial_frame_type away_partial_frame: partial_frame_type for n, (home_partial_frame, away_partial_frame) in enumerate(partial_frames): self.__validate_partials(home_partial_frame, away_partial_frame) period: Period = home_partial_frame.period frame_id: int = home_partial_frame.frame_id frame = Frame(frame_id=frame_id, timestamp=frame_id / frame_rate - period.start_timestamp, ball_position=home_partial_frame.ball_position, home_team_player_positions=home_partial_frame. player_positions, away_team_player_positions=away_partial_frame. player_positions, period=period, ball_state=None, ball_owning_team=None) frames.append(frame) if not periods or period.id != periods[-1].id: periods.append(period) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame)) n += 1 if limit and n >= limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) return TrackingDataset( flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), frame_rate=frame_rate, orientation=orientation, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 1), y_dim=Dimension(0, 1)), periods=periods, records=frames)
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: """ Deserialize StatsBomb event data into a `EventDataset`. Parameters ---------- inputs : dict input `event_data` should point to a `Readable` object containing the 'json' formatted event data. input `lineup_data` should point to a `Readable` object containing the 'json' formatted lineup data. options : dict Options for deserialization of the StatsBomb file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = StatsBombSerializer() >>> with open("events/12312312.json", "rb") as event_data, \ >>> open("lineups/123123123.json", "rb") as lineup_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'event_data': event_data, >>> 'lineup_data': lineup_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): raw_events = json.load(inputs['event_data']) home_lineup, away_lineup = json.load(inputs['lineup_data']) shot_fidelity_version, xy_fidelity_version = _determine_xy_fidelity_versions( raw_events) logger.info( f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}" ) with performance_logging("parse data", logger=logger): home_player_map = { player['player_id']: str(player['jersey_number']) for player in home_lineup['lineup'] } away_player_map = { player['player_id']: str(player['jersey_number']) for player in away_lineup['lineup'] } wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get('event_types', []) ] periods = [] period = None events = [] for raw_event in raw_events: if raw_event['team']['id'] == home_lineup['team_id']: team = Team.HOME current_team_map = home_player_map elif raw_event['team']['id'] == away_lineup['team_id']: team = Team.AWAY current_team_map = away_player_map else: raise Exception( f"Unknown team_id {raw_event['team']['id']}") if raw_event['possession_team']['id'] == home_lineup[ 'team_id']: possession_team = Team.HOME elif raw_event['possession_team']['id'] == away_lineup[ 'team_id']: possession_team = Team.AWAY else: raise Exception( f"Unknown possession_team_id: {raw_event['possession_team']}" ) timestamp = parse_str_ts(raw_event['timestamp']) period_id = int(raw_event['period']) if not period or period.id != period_id: period = Period(id=period_id, start_timestamp=timestamp if not period else timestamp + period.end_timestamp, end_timestamp=None) periods.append(period) else: period.end_timestamp = period.start_timestamp + timestamp player_jersey_no = None if 'player' in raw_event: player_jersey_no = current_team_map[raw_event['player'] ['id']] event_type = raw_event['type']['id'] if event_type == SB_EVENT_TYPE_SHOT: fidelity_version = shot_fidelity_version elif event_type in (SB_EVENT_TYPE_CARRY, SB_EVENT_TYPE_DRIBBLE, SB_EVENT_TYPE_PASS): fidelity_version = xy_fidelity_version else: # TODO: Uh ohhhh.. don't know which one to pick fidelity_version = xy_fidelity_version generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=raw_event['id'], team=team, player_jersey_no=player_jersey_no, position=(_parse_position(raw_event.get('location'), fidelity_version) if 'location' in raw_event else None), raw_event=raw_event) if event_type == SB_EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( pass_dict=raw_event['pass'], current_team_map=current_team_map, fidelity_version=fidelity_version) event = PassEvent( # TODO: Consider moving this to _parse_pass receive_timestamp=timestamp + raw_event['duration'], **pass_event_kwargs, **generic_event_kwargs) elif event_type == SB_EVENT_TYPE_SHOT: shot_event_kwargs = _parse_shot( shot_dict=raw_event['shot']) event = ShotEvent(**shot_event_kwargs, **generic_event_kwargs) # For dribble and carry the definitions # are flipped between Statsbomb and kloppy elif event_type == SB_EVENT_TYPE_DRIBBLE: take_on_event_kwargs = _parse_take_on( take_on_dict=raw_event['dribble']) event = TakeOnEvent(**take_on_event_kwargs, **generic_event_kwargs) elif event_type == SB_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry( carry_dict=raw_event['carry'], fidelity_version=fidelity_version) event = CarryEvent( # TODO: Consider moving this to _parse_carry end_timestamp=timestamp + raw_event['duration'], **carry_event_kwargs, **generic_event_kwargs) else: event = GenericEvent(result=None, **generic_event_kwargs) if not wanted_event_types or event.event_type in wanted_event_types: events.append(event) return EventDataset(flags=DatasetFlag.BALL_OWNING_TEAM, orientation=Orientation.ACTION_EXECUTING_TEAM, pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 120), y_dim=Dimension(0, 80)), periods=periods, records=events)
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataSet: """ Deserialize TRACAB tracking data into a `TrackingDataSet`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `meta_data` should point to the xml metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are `only_alive` (boolean) to specify that only frames with alive ball state should be loaded, or `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded. Returns ------- data_set : TrackingDataSet Raises ------ - See Also -------- Examples -------- >>> serializer = TRACABSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> data_set = serializer.deserialize( >>> inputs={ >>> 'meta_data': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'only_alive': True, >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get('sample_rate', 1.0)) only_alive = bool(options.get('only_alive', True)) with performance_logging("Loading metadata"): match = objectify.fromstring(inputs['meta_data'].read()).match frame_rate = int(match.attrib['iFrameRateFps']) pitch_size_width = float(match.attrib['fPitchXSizeMeters']) pitch_size_height = float(match.attrib['fPitchYSizeMeters']) periods = [] for period in match.iterchildren(tag='period'): start_frame_id = int(period.attrib['iStartFrame']) end_frame_id = int(period.attrib['iEndFrame']) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period(id=int(period.attrib['iId']), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate)) with performance_logging("Loading data"): def _iter(): n = 0 sample = 1. / sample_rate for line in inputs['raw_data'].readlines(): line = line.strip().decode("ascii") if not line: continue frame_id = int(line[:10].split(":", 1)[0]) if only_alive and not line.endswith("Alive;:"): continue for period in periods: if period.contains(frame_id / frame_rate): if n % sample == 0: yield period, line n += 1 frames = [] for period, line in _iter(): frame = self._frame_from_line(period, line, frame_rate) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame)) orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) return TrackingDataSet(flags=DataSetFlag.BALL_OWNING_TEAM | DataSetFlag.BALL_STATE, frame_rate=frame_rate, orientation=orientation, pitch_dimensions=PitchDimensions( x_dim=Dimension(-1 * pitch_size_width / 2, pitch_size_width / 2), y_dim=Dimension(-1 * pitch_size_height / 2, pitch_size_height / 2), x_per_meter=100, y_per_meter=100), periods=periods, records=frames)
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> TrackingDataset: """ Deserialize TRACAB tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'csv' formatted raw data. input `metadata` should point to the xml metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are `only_alive` (boolean) to specify that only frames with alive ball state should be loaded, or `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded, `limit` to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = TRACABSerializer() >>> with open("metadata.xml", "rb") as meta, \ >>> open("raw.dat", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'metadata': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> 'only_alive': True, >>> 'sample_rate': 1/12 >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) only_alive = bool(options.get("only_alive", True)) # TODO: also used in Metrica, extract to a method home_team = Team(team_id="home", name="home", ground=Ground.HOME) away_team = Team(team_id="away", name="away", ground=Ground.AWAY) teams = [home_team, away_team] with performance_logging("Loading metadata", logger=logger): match = objectify.fromstring(inputs["metadata"].read()).match frame_rate = int(match.attrib["iFrameRateFps"]) pitch_size_width = float(match.attrib["fPitchXSizeMeters"]) pitch_size_height = float(match.attrib["fPitchYSizeMeters"]) periods = [] for period in match.iterchildren(tag="period"): start_frame_id = int(period.attrib["iStartFrame"]) end_frame_id = int(period.attrib["iEndFrame"]) if start_frame_id != 0 or end_frame_id != 0: periods.append( Period( id=int(period.attrib["iId"]), start_timestamp=start_frame_id / frame_rate, end_timestamp=end_frame_id / frame_rate, ) ) with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / sample_rate for line_ in inputs["raw_data"].readlines(): line_ = line_.strip().decode("ascii") if not line_: continue frame_id = int(line_[:10].split(":", 1)[0]) if only_alive and not line_.endswith("Alive;:"): continue for period_ in periods: if period_.contains(frame_id / frame_rate): if n % sample == 0: yield period_, line_ n += 1 frames = [] for n, (period, line) in enumerate(_iter()): frame = self._frame_from_line(teams, period, line, frame_rate) frames.append(frame) if not period.attacking_direction_set: period.set_attacking_direction( attacking_direction=attacking_direction_from_frame( frame ) ) if limit and n >= limit: break orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME ) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension( -1 * pitch_size_width / 2, pitch_size_width / 2 ), y_dim=Dimension( -1 * pitch_size_height / 2, pitch_size_height / 2 ), x_per_meter=100, y_per_meter=100, ), score=None, frame_rate=frame_rate, orientation=orientation, provider=Provider.TRACAB, flags=DatasetFlag.BALL_OWNING_TEAM | DatasetFlag.BALL_STATE, ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> EventDataset: WyscoutSerializer.__validate_inputs(inputs) if not options: options = {} wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] with performance_logging("load data", logger=logger): raw_events = json.load(inputs["event_data"]) periods = [] with performance_logging("parse data", logger=logger): home_team_id, away_team_id = raw_events["teams"].keys() home_team = _parse_team(raw_events, home_team_id, Ground.HOME) away_team = _parse_team(raw_events, away_team_id, Ground.AWAY) teams = {home_team_id: home_team, away_team_id: away_team} players = dict( [ (wyId, _players_to_dict(team.players)) for wyId, team in teams.items() ] ) events = [] for idx, raw_event in enumerate(raw_events["events"]): next_event = None if (idx + 1) < len(raw_events["events"]): next_event = raw_events["events"][idx + 1] team_id = str(raw_event["teamId"]) player_id = str(raw_event["playerId"]) if ( len(periods) == 0 or periods[-1].id != raw_event["matchPeriod"] ): periods.append( Period( id=raw_event["matchPeriod"], start_timestamp=0, end_timestamp=0, ) ) generic_event_args = { "event_id": raw_event["id"], "raw_event": raw_event, "coordinates": Point( x=float(raw_event["positions"][0]["x"]), y=float(raw_event["positions"][0]["y"]), ), "team": teams[team_id], "player": players[team_id][player_id] if player_id != INVALID_PLAYER else None, "ball_owning_team": None, "ball_state": None, "period": periods[-1], "timestamp": raw_event["eventSec"], } event = None if raw_event["eventName"] == wyscout_events.SHOT.EVENT: shot_event_args = _parse_shot(raw_event, next_event) event = ShotEvent.create( **shot_event_args, **generic_event_args ) elif raw_event["eventName"] == wyscout_events.PASS.EVENT: pass_event_args = _parse_pass(raw_event, next_event) event = PassEvent.create( **pass_event_args, **generic_event_args ) elif raw_event["eventName"] == wyscout_events.FOUL.EVENT: foul_event_args = _parse_foul(raw_event) event = FoulCommittedEvent.create( **foul_event_args, **generic_event_args ) if any( (_has_tag(raw_event, tag) for tag in wyscout_tags.CARD) ): card_event_args = _parse_card(raw_event) event = CardEvent.create( **card_event_args, **generic_event_args ) elif ( raw_event["eventName"] == wyscout_events.INTERRUPTION.EVENT ): ball_out_event_args = _parse_ball_out(raw_event) event = BallOutEvent.create( **ball_out_event_args, **generic_event_args ) elif raw_event["eventName"] == wyscout_events.FREE_KICK.EVENT: set_piece_event_args = _parse_set_piece( raw_event, next_event ) if ( raw_event["subEventName"] in wyscout_events.FREE_KICK.PASS_TYPES ): event = PassEvent.create( **set_piece_event_args, **generic_event_args ) elif ( raw_event["subEventName"] in wyscout_events.FREE_KICK.SHOT_TYPES ): event = ShotEvent.create( **set_piece_event_args, **generic_event_args ) elif ( raw_event["eventName"] == wyscout_events.OTHERS_ON_BALL.EVENT ): recovery_event_args = _parse_recovery(raw_event) event = RecoveryEvent.create( **recovery_event_args, **generic_event_args ) elif raw_event["eventName"] == wyscout_events.DUEL.EVENT: takeon_event_args = _parse_takeon(raw_event) event = TakeOnEvent.create( **takeon_event_args, **generic_event_args ) elif raw_event["eventName"] not in [ wyscout_events.SAVE.EVENT, wyscout_events.OFFSIDE.EVENT, ]: # The events SAVE and OFFSIDE are already merged with PASS and SHOT events qualifiers = _generic_qualifiers(raw_event) event = GenericEvent.create( result=None, qualifiers=qualifiers, **generic_event_args ) if event and _include_event(event, wanted_event_types): events.append(event) metadata = Metadata( teams=[home_team, away_team], periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 100), y_dim=Dimension(0, 100) ), score=None, frame_rate=None, orientation=Orientation.BALL_OWNING_TEAM, flags=None, provider=Provider.WYSCOUT, ) return EventDataset(metadata=metadata, records=events)
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: """ Deserialize Opta event data into a `EventDataset`. Parameters ---------- inputs : dict input `f24_data` should point to a `Readable` object containing the 'xml' formatted event data. input `f7_data` should point to a `Readable` object containing the 'xml' formatted f7 data. options : dict Options for deserialization of the Opta file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = OptaSerializer() >>> with open("123_f24.xml", "rb") as f24_data, \ >>> open("123_f7.xml", "rb") as f7_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'f24_data': f24_data, >>> 'f7_data': f7_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): f7_root = objectify.fromstring(inputs["f7_data"].read()) f24_root = objectify.fromstring(inputs["f24_data"].read()) wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] with performance_logging("parse data", logger=logger): matchdata_path = objectify.ObjectPath( "SoccerFeed.SoccerDocument.MatchData") team_elms = list( matchdata_path.find(f7_root).iterchildren("TeamData")) home_score = None away_score = None for team_elm in team_elms: if team_elm.attrib["Side"] == "Home": home_score = team_elm.attrib["Score"] home_team = _team_from_xml_elm(team_elm, f7_root) elif team_elm.attrib["Side"] == "Away": away_score = team_elm.attrib["Score"] away_team = _team_from_xml_elm(team_elm, f7_root) else: raise Exception(f"Unknown side: {team_elm.attrib['Side']}") score = Score(home=home_score, away=away_score) teams = [home_team, away_team] if len(home_team.players) == 0 or len(away_team.players) == 0: raise Exception("LineUp incomplete") game_elm = f24_root.find("Game") periods = [ Period( id=1, start_timestamp=None, end_timestamp=None, ), Period( id=2, start_timestamp=None, end_timestamp=None, ), ] possession_team = None events = [] for event_elm in game_elm.iterchildren("Event"): event_id = event_elm.attrib["id"] type_id = int(event_elm.attrib["type_id"]) timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"]) period_id = int(event_elm.attrib["period_id"]) for period in periods: if period.id == period_id: break else: logger.debug( f"Skipping event {event_id} because period doesn't match {period_id}" ) continue if type_id == EVENT_TYPE_START_PERIOD: logger.debug( f"Set start of period {period.id} to {timestamp}") period.start_timestamp = timestamp elif type_id == EVENT_TYPE_END_PERIOD: logger.debug( f"Set end of period {period.id} to {timestamp}") period.end_timestamp = timestamp else: if not period.start_timestamp: # not started yet continue if event_elm.attrib["team_id"] == home_team.team_id: team = teams[0] elif event_elm.attrib["team_id"] == away_team.team_id: team = teams[1] else: raise Exception( f"Unknown team_id {event_elm.attrib['team_id']}") x = float(event_elm.attrib["x"]) y = float(event_elm.attrib["y"]) outcome = int(event_elm.attrib["outcome"]) raw_qualifiers = { int(qualifier_elm.attrib["qualifier_id"]): qualifier_elm.attrib.get("value") for qualifier_elm in event_elm.iterchildren("Q") } player = None if "player_id" in event_elm.attrib: player = team.get_player_by_id( event_elm.attrib["player_id"]) if type_id in BALL_OWNING_EVENTS: possession_team = team generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=event_id, team=team, player=player, coordinates=Point(x=x, y=y), raw_event=event_elm, ) if type_id == EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( raw_qualifiers, outcome) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: pass_event_kwargs = _parse_offside_pass(raw_qualifiers) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent.create( qualifiers=None, **take_on_event_kwargs, **generic_event_kwargs, ) elif type_id in ( EVENT_TYPE_SHOT_MISS, EVENT_TYPE_SHOT_POST, EVENT_TYPE_SHOT_SAVED, EVENT_TYPE_SHOT_GOAL, ): shot_event_kwargs = _parse_shot( raw_qualifiers, type_id, coordinates=generic_event_kwargs["coordinates"], ) kwargs = {} kwargs.update(generic_event_kwargs) kwargs.update(shot_event_kwargs) event = ShotEvent.create(**kwargs) elif type_id == EVENT_TYPE_RECOVERY: event = RecoveryEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_FOUL_COMMITTED: event = FoulCommittedEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif type_id in BALL_OUT_EVENTS: generic_event_kwargs["ball_state"] = BallState.DEAD event = BallOutEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) else: event = GenericEvent.create( **generic_event_kwargs, result=None, qualifiers=None, event_name=_get_event_type_name(type_id), ) if (not wanted_event_types or event.event_type in wanted_event_types): events.append(event) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(0, 100)), score=score, frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, provider=Provider.OPTA, ) return EventDataset( metadata=metadata, records=events, )
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> TrackingDataset: """ Deserialize SkillCorner tracking data into a `TrackingDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'json' formatted raw data. input `metadata` should point to the json metadata data. options : dict Options for deserialization of the TRACAB file. Possible options are: `include_empty_frames` (boolean): default = False to specify whether frames without any players_coordinates or the ball_coordinates should be loaded `sample_rate` (float between 0 and 1) to specify the amount of frames that should be loaded and `limit` (int) to specify the max number of frames that will be returned. Returns ------- dataset : TrackingDataset Raises ------ - See Also -------- Examples -------- >>> serializer = SkillCornerSerializer() >>> with open("match_data.json", "rb") as meta, \ >>> open("structured_data.json", "rb") as raw: >>> dataset = serializer.deserialize( >>> inputs={ >>> 'metadata': meta, >>> 'raw_data': raw >>> }, >>> options={ >>> } >>> ) """ self.__validate_inputs(inputs) metadata = self.__load_json(inputs["metadata"]) raw_data = self.__load_json(inputs["raw_data"]) if not options: options = {} sample_rate = float(options.get("sample_rate", 1.0)) limit = int(options.get("limit", 0)) include_empty_frames = bool(options.get("include_empty_frames", False)) with performance_logging("Loading metadata", logger=logger): periods = self.__get_periods(raw_data) teamdict = { metadata["home_team"].get("id"): "home_team", metadata["away_team"].get("id"): "away_team", } player_id_to_team_dict = { player["trackable_object"]: player["team_id"] for player in metadata["players"] } player_dict = { player["trackable_object"]: player for player in metadata["players"] } referee_dict = { ref["trackable_object"]: "referee" for ref in metadata["referees"] } ball_id = metadata["ball"]["trackable_object"] # there are different pitch_sizes in SkillCorner pitch_size_width = metadata["pitch_width"] pitch_size_length = metadata["pitch_length"] home_team_id = metadata["home_team"]["id"] away_team_id = metadata["away_team"]["id"] players = {"HOME": {}, "AWAY": {}} home_team = Team( team_id=home_team_id, name=metadata["home_team"]["name"], ground=Ground.HOME, ) self.home_team = home_team away_team = Team( team_id=away_team_id, name=metadata["away_team"]["name"], ground=Ground.AWAY, ) self.away_team = away_team teams = [home_team, away_team] for player_id in player_dict.keys(): player = player_dict.get(player_id) team_id = player["team_id"] if team_id == home_team_id: team_string = "HOME" team = home_team elif team_id == away_team_id: team_string = "AWAY" team = away_team players[team_string][player_id] = Player( player_id=f"{team.ground}_{player['number']}", team=team, jersey_no=player["number"], name=f"{player['first_name']} {player['last_name']}", first_name=player["first_name"], last_name=player["last_name"], starting=player["start_time"] == "00:00:00", position=Position( position_id=player["player_role"].get("id"), name=player["player_role"].get("name"), coordinates=None, ), attributes={}, ) home_team.players = list(players["HOME"].values()) away_team.players = list(players["AWAY"].values()) anon_players = {"HOME": {}, "AWAY": {}} with performance_logging("Loading data", logger=logger): def _iter(): n = 0 sample = 1.0 / sample_rate for frame in raw_data: frame_period = frame["period"] if frame_period is not None: if n % sample == 0: yield frame n += 1 frames = [] n_frames = 0 for _frame in _iter(): # include frame if there is any tracking data, players or ball. # or if include_empty_frames == True if include_empty_frames or len(_frame["data"]) > 0: frame = self._get_frame_data( teams, teamdict, players, player_id_to_team_dict, periods, player_dict, anon_players, ball_id, referee_dict, _frame, ) frames.append(frame) n_frames += 1 if limit and n_frames >= limit: break self._set_skillcorner_attacking_directions(frames, periods) frame_rate = 10 orientation = (Orientation.HOME_TEAM if periods[1].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.AWAY_TEAM) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(-(pitch_size_length / 2), (pitch_size_length / 2)), y_dim=Dimension(-(pitch_size_width / 2), (pitch_size_width / 2)), x_per_meter=1, y_per_meter=1, ), score=Score( home=metadata["home_team_score"], away=metadata["away_team_score"], ), frame_rate=frame_rate, orientation=orientation, provider=Provider.SKILLCORNER, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), ) return TrackingDataset( records=frames, metadata=metadata, )
def deserialize( self, inputs: Dict[str, Readable], options: Dict = None ) -> EventDataset: """ Deserialize StatsBomb event data into a `EventDataset`. Parameters ---------- inputs : dict input `event_data` should point to a `Readable` object containing the 'json' formatted event data. input `lineup_data` should point to a `Readable` object containing the 'json' formatted lineup data. options : dict Options for deserialization of the StatsBomb file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = StatsBombSerializer() >>> with open("events/12312312.json", "rb") as event_data, \ >>> open("lineups/123123123.json", "rb") as lineup_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'event_data': event_data, >>> 'lineup_data': lineup_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): raw_events = json.load(inputs["event_data"]) home_lineup, away_lineup = json.load(inputs["lineup_data"]) ( shot_fidelity_version, xy_fidelity_version, ) = _determine_xy_fidelity_versions(raw_events) logger.info( f"Determined Fidelity versions: shot v{shot_fidelity_version} / XY v{xy_fidelity_version}" ) with performance_logging("parse data", logger=logger): home_team = Team( team_id=str(home_lineup["team_id"]), name=home_lineup["team_name"], ground=Ground.HOME, ) home_team.players = [ Player( player_id=str(player["player_id"]), team=home_team, name=player["player_name"], jersey_no=int(player["jersey_number"]), ) for player in home_lineup["lineup"] ] away_team = Team( team_id=str(away_lineup["team_id"]), name=away_lineup["team_name"], ground=Ground.AWAY, ) away_team.players = [ Player( player_id=str(player["player_id"]), team=away_team, name=player["player_name"], jersey_no=int(player["jersey_number"]), ) for player in away_lineup["lineup"] ] teams = [home_team, away_team] wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] periods = [] period = None events = [] for raw_event in raw_events: if raw_event["team"]["id"] == home_lineup["team_id"]: team = teams[0] elif raw_event["team"]["id"] == away_lineup["team_id"]: team = teams[1] else: raise Exception( f"Unknown team_id {raw_event['team']['id']}" ) if ( raw_event["possession_team"]["id"] == home_lineup["team_id"] ): possession_team = teams[0] elif ( raw_event["possession_team"]["id"] == away_lineup["team_id"] ): possession_team = teams[1] else: raise Exception( f"Unknown possession_team_id: {raw_event['possession_team']}" ) timestamp = parse_str_ts(raw_event["timestamp"]) period_id = int(raw_event["period"]) if not period or period.id != period_id: period = Period( id=period_id, start_timestamp=( timestamp if not period # period = [start, end], add millisecond to prevent overlapping else timestamp + period.end_timestamp + 0.001 ), end_timestamp=None, ) periods.append(period) else: period.end_timestamp = period.start_timestamp + timestamp player = None if "player" in raw_event: player = team.get_player_by_id(raw_event["player"]["id"]) event_type = raw_event["type"]["id"] if event_type == SB_EVENT_TYPE_SHOT: fidelity_version = shot_fidelity_version elif event_type in ( SB_EVENT_TYPE_CARRY, SB_EVENT_TYPE_DRIBBLE, SB_EVENT_TYPE_PASS, ): fidelity_version = xy_fidelity_version else: # TODO: Uh ohhhh.. don't know which one to pick fidelity_version = xy_fidelity_version generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp, ball_owning_team=possession_team, ball_state=BallState.ALIVE, # from Event event_id=raw_event["id"], team=team, player=player, coordinates=( _parse_coordinates( raw_event.get("location"), fidelity_version ) if "location" in raw_event else None ), raw_event=raw_event, ) if event_type == SB_EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass( pass_dict=raw_event["pass"], team=team, fidelity_version=fidelity_version, ) event = PassEvent( # TODO: Consider moving this to _parse_pass receive_timestamp=timestamp + raw_event["duration"], **pass_event_kwargs, **generic_event_kwargs, ) elif event_type == SB_EVENT_TYPE_SHOT: shot_event_kwargs = _parse_shot( shot_dict=raw_event["shot"] ) event = ShotEvent( **shot_event_kwargs, **generic_event_kwargs ) # For dribble and carry the definitions # are flipped between Statsbomb and kloppy elif event_type == SB_EVENT_TYPE_DRIBBLE: take_on_event_kwargs = _parse_take_on( take_on_dict=raw_event["dribble"] ) event = TakeOnEvent( **take_on_event_kwargs, **generic_event_kwargs ) elif event_type == SB_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry( carry_dict=raw_event["carry"], fidelity_version=fidelity_version, ) event = CarryEvent( # TODO: Consider moving this to _parse_carry end_timestamp=timestamp + raw_event["duration"], **carry_event_kwargs, **generic_event_kwargs, ) else: event = GenericEvent( result=None, event_name=raw_event["type"]["name"], **generic_event_kwargs, ) if ( not wanted_event_types or event.event_type in wanted_event_types ): events.append(event) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions( x_dim=Dimension(0, 120), y_dim=Dimension(0, 80) ), frame_rate=None, orientation=Orientation.ACTION_EXECUTING_TEAM, flags=DatasetFlag.BALL_OWNING_TEAM, score=None, ) return EventDataset(metadata=metadata, records=events,)
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataSet: self.__validate_inputs(inputs) periods = [] period = None events = [] game_state = self.__GameState(ball_state=BallState.DEAD, ball_owning_team=None) reader = csv.DictReader( map(lambda x: x.decode('utf-8'), inputs['raw_data'])) for event_id, record in enumerate(reader): event_type = event_type_map[record['Type']] subtypes = record['Subtype'].split('-') start_timestamp = float(record['Start Time [s]']) end_timestamp = float(record['End Time [s]']) period_id = int(record['Period']) if not period or period.id != period_id: period = Period(id=period_id, start_timestamp=start_timestamp, end_timestamp=end_timestamp) periods.append(period) else: period.end_timestamp = end_timestamp if record['Team'] == 'Home': team = Team.HOME elif record['Team'] == 'Away': team = Team.AWAY else: raise ValueError(f'Unknown team: {record["team"]}') event_kwargs = dict( # From DataRecord: timestamp=start_timestamp, ball_owning_team=None, ## todo ball_state=None, # todo period=period, # From Event: event_id=event_id, team=team, end_timestamp=end_timestamp, player_jersey_no=record['From'][6:], position=Point(x=float(record['Start X']), y=1 - float(record['Start Y'])) if record['Start X'] != 'NaN' else None, ) secondary_position = None if record['End X'] != 'NaN': secondary_position = Point(x=float(record['End X']), y=1 - float(record['End Y'])) secondary_jersey_no = None if record['To']: secondary_jersey_no = record['To'][6:] event = None if event_type == EventType.SET_PIECE: set_piece, fk_attempt, retaken = \ build_subtypes(subtypes, [SetPiece, FKAttempt, Retaken]) event = SetPieceEvent(**event_kwargs) elif event_type == EventType.RECOVERY: interference1, interference2 = \ build_subtypes(subtypes, [Interference1, Interference2]) event = RecoveryEvent(**event_kwargs) elif event_type == EventType.PASS: body_part, attempt, deflection, offside = \ build_subtypes(subtypes, [BodyPart, Attempt, Deflection, Offside]) event = PassEvent( receiver_position=secondary_position, receiver_player_jersey_no=secondary_jersey_no, **event_kwargs) elif event_type == EventType.BALL_LOST: body_part, attempt, interference1, intervention, deflection, offside = \ build_subtypes(subtypes, [ BodyPart, Attempt, Interference1, Intervention, Deflection, Offside ]) event = BallLossEvent(**event_kwargs) elif event_type == EventType.BALL_OUT: body_part, attempt, intervention, deflection, offside, own_goal = \ build_subtypes(subtypes, [ BodyPart, Attempt, Intervention, Deflection, Offside, OwnGoal ]) event = BallOutEvent(**event_kwargs) elif event_type == EventType.SHOT: body_part, deflection, shot_direction, shot_result, offside = \ build_subtypes(subtypes, [ BodyPart, Deflection, ShotDirection, ShotResult, Offside ]) event = ShotEvent(shot_result=shot_result, **event_kwargs) elif event_type == EventType.FAULT_RECEIVED: event = FaultReceivedEvent(**event_kwargs) elif event_type == EventType.CHALLENGE: challenge, fault, challenge_result = \ build_subtypes(subtypes, [Challenge, Fault, ChallengeResult]) event = ChallengeEvent(**event_kwargs) elif event_type == EventType.CARD: card, = build_subtypes(subtypes, [Card]) event = CardEvent(**event_kwargs) else: raise NotImplementedError( f"EventType {event_type} not implemented") # We want to attach the game_state after the event to the event game_state = self.__reduce_game_state(event=event, game_state=game_state) event.ball_state = game_state.ball_state event.ball_owning_team = game_state.ball_owning_team events.append(event) orientation = ( Orientation.FIXED_HOME_AWAY if periods[0].attacking_direction == AttackingDirection.HOME_AWAY else Orientation.FIXED_AWAY_HOME) return EventDataSet( flags=DataSetFlag.BALL_STATE | DataSetFlag.BALL_OWNING_TEAM, orientation=orientation, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 1), y_dim=Dimension(0, 1)), periods=periods, records=events)
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: """ Deserialize Opta event data into a `EventDataset`. Parameters ---------- inputs : dict input `f24_data` should point to a `Readable` object containing the 'xml' formatted event data. input `f7_data` should point to a `Readable` object containing the 'xml' formatted f7 data. options : dict Options for deserialization of the Opta file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = OptaSerializer() >>> with open("123_f24.xml", "rb") as f24_data, \ >>> open("123_f7.xml", "rb") as f7_data: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'f24_data': f24_data, >>> 'f7_data': f7_data >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): f7_root = objectify.fromstring(inputs["f7_data"].read()) f24_root = objectify.fromstring(inputs["f24_data"].read()) wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] with performance_logging("parse data", logger=logger): matchdata_path = objectify.ObjectPath( "SoccerFeed.SoccerDocument.MatchData") team_elms = list( matchdata_path.find(f7_root).iterchildren("TeamData")) away_player_map = {} home_player_map = {} home_team_id = None away_team_id = None for team_elm in team_elms: player_map = { player_elm.attrib["PlayerRef"].lstrip("p"): player_elm.attrib["ShirtNumber"] for player_elm in team_elm.find( "PlayerLineUp").iterchildren("MatchPlayer") } team_id = team_elm.attrib["TeamRef"].lstrip("t") if team_elm.attrib["Side"] == "Home": home_player_map = player_map home_team_id = team_id elif team_elm.attrib["Side"] == "Away": away_player_map = player_map away_team_id = team_id else: raise Exception(f"Unknown side: {team_elm.attrib['Side']}") if not away_player_map or not home_player_map: raise Exception("LineUp incomplete") game_elm = f24_root.find("Game") periods = [ Period( id=1, start_timestamp=None, end_timestamp=None, ), Period( id=2, start_timestamp=None, end_timestamp=None, ), ] events = [] for event_elm in game_elm.iterchildren("Event"): event_id = event_elm.attrib["id"] type_id = int(event_elm.attrib["type_id"]) timestamp = _parse_f24_datetime(event_elm.attrib["timestamp"]) period_id = int(event_elm.attrib["period_id"]) for period in periods: if period.id == period_id: break else: logger.debug( f"Skipping event {event_id} because period doesn't match {period_id}" ) continue if type_id == EVENT_TYPE_START_PERIOD: logger.debug( f"Set start of period {period.id} to {timestamp}") period.start_timestamp = timestamp elif type_id == EVENT_TYPE_END_PERIOD: logger.debug( f"Set end of period {period.id} to {timestamp}") period.end_timestamp = timestamp else: if not period.start_timestamp: # not started yet continue if event_elm.attrib["team_id"] == home_team_id: team = Team.HOME current_team_map = home_player_map elif event_elm.attrib["team_id"] == away_team_id: team = Team.AWAY current_team_map = away_player_map else: raise Exception( f"Unknown team_id {event_elm.attrib['team_id']}") x = float(event_elm.attrib["x"]) y = float(event_elm.attrib["y"]) outcome = int(event_elm.attrib["outcome"]) qualifiers = { int(qualifier_elm.attrib["qualifier_id"]): qualifier_elm.attrib.get("value") for qualifier_elm in event_elm.iterchildren("Q") } player_jersey_no = None if "player_id" in event_elm.attrib: player_jersey_no = current_team_map[ event_elm.attrib["player_id"]] generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=None, ball_state=BallState.ALIVE, # from Event event_id=event_id, team=team, player_jersey_no=player_jersey_no, position=Point(x=x, y=y), raw_event=event_elm, ) if type_id == EVENT_TYPE_PASS: pass_event_kwargs = _parse_pass(qualifiers, outcome) event = PassEvent( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_OFFSIDE_PASS: pass_event_kwargs = _parse_offside_pass() event = PassEvent( **pass_event_kwargs, **generic_event_kwargs, ) elif type_id == EVENT_TYPE_TAKE_ON: take_on_event_kwargs = _parse_take_on(outcome) event = TakeOnEvent( **take_on_event_kwargs, **generic_event_kwargs, ) elif type_id in ( EVENT_TYPE_SHOT_MISS, EVENT_TYPE_SHOT_POST, EVENT_TYPE_SHOT_SAVED, EVENT_TYPE_SHOT_GOAL, ): shot_event_kwargs = _parse_shot( qualifiers, type_id, position=generic_event_kwargs["position"], ) kwargs = {} kwargs.update(generic_event_kwargs) kwargs.update(shot_event_kwargs) event = ShotEvent(**kwargs) else: event = GenericEvent(**generic_event_kwargs, result=None) if (not wanted_event_types or event.event_type in wanted_event_types): events.append(event) return EventDataset( flags=DatasetFlag.BALL_OWNING_TEAM, orientation=Orientation.ACTION_EXECUTING_TEAM, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, 100), y_dim=Dimension(0, 100)), periods=periods, records=events, )
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): match_root = objectify.fromstring(inputs["match_data"].read()) event_root = objectify.fromstring(inputs["event_data"].read()) wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] with performance_logging("parse data", logger=logger): x_max = float( match_root.MatchInformation.Environment.attrib["PitchX"]) y_max = float( match_root.MatchInformation.Environment.attrib["PitchY"]) team_path = objectify.ObjectPath( "PutDataRequest.MatchInformation.Teams") team_elms = list(team_path.find(match_root).iterchildren("Team")) for team_elm in team_elms: if team_elm.attrib["Role"] == "home": home_team = _team_from_xml_elm(team_elm) elif team_elm.attrib["Role"] == "guest": away_team = _team_from_xml_elm(team_elm) else: raise Exception(f"Unknown side: {team_elm.attrib['Role']}") ( home_score, away_score, ) = match_root.MatchInformation.General.attrib["Result"].split(":") score = Score(home=int(home_score), away=int(away_score)) teams = [home_team, away_team] if len(home_team.players) == 0 or len(away_team.players) == 0: raise Exception("LineUp incomplete") periods = [] period_id = 0 events = [] for event_elm in event_root.iterchildren("Event"): event_chain = _event_chain_from_xml_elm(event_elm) timestamp = _parse_datetime(event_chain["Event"]["EventTime"]) if (SPORTEC_EVENT_NAME_KICKOFF in event_chain and "GameSection" in event_chain[SPORTEC_EVENT_NAME_KICKOFF]): period_id += 1 period = Period( id=period_id, start_timestamp=timestamp, end_timestamp=None, ) if period_id == 1: team_left = event_chain[SPORTEC_EVENT_NAME_KICKOFF][ "TeamLeft"] if team_left == home_team.team_id: # goal of home team is on the left side. # this means they attack from left to right orientation = Orientation.FIXED_HOME_AWAY period.set_attacking_direction( AttackingDirection.HOME_AWAY) else: orientation = Orientation.FIXED_AWAY_HOME period.set_attacking_direction( AttackingDirection.AWAY_HOME) else: last_period = periods[-1] period.set_attacking_direction( AttackingDirection.AWAY_HOME if last_period. attacking_direction == AttackingDirection. HOME_AWAY else AttackingDirection.HOME_AWAY) periods.append(period) elif SPORTEC_EVENT_NAME_FINAL_WHISTLE in event_chain: period.end_timestamp = timestamp continue team = None player = None flatten_attributes = dict() # reverse because top levels are more important for event_attributes in reversed(event_chain.values()): flatten_attributes.update(event_attributes) if "Team" in flatten_attributes: team = (home_team if flatten_attributes["Team"] == home_team.team_id else away_team) if "Player" in flatten_attributes: if not team: raise ValueError("Player set while team is not set") player = team.get_player_by_id( flatten_attributes["Player"]) generic_event_kwargs = dict( # from DataRecord period=period, timestamp=timestamp - period.start_timestamp, ball_owning_team=None, ball_state=BallState.ALIVE, # from Event event_id=event_chain["Event"]["EventId"], coordinates=_parse_coordinates(event_chain["Event"]), raw_event=flatten_attributes, team=team, player=player, ) event_name, event_attributes = event_chain.popitem() if event_name in SPORTEC_SHOT_EVENT_NAMES: shot_event_kwargs = _parse_shot(event_name=event_name, event_chain=event_chain) event = ShotEvent.create( **shot_event_kwargs, **generic_event_kwargs, ) elif event_name in SPORTEC_PASS_EVENT_NAMES: pass_event_kwargs = _parse_pass(event_chain=event_chain, team=team) event = PassEvent.create( **pass_event_kwargs, **generic_event_kwargs, receive_timestamp=None, receiver_coordinates=None, ) elif event_name == SPORTEC_EVENT_NAME_BALL_CLAIMING: event = RecoveryEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) elif event_name == SPORTEC_EVENT_NAME_SUBSTITUTION: substitution_event_kwargs = _parse_substitution( event_attributes=event_attributes, team=team) generic_event_kwargs["player"] = substitution_event_kwargs[ "player"] del substitution_event_kwargs["player"] event = SubstitutionEvent.create( result=None, qualifiers=None, **substitution_event_kwargs, **generic_event_kwargs, ) elif event_name == SPORTEC_EVENT_NAME_CAUTION: card_kwargs = _parse_caution(event_attributes) event = CardEvent.create( result=None, qualifiers=None, **card_kwargs, **generic_event_kwargs, ) elif event_name == SPORTEC_EVENT_NAME_FOUL: foul_kwargs = _parse_foul(event_attributes, teams=teams) generic_event_kwargs.update(foul_kwargs) event = FoulCommittedEvent.create( result=None, qualifiers=None, **generic_event_kwargs, ) else: event = GenericEvent.create( result=None, qualifiers=None, event_name=event_name, **generic_event_kwargs, ) if events: previous_event = events[-1] if (previous_event.event_type == EventType.PASS and previous_event.result == PassResult.COMPLETE): if "X-Source-Position" in event_chain["Event"]: previous_event.receiver_coordinates = Point( x=float( event_chain["Event"]["X-Source-Position"]), y=float( event_chain["Event"]["Y-Source-Position"]), ) if (event.event_type == EventType.PASS and event.get_qualifier_value(SetPieceQualifier) in ( SetPieceType.THROW_IN, SetPieceType.GOAL_KICK, SetPieceType.CORNER_KICK, )): # 1. update previous pass if events[-1].event_type == EventType.PASS: events[-1].result = PassResult.OUT # 2. add synthetic out event decision_timestamp = _parse_datetime(event_chain[list( event_chain.keys())[1]]["DecisionTimestamp"]) out_event = BallOutEvent.create( period=period, timestamp=decision_timestamp - period.start_timestamp, ball_owning_team=None, ball_state=BallState.DEAD, # from Event event_id=event_chain["Event"]["EventId"] + "-ball-out", team=events[-1].team, player=events[-1].player, coordinates=None, raw_event={}, result=None, qualifiers=None, ) events.append(out_event) events.append(event) events = list( filter( lambda _event: _include_event(_event, wanted_event_types), events, )) metadata = Metadata( teams=teams, periods=periods, pitch_dimensions=PitchDimensions(x_dim=Dimension(0, x_max), y_dim=Dimension(0, y_max)), score=score, frame_rate=None, orientation=orientation, flags=~(DatasetFlag.BALL_STATE | DatasetFlag.BALL_OWNING_TEAM), provider=Provider.SPORTEC, ) return EventDataset( metadata=metadata, records=events, )