def test_read(self): base_dir = os.path.dirname(__file__) with open(f"{base_dir}/files/epts_meta.xml", "rb") as metadata_fp: metadata = load_metadata(metadata_fp) with open(f"{base_dir}/files/epts_raw.txt", "rb") as raw_data: iterator = read_raw_data(raw_data, metadata) with performance_logging("load"): assert list(iterator)
def test_skip_sensors(self): base_dir = os.path.dirname(__file__) with open(f"{base_dir}/files/epts_meta.xml", "rb") as metadata_fp, open(f"{base_dir}/files/epts_raw.txt", "rb") as raw_data: metadata = load_metadata(metadata_fp) records = read_raw_data(raw_data, metadata, sensor_ids=["heartbeat"]) data_frame = DataFrame.from_records(records) assert "player_1_max_heartbeat" in data_frame.columns assert "player_1_x" not in data_frame.columns
def test_regex(self): base_dir = os.path.dirname(__file__) with open(f"{base_dir}/files/epts_meta.xml", "rb") as metadata_fp: metadata = load_metadata(metadata_fp) regex_str = build_regex( metadata.data_format_specifications[0], metadata.player_channels, metadata.sensors, ) regex = re.compile(regex_str) # NOTE: use broken example of FIFA result = regex.search( "1779143:,-2.013,-500,100,9.63,9.80,4,5,177,182;-461,-615,-120,99,900,9.10,4,5,170,179;-2638,3478,120,110,1.15,5.20,3,4,170,175;:-2656,367,100:" ) assert result is not None
def deserialize(self, inputs: Dict[str, Readable], options: Dict = None) -> EventDataset: """ Deserialize Metrica Sports event data json format into a `EventDataset`. Parameters ---------- inputs : dict input `raw_data` should point to a `Readable` object containing the 'json' formatted event data. input `metadata` should point to a `Readable` object containing the `xml` metadata file. options : dict Options for deserialization of the Metrica Sports event json file. Possible options are `event_types` (list of event types) to specify the event types that should be returned. Valid types: "shot", "pass", "carry", "take_on" and "generic". Generic is everything other than the first 4. Those events are barely parsed. This type of event can be used to do the parsing yourself. Every event has a 'raw_event' attribute which contains the original dictionary. Returns ------- dataset : EventDataset Raises ------ See Also -------- Examples -------- >>> serializer = MetricaEventsJsonSerializer() >>> with open("events.json", "rb") as raw_data, \ >>> open("metadata.xml", "rb") as metadata: >>> >>> dataset = serializer.deserialize( >>> inputs={ >>> 'raw_data': raw_data, >>> 'metadata': metadata >>> }, >>> options={ >>> 'event_types': ["pass", "take_on", "carry", "shot"] >>> } >>> ) """ self.__validate_inputs(inputs) if not options: options = {} with performance_logging("load data", logger=logger): raw_events = json.load(inputs["raw_data"]) metadata = load_metadata(inputs["metadata"], provider=Provider.METRICA) with performance_logging("parse data", logger=logger): wanted_event_types = [ EventType[event_type.upper()] for event_type in options.get("event_types", []) ] events = [] for raw_event in raw_events["data"]: if raw_event["team"]["id"] == metadata.teams[0].team_id: team = metadata.teams[0] elif raw_event["team"]["id"] == metadata.teams[1].team_id: team = metadata.teams[1] else: raise Exception( f"Unknown team_id {raw_event['team']['id']}") player = team.get_player_by_id(raw_event["from"]["id"]) event_type = raw_event["type"]["id"] subtypes = _parse_subtypes(raw_event) period = [ period for period in metadata.periods if period.id == raw_event["period"] ][0] generic_event_kwargs = dict( # from DataRecord period=period, timestamp=raw_event["start"]["time"], ball_owning_team=_parse_ball_owning_team(event_type, team), ball_state=BallState.ALIVE, # from Event event_id=None, team=team, player=player, coordinates=(_parse_coordinates(raw_event["start"])), raw_event=raw_event, ) if event_type in MS_PASS_TYPES: pass_event_kwargs = _parse_pass( event=raw_event, subtypes=subtypes, team=team, ) event = PassEvent( **pass_event_kwargs, **generic_event_kwargs, ) elif event_type == MS_EVENT_TYPE_SHOT: shot_event_kwargs = _parse_shot(event=raw_event, subtypes=subtypes) event = ShotEvent(**shot_event_kwargs, **generic_event_kwargs) elif subtypes and MS_EVENT_TYPE_DRIBBLE in subtypes: take_on_event_kwargs = _parse_take_on(subtypes=subtypes) event = TakeOnEvent(**take_on_event_kwargs, **generic_event_kwargs) elif event_type == MS_EVENT_TYPE_CARRY: carry_event_kwargs = _parse_carry(event=raw_event, ) event = CarryEvent( **carry_event_kwargs, **generic_event_kwargs, ) else: event = GenericEvent( result=None, event_name=raw_event["type"]["name"], **generic_event_kwargs, ) if (not wanted_event_types or event.event_type in wanted_event_types): events.append(event) return EventDataset( metadata=metadata, records=events, )