Пример #1
0
def parse_header(header):
    game_state = GameState()
    base_featureset = FeatureSet()

    for line in header.split("\n"):
        line = line.strip()
        if line.startswith("info,"):
            try:
                _, key, value = csv_split(line)
            except Exception:
                logging.error("Choked on line: %s" % line)
                raise

            if key in ["visteam", "hometeam"]:
                setattr(GameState, key, value)

            fs_key = "game_%s" % key
            if fs_key in FeatureSet.__slots__:
                setattr(FeatureSet, fs_key, value)

    return game_state, base_featureset
Пример #2
0
    def from_featureset_json(cls, json_str):
        d = json.loads(json_str)
        parse_map = FeatureSet.get_parse_map()

        for k in d:
            # all keys are strings in the .features format
            if d[k] != UNK and k in parse_map:
                try:
                    d[k] = parse_map[k](d[k])
                except ValueError:
                    d[k] = UNK

        obj = cls()

        def unk_check(obj_key, keys=None):
            """ If all specified keys are known, return True.  Otherwise,
            set UNK on the specified object key. """
            if keys is None:
                keys = [ obj_key ]

            if all( (d[k] != UNK) for k in keys):
                # proceed with calculation
                return True
            else:
                setattr(obj, obj_key, UNK)
                return False

        obj.game_daynight = d["game_daynight"]
        if unk_check("game_date"):
            obj.game_month = cls.bucketized(d["game_date"].month, granularity=2)

        if unk_check("game_date"):
            obj.game_year = cls.bucketized(d["game_date"].year, granularity=5)

        if unk_check("game_number"):
            obj.game_number = cls.bucketized(d["game_number"], buckets=[0,1])

        if unk_check("game_temp"):
            obj.game_temp = cls.bucketized(d["game_temp"], granularity=5)

        obj.game_site = d["game_site"]

        obj.game_winddir = cls.get_winddir(d)

        if unk_check("ab_inning"):
            obj.ab_inning = cls.bucketized(d["ab_inning"], buckets=[1,4,7,10])

        obj.ab_numballs = d["ab_numballs"]
        obj.ab_numstrikes = d["ab_numstrikes"]

        obj.ab_lrmatchup = cls.get_lrmatchup(d)

        obj.batter_bats = d["batter_bats"]
        obj.batter_fieldpos = d["batter_fieldpos"]
        obj.batter_visorhome = d["batter_visorhome"]

        if unk_check("batter_batpos"):
            obj.batter_batpos = cls.bucketized(d["batter_batpos"], buckets=[1,3,5,8,10,11,12])

        obj.label = d["label"]

        for prefix in ["batter_", "pitcher_"]:
            def key(k):
                return prefix + k

            def year_from_td(td):
                return td.days / 365

            if unk_check(key("age"), keys=[key("birthYear"), key("birthMonth"), key("birthDay"), "game_date"]):
                birthday = datetime.datetime(d[key("birthYear")], d[key("birthMonth")], d[key("birthDay")])
                setattr(obj, key("age"), cls.bucketized(year_from_td(d["game_date"] - birthday), granularity=3))

            if unk_check(key("weight")):
                setattr(obj, key("weight"), cls.bucketized(d[key("weight")], granularity=10))
            if unk_check(key("height")):
                setattr(obj, key("height"), cls.bucketized(d[key("height")], granularity=3))

            setattr(obj, key("team"), d[key("team")])
            setattr(obj, key("throws"), d[key("throws")])
            setattr(obj, key("birthCountry"), d[key("birthCountry")])

            if unk_check(key("experience"), keys=[key("debut"), "game_date"]):
                setattr(obj, key("experience"), cls.bucketized(year_from_td(d["game_date"] - d[key("debut")]), granularity=3))

        return obj
Пример #3
0
 def copy(self):
     # this is very puzzling; why does a new object created in the context of a given object have all the fields of the object in which the new object is created...?
     new_obj = FeatureSet()
     assert(new_obj.to_json() == self.to_json())
     return new_obj