def read_xml(cls, url, markup, game, players): """ read xml object :param url: contents url :param markup: markup provider :param game: MLBAM Game object :param players: MLBAM Players object :return: pitchpx.game.game.Game object """ innings = Inning(game, players) base_url = "".join([url, cls.DIRECTORY]) # hit location data hit_location = cls._read_hit_chart_data( MlbamUtil.find_xml('/'.join([base_url, cls.FILENAME_INNING_HIT]), markup) ) # create for atbat & pitch data for inning in MlbamUtil.find_xml_all(base_url, markup, cls.TAG, cls.FILENAME_PATTERN): soup = MlbamUtil.find_xml("/".join([base_url, inning.get_text().strip()]), markup) inning_number = int(soup.inning['num']) for inning_type in cls.INNINGS.keys(): inning_soup = soup.inning.find(inning_type) if inning_soup is None: break innings._inning_events(inning_soup, inning_number, cls.INNINGS[inning_type], hit_location) return innings
def read_xml(cls, url, markup, game, players): """ read xml object :param url: contents url :param markup: markup provider :param game: MLBAM Game object :param players: MLBAM Players object :return: pitchpx.game.game.Game object """ innings = Inning(game, players) base_url = "".join([url, cls.DIRECTORY]) # hit location data hit_location = cls._read_hit_chart_data( MlbamUtil.find_xml('/'.join([base_url, cls.FILENAME_INNING_HIT]), markup)) # create for atbat & pitch data for inning in MlbamUtil.find_xml_all(base_url, markup, cls.TAG, cls.FILENAME_PATTERN): soup = MlbamUtil.find_xml( "/".join([base_url, inning.get_text().strip()]), markup) inning_number = int(soup.inning['num']) for inning_type in cls.INNINGS.keys(): inning_soup = soup.inning.find(inning_type) if inning_soup is None: break innings._inning_events(inning_soup, inning_number, cls.INNINGS[inning_type], hit_location) innings._inning_actions(inning_soup, inning_number, cls.INNINGS[inning_type]) return innings
def read_xml(cls, url, markup, game): """ read xml object :param url: contents url :param markup: markup provider :param game: MLBAM Game object :return: pitchpx.game.players.Players object """ return Players._read_objects(MlbamUtil.find_xml("/".join([url, cls.FILENAME]), markup), game)
def test_find_xml_200(self): """ Get xml content(status:200, head:default) """ req = MlbamUtil.find_xml( 'http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_lanmlb_sdnmlb_1/game.xml', 'lxml', ) self.assertIsNotNone(req)
def read_xml(cls, url, markup, game): """ read xml object :param url: contents url :param markup: markup provider :param game: MLBAM Game object :return: pitchpx.game.players.Players object """ return Players._read_objects( MlbamUtil.find_xml("".join([url, cls.FILENAME]), markup), game)
def read_xml(cls, url, features, timestamp, game_number): """ read xml object :param url: contents url :param features: markup provider :param timestamp: game day :param game_number: game number :return: pitchpx.game.game.Game object """ soup = MlbamUtil.find_xml("".join([url, cls.FILENAME]), features) return cls._generate_game_object(soup, timestamp, game_number)
def read_xml(cls, url, features, game, players): """ read xml object :param url: contents url :param features: markup provider :param game: MLBAM Game object :param players: MLBAM Players object :return: pitchpx.box_score.box_score.BoxScore object """ soup = MlbamUtil.find_xml("".join([url, cls.FILENAME]), features) return cls._generate_object(soup, game, players)
def _download(self, timestamp): """ download MLBAM Game Day :param timestamp: day """ games, atbats, pitches = [], [], [] rosters, coaches, umpires = [], [], [] timestamp_params = { 'year': str(timestamp.year), 'month': str(timestamp.month).zfill(2), 'day': str(timestamp.day).zfill(2) } logging.info('->- Game data download start({year}/{month}/{day})'.format(**timestamp_params)) base_url = self.DELIMITER.join([self.url, self.PAGE_URL_GAME_DAY.format(**timestamp_params)]) html = MlbamUtil.find_xml(base_url, self.parser) href = self.PAGE_URL_GAME_PREFIX.format(**timestamp_params) for gid in html.find_all('a', href=re.compile(href)): gid_path = gid.get_text().strip() gid_url = self.DELIMITER.join([base_url, gid_path]) # Read XML & create dataset try: game = Game.read_xml(gid_url, self.parser, timestamp, MlbAm._get_game_number(gid_path)) players = Players.read_xml(gid_url, self.parser, game) innings = Inning.read_xml(gid_url, self.parser, game, players) except MlbAmHttpNotFound as e: logging.warning(e.msg) continue # append a dataset games.append(game.row()) rosters.extend([roseter.row() for roseter in players.rosters.values()]) coaches.extend([coach.row() for coach in players.coaches.values()]) umpires.extend([umpire.row() for umpire in players.umpires.values()]) atbats.extend(innings.atbats) pitches.extend(innings.pitches) # writing csv day = "".join([timestamp_params['year'], timestamp_params['month'], timestamp_params['day']]) for params in ( {'datasets': games, 'filename': Game.DOWNLOAD_FILE_NAME}, {'datasets': rosters, 'filename': Players.Player.DOWNLOAD_FILE_NAME}, {'datasets': coaches, 'filename': Players.Coach.DOWNLOAD_FILE_NAME}, {'datasets': umpires, 'filename': Players.Umpire.DOWNLOAD_FILE_NAME}, {'datasets': atbats, 'filename': AtBat.DOWNLOAD_FILE_NAME}, {'datasets': pitches, 'filename': Pitch.DOWNLOAD_FILE_NAME}, ): self._write_csv(params['datasets'], params['filename'].format(day=day, extension=self.extension)) time.sleep(2) logging.info('-<- Game data download end({year}/{month}/{day})'.format(**timestamp_params))
def test_find_xml_404(self): """ Get xml content(status:404, head:default) """ try: _ = MlbamUtil.find_xml( 'http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_chnmlb_anamlb_1/game.xml', 'lxml', ) except MlbAmHttpNotFound as e: self.assertEqual(e.msg, ( 'HTTP Error ' 'url: http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_chnmlb_anamlb_1/game.xml ' 'status: 404'))
def test_find_xml_404(self): """ Get xml content(status:404, head:default) """ try: _ = MlbamUtil.find_xml( 'http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_chnmlb_anamlb_1/game.xml', 'lxml', ) except MlbAmHttpNotFound as e: self.assertEqual( e.msg, ('HTTP Error ' 'url: http://gd2.mlb.com/components/game/mlb/year_2016/month_04/day_06/gid_2016_04_06_chnmlb_anamlb_1/game.xml ' 'status: 404' ) )
def _download(self, timestamp): """ download MLBAM Game Day :param timestamp: day """ games, atbats, pitches = [], [], [] rosters, coaches, umpires = [], [], [] boxscores, actions = [], [] timestamp_params = { 'year': str(timestamp.year), 'month': str(timestamp.month).zfill(2), 'day': str(timestamp.day).zfill(2) } logging.info( '->- Game data download start({year}/{month}/{day})'.format( **timestamp_params)) base_url = self.DELIMITER.join( [self.url, self.PAGE_URL_GAME_DAY.format(**timestamp_params)]) html = MlbamUtil.find_xml(base_url, self.parser) href = self.PAGE_URL_GAME_PREFIX.format(**timestamp_params) for gid in html.find_all('a', href=re.compile(href)): gid_path = gid.get_text().strip() gid_url = self.DELIMITER.join([base_url, gid_path]) # Read XML & create dataset try: game = Game.read_xml(gid_url, self.parser, timestamp, MlbAm._get_game_number(gid_path)) players = Players.read_xml(gid_url, self.parser, game) innings = Inning.read_xml(gid_url, self.parser, game, players) boxscore = BoxScore.read_xml(gid_url, self.parser, game, players) except MlbAmHttpNotFound as e: logging.warning(e.msg) continue # append a dataset games.append(game.row()) rosters.extend( [roseter.row() for roseter in players.rosters.values()]) coaches.extend([coach.row() for coach in players.coaches.values()]) umpires.extend( [umpire.row() for umpire in players.umpires.values()]) atbats.extend(innings.atbats) pitches.extend(innings.pitches) actions.extend(innings.actions) boxscores.append(boxscore.row()) # writing csv day = "".join([ timestamp_params['year'], timestamp_params['month'], timestamp_params['day'] ]) for params in ( { 'datasets': games, 'filename': Game.DOWNLOAD_FILE_NAME }, { 'datasets': rosters, 'filename': Players.Player.DOWNLOAD_FILE_NAME }, { 'datasets': coaches, 'filename': Players.Coach.DOWNLOAD_FILE_NAME }, { 'datasets': umpires, 'filename': Players.Umpire.DOWNLOAD_FILE_NAME }, { 'datasets': atbats, 'filename': AtBat.DOWNLOAD_FILE_NAME }, { 'datasets': pitches, 'filename': Pitch.DOWNLOAD_FILE_NAME }, { 'datasets': boxscores, 'filename': BoxScore.DOWNLOAD_FILE_NAME }, { 'datasets': actions, 'filename': InningAction.DOWNLOAD_FILE_NAME }, ): self._write_csv( params['datasets'], params['filename'].format(day=day, extension=self.extension)) time.sleep(2) logging.info('-<- Game data download end({year}/{month}/{day})'.format( **timestamp_params))