def handle(self, *args, **options):

        year = options['year']

        # load MLB structure
        call_command('load_MLB_structure', year)

        # TODO load past players

        # for now: load current rosters to at least have current players
        call_command('load_players', year)

        # load games from season
        firstgame = mlbgame.important_dates(year).first_date_seas[:10]
        lastgame = mlbgame.important_dates(year).last_date_seas[:10]
        asg = mlbgame.important_dates(year).all_star_date[:10]

        d = datetime.datetime.strptime(firstgame, '%Y-%m-%d')

        while d < datetime.datetime.strptime(lastgame, '%Y-%m-%d'):
            d += datetime.timedelta(1)
            if d != datetime.datetime.strptime(asg, '%Y-%m-%d'):
                call_command('load_day', d.year, d.month, d.day)

        print('Complete')
Пример #2
0
  def __fetch_important_dates(self):
    today = datetime.today()
    dates = mlbgame.important_dates()

    if dates.playoffs_end_date != '':
      season_end_date = datetime.strptime(dates.playoffs_end_date, MLB_DATE_FORMAT)
      if (season_end_date - today).days < 0:
        dates = mlbgame.important_dates(today.year + 1)

    return dates
Пример #3
0
    def crawl_xmls(self, browser):

        xmls_to_fetch = [
            "boxscore.xml", "rawboxscore.xml", "game_events.xml",
            "linescore.xml", "players.xml", "inning/inning_all.xml", "game.xml"
        ]

        event_dates = mlbgame.important_dates(year=self.year)
        last_date = event_dates.last_date_seas
        first_date = event_dates.first_date_seas if self.start_date is None else self.start_date
        dates = date_range(first_date, last_date)

        for date in dates:
            game_scoreboards = mlbgame.day(date.year, date.month, date.day)
            game_scoreboards = [[match] for match in game_scoreboards]
            games = mlbgame.combine_games(game_scoreboards)

            year = date.year
            month = "{0:02d}".format(date.month)
            day = "{0:02d}".format(date.day)

            for game in games:
                gameid = game.game_id

                if not os.path.exists("xml/" + gameid):
                    os.makedirs("xml/" + gameid)

                dir_path = "http://gd2.mlb.com/components/game/mlb/year_%s/month_%s/day_%s/gid_%s/" % \
                          (year, month, day, gameid)

                for xml in xmls_to_fetch:
                    url_fetch = dir_path + xml
                    path_write = "xml/" + gameid + "/" + xml
                    self.fetch_xml(url_fetch, path_write)
def create_json(input_folder, input_summaries, output_folder):
    for filename in os.listdir(input_folder):
        d = None
        with codecs.open(input_folder + filename) as json_data:
            d = json.load(json_data)
        print 'filename', input_folder + filename
        output = []
        for entry in d:
            datetime_object = datetime.strptime(entry['day'], '%m_%d_%y')
            begin_date = mlbgame.important_dates(
                datetime_object.year).first_date_seas
            begin_date = datetime.strptime(begin_date, '%Y-%m-%dT%H:%M:%S')
            if datetime_object < begin_date:
                print 'datetime_object', datetime_object, filename
                continue

            html_file_name = []
            html_file_name.append(datetime_object.strftime("%Y%m%d"))
            visname_homename = entry['vis_name'].replace(
                " ", "_") + "-" + entry['home_name'].replace(" ", "_")
            visname_homename = visname_homename.replace(
                'D-backs', 'Diamondbacks')
            html_file_name.append(visname_homename)
            html_file_name.append(
                str(entry['vis_line']['team_runs']) + "-" +
                str(entry['home_line']['team_runs']))

            files = glob.glob(input_summaries + "*" + "_".join(html_file_name))
            if len(files) < 1:
                print input_summaries + "*" + "_".join(
                    html_file_name) + " not found"
            elif len(files) > 1:
                print input_summaries + "*" + "_".join(
                    html_file_name) + " multiple found"
            else:
                fname = files[0]
                with codecs.open(fname, encoding='utf-8') as f:
                    content = f.readlines()
                # you may also want to remove whitespace characters like `\n` at the end of each line
                content = [x.strip() for x in content]
                text = " ".join(content)
                words = nltk.word_tokenize(text)
                newtokes = []
                [
                    newtokes.append(toke) if toke[0].isupper()
                    or '-' not in toke else newtokes.extend(
                        toke.replace('-', " - ").split()) for toke in words
                ]
                entry['summary'] = newtokes
                output.append(entry)

        if len(output) > 0:
            with codecs.open(output_folder + 'combined_' + filename,
                             'w+') as outfile:
                json.dump(output, outfile)
            outfile.close()
Пример #5
0
 def test_important_dates(self):
     important_dates = mlbgame.important_dates(2017)
     output = ("Opening Day 2017: Sunday, April 02.\n"
               "Last day of the 1st half: Sunday, July 09.\n"
               "2017 All Star Game: Tuesday, July 11.\n"
               "First day of the 2nd half: Friday, July 14.\n"
               "Last day of the 2017 season: Sunday, October 01.\n"
               "2017 Playoffs start: Tuesday, October 03.\n"
               "2017 Playoffs end: Wednesday, November 01.")
     self.assertRaises(ValueError, lambda: mlbgame.important_dates(2050))
     self.assertIsInstance(important_dates.organization_id, int)
     self.assertIsInstance(important_dates.year, int)
     self.assertIsInstance(important_dates.org_code, str)
     self.assertIsInstance(important_dates.org_type, str)
     self.assertIsInstance(important_dates.parent_org, str)
     self.assertIsInstance(important_dates.parent_abbrev, str)
     self.assertIsInstance(important_dates.name_full, str)
     self.assertIsInstance(important_dates.name_short, str)
     self.assertIsInstance(important_dates.name_abbrev, str)
     self.assertIsInstance(important_dates.file_code, str)
     self.assertIsInstance(important_dates.games, int)
     self.assertIsInstance(important_dates.first_date_seas, str)
     self.assertIsInstance(important_dates.last_date_1sth, str)
     self.assertIsInstance(important_dates.first_date_2ndh, str)
     self.assertIsInstance(important_dates.last_date_seas, str)
     self.assertIsInstance(important_dates.split_season_sw, str)
     self.assertIsInstance(important_dates.games_1sth, str)
     self.assertIsInstance(important_dates.games_2ndh, str)
     self.assertIsInstance(important_dates.all_star_sw, str)
     self.assertIsInstance(important_dates.all_star_date, str)
     self.assertIsInstance(important_dates.playoff_sw, str)
     self.assertIsInstance(important_dates.playoff_teams, str)
     self.assertIsInstance(important_dates.wildcard_sw, str)
     self.assertIsInstance(important_dates.wildcard_teams, str)
     self.assertIsInstance(important_dates.playoff_points_sw, str)
     self.assertIsInstance(important_dates.point_values, str)
     self.assertIsInstance(important_dates.playoffs_start_date, str)
     self.assertIsInstance(important_dates.playoffs_end_date, str)
     self.assertIsInstance(important_dates.playoff_rounds, str)
     self.assertIsInstance(important_dates.playoff_games, str)
     self.assertEqual(important_dates.organization_id, 1)
     self.assertEqual(important_dates.year, 2017)
     self.assertEqual(important_dates.org_code, 'mlb')
     self.assertEqual(important_dates.org_type, 'S')
     self.assertEqual(important_dates.parent_org, '')
     self.assertEqual(important_dates.parent_abbrev, '')
     self.assertEqual(important_dates.name_full, 'Major League Baseball')
     self.assertEqual(important_dates.name_short, '')
     self.assertEqual(important_dates.name_abbrev, 'MLB')
     self.assertEqual(important_dates.file_code, 'mlb')
     self.assertEqual(important_dates.games, 162)
     self.assertEqual(important_dates.first_date_seas,
                      '2017-04-02T00:00:00')
     self.assertEqual(important_dates.last_date_1sth, '2017-07-09T00:00:00')
     self.assertEqual(important_dates.first_date_2ndh,
                      '2017-07-14T00:00:00')
     self.assertEqual(important_dates.last_date_seas, '2017-10-01T00:00:00')
     self.assertEqual(important_dates.split_season_sw, 'N')
     self.assertEqual(important_dates.games_1sth, '')
     self.assertEqual(important_dates.games_2ndh, '')
     self.assertEqual(important_dates.all_star_sw, 'Y')
     self.assertEqual(important_dates.all_star_date, '2017-07-11T00:00:00')
     self.assertEqual(important_dates.playoff_sw, 'N')
     self.assertEqual(important_dates.playoff_teams, '')
     self.assertEqual(important_dates.wildcard_sw, 'N')
     self.assertEqual(important_dates.wildcard_teams, '')
     self.assertEqual(important_dates.playoff_points_sw, 'N')
     self.assertEqual(important_dates.point_values, '')
     self.assertEqual(important_dates.playoffs_start_date,
                      '2017-10-03T00:00:00')
     self.assertEqual(important_dates.playoffs_end_date,
                      '2017-11-01T00:00:00')
     self.assertEqual(important_dates.playoff_rounds, '')
     self.assertEqual(important_dates.playoff_games, '')
     self.assertEqual(mlbgame.info.date_format('2017-04-02T00:00:00'),
                      'Sunday, April 02')
     self.assertEqual(mlbgame.info.date_format('not_a_date'), '')
     self.assertEqual(important_dates.nice_output(), output)
     self.assertEqual(important_dates.__str__(), output)
     self.assertEqual(mlbgame.info.str_format('test-{0}', [1]), 'test-1')
     mlbgame.data.IMPORTANT_DATES = '{0}'
     self.assertRaises(ValueError,
                       lambda: mlbgame.data.get_important_dates(2050))
Пример #6
0
def search(request, team_searched):
    fullNames = {
        "Dodgers": "Los Angeles Dodgers",
        "Indians": "Cleveland Indians",
        "Rays": "Tampa Bay Rays",
        "Twins": "Minnesota Twins",
        "Athletics": "Oakland Athletics",
        "White Sox": "Chicago White Sox",
        "Reds": "Cincinnati Reds",
        "Padres": "San Diego Padres",
        "Cardinals": "St. Louis Cardinals",
        "Cubs": "Chicago Cubs",
        "Brewers": "Milwaukee Brewers",
        "Royals": "Kansas City Royals",
        "Astros": "Houston Astros",
        "Yankees": "New York Yankees",
        "Braves": "Atlanta Braves",
        "Orioles": "Baltimore Orioles",
        "Blue Jays": "Toronto Blue Jays",
        "Giants": "San Francisco Giants",
        "Pirates": "Pittsburgh Pirates",
        "Diamondbacks": "Arizona Diamondbacks",
        "Marlins": "Miami Marlins",
        "Mets": "New York Mets",
        "Mariners": "Seattle Mariners",
        "Rangers": "Texas Rangers",
        "Angels": "Los Angeles Angels",
        "Nationals": "Washington Nationals",
        "Phillies": "Philadelphia Phillies",
        "Red Sox": "Boston Red Sox",
        "Rockies": "Colorado Rockies",
        "Tigers": "Detroit Tigers"
    }

    teamNames = {v: k for k, v in fullNames.items()}

    searched = True
    now = datetime.datetime.now()
    areGamesToday = True

    #retrieve today's games
    games = mlbgame.day(now.year,
                        now.month,
                        now.day,
                        home=team_searched,
                        away=team_searched)[0:10]
    dayCount = 1
    #retrieve this week's games if not enough games today
    while len(games) < 10 and dayCount < 7:
        dateToCheck = datetime.date.today()
        dateToCheck += datetime.timedelta(days=dayCount)
        gamesOnDay = mlbgame.day(dateToCheck.year,
                                 dateToCheck.month,
                                 dateToCheck.day,
                                 home=team_searched,
                                 away=team_searched)[0:10 - len(games)]
        if len(gamesOnDay) > 0:
            games.append(gamesOnDay[0])
        dayCount += 1

    dayCount = 0
    if len(games) == 0:
        areGamesToday = False
        #go to next season
        date = mlbgame.important_dates(now.year + 1).first_date_seas.split('-')
        year = int(date[0])
        month = int(date[1])
        day = int(date[2][:2])
        while len(games) < 10 and dayCount < 6:
            dateToCheck = datetime.date(year, month, day)
            dateToCheck += datetime.timedelta(days=dayCount)
            gamesOnDay = mlbgame.day(dateToCheck.year,
                                     dateToCheck.month,
                                     dateToCheck.day,
                                     home=team_searched,
                                     away=team_searched)[0:10]
            if len(gamesOnDay) > 0:
                for game in gamesOnDay:
                    games.append(game)
            dayCount += 1

    predictions = []
    for game in games:
        predictions.append(teamNames[predict(fullNames[game.home_team],
                                             fullNames[game.home_team])])

    zipped = list(zip(games, predictions))

    return render(
        request, 'home.html', {
            'data': zipped,
            'today': areGamesToday,
            'team': team_searched,
            'searched': searched
        })
Пример #7
0
 def set_mlb_dates(self):
     if self.season:
         self.mlb_dates = mlbgame.important_dates(self.season)
     else:
         self.mlb_dates = mlbgame.important_dates(self.current_season)