Exemplo n.º 1
0
    def __init__(self, year, large_dfs, processors):
        self.year = year

        # Select current season of all dataframes
        self.data = Slicer.slice_all_data(large_dfs, self.year)
        self.data.boxscores = Loader.load_boxscores(self.year)

        # Create lists of dates, used for indexing and access
        self.dates = pd.unique(self.data.boxscores.teams.index.get_level_values('Date').values)
        self.shots_dates = pd.unique(self.data.shots.globals.index.get_level_values('Date').values)

        # initialize processor classes to use for processing data
        self.boxscore_processor, self.shots_processor, self.game_processor = processors
Exemplo n.º 2
0
    def __init__(self, year, large_dfs, processors):
        self.year = year

        # Select current season of all dataframes
        self.data = Slicer.slice_all_data(large_dfs, self.year)
        self.data.boxscores = Loader.load_boxscores(self.year)

        # Create lists of dates, used for indexing and access
        self.dates = pd.unique(
            self.data.boxscores.teams.index.get_level_values('Date').values)
        self.shots_dates = pd.unique(
            self.data.shots.globals.index.get_level_values('Date').values)

        # initialize processor classes to use for processing data
        self.boxscore_processor, self.shots_processor, self.game_processor = processors
Exemplo n.º 3
0
    def process_jobs(self):
        # get total number of jobs
        num_jobs = len(self.processor_list)
        print 'Number of Jobs: ' + str(num_jobs)
        start_time = time.time()  # timer function

        # load dataframes that span multiple seasons
        large_dfs = Loader.load_large_dfs()

        # iterate through jobs
        for processors, params, num in zip(self.processor_list, self.param_list, range(1, num_jobs + 1)):
            print 'Starting Job #' + str(num) + ': ' + str(params['path'])

            # perform job with specified params and processor objects
            self.perform_job(large_dfs, params['min_year'], params['max_year'], processors, params['path'], params['output_format'])

        print "ALL JOBS FINISHED"
        print("Total time: --- %s seconds ---" % (time.time() - start_time))