def load_speeds_for_year(self, year, directory, recalc_field_lengths=False):
        if self._verbose:
            print "Loading speeds for station ", self.id
        current_day = date(year, 1, 1)
        last_day = date(year, 12, 31)
        one_day = timedelta(days=1)
        n_days = (last_day - current_day).days

        # initialize empty 2D array to hold speeds
        # dimensions: date (n_days), timeslot (288 5-min slots)
        self.speeds = empty((n_days, 288), dtype=object)

        for day in range(n_days):
            if self._verbose:
                print "    Loading speeds for ", current_day

            # If there are no detectors, there are no valid speeds
            if self.detector_list == []:
                day_speeds = [None] * 288
            # Otherwise, load speeds from each detector
            else:
                try:
                    traffic_file = self.traffic_filename_from_date(current_day)
                    tr = TrafficReader(path.join(directory, traffic_file))

                    # average 1min speeds across detectors
                    day_speeds = impute.average_multilist(
                        [detector.load_speeds(tr, recalc_field_lengths)
                         for detector in self.detector_list])
                    # "short duration temporal linear regression" = impute gaps
                    # up to 3 slots long use adjacent values
                    day_speeds = impute.impute_range(day_speeds,
                                                     impute_length=3,
                                                     input_length=3)
                    # average 1min speeds to 5min speeds
                    day_speeds = impute.average_list(day_speeds, 5)
                    # "short duration temporal linear regression" again
                    day_speeds = impute.impute_range(day_speeds,
                                                     impute_length=3,
                                                     input_length=3)
                    # remove any single missing values by averaging adjacent
                    # values
                    day_speeds = impute.impute1(day_speeds)
                except IOError:
                    # If there is no file for the given day, add a list of
                    # invalid speeds
                    day_speeds = [None] * 288

            self.speeds[day,:] = day_speeds
            current_day = current_day + one_day

        return self.speeds
    def long_temporal_impute(self):
        # if there are no staions in this corridor don't do anything
        if len(self.station_list) == 0:
            return

        # speed array dimensions: station, day, time
        speeds = self.speeds
        for station in range(speeds.shape[0]):
            for day in range(speeds.shape[1]):
                speeds[station, day, :] = impute.impute_range(list(speeds[station, day, :]),
                                                                   impute_length=6,
                                                                   input_length=6)
    def load_speeds(self, traffic_reader, recalc_field_lengths=False):
        # if there are no detectors for this station, give it a speed list of all invalid speeds
        if self.detector_list == []:
            self.speeds = [None] * 288
        # otherwise, load the speeds from the detectors
        else:
            for detector in self.detectors():
                detector.load_speeds(traffic_reader, recalc_field_lengths)

            speeds = impute.average_multilist([detector.load_speeds(traffic_reader) for detector in self.detector_list])
            speeds = impute.impute_range(speeds, impute_length=3, input_length=3)
            speeds = impute.average_list(speeds, 5)
            self.speed_list = impute.impute1(speeds)
    def spatial_impute(self):
        # if there are no stations in this corridor, don't do anything
        if len(self.station_list) == 0:
            return

        # dimension 1 of speeds array is day
        speeds = self.speeds
        for day in range(speeds.shape[1]):
            # dimension 2 of speeds array is timeslot
            for timeslot in range(speeds.shape[2]):
                # impute values along the spatial axis (dimension 0)
                speeds[:, day, timeslot] = impute.impute_range(list(speeds[:, day, timeslot]),
                                                                    impute_length=4,
                                                                    input_length=1)
    def weekly_impute(self):
        # if there are no station in this corridor, don't do anytihng
        if len(self.station_list) == 0:
            return

        # dimension 1 is day
        # using the first 7 days as starting points, impute over station and time slot for every seventh day
        speeds = self.speeds
        for start_day in range(7):
            # dimension 0 is station
            for station in range(speeds.shape[0]):
                # dimension 2 is timeslot
                for timeslot in range(speeds.shape[2]):
                    speeds[station, start_day::7, timeslot] = impute.impute_range(list(speeds[station, start_day::7, timeslot]),
                                                                                       impute_length=3,
                                                                                       input_length=2)