Пример #1
0
def get_train_test(test_start, test_stop, model = 'ar'):
    print('gets train test data for ar model')
    if type == 'ar':
        files_train = get_list_of_files_excluding_period(test_start, test_stop)
        files_test = get_list_of_files(test_start, test_stop)

    else:
        files_train = get_list_of_files_excluding_period_traditional_model(test_start, test_stop)
        files_test = get_list_of_files_traditional_model(test_start, test_stop)

    print('Detected {} train files and {} test files. Merging might take a while .... '.format(len(files_train), len(files_test)))
    train_dataset = merge(files_train)
    print('finished merging train')
    test_dataset = merge(files_test)
    return train_dataset, test_dataset
Пример #2
0
    def load(self, lat, lon):
        files = get_list_of_files(start=self.start, stop=self.stop)
        print("Loading {} files.".format(len(files)))
        self.dataset = merge(files)
        # Move some of this to the dataloader part?
        means = np.zeros(
            (len(self.latitude), len(self.longitude), len(self.variables)))

        stds = np.zeros(
            (len(self.latitude), len(self.longitude), len(self.variables)))

        for i, lat in enumerate(self.latitude):  # 81
            for j, lon in enumerate(self.longitude):  # 161
                ds = get_pixel_from_ds(self.dataset, lat, lon)
                X, y = dataset_to_numpy(ds, bias=self.bias)
                print('Number of samples prior to removal of nans {}.'.format(
                    len(y)))

                # Removes nan's
                a = np.concatenate([X, y], axis=1)
                a = a[~np.isnan(a).any(axis=1)]
                # This is where you do the transformation
                X = a[:, :-1]
                y = a[:, -1, np.newaxis]

        self.mean = means
        self.std = stds
        return X, y
Пример #3
0
    def load(self):
        """ Includes the nans that are present. They are disregarded in fit()
         and included in transform().

        """
        from sklearn.preprocessing import MinMaxScaler
        files = get_list_of_files(start=self.start, stop=self.stop)
        print("Loading {} files.".format(len(files)))
        self.dataset = merge(files)
        # Move some of this to the dataloader part?
        mins = np.zeros(
            (len(self.latitude), len(self.longitude), len(self.variables)))

        maxs = np.zeros(
            (len(self.latitude), len(self.longitude), len(self.variables)))

        for i, lat in enumerate(self.latitude):  # 81
            for j, lon in enumerate(self.longitude):  # 161
                # Move some of this to the dataloader part?
                ds = get_pixel_from_ds(self.dataset, lat, lon)
                X, y = dataset_to_numpy(ds, bias=self.bias)
                print('Number of samples prior to removal of nans {}.'.format(
                    len(y)))

                # This is where you do the transformation
                minmax = MinMaxScaler()
                X[:, i, j, :] = minmax.fit_transform(pX)
                mins[i, j, :] = minmax.data_min_.flatten()
                maxs[i, j, :] = minmax.data_max_.flatten()
                # where do you intent to store the transformed data of uneven lengths
        self.min = mins
        self.max = maxs
        return X, y
Пример #4
0
def get_train_test(test_start, test_stop, model='ar'):
    """Loads train and test data to datasets ... """
    #logger.info('Retrives data')
    if type == 'ar':
        files_train = get_list_of_files_excluding_period(test_start, test_stop)
        files_test = get_list_of_files(test_start, test_stop)

    else:
        files_train = get_list_of_files_excluding_period_traditional_model(
            test_start, test_stop)
        files_test = get_list_of_files_traditional_model(test_start, test_stop)

    #logger.info('Detected the relevant files. ')
    train_dataset = merge(files_train)
    #logger.info('Merged training data for {} to {}'.format(test_start,
    #test_stop))
    test_dataset = merge(files_test)
    #logger.info('Merged test data for {} to {}'.format(test_start, test_stop))
    return train_dataset, test_dataset
Пример #5
0
    def fit(self):
        """ New fit function
        """

        if self.test_start is not None and self.test_stop is not None:
            # Load test data
            #            print('Loads test data')
            files = get_list_of_files_traditional_model(start=self.test_start,
                                                        stop=self.test_stop,
                                                        include_start=True,
                                                        include_stop=True)
            self.test_dataset = merge(files)

        num_vars = self.bias + len(self.variables) + self.order

        coeff_matrix = np.zeros(
            (len(self.latitude), len(self.longitude), num_vars))

        mse_storage = np.zeros((len(self.latitude), len(self.longitude)))

        r2_storage = np.zeros((len(self.latitude), len(self.longitude)))

        ase_storage = np.zeros((len(self.latitude), len(self.longitude)))

        num_train_samples = np.zeros((len(self.latitude), len(self.longitude)))

        num_test_samples = np.zeros((len(self.latitude), len(self.longitude)))

        for i, lat in enumerate(self.latitude):  # 81
            for j, lon in enumerate(self.longitude):  # 161

                if self.transform:
                    coeff, mse, ase, r2, num_test, num_train = self.load_transform_fit(
                        lat, lon)
                    coeff_matrix[i, j, :] = coeff
                    mse_storage[i, j] = mse
                    r2_storage[i, j] = r2
                    ase_storage[i, j] = ase
                    num_train_samples[i, j] = num_test
                    num_test_samples[i, j] = num_train
                else:
                    raise NotImplementedError('Implement this shit .... ')
            print('Finished with pixel {}/{}'.format((i + 1) * j, 81 * 161))

        self.coeff_matrix = coeff_matrix
        self.mse = mse_storage
        self.ase = ase_storage
        self.r2 = r2_storage
        self.num_test_samples = num_test_samples
        self.num_train_samples = num_train_samples
        return
Пример #6
0
    def load_pixel(self, lat, lon):
        files = get_list_of_files(start=self.start, stop=self.stop)
        print("Loading {} files.".format(len(files)))
        self.dataset = merge(files)
        # Move some of this to the dataloader part?

        # TODO: This loops over lat lons
        ds = get_pixel_from_ds(self.dataset, lat, lon)
        X, y = dataset_to_numpy(ds, bias=self.bias)
        print('Number of samples prior to removal of nans {}.'.format(len(y)))

        # Removes nan's
        a = np.concatenate([X, y], axis=1)
        a = a[~np.isnan(a).any(axis=1)]
        # This is where you do the transformation
        X = a[:, :-1]
        y = a[:, -1, np.newaxis]  # not tested
        return X, y
Пример #7
0
def get_xarray_dataset_for_period(start='2012-01-01', stop='2012-01-31'):
    """ Reads data from the requested period into a xarray dataset.
    I stop is not provided it defaults to one month of data.

    Parameteres
    ----------------------
    start : str
        Start of period. First day included. (default '2012-01-01')
    stop : str, optional
        end of period. Last day included. (default '2012-01-31')

    Returns
    -----------------------
    data : xr.Dataset
        Dataset including all variables in the requested period.
    """
    #from utils import merge
    files = get_list_of_files(start=start, stop=stop)
    #print("Num files {}".format(len(files)))
    data = merge(files)
    if stop is not None:
        data = data.sel(time=slice(start, stop))
    return data
Пример #8
0
    def __init__(self,
                 start=None,
                 stop=None,
                 test_start=None,
                 test_stop=None,
                 order=1,
                 transform=False,
                 sigmoid=False):
        """
        Parameters
        ----------
        start : str
            Start date of training/ fitting the model. Format: year-month-date
        stop  : str
            Stop date of training/ fitting the model. Format: year-month-date
        test_start : str, optional
            Start date of test data, used in evaluation of the model.
        test_stop  : str, optional
            Stop date of test data, used in evaluation of the model.
        order : int
            Number of previos time steps included as predictors
        transformer : bool, default = True
            Whether to standardize the data or not.
        sigmoid : bool, default = False
            Desides if you should siogmoid transform the response.
        """
        if stop is not None:
            assert start < stop, "Start {} need to be prior to stop {}".format(
                start, stop)

        self.start = start
        self.stop = stop

        self.test_start = test_start
        self.test_stop = test_stop

        if ((start is None and stop is None)
                and (test_start is not None and test_stop is not None)):

            files = get_list_of_files_excluding_period(test_start, test_stop)
            self.dataset = merge(files)

        elif ((start is None and stop is None)
              and (test_start is None and test_stop is None)):
            raise ValueError('Something is wrong with')

        else:
            # Based on start and stop descide which files it gets.
            self.dataset = get_xarray_dataset_for_period(start=self.start,
                                                         stop=self.stop)

        print('Finished loaded the dataset')
        self.order = order

        self.longitude = self.dataset.longitude.values
        self.latitude = self.dataset.latitude.values
        self.variables = []  #get_list_of_variables_in_ds(self.dataset)

        self.test_dataset = None
        self.coeff_matrix = None
        self.evaluate_ds = None

        self.mse = None
        self.r2 = None
        self.ase = None

        self.num_test_samples = None
        self.num_train_samples = None

        self.transform = transform
        self.sigmoid = sigmoid

        # Initialize containers if data should be transformed
        if self.transform:
            """ Read transformation from the correct folder in lagringshotellet """

            self.mean = np.zeros((len(self.latitude), len(self.longitude),
                                  len(self.variables) + self.order))

            self.std = np.zeros((len(self.latitude), len(self.longitude),
                                 len(self.variables) + self.order))
            self.bias = False
        else:
            self.bias = True

        self.X_train = None
        self.y_train = None
        return
Пример #9
0
    def __init__(self, start = None, stop = None,
                    test_start = None, test_stop = None,
                    train_dataset = None, test_dataset = None,
                    order = 1, transform = False, sigmoid = False,
                    latitude = None, longitude = None, type = 'ar',
                    bias = True):
        """
        Parameters
        ----------
        start : str
            Start date of training/ fitting the model. Format: year-month-date
        stop  : str
            Stop date of training/ fitting the model. Format: year-month-date
        test_start : str, optional
            Start date of test data, used in evaluation of the model.
        test_stop  : str, optional
            Stop date of test data, used in evaluation of the model.
        order : int
            Number of previos time steps included as predictors
        transformer : bool, default = True
            Whether to standardize the data or not.
        sigmoid : bool, default = False
            Desides if you should siogmoid transform the response.
        """
        if stop is not None:
            assert start < stop, "Start {} need to be prior to stop {}".format(start, stop)

        self.start = start
        self.stop  = stop

        self.test_start = test_start
        self.test_stop  = test_stop
        self.timer_start = timer()

        self.type = type

        if train_dataset is not None:
            print('Sets training data .... ')
            self.dataset = train_dataset
        else:
            if((start is None and stop is None) and
                    (test_start is not None and test_stop is not None) ):

                if type == 'ar':
                    files = get_list_of_files_excluding_period(test_start, test_stop)
                else:
                    files = get_list_of_files_excluding_period_traditional_model(test_start, test_stop)

                #print('Detected {} files .. Merging might take a while ... '.format(len(files)))
                self.dataset = merge(files)

            elif((start is None and stop is None) and
                    (test_start is None and test_stop is None) ):
                    raise ValueError('Something is wrong with')
            else:
                # Based on start and stop descide which files it gets.
                if type == 'ar':
                    files = get_list_of_files(test_start, test_stop)
                else:
                    files = get_list_of_files_traditional_model(test_start, test_stop)
                #print('Detected {} files .. Merging might take a while ... '.format(len(files)))
                self.dataset = merge(files)

        if test_dataset is not None:
            print('Sets test data')
            self.test_dataset = test_dataset
        else:
            if self.test_start is not None and self.test_stop is not None:
                # Load test data
                print('Loads test data -- this should happen')
                if self.type == 'ar':
                    files = get_list_of_files(start = self.test_start, stop = self.test_stop,
                                include_start = True, include_stop = True)
                else:
                    files = get_list_of_files_traditional_model(start = self.test_start, stop = self.test_stop,
                                include_start = True, include_stop = True)
                self.test_dataset = merge(files)


        print('Finished loaded the dataset')
        self.order = order

        if latitude is None:
            self.latitude  = self.dataset.latitude.values
            print('sets latitude values to be {}'.format(self.latitude))
        else:
            self.latitude = np.array(latitude)
            print('sets latitude values to be {}'.format(self.latitude))

        if longitude is None:
            self.longitude = self.dataset.longitude.values
            print('sets longitude values to be {}'.format(self.longitude))
        else:
            self.longitude = np.array(longitude)
            print('sets longitude values to be {}'.format(self.longitude))

        if type == 'ar':
            self.variables = ['t2m', 'q', 'r', 'sp'] #get_list_of_variables_in_ds(self.dataset)
        else:
            # traditional model
            self.variables = []
        print('Sets enviornmental variables to {}'.format(self.variables))

        self.coeff_matrix = None
        self.evaluate_ds  = None

        self.mse = None
        self.r2 = None
        self.ase = None

        self.mse_train = None
        self.r2_train = None
        self.ase_train = None

        self.num_test_samples = None
        self.num_train_samples = None

        self.transform   = transform
        self.sigmoid     = sigmoid

        # Initialize containers if data should be transformed
        if self.transform:
            """ Read transformation from the correct folder in lagringshotellet """
            self.bias = False
            print("Transform is true, forces bias to be false .. ")
        else:
            self.bias = bias

        self.X_train = None
        self.y_train = None
        return