示例#1
0
def saveAnomaly(data, new, compute=True):
    """
    save deviation to processeddir
    """
    filename = generateFileName(data.name, dataset=data.dataset,
                                processed='anom', suffix='nc')
    path = join(processeddir, filename)

    if exists(path) and not new:
        print(f"{data.name} anomaly already computed")
    else:
        print(f"Compute {data.name} anomaly")

        if compute:
            print(f"Compute and save {data.name} anomaly")

            anom = computeAnomaly(data)
        else:
            print(f"Save {data.name} anomaly")
            anom = data

        anom.name = ''.join([data.name, 'Anom'])

        anom.attrs = data.attrs.copy()
        anom.attrs['statistic'] = 'Substracted the monthly Mean.'

        anom.attrs = _delete_some_attributes(anom.attrs)

        anom.to_netcdf(path)
示例#2
0
def computeMeanClimatology(data):
    """
    Monthly means
    """
    filename = generateFileName(data.name, dataset=data.dataset,
                                processed='meanclim', suffix='nc')
    path = join(processeddir, filename)

    if not exists(path):
        print(f"- Compute {data.name} climatetology")
        period = _get_period(data)
        print(f"- Data has {period} period")

        if reference_period:
            meanclim = data.loc['1981-01-01':'2010-12-31']. \
                groupby(f'time.{period}').mean(dim="time")
        else:
            print("Use the entire time series for the Mean Climatology")
            meanclim = data.groupby(f'time.{period}').mean(dim="time")

        meanclim.to_netcdf(path)
    else:
        print(f"- Read {data.name} climatetology")
        meanclim = xr.open_dataarray(path)
    return meanclim
示例#3
0
    def save(self):
        self.data = pd.DataFrame(
              {'global_transitivity': self.global_transitivity,
               'avelocal_transmissivity': self.avglocal_transitivity,
               'fraction_clusters_size_2': self.frac_cluster_size2,
               'fraction_clusters_size_3': self.frac_cluster_size3,
               'fraction_clusters_size_5': self.frac_cluster_size5,
               'fraction_giant_component': self.frac_giant,
               'average_path_length': self.avg_path_length,
               'hamming_distance': self.hamming_distance,
               'corrected_hamming_distance': self.corrected_hamming_distance,
               'threshold': self.threshold_value,
               'edge_density': self.edge_density_value
               })

        filename = generateFileName(self.variable,
                                    self.dataset,
                                    processed=self.processed,
                                    suffix='csv')

        filename = '-'.join(['network_metrics', filename])

        if self.threshold is not None:
            # TODO: dynamic naming depending on the methods used
            pass
        elif self.edge_density is not None:
            pass
        self.data.to_csv(join(processeddir, filename))
示例#4
0
    def save(self, extension='', filename=None):
        """
        Saves the first three pca components to a csv-file.
        """
        # save data to first day of month
        save_index = self.time.to_index()

        pca1 = pd.Series(np.matmul(self.EOFarr, self.components_[0, :]),
                         index=save_index)
        pca2 = pd.Series(np.matmul(self.EOFarr, self.components_[1, :]),
                         index=save_index)
        pca3 = pd.Series(np.matmul(self.EOFarr, self.components_[2, :]),
                         index=save_index)

        self.df = pd.DataFrame({'pca1': pca1, 'pca2': pca2, 'pca3': pca3})

        if filename is None:
            filename = generateFileName(self.variable,
                                        self.dataset,
                                        ''.join((self.processed, extension)),
                                        suffix='csv')

        else:
            filename = '.'.join((filename, 'csv'))

        filename = '-'.join(['pca', filename])

        self.df.to_csv(join(processeddir, filename))
示例#5
0
    def read_statistic(self, statistic, variable, dataset='', processed=''):

        filename = generateFileName(variable, dataset,
                                    processed=processed, suffix="csv")
        filename = '-'.join([statistic, filename])

        data = pd.read_csv(join(processeddir, filename),
                           index_col=0, parse_dates=True)
        self._check_dates(data, f"{variable} - {statistic}" )
        return data.loc[self.startdate:self.enddate]
示例#6
0
def toProcessedDir(data, new):
    """
    Save the basic data to the processeddir.
    """
    filename = generateFileName(data.name, dataset=data.dataset, suffix='nc')
    path = join(processeddir, filename)

    if exists(path) and not new:
        print(f"{data.name} already saved in post directory")
    else:
        print(f"save {data.name} in post directory")
        data.to_netcdf(path)
示例#7
0
    def read_netcdf(self, variable, dataset='', processed='', chunks=None):
        """
        wrapper for xarray.open_dataarray.

        :param variable: the name of the variable
        :param dataset: the name of the dataset
        :param processed: the postprocessing that was applied
        :param chunks: same as for xarray.open_dataarray
        """
        filename = generateFileName(variable,
                                    dataset,
                                    processed=processed,
                                    suffix="nc")

        data = xr.open_dataarray(join(processeddir, filename), chunks=chunks)

        regrided = ['GODAS', 'ERSSTv5', 'ORAS4', 'NODC', 'NCAR']

        if processed == 'meanclim':
            return data

        else:
            self._check_dates(data, f'{filename[:-3]}')
            if dataset not in regrided and dataset != 'ORAP5' and dataset != 'GFDL-CM3':
                return data.loc[self.startdate:self.enddate,
                                self.lat_max:self.lat_min,
                                self.lon_min:self.lon_max]

            elif dataset in regrided or dataset == 'GFDL-CM3':
                return data.loc[self.startdate:self.enddate,
                                self.lat_min:self.lat_max,
                                self.lon_min:self.lon_max]
            elif dataset == 'ORAP5':
                return data.loc[self.startdate:self.enddate, :, :].where(
                    (data.nav_lat > self.lat_min) &
                    (data.nav_lat < self.lat_max) &
                    (data.nav_lon > self.lon_min) &
                    (data.nav_lon < self.lon_max),
                    drop=True)
示例#8
0
def saveNormAnomaly(data, new):
    """
    save deviation to processeddir
    """
    filename = generateFileName(data.name, dataset=data.dataset,
                                processed='normanom', suffix='nc')
    path = join(processeddir, filename)

    if exists(path) and not new:
        print(f"{data.name} normed anomaly already computed")
    else:
        print(f"Compute {data.name} normed anomaly")
        normanom = computeNormAnomaly(data)

        normanom.name = ''.join([data.name, 'NormAnom'])

        normanom.attrs = data.attrs.copy()
        normanom.attrs['statistic'] = 'Substracted the monthly Mean.\
            Divided by the Monthly standard deviation'

        normanom.attrs = _delete_some_attributes(normanom.attrs)
        normanom.to_netcdf(path)