Exemplo n.º 1
0
def load_hawaii_3s_csv():
    # TODO: Don't actually need Dask for this. Should remove the dependency.
    from dask.diagnostics import ProgressBar
    import dask.dataframe as ddf
    loc = Location(21.31034, -158.08675, tz='HST', altitude=11)
    df = ddf.read_csv(DATASETS_PATH / 'hawaii_3s' / '*.txt', header=None)
    df.columns = ['S', 'Y', 'DOY', 'HHMM', 'ghi', 'dhi', 'dni']

    log.info(
        "Reading Hawaii 3-second irradiance CSV files (takes up to 10 minutes)..."
    )

    def get_datetime_index(df):
        str_datetime = df[['Y', 'DOY', 'HHMM',
                           'S']].astype(str).apply(lambda x: ' '.join(x),
                                                   axis=1)
        datetimes = pd.to_datetime(str_datetime, format='%Y %j %H%M %S')
        return datetimes

    with ProgressBar():
        datetimes = df.map_partitions(
            get_datetime_index,
            meta=pd.Series([pd.to_datetime('2019-01-01 00:00:00')]))
        df = df.set_index(datetimes).compute().asfreq('3S')
    df.index = df.index.tz_localize(loc.tz)
    return IrradianceDataset(df[['ghi', 'dhi', 'dni']], location=loc)
Exemplo n.º 2
0
def load_katherine_5s():
    log.info("Reading Katherine 5-second irradiance file...")
    loc = Location(-14.4747,
                   132.3050,
                   altitude=108.0,
                   tz="Australia/Darwin",
                   name='Katherine')
    df = pd.concat([
        pd.read_csv(_find_prefixed_dat_files("KTR"),
                    index_col='Time',
                    parse_dates=True)
    ])[['GHI', 'DNI']].asfreq('5S')
    df.index = df.index.tz_localize(loc.tz)
    df.columns = ['ghi', 'dni']
    return IrradianceDataset(df, location=loc)
Exemplo n.º 3
0
def load_darwin_5s():
    log.info("Reading Darwin 5-second irradiance file...")
    loc = Location(-12.4417,
                   130.9215,
                   altitude=10.0,
                   tz="Australia/Darwin",
                   name='Darwin')
    df = pd.concat([
        pd.read_csv(_find_prefixed_dat_files("DRW"),
                    index_col='Time',
                    parse_dates=True)
    ])[['GHI', 'DNI']].asfreq('5S')
    df.index = df.index.tz_localize(loc.tz)
    df.columns = ['ghi']
    return IrradianceDataset(df, location=loc)
Exemplo n.º 4
0
def load_alice_5s():
    log.info("Reading Alice Springs 5-second irradiance file...")
    loc = Location(-23.7624,
                   133.8754,
                   altitude=580.0,
                   tz="Australia/Darwin",
                   name='Alice Springs')
    df = pd.concat([
        pd.read_csv(_find_prefixed_dat_files("ASP"),
                    index_col='Time',
                    parse_dates=True)
    ])[['GHI', 'DNI']].asfreq('5S')
    df.index = df.index.tz_localize(loc.tz)
    df.columns = ['ghi', 'dni']
    return IrradianceDataset(df, location=loc)
Exemplo n.º 5
0
def load_alice_5m():
    log.info("Reading Alice 5-minute irradiance data...")
    loc = Location(-23.7624,
                   133.8754,
                   altitude=580.0,
                   tz="Australia/Darwin",
                   name='Alice Springs')
    df = pd.read_csv(
        DATASETS_PATH / '101-Site_DKA-WeatherStation.csv.gz',
        index_col='Timestamp',
        parse_dates=True)[[
            'DKA.WeatherStation - Global Horizontal Radiation (W/m²)',
            'DKA.WeatherStation - Diffuse Horizontal Radiation (W/m²)'
        ]]
    df.index = df.index.tz_localize(loc.tz)
    df.columns = ['ghi', 'dni']
    df = df.asfreq('5T')
    return IrradianceDataset(df, location=loc)
Exemplo n.º 6
0
def load_hawaii_3s_hdf():
    log.info("Reading Hawaii 3-second irradiance HDF file")
    data = pd.read_hdf(DATASETS_PATH / 'hawaii_3s.h5', key='data')
    loc = Location(21.31034, -158.08675, tz='HST', altitude=11)
    return IrradianceDataset(data, location=loc)
Exemplo n.º 7
0
    def synthesize(self,
                   target_irradiance,
                   chunk_size='D',
                   feature_space=None,
                   sampling_method='weighted'):
        if feature_space is None:
            feature_space = default_feature_space

        target = target_irradiance.k_star.ghi
        # target.index = target.index.tz_localize(None)

        source = self.source.k_star.ghi
        # source.index = source.index.tz_localize(None)

        out_ix = pd.date_range(target.index[0],
                               target.index[-1],
                               freq=source.index.freq,
                               tz=target.index.tz)

        log.info("Generating feature space")
        target_features = feature_space(target, chunk_size)
        source_features = feature_space(
            source.resample(target.index.freq).mean(), chunk_size)

        if sampling_method == 'weighted':
            selector = ts_bootstrap.WeightedRandomPoolSelector(
                source_features, target_features)
        elif sampling_method == 'nearest':
            selector = ts_bootstrap.KNNPoolSelector(source_features,
                                                    target_features,
                                                    k=1)
        else:
            raise ValueError(
                "Sampling method must be one of 'weighted' or 'nearest'.")

        log.info("Generating high res clearness index samples")

        # Drop all of these columns from the samples
        sp_cols = list(filter(lambda c: c[:3] == 'sp_', self.source.columns))
        clear_cols = list(
            filter(lambda c: c[:6] == 'clear_', self.source.columns))
        k_cols = list(filter(lambda c: c[:2] == 'k_', self.source.columns))
        irrad_cols = list(
            filter(lambda c: c in ('ghi', 'dhi', 'dni'), self.source.columns))
        drop_cols = sp_cols + clear_cols + irrad_cols + k_cols

        k_star = self.source.k_star
        k_star.columns = [f'k_{col}' for col in k_star.columns]

        src = self.source.drop(columns=drop_cols)
        if len(src.columns):
            src = pd.concat([src, k_star], axis=1)
        else:
            src = k_star

        out = ts_bootstrap.ts_bootstrap(src,
                                        out_ix,
                                        chunk_size=chunk_size,
                                        pool_selector=selector,
                                        stitch_boundaries=False)

        out = out.asfreq(self.source.index.freq)
        # out.index = out.index.tz_localize(target_irradiance.location.tz)

        return IrradianceDataset(out, location=target_irradiance.location)