示例#1
0
def timeline_trend_count_SA():
    msg_folder = cnst.GRIDSAT
    fname = 'aggs/gridsat_WA_-65_monthly_count_-40base_1000km2.nc'
    fname2 = 'aggs/gridsat_WA_-40_monthly_count_-40base_1000km2.nc'


    da = xr.open_dataarray(msg_folder + fname)
    da2 = xr.open_dataarray(msg_folder + fname2)
    #[25,33,-28,-10]  , West[15,25,-26,-18]
    da = da.sel(lat=slice(-25,-18), lon=slice(18, 22))# (lat=slice(-28,-10), lon=slice(25, 33))
    da2 = da2.sel(lat=slice(-25,-18), lon=slice(18, 22))  #[25,33,-28,-10]
    #da=da.sel(lat=slice(5,10))
    #da[da==0]=np.nan
    mean = da.mean(dim=['lat', 'lon'])
    mean2 = da2.mean(dim=['lat', 'lon'])
    #mean = mean[(mean['time.month']==8)]
    f= plt.figure(figsize=(10,6))
    for i in [12,1]:
        bla = mean[(mean['time.month'] == i)]
        bla.plot(label=str(i), marker='o')
    plt.title('Average number of pixels <= -70C, SouthA 10-28S, 25-35E')
    f = plt.figure(figsize=(10, 6))
    for i in [12,1]:
        bla2 = mean2[(mean2['time.month'] == i)]
        bla2.plot(label=str(i), marker='o')
    plt.title('Average number of pixels <= -40C, SouthA 10-28S, 25-35E')


    plt.legend()
示例#2
0
def run(params):
    start_time = datetime.now()

    bin_width, filter_bandwidth, theta, shift, signal_field = params

    # Get file paths
    signal_dir = '/scratch/pkittiwi/fg1p/signal_map/bin{:.2f}/' \
        'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    output_dir = '/scratch/pkittiwi/fg1p/stats_semi/signal/bin{:.2f}/' \
        'fbw{:.2f}/theta{:.1f}/shift{:d}' \
        .format(bin_width, filter_bandwidth, theta, shift)
    signal_file = '{:s}/signal_map_bin{:.2f}_fbw{:.2f}_' \
        'theta{:.1f}_shift{:d}_{:03d}.nc'\
        .format(signal_dir, bin_width, filter_bandwidth,
                theta, shift, signal_field)
    output_file = '{:s}/stats_semi_signal_bin{:.2f}_fbw{:.2f}_' \
        'theta{:.1f}_shift{:d}_{:03d}.nc' \
        .format(output_dir, bin_width, filter_bandwidth,
                theta, shift, signal_field)
    mask_file = '/scratch/pkittiwi/fg1p/hera331_fov_mask.nc'

    # Load data to memory and align coordinates
    with xr.open_dataarray(signal_file) as da:
        signal = da.load()
    with xr.open_dataarray(mask_file) as da:
        mask = da.load()
    # Load one noise file to get coordinates.
    noise = xr.open_dataarray(
        '/scratch/pkittiwi/fg1p/noise_map/bin0.08/fbw8.00/theta90.0/shift0/'
        'noise_map_bin0.08_fbw8.00_theta90.0_shift0_333.nc'
    )
    for key, values in noise.coords.items():
        signal.coords[key] = values
        mask.coords[key] = values
    signal, noise, mask = xr.align(signal, noise, mask)

    # Mask observation
    signal = signal.where(mask == 1)

    # Calculate statistic
    out = get_stats(signal)
    out.attrs = {'bin_width': bin_width, 'filter_bandwidth': filter_bandwidth,
                 'theta': theta, 'shift': shift}

    os.makedirs(output_dir, exist_ok=True)
    out.to_netcdf(output_file)

    out.close()

    print('Finish. signal_file = {:s}. output_file = {:s}. '
          'Time spent {:.5f} sec.'
          .format(signal_file, output_file,
                  (datetime.now() - start_time).total_seconds()))
示例#3
0
    def __init__(self, instrument, ref_pb=None):
        """

        Parameters
        ----------
        :param instrument : Instrument
            Instrument configuration
        :param ref_pb : str, optional
            name of the reference passband
        """
        super().__init__(instrument, ref_pb)
        I = self.instrument

        self._spectra = sp = pd.read_hdf(resource_filename(__name__, join("data", "spectra.h5")), 'Z0')
        self._tr_table = trt = xa.open_dataarray(resource_filename(__name__, join("data", "transmission.nc")))
        self._tr_mean = trm = trt.mean(['airmass', 'pwv'])
        self.extinction = interp1d(trm.wavelength, trm, bounds_error=False, fill_value=0.0)
        self.wl = wl = sp.index.values
        self.lte = lte = sp.columns.values
        self._apply_extinction = True

        # Dataframe indices
        # -----------------
        self.ipb = pd.Index(self.instrument.pb_names, name='passband')
        self.iteff = pd.Index(lte, name='teff')

        # Per-passband fluxes
        # -------------------
        self._compute_relative_flux_tables(0, ref_pb)
示例#4
0
def saveDailyBlobs():
    """
    Converts hourly centre-point convective-core files to daily netcdf files so they can be saved with LSTA daily data
    :return:
    """

    msgfile = '/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant.nc'
    msg = xr.open_dataarray(msgfile)

    # def first_nozero(array_like, axis):
    #     array_like[array_like<16]= array_like[array_like<16]+24
    #     return np.nanmin(array_like,axis=axis)

    msg.values[msg.values > 75] = np.nan
    msg.values[msg.values == 0] = np.nan

    for m in msg:
        if m['time.hour'].values >= 16:
            m.values[m > 0] = m['time.hour'].values
        else:
            m.values[m > 0] = m['time.hour'].values+24

    ### this is useful, it removes all pixels which got rain twice on a day
    md = msg.resample('24H', base=16, dim='time', skipna=True, how='min')

    md = md[(md['time.month'] >=6) & (md['time.month'] <=9)]

    md.values[md.values>23] = md.values[md.values>23]-24

    md.to_netcdf('/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant_daily.nc')
示例#5
0
 def setUp(self):
     file = os.path.join(BASE_PATH, 'model', 'GFS_Global_0p25deg_20161219_0600.nc')
     ds = SpatialDataset(NetCDFHandler(file),)
     self.array = xr.open_dataarray(file)
     self.grid = ds.get_grid(
         'Maximum_temperature_height_above_ground_Mixed_intervals_Maximum',
         data_array=self.array)
def composite(h):
    pool = multiprocessing.Pool(processes=8)


    file = '/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant.nc'

    msg = xr.open_dataarray(file)
    msg = msg[(msg['time.hour'] == 17) & (msg['time.minute'] == 0) & (
        msg['time.year'] >= 2006) & (msg['time.year'] <= 2009) & (msg['time.month'] >= 6) ]

    msg = msg.sel(lat=slice(10.5,17.5), lon=slice(-9.5,9.5))

    res = pool.map(file_loop, msg)
    pool.close()

    # res = []
    # for m in msg[0:50]:
    #     r = file_loop(m)
    #     res.append(r)



    res = [x for x in res if x is not None]

    scales = res

    scales = [item for sublist in scales for item in sublist]  # flatten list of lists

    scales = np.concatenate(scales)


    return scales
示例#7
0
 def test_save_saves_also_grid(self):
     self.array.pp.grid = self.grid
     self.array.pp.save('test.nc')
     opened_array = xr.open_dataarray('test.nc')
     grid_attrs = {attr[7:]: opened_array.attrs[attr]
                   for attr in opened_array.attrs if attr[:7] == 'ppgrid_'}
     opened_grid = GridBuilder(grid_attrs).build_grid()
     self.assertEqual(self.grid, opened_grid)
示例#8
0
def run_rebuild_iter_1mhz(args):
    process = multiprocessing.current_process().pid
    inf, ouf = args
    print('pid: {:d} ; input file: {:s} ; output file: {:s}'
          .format(process, inf, ouf))
    w_in = xr.open_dataarray(inf)
    w_out = rebuild_iter_1mhz(w_in)
    w_out.to_netcdf(ouf)
示例#9
0
文件: xspline.py 项目: nbren12/gnl
    def load(cls, name, **kwargs):
        Bx = xr.open_dataarray(name, **kwargs)

        spl = cls(knots=Bx.knots, order=Bx.order,
                  bc=Bx.bc, dim=Bx.dim)

        spl._coef = Bx

        return spl
示例#10
0
 def test_load_removes_grid_attrs(self):
     self.array.pp.grid = self.grid
     self.array.pp.save('test.nc')
     non_gridded_array = xr.open_dataarray('test.nc')
     non_gridded_attrs = [attr for attr in non_gridded_array.attrs
                          if attr[:7] == 'ppgrid_']
     self.assertTrue(non_gridded_attrs)
     gridded_array = xr.DataArray.pp.load('test.nc')
     gridded_attrs = [attr for attr in gridded_array.attrs
                      if attr[:7] == 'ppgrid_']
     self.assertFalse(gridded_attrs)
示例#11
0
def regrid_simpler(cmorph):

    dummy = xr.open_dataset(constants.LSTA_TESTFILE)
    cm = xr.open_dataarray(cmorph)

    out = cmorph.replace('WA_', 'WA_onLSTA_')

    cm_on_lst = dummy.salem.transform(cm)
    enc = {'pr': {'complevel': 5, 'zlib': True}}

    cm_on_lst.to_netcdf(out, encoding=enc, format='NETCDF4')
示例#12
0
def run(binnum):
    process = current_process().pid
    print('... P{:d}: applying filter {:s}'
          .format(process, filter_files[binnum].split('/')[-1]))

    filter_da = xr.open_dataarray(filter_files[binnum])
    filter_array = filter_da.values
    data_channels = filter_da.attrs['frequency_channels']
    filter_bandwidth = filter_da.attrs['filter_bandwidth']

    # Figure out FFT and filter normalization
    # FFT normalization factor
    x = filter_da.attrs['x']
    y = filter_da.attrs['y']
    f = filter_da.attrs['f']
    dx = x[1] - x[0]
    dy = y[1] - y[0]
    df = f[1] - f[0]
    u = filter_da.attrs['u']
    v = filter_da.attrs['v']
    e = filter_da.attrs['e']
    du = u[1] - u[0]
    dv = v[1] - v[0]
    de = e[1] - e[0]
    fft_norm = dx * dy * df
    ifft_norm = du * dv * de * filter_array.size
    # Filter normalization factor
    filter_volume = np.sum(filter_array.size * du * dv * de)
    filter_integral = np.sum(np.abs(filter_array) ** 2 * du * dv * de)
    filter_norm = np.sqrt(filter_volume / filter_integral)
    # Apply filter
    filtered_data = apply_filter(
        data_array[data_channels], filter_array,
        fft_multiplier=fft_norm, ifft_multiplier=ifft_norm,
        output_multiplier=filter_norm, apply_window_func=args.apply_window_func,
        invert_filter=False
    ).real

    out_da_attrs = filter_da.attrs
    out_da_attrs.pop('x')
    out_da_attrs.pop('y')
    out_da_attrs.pop('f')
    out_da_attrs['kx'] = filter_da.kx.values
    out_da_attrs['ky'] = filter_da.ky.values
    out_da_attrs['kz'] = filter_da.kz.values
    out_da = xr.DataArray(
        filtered_data, dims=['f', 'y', 'x'], coords={'f': f, 'y': y, 'x': x},
        attrs=out_da_attrs
    )
    outfile = '{:s}/signal_cube_filtered_fbw{:.2f}MHz_{:03d}_bin{:03d}.nc'\
        .format(args.output_directory, filter_bandwidth / 1e6,
                field_num, binnum)
    out_da.to_netcdf(outfile)
示例#13
0
def composite():
    pool = multiprocessing.Pool(processes=4)

    file = constants.MCS_POINTS_DOM

    msg = xr.open_dataarray(file)
    msg = msg[  (msg['time.minute'] == 0) & (
        msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) & (msg['time.month'] >= 6)] #(msg['time.hour'] >= 17) &

    msg = msg.sel(lat=slice(10.2, 17), lon=slice(-9.5, 9.5))

    res = pool.map(file_loop, msg)
    pool.close()

    # for m in msg[0:10]:
    #     file_loop(m)
    #
    # return

    res = [x for x in res if x is not None]

    cell = []
    surface = []
    hour = []

    for r in res:
        cell.append(r[0])
        surface.append(r[1])
        hour.append(r[2])
    pdb.set_trace()
    cell = [item for sublist in cell for item in sublist]  # flatten list of lists
    surface = [item for sublist in surface for item in sublist]  # flatten list of lists
    hour = [item for sublist in hour for item in sublist]  # flatten list of lists

    cell = np.array(cell, dtype=float)
    cell = cell[np.isfinite(surface)]

    surface = np.array(surface, dtype=float)
    surface = surface[np.isfinite(surface)]

    hour = np.array(hour, dtype=float)
    hour = hour[np.isfinite(surface)]

    dic = {'cell': cell,
           'surface': surface,
           'hour' : hour }

    pkl.dump(dic,
             open("/users/global/cornkle/figs/LSTA-bullshit/scales/new/dominant_scales_save/scatter_scales.p", "wb"))
    print('Successfully written scatter_scales save file')
示例#14
0
def blobs():
    #file = '/users/global/cornkle/MCSfiles/blob_map_30km_-67_JJAS_points.nc'
    file = '/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant.nc'
    fpath = '/users/global/cornkle/data/pythonWorkspace/proj_CEH/topo/gtopo_1min_afr.nc'
    msg = xr.open_dataarray(file)
    msg = msg.sel(lat=slice(10, 20), lon=slice(-10, 10))
    msg = msg[ (msg['time.month'] >= 6 )  ]
    msg = msg.where(msg > 6)
    msg.values[msg.values>6] = 1
    msg = msg.sum(dim='time')


    map = msg.salem.get_map(cmap='viridis')
    top = xr.open_dataarray(fpath)
    f = plt.figure()
    z = map.set_topography(top, relief_factor=1.4)
    map.set_contour(z, levels=(200,400,600,800), cmap='Reds' )

    map.set_data(msg)
    map.visualize(title='Blobs and topo')

    msg = msg.sum(dim='lon')
    f = plt.figure()
    msg.plot()
def get_previous_hours_msg(date, ehour, refhour):

    # tdic = {18 : ('36 hours', '15 hours'),
    #         19 : ('37 hours', '16 hours'),
    #         20: ('38 hours', '17 hours'),
    #         21: ('39 hours', '18 hours'),
    #         22: ('40 hours', '19 hours'),
    #         23: ('41 hours', '20 hours'),
    #         0: ('42 hours', '21 hours'),
    #         3: ('45 hours', '24 hours'),
    #         6: ('48 hours', '27 hours')}
    # before = pd.Timedelta(tdic[date.hour][0])
    # before2 = pd.Timedelta(tdic[date.hour][1])
    date = date.replace(hour=refhour)

    if ehour > 0:
        edate = date + pd.Timedelta(str(ehour) + ' hours')
    else:
        edate = date - pd.Timedelta(str(np.abs(ehour)) + ' hours')
        #edate = edate.replace(hour=ehour)

    t1 = edate - pd.Timedelta('1 hours')
    t2 = edate + pd.Timedelta('1 hours')

    file = cnst.MCS_15K# MCS_15K #_POINTS_DOM
    msg = xr.open_dataarray(file)
    try:
        msg = msg.sel(time=slice(t1.strftime("%Y-%m-%dT%H"), t2.strftime("%Y-%m-%dT%H")))
    except OverflowError:
        return None

    #print(prev_time.strftime("%Y-%m-%dT%H"), date.strftime("%Y-%m-%dT%H"))
    pos = np.where((msg.values <= -40) ) #(msg.values >= 5) & (msg.values < 65)) # #

    out = np.zeros_like(msg)
    out[pos] = 1
    out = np.sum(out, axis=0)
    out[out>0]=1
    # if np.sum(out>1) != 0:
    #     'Stop!!!'
    #     pdb.set_trace()

    msg = msg.sum(axis=0)*0
    xout = msg.copy()
    xout.name = 'probs'
    xout.values = out

    return xout
def create_ancils():


    dummy = xr.open_dataarray(dummy_grid)

    ds = xr.Dataset(attrs=dummy.attrs)
    dummy = dummy.isel(grid_longitude_t=slice(box[0], box[1]), grid_latitude_t=slice(box[2], box[3]))

    files = glob.glob(ancils+'*.nc')
    files
    for f in files:
        varsdat = xr.open_dataset(f, decode_times=False)

        if 'pseudo' in varsdat.keys():

            varsdat = varsdat.isel(rlon=slice(box[0], box[1]), rlat=slice(box[2], box[3]))

            data = varsdat['field1391'].values[0, 0, :,:].squeeze() # time, plant type, y, x

            if 'past' in f:
                var = 'veg_past'
            elif 'current' in f:
                var = 'veg_current'
            else:
                print('Ancils not found')
                return
            ds[var] = xr.DataArray(data, coords={'false_latitude': dummy.grid_latitude_t.values,
                                          'false_longitude': dummy.grid_longitude_t.values,
                                          'true_latitude': (
                                          ['false_latitude', 'false_longitude'], dummy.latitude_t.values),
                                          'true_longitude': (
                                          ['false_latitude', 'false_longitude'], dummy.longitude_t.values)},
                                  dims=['false_latitude', 'false_longitude'])

        if 'ht' in varsdat.keys():

            varsdat = varsdat.isel(rlon=slice(box[0], box[1]), rlat=slice(box[2], box[3]))
            data = varsdat['ht'].values[0, 0, :, :].squeeze()  # time, plant type, y, x

            ds['topo'] = xr.DataArray(data, coords={'false_latitude': dummy.grid_latitude_t.values,
                                                 'false_longitude': dummy.grid_longitude_t.values,
                                                 'true_latitude': (
                                                     ['false_latitude', 'false_longitude'], dummy.latitude_t.values),
                                                 'true_longitude': (
                                                     ['false_latitude', 'false_longitude'], dummy.longitude_t.values)},
                                   dims=['false_latitude', 'false_longitude'])

    ds.to_netcdf(out+'ancils/ancils_vera.nc')
示例#17
0
def t_trend_slice():
    #file = '/users/global/cornkle/data/ERA-I monthly/ERA-WA-Monthly-2mTemp.nc'
    file = '/localscratch/wllf030/cornkle/ERA-I/monthly/old/ERA-Int-Monthly-2mTemp.nc'

    fpath = '/users/global/cornkle/figs/CLOVER/months/'


    dam = xr.open_dataarray(file)
    lower = 9
    higher = 11

    da = dam[(dam['time.month']>=lower) & (dam['time.month']<=higher)]
    da = da.sel(longitude=slice(-18,51), latitude=slice(36, -37))
    da = da.groupby('time.year').mean(axis=0)

    lons = da.longitude
    lats = np.flip(da.latitude.values, axis=0)

    # define a function to compute a linear trend of a timeseries
    def linear_trend(x):

        #pf = np.polyfit(np.arange(len(x)), x, 1)
        pf, slope, int, p, ind = mk.test(np.arange(len(x)),x.squeeze().values, eps=0.001, alpha=0.01, Ha='upordown')

        # we need to return a dataarray or else xarray's groupby won't be happy

        if ind == 1:
            issig = slope
        else:
            issig = np.nan

        return xr.DataArray(issig, )

    # stack lat and lon into a single dimension called allpoints
    stacked = da.stack(allpoints=['latitude','longitude'])
    # apply the function over allpoints to calculate the trend at each point
    trend = stacked.groupby('allpoints').apply(linear_trend)
    # unstack back to lat lon coordinates
    trend_unstacked = trend.unstack('allpoints')

    trend_unstacked = trend_unstacked*10. # warming over decade
    da2 = xr.DataArray(trend_unstacked, coords=[lats, lons], dims=['latitude', 'longitude'])

    fp = fpath + 'ttrend_'+str(lower).zfill(2)+'-'+str(higher).zfill(2)+'.png'

    up.quick_map_salem(da2, vmin=-0.4, vmax=0.4, cmap='RdBu_r', save=fp)  #

    plt.close('all')
示例#18
0
def timeline_trend_count():
    msg_folder = cnst.GRIDSAT
    fname = 'aggs/gridsat_WA_-70_monthly_count_-40base_1000km2.nc'

    da = xr.open_dataarray(msg_folder + fname)
    da = da.sel(lat=slice(4.5,8), lon=slice(-10, 15))
    #da=da.sel(lat=slice(5,10))
    #da[da==0]=np.nan
    mean = da.mean(dim=['lat', 'lon'])
    #mean = mean[(mean['time.month']==8)]
    f= plt.figure(figsize=(10,6))
    for i in range(3,6):
        bla = mean[(mean['time.month'] == i)]
        bla.plot(label=str(i), marker='o')
    plt.title('Average number of pixels <= -70C, 4.5-8N')
    plt.legend()
示例#19
0
def timeline_trend_mean():
    msg_folder = '/users/global/cornkle/data/OBS/gridsat/gridsat_netcdf/'
    fname = 'gridsat_WA_-70_monthly.nc'

    da = xr.open_dataarray(msg_folder + fname)
    da=da.sel(lat=slice(5,7), lon=slice(-17,20))
    da[da==0]=np.nan
    mean = da.mean(dim=['lat', 'lon'])
    #mean = mean[(mean['time.month']==8)]
    f= plt.figure(figsize=(10,6))
    for i in range(4,6):
        bla = mean[(mean['time.month'] == i)]
        bla.plot(label=str(i), marker='o')
    plt.title('Monthly mean temperature of pixels <= -40C, 11-18N')
    plt.legend()
    plt.ylim(-78,-71)
def run(fieldnum):
    unmask_cube = fits.getdata(
        '/data6/piyanat/projects/fg1p/mc_cubes_heraxx/hera331/'
        'hera331_mc_cube_p{:03d}.fits'.format(fieldnum)
    )
    input_dir = '/data6/piyanat/projects/fg1p/masked_cubes_heraxx/m1/' \
                '{:d}MHz/p{:03d}'.format(args.bw, fieldnum)
    s_arr0 = np.empty((4, nbins, 2, 50))
    s_arr1 = np.empty((4, nbins, 2, 50))
    s_f0 = np.empty(nbins)
    for binnum in range(nbins):
        data_da = xr.open_dataarray(
            '{:s}/masked_cube_hera331_p{:03d}_m1_bw{:d}MHz_bin{:02d}_fpad.nc'
            .format(input_dir, fieldnum, args.bw, binnum)
        )

        fov_window = get_fov_window(data_da)
        f_window = get_f_window(data_da)
        full_mask = fov_window[None, :, :] * f_window[:, None, None]
        f_mask = np.ones_like(full_mask, dtype=bool) * f_window[:, None, None]
        fov_mask = np.ones_like(full_mask, dtype=bool) * fov_window[None, :, :]

        s_arr1[0, binnum] = cal_pdf(data_da.values.ravel())
        s_arr1[1, binnum] = cal_pdf(data_da.values[f_mask].ravel())
        s_arr1[2, binnum] = cal_pdf(data_da.values[fov_mask].ravel())
        s_arr1[3, binnum] = cal_pdf(data_da.values[full_mask].ravel())

        ch_cut = slice(0 + (binnum * nf), (nf * 2) + (binnum * nf))
        unmask_data = unmask_cube[::-1][ch_cut][::-1]
        s_arr0[0, binnum] = cal_pdf(unmask_data.ravel())
        s_arr0[1, binnum] = cal_pdf(unmask_data[f_mask].ravel())
        s_arr0[2, binnum] = cal_pdf(unmask_data[fov_mask].ravel())
        s_arr0[3, binnum] = cal_pdf(unmask_data[full_mask].ravel())

        s_f0[binnum] = data_da.f0.values

    s_ds = xr.Dataset(
        {'original': (['cut', 'f0', 'val', 'val_bin'], s_arr0),
         'fg_masked': (['cut', 'f0', 'val', 'val_bin'], s_arr1)},
        coords={'cut': np.array(['none', 'freq', 'fov', 'all']),
                'f0': s_f0,
                'val': ['pdf', 'bin_center'],
                'val_bin': np.arange(50)}
    )
    s_ds.to_netcdf('/data6/piyanat/projects/fg1p/stats/'
                   '{:d}MHz/pdf_hera331_masked_cube_p{:03d}_bw{:d}MHz.nc'
                   .format(args.bw, fieldnum, args.bw))
示例#21
0
def get_previous_hours(date):

    tdic = {18 : ('36 hours', '15 hours'),
            19 : ('37 hours', '16 hours'),
            20: ('38 hours', '17 hours'),
            21: ('39 hours', '18 hours'),
            22: ('40 hours', '19 hours'),
            23: ('41 hours', '20 hours'),
            0: ('42 hours', '21 hours'),
            3: ('45 hours', '24 hours'),
            6: ('48 hours', '27 hours')}
    before = pd.Timedelta(tdic[date.hour][0])
    before2 = pd.Timedelta(tdic[date.hour][1])




    #before2 = pd.Timedelta('15 minutes')

    t1 = date - before
    t2 = date - before2

    file = constants.MCS_15K# MCS_15K #_POINTS_DOM
    msg = xr.open_dataarray(file)
    try:
        msg = msg.sel(time=slice(t1.strftime("%Y-%m-%dT%H"), t2.strftime("%Y-%m-%dT%H")))
    except OverflowError:
        return None

    #print(prev_time.strftime("%Y-%m-%dT%H"), date.strftime("%Y-%m-%dT%H"))
    pos = np.where((msg.values <= -70) ) #(msg.values >= 5) & (msg.values < 65)) # #

    out = np.zeros_like(msg)
    out[pos] = 1
    out = np.sum(out, axis=0)
    out[out>0]=1
    if np.sum(out>1) != 0:
        'Stop!!!'
        pdb.set_trace()

    msg = msg.sum(axis=0)*0
    xout = msg.copy()
    xout.name = 'probs'
    xout.values = out

    return xout
示例#22
0
def composite(h):
    #pool = multiprocessing.Pool(processes=8)
    file = constants.MCS_CENTRE70 #MCS_POINTS_DOM

    hour = h

    msg = xr.open_dataarray(file)
    msg = msg[(msg['time.hour'] >= 18 ) & (msg['time.hour'] <= 21 )  & (msg['time.minute'] == 0) & (
        msg['time.year'] >= 2008) & (msg['time.year'] <= 2010) & (msg['time.month'] >=6) ]

    msg = msg.sel(lat=slice(10.9,19.5), lon=slice(-9.8,9.8))

    dic = u_parallelise.run_arrays(7,file_loop,msg[0:50],['ano', 'regional', 'cnt',  'prob', 'pcnt']) #'rano', 'rregional', 'rcnt',

    for k in dic.keys():
       dic[k] = np.nansum(dic[k], axis=0)

    pkl.dump(dic, open("/users/global/cornkle/figs/LSTA-bullshit/corrected_LSTA/system_scale/composite_backtrack_"+str(hour).zfill(2)+".p", "wb"))
示例#23
0
def t_mean():
    # file = '/users/global/cornkle/data/ERA-I monthly/ERA-WA-Monthly-2mTemp.nc'
    file = '/users/global/cornkle/data/ERA-I monthly/ERA-Int-Monthly-2mTemp.nc'

    fpath = '/users/global/cornkle/figs/gap_filling_Tgrad/months/'


    dam = xr.open_dataarray(file)
    months = np.arange(1, 13)

    for m in months:
        da = dam[(dam['time.month'] == m)]
        da = da.sel(longitude=slice(-18, 51), latitude=slice(36, -37))
        da = da.mean(axis=0)-273.15

        fp = fpath + 'tmean_' + str(m).zfill(2) + '.png'

        up.quick_map_salem(da, levels=np.arange(20,41,2), cmap='jet', save=fp)
def composite(h, eh):
    #pool = multiprocessing.Pool(processes=8)

    file = cnst.MCS_CENTRE70

    hour = h

    msg = xr.open_dataarray(file)

    msg = msg[(msg['time.hour'] == hour ) & (msg['time.minute'] == 0) & (
        msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) & (msg['time.month'] >=6) ]

    msg = msg.sel(lat=slice(10.9,19), lon=slice(-9.8,9.8))
    msg.attrs['eh'] = eh
    msg.attrs['refhour'] = h
    dic = {}
    #ipdb.set_trace()
    for ids in range(0,len(msg), 50):
        dic = u_parallelise.era_run_arrays(1,file_loop,msg[ids:ids+50], dic) #'rano', 'rregional', 'rcnt',

    # res = []
    # for mm in msg:
    #     out =file_loop(mm)
    #     res.append(out)

    print('Returned from parallel')

    res = [x for x in res if x is not None]

    rres = []
    dic_names = (res[0])[1]
    for r in res:
        rres.append(np.array(r[0]))


    vars = np.array(rres)
    for id, l in enumerate(dic_names):
            dic[l] = np.nansum(np.squeeze(vars[:,id,...]), axis=0)
    # for k in dic.keys():
    #    dic[k] = np.nansum(dic[k], axis=0)


    pkl.dump(dic, open(cnst.network_data + "figs/LSTA-bullshit/AGU/composite_backtrack"+str(eh) + "UTCERA"+str(hour).zfill(2)+".p", "wb"))
    print('Dumped file')
def composite(h, eh):
    #pool = multiprocessing.Pool(processes=8)


    file = cnst.MCS_CENTRE70

    hour = h

    msg = xr.open_dataarray(file)

    msg = msg[(msg['time.hour'] == hour ) & (msg['time.minute'] == 0) & (
        msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) & (msg['time.month'] >=6) ]

    msg = msg.sel(lat=slice(10.9,19), lon=slice(-9.8,9.8))
    msg.attrs['eh'] = eh
    msg.attrs['refhour'] = h
    dic = OrderedDict()
    for sk in ['lsta',  'cnt', 'cntp', 'cntm', 'probmsg']:
        dic['lsta']

    res = []
    for mm in msg:
        out =file_loop(mm)
        res.append(out)

    print('Returned from parallel')

    res = [x for x in res if x is not None]

    rres = []
    dic_names = (res[0])[1]
    for r in res:
        rres.append(np.array(r[0]))

    vars = np.array(rres)
    for id, l in enumerate(dic_names):
            dic[l] = np.nansum(np.squeeze(vars[:,id,...]), axis=0)
    # for k in dic.keys():
    #    dic[k] = np.nansum(dic[k], axis=0)


    pkl.dump(dic, open(cnst.network_data + "figs/LSTA-bullshit/AGU/composite_backtrack"+str(eh) + "UTCERA"+str(hour).zfill(2)+".p", "wb"))
    print('Dumped file')
示例#26
0
def do_mask(i):
    process = current_process().pid
    if args.verbose:
        print('... P{:d}: applying filter {:s}'
              .format(process, filter_files[i].split('/')[-1]))

    filter_da = xr.open_dataarray(filter_files[i])
    filter_array = filter_da.values
    data_channels = filter_da.attrs['frequency_channels']
    image_channel = int(np.floor(filter_da.shape[0] / 2))

    # Figure out FFT and filter normalization
    # FFT normalization factor
    x = filter_da.attrs['x']
    y = filter_da.attrs['y']
    f = filter_da.attrs['f']
    dx = x[1] - x[0]
    dy = y[1] - y[0]
    df = f[1] - f[0]
    u = filter_da.attrs['u']
    v = filter_da.attrs['v']
    e = filter_da.attrs['e']
    du = u[1] - u[0]
    dv = v[1] - v[0]
    de = e[1] - e[0]
    fft_norm = dx * dy * df
    ifft_norm = du * dv * de * filter_array.size
    # Filter normalization factor
    filter_volume = np.sum(filter_array.size * du * dv * de)
    filter_integral = np.sum(np.abs(filter_array) ** 2 * du * dv * de)
    filter_norm = np.sqrt(filter_volume / filter_integral)
    # Apply filter
    filtered_data = apply_filter(
        data_array[data_channels], filter_array,
        fft_multiplier=fft_norm, ifft_multiplier=ifft_norm,
        output_multiplier=filter_norm, apply_window_func=args.apply_window_func,
        invert_filter=False
    ).real

    # Select and store the center channel of the filtered data array
    filtered_data_array[data_channels[image_channel]] = \
        filtered_data[image_channel]
示例#27
0
def regrid(cmorph):

    dummy = xr.open_dataset(constants.LSTA_TESTFILE)
    cm = xr.open_dataarray(cmorph)

    out = cmorph.replace('WA_', 'WA_onLSTA_')

    arrays = []
    for c in cm:

        c_on_lsta = dummy.salem.transform(c)
        arrays.append(c_on_lsta)

    astack = np.stack(arrays, axis=0)
    da = xr.DataArray(astack, coords={'time': cm.time,
                                  'lat': dummy.lat,
                                  'lon': dummy.lon},
                      dims=['time', 'lat', 'lon'])  # .isel(time=0)

    da.to_netcdf(out)
def composite(h):
    #pool = multiprocessing.Pool(processes=8)


    file = constants.MCS_CENTRE70

    hour = h

    msg = xr.open_dataarray(file)
    msg = msg[((msg['time.hour'] >= 17 ) | (msg['time.hour'] <= 19 )) & ((msg['time.minute'] == 0) & (
        msg['time.year'] >= 2008) & (msg['time.year'] <= 2010) & (msg['time.month'] >=6)) ]

    msg = msg.sel(lat=slice(10.9,19), lon=slice(-9.8,9.8))

    dic = u_parallelise.era_run_arrays(5,file_loop,msg) #'rano', 'rregional', 'rcnt',

    # for k in dic.keys():
    #    dic[k] = np.nansum(dic[k], axis=0)


    pkl.dump(dic, open("/users/global/cornkle/figs/LSTA-bullshit/corrected_LSTA/system_scale/doug/composite_backtrack_ERA_pl_"+str(hour).zfill(2)+".p", "wb"))
示例#29
0
def rinexobs(fn, ofn=None):
    """
    Program overviw:
    1) scan the whole file for the header and other information using scan(lines)
    2) each epoch is read and the information is put in a 4-D xarray.DataArray
    3)  rinexobs can also be sped up with if an h5 file is provided,
        also rinexobs can save the rinex file as an h5. The header will
        be returned only if specified.

    rinexobs() returns the data in a 4-D xarray.DataArray, [Parameter,Sat #,time,data/loss of lock/signal strength]
    """
    # open file, get header info, possibly speed up reading data with a premade h5 file
    fn = Path(fn).expanduser()
    with fn.open('r') as f:
        tic = time()
        lines = f.read().splitlines(True)
        header, version, headlines, headlength, obstimes, sats, svset = scan(lines)
        print(fn, 'is a RINEX', version, 'file.', fn.stat().st_size//1000, 'kB.')
        if fn.suffix == '.nc':
            data = xarray.open_dataarray(str(fn), group='OBS')
        elif fn.suffix == '.h5':
            logging.warning('HDF5 is deprecated in this program, please use NetCDF format')
            import pandas
            data = pandas.read_hdf(fn, key='OBS')
        else:
            data = processBlocks(lines, header, obstimes, svset, headlines, headlength, sats)

        print("finished in {:.2f} seconds".format(time()-tic))

    # write an h5 file if specified
    if ofn:
        ofn = Path(ofn).expanduser()
        print('saving OBS data to', ofn)
        if ofn.is_file():
            wmode = 'a'
        else:
            wmode = 'w'
        data.to_netcdf(ofn, group='OBS', mode=wmode)

    return data, header
示例#30
0
def composite(h):
    pool = multiprocessing.Pool(processes=8)


    file = '/users/global/cornkle/MCSfiles/blob_map_allscales_-50_JJAS_points_dominant.nc'

    msg = xr.open_dataarray(file)
    msg = msg[(msg['time.hour'] == h) & (msg['time.minute'] == 0) & (
        msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) & (msg['time.month'] >= 6) ]

    msg = msg.sel(lat=slice(10.5,17.5), lon=slice(-9.5,9.5))

    res = pool.map(file_loop, msg)
    pool.close()

    # for m in msg[0:50]:
    #     file_loop(m)
    #
    # return

    res = [x for x in res if x is not None]

    blobs = []
    scales = []
    temp = []

    for r in res:
        scales.append(r[0])
        temp.append(r[1])
        blobs.append(r[2])



    blobs = [item for sublist in blobs for item in sublist]  # flatten list of lists
    scales = [item for sublist in scales for item in sublist]  # flatten list of lists
    temp = [item for sublist in temp for item in sublist]


    return blobs, scales, temp
示例#31
0
def attach_hydro(n, costs, ppl):
    if 'hydro' not in snakemake.config['renewable']: return
    c = snakemake.config['renewable']['hydro']
    carriers = c.get('carriers', ['ror', 'PHS', 'hydro'])

    _add_missing_carriers_from_costs(n, costs, carriers)

    ppl = ppl.query('carrier == "hydro"').reset_index(drop=True)\
             .rename(index=lambda s: str(s) + ' hydro')
    ror = ppl.query('technology == "Run-Of-River"')
    phs = ppl.query('technology == "Pumped Storage"')
    hydro = ppl.query('technology == "Reservoir"')

    country = ppl['bus'].map(n.buses.country).rename("country")

    inflow_idx = ror.index | hydro.index
    if not inflow_idx.empty:
        dist_key = ppl.loc[inflow_idx,
                           'p_nom'].groupby(country).transform(normed)

        with xr.open_dataarray(snakemake.input.profile_hydro) as inflow:
            inflow_countries = pd.Index(country[inflow_idx])
            missing_c = (inflow_countries.unique().difference(
                inflow.indexes['countries']))
            assert missing_c.empty, (
                f"'{snakemake.input.profile_hydro}' is missing "
                f"inflow time-series for at least one country: {', '.join(missing_c)}"
            )

            inflow_t = (inflow.sel(countries=inflow_countries).rename({
                'countries':
                'name'
            }).assign_coords(name=inflow_idx).transpose(
                'time', 'name').to_pandas().multiply(dist_key, axis=1))

    if 'ror' in carriers and not ror.empty:
        n.madd("Generator",
               ror.index,
               carrier='ror',
               bus=ror['bus'],
               p_nom=ror['p_nom'],
               efficiency=costs.at['ror', 'efficiency'],
               capital_cost=costs.at['ror', 'capital_cost'],
               weight=ror['p_nom'],
               p_max_pu=(inflow_t[ror.index].divide(
                   ror['p_nom'], axis=1).where(lambda df: df <= 1., other=1.)))

    if 'PHS' in carriers and not phs.empty:
        # fill missing max hours to config value and
        # assume no natural inflow due to lack of data
        phs = phs.replace({'max_hours': {0: c['PHS_max_hours']}})
        n.madd('StorageUnit',
               phs.index,
               carrier='PHS',
               bus=phs['bus'],
               p_nom=phs['p_nom'],
               capital_cost=costs.at['PHS', 'capital_cost'],
               max_hours=phs['max_hours'],
               efficiency_store=np.sqrt(costs.at['PHS', 'efficiency']),
               efficiency_dispatch=np.sqrt(costs.at['PHS', 'efficiency']),
               cyclic_state_of_charge=True)

    if 'hydro' in carriers and not hydro.empty:
        hydro_max_hours = c.get('hydro_max_hours')
        hydro_stats = pd.read_csv(snakemake.input.hydro_capacities,
                                  comment="#",
                                  na_values='-',
                                  index_col=0)
        e_target = hydro_stats["E_store[TWh]"].clip(lower=0.2) * 1e6
        e_installed = hydro.eval('p_nom * max_hours').groupby(
            hydro.country).sum()
        e_missing = e_target - e_installed
        missing_mh_i = hydro.query('max_hours == 0').index

        if hydro_max_hours == 'energy_capacity_totals_by_country':
            # watch out some p_nom values like IE's are totally underrepresented
            max_hours_country = e_missing / \
                                hydro.loc[missing_mh_i].groupby('country').p_nom.sum()

        elif hydro_max_hours == 'estimate_by_large_installations':
            max_hours_country = hydro_stats['E_store[TWh]'] * 1e3 / \
                                hydro_stats['p_nom_discharge[GW]']

        missing_countries = (pd.Index(hydro['country'].unique()).difference(
            max_hours_country.dropna().index))
        if not missing_countries.empty:
            logger.warning(
                "Assuming max_hours=6 for hydro reservoirs in the countries: {}"
                .format(", ".join(missing_countries)))
        hydro_max_hours = hydro.max_hours.where(
            hydro.max_hours > 0,
            hydro.country.map(max_hours_country)).fillna(6)

        n.madd(
            'StorageUnit',
            hydro.index,
            carrier='hydro',
            bus=hydro['bus'],
            p_nom=hydro['p_nom'],
            max_hours=hydro_max_hours,
            capital_cost=(costs.at['hydro', 'capital_cost']
                          if c.get('hydro_capital_cost') else 0.),
            marginal_cost=costs.at['hydro', 'marginal_cost'],
            p_max_pu=1.,  # dispatch
            p_min_pu=0.,  # store
            efficiency_dispatch=costs.at['hydro', 'efficiency'],
            efficiency_store=0.,
            cyclic_state_of_charge=True,
            inflow=inflow_t.loc[:, hydro.index])
def transform(da, transform_type, workdir):
    """Transform data to be more normal using either boxcox or log transform.

    The transform is performed separately for each month, since the regression model is fit for each month.

    Parameters
    ----------
    da : xarray.DataArray
        Untransformed dataarray
    transform_type : str
        'boxcox' or 'log'
    workdir : str
        Where to save the boxcox parameters

    Returns
    -------
    ds_t : xarray.DatArray
        Transformed dataarray

    """

    # Set all non-positive precip values to trace
    tmp = da.values
    tmp[tmp <= 0] = 1e-24
    da.values = tmp

    if transform_type == 'boxcox':
        lam_save_name = '%s/boxcox_lambda.nc' % workdir
        if os.path.isfile(lam_save_name):
            da_lam = xr.open_dataarray(lam_save_name)
        else:
            ntime, nlat, nlon = da.shape
            box_lam = np.nan * np.ones((12, nlat, nlon))
            for mo in range(1, 13):
                print('calculating lambda for month %i' % mo)
                for ct1 in range(nlat):
                    for ct2 in range(nlon):
                        this_ts = da.isel({
                            'time': da['time.month'] == mo,
                            'lat': ct1,
                            'lon': ct2
                        })

                        if (np.isnan((this_ts.values).astype(float))).all():
                            continue
                        _, lam = boxcox(this_ts)
                        box_lam[mo - 1, ct1, ct2] = np.min(
                            (lam, 1)
                        )  # set ceiling at 1, since pr is positively skewed

            # save to netcdf
            da_lam = xr.DataArray(data=box_lam,
                                  dims=('month', 'lat', 'lon'),
                                  coords={
                                      'month': np.arange(1, 13),
                                      'lat': da.lat,
                                      'lon': da.lon
                                  })
            da_lam.to_netcdf(lam_save_name)

        # transform data, separately for each month
        da_t = []
        for mo in range(1, 13):
            x_t = boxcox_forward(da.sel({'time': da['time.month'] == mo}),
                                 da_lam.sel({'month': mo}))
            da_t.append(x_t)
        da_t = xr.concat(da_t, dim='time')
        da_t = da_t.sortby('time')

    elif transform_type == 'log':
        da_t = np.log(da)
    else:
        raise NotImplementedError(
            'No other transforms besides Box-Cox and log')

    return da_t
示例#33
0
        # remove this dimension
        da = da.squeeze()
        if not load_data_lazily:
            da.load()
        combined_ds.append(da)

    return combined_ds


# lazily load the whole dataset
ds_whole = read_e5_data(test_startyear, test_endyear, variables=variables)
# this is now a lazy dask array. do not do any operations on this array outside the data generator below.
# if we do operations before, it will severly slow down the data loading throughout the training.

# load normalization weights
norm_mean = xr.open_dataarray(norm_weights_filenamebase + '_mean.nc').values
norm_std = xr.open_dataarray(norm_weights_filenamebase + '_std.nc').values

n_data = ds_whole[0].shape[0]
N_train = n_data // time_resolution_hours
n_valid = int(N_train * valid_split)

Nlat, Nlon, = ds_whole[0].shape[1:3]

Nlat = Nlat // 2  # only NH

n_channels_out = len(variables)

n_channels_in = n_channels_out

param_string = f'{modelname}_{train_startyear}-{train_endyear}'
示例#34
0
    "/global/cscratch1/sd/qnicolas/wrfdata/saved/gw.wrf.2D.60lev.500m.3km/wrfout_d01_1970-01-01_00_00_00"
)

x_kwargs = {'center': 314, 'flip_x': False, 'dx': 10}
ghats_topo = change_coords_sfc(ghats_ideal.HGT[0, 0], **x_kwargs)

# Define nb frames
print("Total nb of times: ", len(ghats_ideal.Time))
nframes = len(ghats_ideal.Time)
time_disc = 24  # frames per day
fig, ax = plt.subplots(1, 1, figsize=(15, 5))

# Animate
if movietype == 'isentropes':
    ghats_ideal_theta_z = xr.open_dataarray(
        "/global/cscratch1/sd/qnicolas/wrfdata/saved/gw.wrf.2D.60lev.500m.3km/diags/wrf.THETA.zinterp.days0-10.nc"
    )[:, 2:]
    levels = np.array(
        ghats_ideal_theta_z.isel(
            Time=0,
            distance_from_mtn=0).sel(z=np.arange(1000., 20000., 1000.)))

    def update(i):
        print(i)
        ax.cla()
        ghats_topo.plot(ax=ax, color='k', linewidth=2.)
        ghats_ideal_theta_z.isel(Time=i).plot.contour(ax=ax, levels=levels)
        ax.set_ylim(0., 20000.)
        ax.set_xlabel("Distance from mountain peak (km)")
        ax.set_ylabel("Altitude (m)")
        ax.set_title("Isentropes, Time = %i days %02i h" %
示例#35
0
sample_coor_m = sample_coor.resample(time='MS').sum()

out_nc = Path(r'H:\CMIP6 - Biased\pr_gamma\nc')
#%%
for l in ls:
    bias_p = sorted(list(l.iterdir()))
    print(l.name)
    print('------------')
    for path in bias_p:
        print(path.name)
        cor = pd.read_csv(list(Path(path).iterdir())[0], header=None)
        # print(cor.shape)
        cor_nc = to_xarray(cor, sample_coor_m.coords)
        cor_nc.to_netcdf(
            ut.save_file(out_nc / l.name /
                         ('Biased_' + path.name + '_2015_2100.nc')))

# open_and_format([sce585_path[3], sce585_path[-3]])
#%%
nc_path = sorted(list(Path(r'H:\CMIP6 - Biased\pr_gamma\nc\ssp245').iterdir()))
mf_ds_arr = [
    xr.open_dataarray(p).assign_coords(id=i + 1) for i, p in enumerate(nc_path)
]
mf_ds = xr.concat(mf_ds_arr, dim='id')
#%%
ut.sim_plot(mf_ds.isel(time=0), col='id', col_wrap=4, add_colorbar=False)
#%%
near_p = Path(r'H:\CMIP6 - Test\cdbc\new\Bias Corrected Rainfall 2015.csv')
cor = pd.read_csv(near_p, header=None)
cor_nc2 = to_xarray(cor, ut.select_year(sample_coor_m, 2015, 2031).coords)
示例#36
0
def main():
    # Parse arguments
    parser = argparse.ArgumentParser()
    parser.add_argument("config", help="Name of the config file.")
    parser.add_argument("-t",
                        "--train",
                        action="store_true",
                        help="Run neural network training.")
    parser.add_argument("-i",
                        "--interp",
                        action="store_true",
                        help="Run interpretation.")
    parser.add_argument("-p",
                        "--plot",
                        action="store_true",
                        help="Plot interpretation results.")
    args = parser.parse_args()
    if not exists(args.config):
        raise FileNotFoundError(args.config + " not found.")
    with open(args.config, "r") as config_file:
        config = yaml.load(config_file, Loader=yaml.Loader)
    # Load training data
    print(
        f"Loading training data period: {config['train_start_date']} to {config['train_end_date']}"
    )
    data_input = {}
    output = {}
    out_max = {}
    labels = {}
    meta = {}
    meta_df = {}
    input_combined = {}
    input_scaled = {}
    scale_values = {}
    predictions = {}
    modes = ["train", "val", "test"]
    # Load training, validation, and testing data
    for mode in modes:
        data_input[mode], output[mode], meta[mode] = load_patch_files(
            config[mode + "_start_date"], config[mode + "_end_date"],
            config["data_path"], config["input_variables"],
            config["output_variables"], config["meta_variables"],
            config["patch_radius"])
        input_combined[mode] = combine_patch_data(data_input[mode],
                                                  config["input_variables"])
        if mode == "train":
            input_scaled[mode], scale_values[mode] = min_max_scale(
                input_combined[mode])
        else:
            input_scaled[mode], scale_values[mode] = min_max_scale(
                input_combined[mode], scale_values["train"])
        out_max[mode] = storm_max_value(
            output[mode][config["output_variables"][0]], meta[mode]["masks"])
        meta_df[mode] = get_meta_scalars(meta[mode])
        print(meta_df[mode].columns)
        if config["classifier"]:
            labels[mode] = np.where(
                out_max[mode] >= config["classifier_threshold"], 1, 0)
        else:
            labels[mode] = out_max[mode]
    if not exists(config["out_path"]):
        makedirs(config["out_path"])
    scale_values["train"].to_csv(join(config["out_path"], "scale_values.csv"),
                                 index_label="variable")
    if "get_visible_devices" in dir(tf.config.experimental):
        gpus = tf.config.experimental.get_visible_devices("GPU")
    else:
        gpus = tf.config.get_visible_devices("GPU")
    for device in gpus:
        tf.config.experimental.set_memory_growth(device, True)
    models = {}
    neuron_activations = {}
    neuron_scores = {}
    saliency = {}
    if args.train:
        print("Begin model training")
        for mode in modes:
            predictions[mode] = pd.DataFrame(0,
                                             index=meta_df[mode].index,
                                             columns=list(
                                                 config["models"].keys()))
            predictions[mode] = pd.merge(meta_df[mode],
                                         predictions[mode],
                                         left_index=True,
                                         right_index=True)
        for model_name, model_config in config["models"].items():
            model_out_path = join(config["out_path"], model_name)
            if not exists(model_out_path):
                makedirs(model_out_path)
            scale_values["train"].to_csv(join(
                model_out_path, "scale_values_" + model_name + ".csv"),
                                         index_label="variable")
            models[model_name] = BaseConvNet(**model_config)
            models[model_name].fit(input_scaled["train"].values,
                                   labels["train"],
                                   val_x=input_scaled["val"].values,
                                   val_y=labels["val"])
            models[model_name].save_model(model_out_path, model_name)
            for mode in modes:
                predictions[mode].loc[:,
                                      model_name] = models[model_name].predict(
                                          input_scaled[mode].values)
        for mode in modes:
            predictions[mode].to_csv(join(config["out_path"],
                                          f"predictions_{mode}.csv"),
                                     index_label="index")
        print("Calculate metrics")
        if config["classifier"]:
            model_scores = classifier_metrics(
                labels["test"],
                predictions["test"][list(config["models"].keys())])
            model_scores.to_csv(join(config["out_path"],
                                     "model_test_scores.csv"),
                                index_label="model_name")
    if args.interp:
        for model_name, model_config in config["models"].items():
            if model_name not in models.keys():
                model_out_path = join(config["out_path"], model_name)
                models[model_name] = load_conv_net(model_out_path, model_name)
            neuron_columns = [
                f"neuron_{n:03d}"
                for n in range(models[model_name].dense_neurons)
            ]
            neuron_activations[model_name] = {}
            neuron_scores[model_name] = pd.DataFrame(0,
                                                     columns=neuron_columns,
                                                     index=modes)
            saliency[model_name] = {}
            for mode in modes:
                neuron_activations[model_name][mode] = pd.merge(
                    meta_df[mode],
                    pd.DataFrame(0,
                                 columns=neuron_columns,
                                 index=meta_df[mode].index),
                    left_index=True,
                    right_index=True)
                neuron_activations[model_name][
                    mode].loc[:, neuron_columns] = models[
                        model_name].output_hidden_layer(
                            input_scaled[mode].values)
                neuron_activations[model_name][mode].to_csv(
                    join(config["out_path"],
                         f"neuron_activations_{model_name}_{mode}.csv"),
                    index_label="index")
                saliency[model_name][mode] = models[model_name].saliency(
                    input_scaled[mode])

                saliency[model_name][mode].to_netcdf(
                    join(config["out_path"],
                         f"neuron_saliency_{model_name}_{mode}.nc"),
                    encoding={
                        "saliency": {
                            "zlib": True,
                            "complevel": 4,
                            "shuffle": True,
                            "least_significant_digit": 3
                        }
                    })
                if config["classifier"]:
                    neuron_scores[model_name].loc[mode] = score_neurons(
                        labels[mode], neuron_activations[model_name][mode]
                        [neuron_columns].values)
                else:
                    neuron_scores[model_name].loc[mode] = score_neurons(
                        labels[mode],
                        neuron_activations[model_name][mode]
                        [neuron_columns].values,
                        metric="r")
            neuron_scores[model_name].to_csv(join(
                config["out_path"], f"neuron_scores_{model_name}.csv"),
                                             index_label="mode")
    if args.plot:
        print("Begin plotting")
        if "plot_kwargs" not in config.keys():
            config["plot_kwargs"] = {}
        for model_name, model_config in config["models"].items():
            print(model_name)
            if model_name not in models.keys():
                model_out_path = join(config["out_path"], model_name)
                models[model_name] = load_conv_net(model_out_path, model_name)
                neuron_activations[model_name] = {}
                neuron_scores[model_name] = pd.read_csv(join(
                    config["out_path"], f"neuron_scores_{model_name}.csv"),
                                                        index_col="mode")
                saliency[model_name] = {}
            for mode in modes:
                print(mode)
                if mode not in neuron_activations[model_name].keys():
                    neuron_activations[model_name][mode] = pd.read_csv(
                        join(config["out_path"],
                             f"neuron_activations_{model_name}_{mode}.csv"),
                        index_col="index")
                    saliency[model_name][mode] = xr.open_dataarray(
                        join(config["out_path"],
                             f"neuron_saliency_{model_name}_{mode}.nc"))
                for variable_name in config["input_variables"]:
                    print(variable_name)
                    if variable_name not in config["plot_kwargs"].keys():
                        plot_kwargs = None
                    else:
                        plot_kwargs = config["plot_kwargs"][variable_name]
                    plot_neuron_composites(
                        config["out_path"],
                        model_name + "_" + mode,
                        input_combined[mode],
                        neuron_activations[model_name][mode].values,
                        neuron_scores[model_name].loc[mode].values,
                        variable_name,
                        plot_kwargs=plot_kwargs)
                    plot_saliency_composites(
                        config["out_path"], model_name + "_" + mode,
                        saliency[model_name][mode],
                        neuron_activations[model_name][mode].values,
                        neuron_scores[model_name].loc[mode].values,
                        variable_name)
                    plot_top_activations(
                        config["out_path"],
                        model_name + "_" + mode,
                        input_combined[mode],
                        meta_df[mode],
                        neuron_activations[model_name][mode],
                        neuron_scores[model_name].loc[mode].values,
                        saliency[model_name][mode],
                        variable_name,
                        plot_kwargs=plot_kwargs)
    return
示例#37
0
def arima_and_ystar(acause,
                    agg_version,
                    arima_version,
                    smoothing,
                    years,
                    measure,
                    intercept_shift,
                    gbd_round_id,
                    draws,
                    decay,
                    dryrun=False,
                    no_correction=False,
                    past_version="best",
                    no_arima=False,
                    **kwargs):
    r"""Samples mortality residuals from an ARIMA and forms
    $y^* = \hat{y} + \hat{\epsilon}$.

    :param str acause: name of the target acause to aggregate to.
    :param str agg_version: name of the aggregate version.
    :param str arima_version: name of the arima version.
    :param list[str] smoothing: what dimensions to smooth over during the ARIMA
        step.
    :param fbd_core.argparse.YearRange years: a container for the three years
        which define our forecast.
    :param int draws: number of draws to take.
    :param bool dryrun: dryrun flag. This is a test run if True.
    :param bool bias: Perform log bias correction.
    """
    logger.debug("Opening: {}".format(FILEPATH))
    y_hat = xr.open_dataarray(str(FILEPATH))

    # GK intercept shift
    y_hat = gis.intercept_shift_at_draw(y_hat, acause, past_version,
                                        gbd_round_id, years, draws)
    save_xr(y_hat, FILEPATH, root_dir="scratch", metric="rate", space="log")

    y_past = _get_y_past(acause,
                         years,
                         measure,
                         gbd_round_id,
                         past_version=past_version)

    past_years = years.past_years

    if not no_arima:
        # ARIMA for everything except NTDs
        logger.info("Computing epsilon_past.")
        epsilon_past_with_scenarios_and_draws = (
            y_past.loc[dict(year_id=past_years)] -
            y_hat.loc[dict(year_id=past_years)])
        epsilon_past = epsilon_past_with_scenarios_and_draws.loc[dict(
            scenario=0)].mean("draw")

        try:
            epsilon_hat = xr.open_dataarray(str(FILEPATH))
        except:
            epsilon_hat = _draw_epsilons(epsilon_past,
                                         draws,
                                         smoothing,
                                         years,
                                         acause,
                                         decay,
                                         gbd_round_id=gbd_round_id)
        if not dryrun:
            logger.info("Saving epsilon_hat to {}".format(FILEPATH))
            _save_netcdf(epsilon_hat, FILEPATH)
        y_star = _get_y_star(y_hat, epsilon_hat, years).copy()

    else:
        # no arima for ntds
        y_star = y_hat
        y_star.name = "value"

    # intercept shift and bias
    if intercept_shift:
        y_star = _intercept_shift(acause,
                                  y_star,
                                  years,
                                  measure,
                                  gbd_round_id,
                                  draws=draws,
                                  no_arima=no_arima,
                                  past_version=past_version)
    if not no_correction:
        y_star = xr.ufuncs.log(bias_exp(y_star))

    if not dryrun:
        logger.info("Saving y_star to {}".format(FILEPATH))
        _save_netcdf(y_star, FILEPATH)
示例#38
0
def _get_modeled_y_hat(acause, version, measure, period, gbd_round_id, draws):
    """Gets mortality data for a modeled acause.

    For modeled causes, if the data is split by sex, then it is assumed that it
    is in log rate space. If the data is not split by sex, then it is assumed
    that it is in normal rate space.

    :param str acause: acause for a modeled acause.
    :param str version: name of the mortality or yld version which modeled this
        acauaArray: the mortality or yld data for acause.
    """
    if period == "past":
        input_file = FILEPATH / "{}.nc".format(acause)
        y_hat_exp = xr.open_dataset(str(input_file))["value"] + FLOOR
        y_hat_exp = resample(y_hat_exp, draws)
        y_hat = xr.ufuncs.log(y_hat_exp)
        y_hat.coords["acause"] = acause
    else:
        try:
            logger.info(
                "No children. y_hat is from mort/yld file {}".format(FILEPATH))
            # Because the data is modeled and not split by sex, it is saved in
            # normal rate space. Log it.
            y_hat_exp = xr.open_dataarray(str(FILEPATH))
            y_hat_exp = resample(y_hat_exp, draws)
            y_hat = xr.ufuncs.log(y_hat_exp + FLOOR)
            # some of the yld files are missing acause, so add that info
            y_hat.coords["acause"] = acause

        except IOError:  # Modeled data is split by sex.
            input_files = [
                FILES for FILES in POTENTIAL_FILES if FILES.exists()
            ]
            logger.info("Input results are split by sex. Files are {}".format(
                input_files))

            if len(input_files) == 1:
                logger.info("This is a sex specific cause. Gotta give it a "
                            "real coordinate on sex.")
                if "female" in input_files[0].as_posix():
                    sex_id = 2
                else:
                    sex_id = 1
                dataarray_one_sex = xr.open_dataarray(
                    str(input_files[0]), drop_variables=["measure", "cov"])
                dataarray_one_sex = resample(dataarray_one_sex, draws)
                new_vals = np.expand_dims(dataarray_one_sex.values, 0)
                new_dims = ["sex_id"] + list(dataarray_one_sex.dims)
                logger.info("New dimensions: {}".format(new_dims))
                new_coords = ([[sex_id]] + [
                    coord.values for coord in list(
                        dataarray_one_sex.coords.indexes.values())
                ])
                y_hat = xr.DataArray(
                    new_vals, dims=new_dims,
                    coords=new_coords).to_dataset(name="value")
                y_hat.coords["acause"] = acause

            elif len(input_files) == 2:
                y_hat = xr.open_mfdataset(
                    [str(input_file) for input_file in input_files],
                    concat_dim="sex_id",
                    drop_variables=["measure", "cov"])
                y_hat = resample(y_hat[list(y_hat.data_vars.keys())[0]], draws)

            else:
                logger.error((
                    "{} has no modeled mortality/ylds for version {}. ruh-roh."
                ).format(acause, version))
                raise Exception("Modeled acause has no saved results.")
            # if data are split by sex, they are in log space. convert back to
            # regular space to add the floor
            y_hat = xr.ufuncs.log(xr.ufuncs.exp(y_hat) + FLOOR)
    return y_hat
示例#39
0
def _get_aggregated_y_hat(acause, version, measure, period, gbd_round_id):
    """Gets expected value of cause specific mortality rates.

    For aggregate causes, it is assumed that the data is not split by sex and
    is saved in log rate space.

    When the children are added to form the aggregated acause result, the
    summation happens in normal space. Therefore, we must exponentiate the
    children's rates, add them up, and log them to get an aggregated
    y_hat in log rate space.

    The resulting y_hat is in log rate space.

    :param str acause: name of the target acause to aggregate to.
    :param str version: name of the aggregation version.
    :return xarray.DataArray: The expected value of the cause specific
        mortality rate.
    """
    # connect to db and read in cause hierarchy
    engine = db.db_engine(NAME, database=DATABASE)
    session = sessionmaker(bind=engine)()
    all_causes = get_hierarchy(session, "cause", CAUSE_HIERARCHY_ID)[[
        "acause", "cause_id", "parent_id"
    ]]
    # subset to just fatal causes
    cause_strategy_set = get_strategy_set(session, FATAL_GK_STRATEGY_ID,
                                          CAUSE_HIERARCHY_ID)
    cause_hierarchy = get_hierarchy(session, "cause", CAUSE_HIERARCHY_ID)
    cause_tree, node_map = subset_fatal.make_hierarchy_tree(
        cause_hierarchy, 294, "cause_id")
    fatal_subset = subset_fatal.include_up_hierarchy(
        cause_tree, node_map, cause_strategy_set["cause_id"].values)
    fatal_causes = all_causes[all_causes.cause_id.isin(fatal_subset)]

    cause_id = fatal_causes[fatal_causes.acause == acause].cause_id.values[0]
    children = fatal_causes.query(
        "parent_id == {}".format(cause_id))["acause"].values
    logger.info("y_hat is a sum of children: {}".format(children))

    # Create a list of child acause files which are not external causes and
    # check to make sure all the ones we want to sum up are actually present.
    potential_child_files = [
        FBDPath("/{gri}/{p}/{m}/{v}/{c}_hat.nc".format(gri=gbd_round_id,
                                                       p=period,
                                                       m=measure,
                                                       v=version,
                                                       c=child),
                root_dir="scratch") for child in children
        if child not in ("_all", "_none")
    ]
    child_files = [
        str(child_file) for child_file in potential_child_files
        if child_file.exists()
    ]
    if len(potential_child_files) != len(child_files):
        logger.error("You are missing files, bud. {} vs {}".format(
            potential_child_files, child_files))
        raise Exception("Missing y_hat files!")
    logger.debug("Summing these files: {}".format(child_files))

    exp_y_hat_sum = None
    for child_file in child_files:
        logger.info("Adding {}".format(child_file))
        exp_y_hat = xr.ufuncs.exp(
            xr.open_dataarray(child_file, drop_variables=["measure", "cov"]))
        if exp_y_hat_sum is None:
            exp_y_hat_sum = exp_y_hat
        else:
            exp_y_hat_broadcasted = xr.broadcast(exp_y_hat_sum, exp_y_hat)
            exp_y_hat_broadcasted = [
                data.fillna(0.) for data in exp_y_hat_broadcasted
            ]
            exp_y_hat_sum = sum(exp_y_hat_broadcasted)
    y_hat = xr.ufuncs.log(exp_y_hat_sum)
    y_hat.coords["acause"] = acause
    return y_hat
示例#40
0
 def setUp(self):
     self.algorithm = NewAssimilation()
     state_path = os.path.join(DATA_PATH, 'test_state.nc')
     self.state = xr.open_dataarray(state_path)
     obs_path = os.path.join(DATA_PATH, 'test_single_obs.nc')
     self.obs = xr.open_dataset(obs_path)
示例#41
0
def trend_all():

    srfc = cnst.ERA5_MONTHLY_SRFC_SYNOP  #cnst.ERA_MONTHLY_SRFC_SYNOP
    pl = cnst.ERA5_MONTHLY_PL_SYNOP  #cnst.ERA_MONTHLY_PL_SYNOP
    mcs = cnst.GRIDSAT + 'aggs/gridsat_WA_-70_monthly_mean_5000km2.nc'

    fpath = cnst.network_data + 'figs/CLOVER/months/ERA5_WA/'

    box = [-18, 30, 0, 25]  #  [-18,40,0,25] #

    da = xr.open_dataset(pl)  #xr.open_dataset(pl)
    #da = xr.decode_cf(da)
    da = u_darrays.flip_lat(da)
    da = da.sel(longitude=slice(box[0], box[1]),
                latitude=slice(box[2], box[3]))
    da2 = xr.open_dataset(srfc)  #xr.open_dataset(srfc)
    #da2 = xr.decode_cf(da2)
    da2 = u_darrays.flip_lat(da2)
    da2 = da2.sel(longitude=slice(box[0], box[1]),
                  latitude=slice(box[2], box[3]))
    da3 = xr.open_dataarray(mcs) * 100
    da3 = da3.sel(lon=slice(box[0], box[1]), lat=slice(box[2], box[3]))

    lons = da.longitude
    lats = da.latitude

    press = da2['tcwv']
    press = press[press['time.hour'] == 12]
    #press.values = press.values#*1000
    low_press = 925
    up_press = 650
    mid_press = 700

    q = da['q'].sel(level=slice(low_press - 20, low_press)).mean('level')
    q = q[q['time.hour'] == 12]
    t2d = da2['t2m']  #['t2m']
    #t2d = da['t'].sel(level=slice(800, 850)).mean('level')
    t2d = t2d[t2d['time.hour'] == 12]

    sh = da2['sshf']
    sh = sh[sh['time.hour'] == 12] / -3600

    # theta_low = u_met.theta_e(da.level.values, da['t'].sel(level=low_press), da['q'].sel(level=low_press))
    # theta_high = u_met.theta_e(da.level.values, da['t'].sel(level=mid_press), da['q'].sel(level=mid_press))
    #
    # theta_e = theta_low - theta_high

    u600 = da['u'].sel(level=slice(up_press - 20, up_press)).mean('level')
    u600 = u600[u600['time.hour'] == 12]
    v600 = da['v'].sel(level=slice(up_press - 20, up_press)).mean('level')
    v600 = v600[v600['time.hour'] == 12]

    ws600 = u_met.u_v_to_ws_wd(u600, v600)

    u800 = da['u'].sel(level=slice(low_press - 20, low_press)).mean('level')
    u800 = u800[u800['time.hour'] == 12]

    v800 = da['v'].sel(level=slice(low_press - 20, low_press)).mean('level')
    v800 = v800[v800['time.hour'] == 12]

    shear_u = u600 - u800  #u600-
    shear_v = v600 - v800  # v600-

    ws_shear = u_met.u_v_to_ws_wd(shear_u.values, shear_v.values)

    ws_600 = t2d.copy(deep=True)
    ws_600.name = 'ws'

    ws_600.values = ws600[0]

    # shear = t2d.copy(deep=True)
    # shear.name = 'shear'
    # shear.values = ws_shear[0]

    shear = sh

    u6 = shear_u  #u800
    v6 = shear_v  #v800

    q.values = q.values * 1000

    grid = t2d.salem.grid.regrid(factor=1)
    t2 = t2d  # grid.lookup_transform(t2d)
    tir = grid.lookup_transform(da3)  #t2d.salem.lookup_transform(da3['tir']) #

    grid = grid.to_dataset()
    tir = xr.DataArray(tir,
                       coords=[da3['time'], grid['y'], grid['x']],
                       dims=['time', 'latitude', 'longitude'])

    months = [
        4, (3, 5), (6, 8), (9, 11)
    ]  #[3,4,5,6,9,10,11]#,4,5,6,9,10,11#,4,5,6,9,10,11,(3,5), (9,11)]#, 10,5,9]#[(12,2)]#[1,2,3,4,5,6,7,8,9,10,11,12]# #,2,3,11,12]#[(12,2)]#[1,2,3,4,5,6,7,8,9,10,11,12]# #,2,3,11,12]

    dicm = {}
    dicmean = {}

    for m in months:
        method = 'mk'

        if type(m) == int:
            m = [m]

        sig = True

        t2trend, t2mean = calc_trend(t2,
                                     m,
                                     method=method,
                                     sig=sig,
                                     hour=12,
                                     wilks=False)  #hour=12,
        t2_mean = t2mean.mean(axis=0)

        tirtrend, tirmean = calc_trend(tir,
                                       m,
                                       method=method,
                                       sig=sig,
                                       wilks=False)

        tirm_mean = tirmean.mean(axis=0)

        qtrend, qmean = calc_trend(q,
                                   m,
                                   method=method,
                                   sig=sig,
                                   hour=12,
                                   wilks=False)  #hour=12,
        q_mean = qmean.mean(axis=0)

        sheartrend, shearmean = calc_trend(shear,
                                           m,
                                           method=method,
                                           sig=sig,
                                           hour=12,
                                           wilks=False)  #hour=12,
        shear_mean = shearmean.mean(axis=0)

        #ipdb.set_trace()

        presstrend, pressmean = calc_trend(press,
                                           m,
                                           method=method,
                                           sig=sig,
                                           hour=12,
                                           wilks=False)  #hour=12,
        press_mean = pressmean.mean(axis=0)

        u6trend, u6mean = calc_trend(u6,
                                     m,
                                     method=method,
                                     sig=sig,
                                     hour=12,
                                     wilks=False)  #hour=12,
        u6_mean = u6mean.mean(axis=0)
        v6trend, v6mean = calc_trend(v6,
                                     m,
                                     method=method,
                                     sig=sig,
                                     hour=12,
                                     wilks=False)  #hour=12,
        v6_mean = v6mean.mean(axis=0)

        # thetatrend, thetamean = calc_trend(theta_e, m, method=method, sig=sig, hour=12,wilks=False) #hour=12,
        # theta_mean = thetamean.mean(axis=0)

        t2trend_unstacked = t2trend * 10.  # warming over decade
        qtrend_unstacked = qtrend * 10.  # warming over decade
        sheartrend_unstacked = sheartrend * 10.  # warming over decade
        u6trend_unstacked = u6trend * 10
        v6trend_unstacked = v6trend * 10
        presstrend_unstacked = presstrend * 10
        # thetatrend_unstacked = thetatrend * 10

        tirtrend_unstacked = (
            (tirtrend.values) * 10. / tirm_mean.values) * 100.
        #ipdb.set_trace()
        tirtrend_out = xr.DataArray(tirtrend_unstacked,
                                    coords=[grid['y'], grid['x']],
                                    dims=['latitude', 'longitude'])
        tirtrend_out.name = 'tir'
        #tirmean_out = xr.DataArray(tirm_mean, coords=[grid['y'], grid['x']], dims=['latitude','longitude'])

        dicm[m[0]] = tirtrend_out
        dicmean[m[0]] = tirm_mean

        t_da = t2trend_unstacked
        q_da = qtrend_unstacked
        s_da = sheartrend_unstacked
        ti_da = tirtrend_out
        tcwv_da = presstrend_unstacked
        # theta_da  = thetatrend_unstacked

        if len(m) == 1:
            fp = fpath + 'use/ERA5_trend_synop_WA_sig_poly_tcwv_1991_skt_' + str(
                m[0]).zfill(2) + '.png'
        else:
            fp = fpath + 'use/ERA5_trend_synop_WA_sig_poly_tcwv_1991_skt_' + str(
                m[0]).zfill(2) + '-' + str(m[1]).zfill(2) + '.png'
        map = shear.salem.get_map()
        ti_da = t2d.salem.transform(ti_da)

        f = plt.figure(figsize=(15, 8), dpi=300)

        # transform their coordinates to the map reference system and plot the arrows
        xx, yy = map.grid.transform(shear.longitude.values,
                                    shear.latitude.values,
                                    crs=shear.salem.grid.proj)

        xx, yy = np.meshgrid(xx, yy)
        #Quiver only every 7th grid point
        u = u6trend_unstacked.values[1::2, 1::2]
        v = v6trend_unstacked.values[1::2, 1::2]

        #Quiver only every 7th grid point
        uu = u6_mean.values[1::2, 1::2]
        vv = v6_mean.values[1::2, 1::2]

        xx = xx[1::2, 1::2]
        yy = yy[1::2, 1::2]

        pdic = {
            'tlin': (t2_mean.values - 273.15).astype(np.float64),
            'tmean': (t2_mean.values - 273.15).astype(np.float64),
            'qmean': (q_mean.values).astype(np.float64),
            'qlin': q_da.values,
            'shearlin': s_da.values,
            'u': u,
            'v': v,
            'xx': xx,
            'yy': yy,
            'tirmean': tirm_mean,
        }

        pkl.dump(
            dicm,
            open(
                cnst.network_data +
                'data/CLOVER/saves/storm_frac_synop12UTC_WA.p', 'wb'))

        ax1 = f.add_subplot(221)
        map.set_data(t_da.values, interp='linear')  # interp='linear'

        map.set_contour((t2_mean.values - 273.15).astype(np.float64),
                        interp='linear',
                        colors='k',
                        linewidths=0.5,
                        levels=[20, 23, 26, 29, 32, 35])
        map.set_plot_params(
            levels=[-0.5, -0.4, -0.3, -0.2, 0.2, 0.3, 0.4, 0.5],
            cmap='RdBu_r',
            extend='both')  # levels=np.arange(-0.5,0.51,0.1),

        #map.set_contour((t2_mean.values).astype(np.float64), interp='linear', colors='k', linewidths=0.5, levels=np.linspace(800,925,8))
        #map.set_plot_params(levels=[-0.5,-0.4,-0.3,-0.2,-0.1,-0.05,-0.02, 0.02,0.05,0.1,0.2,0.3,0.4,0.5], cmap='RdBu_r', extend='both')  # levels=np.arange(-0.5,0.51,0.1),

        dic = map.visualize(ax=ax1,
                            title='2m temperature trend | contours: mean T',
                            cbar_title='K decade-1')
        contours = dic['contour'][0]
        plt.clabel(contours, inline=True, fontsize=7, fmt='%1.1f')

        ax2 = f.add_subplot(222)
        map.set_data(tcwv_da.values, interp='linear')  # interp='linear'
        map.set_contour((press_mean.values).astype(np.float64),
                        interp='linear',
                        colors='k',
                        levels=[20, 30, 40, 50, 60],
                        linewidths=0.5)  #[6,8,10,12,14,16]
        map.set_plot_params(
            levels=[-1.2, -1, -0.8, -0.6, -0.4, 0.4, 0.6, 0.8, 1, 1.2],
            cmap='RdBu',
            extend='both'
        )  # levels=np.arange(-0.5,0.51,0.1), [-0.6,-0.4,-0.2,0.2,0.4,0.6]

        dic = map.visualize(
            ax=ax2,
            title='925hPa Spec. humidity trend | contours: mean q',
            cbar_title='g kg-1 decade-1')
        contours = dic['contour'][0]
        plt.clabel(contours, inline=True, fontsize=7, fmt='%1.1f')

        ax3 = f.add_subplot(223)
        map.set_data(s_da.values, interp='linear')  # interp='linear'
        map.set_contour(shear_mean.values,
                        interp='linear',
                        colors='k',
                        levels=np.arange(10, 150,
                                         8))  #, levels=np.arange(50,300,8),

        map.set_plot_params(levels=np.array(
            [-0.8, -0.6, -0.4, -0.2, -0.1, 0.1, 0.2, 0.4, 0.6, 0.8]) * 10,
                            cmap='RdBu_r',
                            extend='both')  # levels=np.arange(-0.5,0.51,0.1)
        dic = map.visualize(ax=ax3,
                            title='Sensible heat flux trend and mean',
                            cbar_title='W m-2 decade-1')
        contours = dic['contour'][0]
        plt.clabel(contours, inline=True, fontsize=7, fmt='%1.1f')
        # qu = ax3.quiver(xx, yy, u, v, scale=60, width=0.002)
        #
        # qk = plt.quiverkey(qu, 0.4, 0.03, 1, '1 m s$^{-1}$',
        #                    labelpos='E', coordinates='figure')

        ax4 = f.add_subplot(224)
        map.set_contour((tirm_mean),
                        interp='linear',
                        levels=[0.1, 0.5, 1, 2.5],
                        colors='k',
                        linewidths=0.5)
        #.values).astype(np.float64)

        ti_da.values[ti_da.values == 0] = np.nan
        map.set_data(ti_da)  #
        coord = [18, 25, -28, -20]
        geom = shpg.box(coord[0], coord[2], coord[1], coord[3])
        #map.set_geometry(geom, zorder=99, color='darkorange', linewidth=3, linestyle='--', alpha=0.3)

        map.set_plot_params(
            cmap='viridis', extend='both', levels=np.arange(
                10, 51,
                10))  # levels=np.arange(20,101,20)  #np.arange(20,101,20)
        dic = map.visualize(ax=ax4,
                            title='-70C cloud cover change | >5000km2',
                            cbar_title='$\%$ decade-1',
                            addcbar=True)
        contours = dic['contour'][0]
        plt.clabel(contours, inline=True, fontsize=7, fmt='%1.1f')

        plt.tight_layout()
        plt.savefig(fp)
        plt.close('all')

    pkl.dump(
        dicm,
        open(
            cnst.network_data + 'data/CLOVER/saves/storm_frac_synop12UTC_WA.p',
            'wb'))

    pkl.dump(
        dicmean,
        open(
            cnst.network_data +
            'data/CLOVER/saves/storm_frac_mean_synop12UTC_WA.p', 'wb'))
示例#42
0
def dissertation_plot_contour_by_group_individual_bar(setting,
                                                      z_dim="annual_roi",
                                                      mkt='TW'):
    # verify setting
    if setting not in ("compact", "general"):
        raise ValueError("unknown setting: {}".format(setting))

    # verify z_dim
    if z_dim not in ('SPA_c', 'daily_VSS', 'annual_roi'):
        raise ValueError('unknown z_dim:{}'.format(z_dim))

    # parameters
    start_date, end_date = dt.date(2005, 1, 3), dt.date(2018, 12, 28)
    interval = "{}_{}".format(start_date.strftime("%Y%m%d"),
                              end_date.strftime("%Y%m%d"))

    max_portfolio_sizes = (5, )
    window_sizes = range(50, 240 + 10, 10)
    # alpha
    alpha_pcts = [v for v in range(50, 100, 5)]
    # alphas = ["{:.2f}".format(v / 100.) for v in range(50, 100, 5)]
    # set_indices = [1, ]
    set_indices = [1, 2, 3]

    name = "report_SPSP_CVaR_whole_dissertation_{}_{}_{}.nc".format(
        setting, start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d"))

    # read report file
    xarr = xr.open_dataarray(os.path.join(pp.DATA_DIR, name))
    print(xarr)

    import matplotlib as mpl
    import matplotlib.pyplot as plt
    # set global font
    plt.rcParams['font.family'] = 'serif'
    plt.rcParams['font.serif'] = (['Times New Roman'] +
                                  plt.rcParams['font.serif'])

    # figure size in inches
    fig = plt.figure(figsize=(16, 12), facecolor='white')

    # alpha
    xlim = (50, 95)
    # rolling window size
    ylim = (50, 240)

    group_names = ['{}G{}'.format(mkt, idx + 1) for idx in range(6)]
    for gdx, group_name in enumerate(group_names):
        # x-axis, alpha, y-axis:  window_sizes
        ax = fig.add_subplot(2, 3, gdx + 1, xlim=xlim, ylim=ylim)
        ax.set_title(group_name, y=1.02, fontsize=18)

        # labelpad - number of points between the axis and its label
        ax.set_xlabel(r'$\alpha$', fontsize=14, labelpad=-2)
        ax.set_ylabel(r'$h$', fontsize=14, labelpad=-2)
        ax.tick_params(labelsize=10, pad=1)
        ax.set_xticks(alpha_pcts)
        ax.set_xticklabels(alpha_pcts, fontsize=10)
        ax.set_yticks(window_sizes)
        ax.set_yticklabels(window_sizes, fontsize=10)

        # X: alpha_pcts, Y: window size
        Xs, Ys = np.meshgrid(alpha_pcts, window_sizes)
        Zs = np.zeros_like(Xs, dtype=np.float)
        n_row, n_col = Xs.shape

        # get z-value
        for rdx in range(n_row):
            for cdx in range(n_col):
                alpha, win_size = Xs[rdx, cdx], Ys[rdx, cdx]
                z_values = xarr.loc[interval, group_name,
                                    set_indices,  # all scenarios
                                    5, win_size, "{:.2f}".format(alpha / 100.),
                                    z_dim]
                mean = z_values.mean()
                if z_dim == 'daily_VSS':
                    Zs[rdx, cdx] = float(mean) * 1e5
                else:
                    Zs[rdx, cdx] = float(mean) * 1e2

        # print(Zs)
        lower, high = np.floor(np.min(Zs)), np.ceil(np.max(Zs))
        print(group_name, " z_range:", lower, high)
        if z_dim == 'annual_roi':
            for _ in range(4):
                if (lower * 10) % 4:
                    lower -= 0.1
            for _ in range(4):
                if (high * 10) % 4:
                    high += 0.1
            print(group_name, "fixed z_range:", lower, high)
            cm_norm = mpl.colors.Normalize(vmin=lower - 0.1,
                                           vmax=high + 0.1,
                                           clip=False)
            color_range = np.arange(lower, high, 0.4)
        elif z_dim == 'daily_VSS':
            print('z_dim:', z_dim)
            cm_norm = mpl.colors.Normalize(vmin=lower - 0.5,
                                           vmax=high + 0.5,
                                           clip=False)
            color_range = np.arange(lower, high)
        elif z_dim == 'SPA_c':
            print('z_dim:', z_dim)
            Zs[Zs > 10] = 11
            cm_norm = mpl.colors.Normalize(vmin=0, vmax=12, clip=False)
            color_range = np.arange(0, 12)

        # contour, projecting on z
        cset = ax.contourf(Xs,
                           Ys,
                           Zs,
                           cmap=plt.cm.coolwarm,
                           norm=cm_norm,
                           levels=color_range)
        # color bar

        if z_dim == 'annual_roi':
            cbar = fig.colorbar(cset, ax=ax)
            cbar.ax.tick_params(labelsize=12)
            cbar_label_name = "Annual return (%)"
        elif z_dim == 'daily_VSS':
            cbar = fig.colorbar(cset, ax=ax)
            cbar.ax.tick_params(labelsize=12)
            cbar_label_name = r"Daily VSS (10$^{-5}$)"
        elif z_dim == 'SPA_c':
            ticks = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, ">10"]
            cbar = fig.colorbar(cset, ax=ax, ticks=np.arange(12))
            cbar.ax.tick_params(labelsize=12)
            cbar_label_name = "SPA (%)"
            cbar.set_ticklabels(ticks)

        cbar.set_label(cbar_label_name, labelpad=1, size=18)

    fig_path = os.path.join(pp.TMP_DIR, "{}_{}.pdf".format(mkt, z_dim))
    plt.savefig(fig_path, dpi=240, format='pdf')
    plt.show()
xsave = str(int(xv))

outanomDir = outPath+ft+'/%(m)s/'+va+'/'+str(pl)+'/daily/anom/'
anomfname = 'daily_anomalies.y'+ysave+'.x'+xsave+'.nc'
outmmeDir = outPath+ft+'/MME/'+va+'/'+str(pl)+'/daily/anom/'
if not os.path.isdir(outmmeDir):
    os.makedirs(outmmeDir)
# Update file names
anomfname = starttime+'.'+endtime+'.'+anomfname

modellist = ['30LCESM1', '46LCESM1', 'CCSM4', 'FIMr1p1', 'GEFS',
             'GEM', 'GEOS_V2p1', 'NESM']
# create an empty multi-model ensemble file made up of
# days from startS and endS and leadtime of up to 45 days
# Read in one model to get leadtime coords
da = xr.open_dataarray(_moddir+anomfname)
_dates = pd.date_range(starttime, endtime, freq='D')
_L = [ pd.Timedelta(12,'h') + pd.Timedelta(days=i) for i in range(45) ]
x = np.empty((len(modellist), len(_dates), len(_L)))
x.fill(np.nan)
mme_ds = xr.DataArray(x, coords={'X': da.X, 'L': da.L, 'Y': da.Y,
                                 'P': da.P, 'S': _dates, 'model': modellist},
                      dims=['model', 'S', 'L'])
# Populate mme_da
for i, model in enumerate(modellist):
    _moddir = outanomDir % {'m':model}
    da = xr.open_dataarray(_moddir+anomfname)
    da = da.mean(dim='M')
    # Find indices to populate start date
    idates = np.ones(len(da.S), dtype=np.int16)
    for j in range(len(idates)):
示例#44
0
alpha = xr.open_mfdataset(era_path + "/eff_ws/era5_alpha_NZ_*.nc",
                          chunks={'time':
                                  46})  #.sel(time=slice('1997','2020'))

# load windpark data
windparks = pd.read_csv(nz_path + "/windparks_NZ.csv",
                        delimiter=';',
                        parse_dates=['commissioning'])
# calculate specific power of turbines (in W)
windparks['sp'] = windparks.turb_cap * 10**6 / (windparks.d_rotor**2 * np.pi /
                                                4)

# with GWA
outfile = results_path + '/windpower_??_ERA5_GWA.nc'

if results_path + '/windpower_NZ_ERA5_GWA.nc' not in glob.glob(outfile):
    print('calculating ERA5 NZ GWA')
    if GWA == "3":
        GWA = xr.open_rasterio(nz_path + '/GWA/GWA3_NZ100m.tif')
    else:
        GWA = xr.open_dataarray(nz_path + '/GWA/GWA2_NZ100m.nc')
    wps = windpower_simulation_era5(
        wind.wh100, alpha.alpha, windparks.Height.values,
        windparks.Capacity.values, windparks.sp.values,
        windparks.Longitude.values, windparks.Latitude.values,
        windparks.commissioning.values, startyear, GWA, startGWA, endGWA)
    # save as netcdf
    wps.drop(['x', 'y'
              ]).to_dataset(name='wp').to_netcdf(results_path +
                                                 "/windpower_NZ_ERA5_GWA.nc")
示例#45
0
@numba.njit
def nanmean(v, w):
    return _nanmean(v, w)


filepath = '/net/so4/landclim/bverena/large_files/data_small.nc'

comm = MPI.COMM_WORLD
rank = comm.Get_rank()

print(rank)

# open data
# Every rank is reading in the file. This is probably not optimal, rather one rank should read the file and send each variable to a different rank.
print(f'open data')
data = xr.open_dataarray(filepath)
"""
# subset more for speedup of first tests
print(f'subset even more because very large dataset')
data = data[:,::10,:,:]
"""
shape = np.shape(data)

# create a mask of nans
mask = ~np.isnan(data)  # nan values have zero weight (i.e. are False)

# gapfilling the missing values with spatiotemporal mean
print('gapfilling missing values with spatiotemporal mean')
tic = datetime.now()
var = rank
result = _nanmean(data.values[var, :, :, :], mask.values[var, :, :, :])
示例#46
0
# da[8, :, :] = da[7, :, :]

# assign the time stamps:
da.coords['time'] = pd.date_range(start='1/1/2017',
                                  end='29/12/2019',
                                  freq='12D')

# test the  bfast4openeo function:
breaks = bfast4openeo(da)

# plot ther result:
plt.imshow(breaks.values)
# save the result:
breaks.to_netcdf('offline_bfastPy_output.nc')
# load the results:
breaks = xr.open_dataarray('offline_bfastPy_output.nc')

breaks = breaks.sortby('x')
breaks = breaks.sortby('y')

aoi = breaks.sel(y=slice(9611198, 9611750), x=slice(742740, 743297))
aoi.plot()
# ------------
start_monitor = datetime(2019, 1, 1)
end_monitor = datetime(2019, 12, 31)
# get dates from monitoring period:
dates = pd.date_range(start='1/1/2017', end='29/12/2019', freq='12D')
dates_mon = dates[dates.slice_indexer(start_monitor, end_monitor)]
# convert to the fraction of the year:
frac_of_year = np.array(dates_mon.year + dates_mon.dayofyear / 365.)
# convert aoi values to the fraction of the year:
示例#47
0
    object_file = args.objects_file.replace('.nc', '')

    if not 'objects' in object_file:
        raise Exception()

    base_name, objects_mask = object_file.split('.objects.')

    out_filename = FN_FORMAT.format(
        base_name=base_name, objects_name=object_mask
    )

    fn_objects = "{}.nc".format(object_file)
    if not os.path.exists(fn_objects):
        raise Exception("Couldn't find objects file `{}`".format(fn_objects))
    objects = xr.open_dataarray(fn_objects, decode_times=False)

    ds = main(objects)

    ds.attrs['input_name'] = args.objects_file
    ds.attrs['mask'] = objects_mask

    ds.to_netcdf(out_filename)
    print("Wrote output to `{}`".format(out_filename))

    if args.make_plot:
        import matplotlib
        matplotlib.use("Agg")
        import matplotlib.pyplot as plt
        import topology.plots.overview
示例#48
0
def symbol_statistics(start_date=dt.date(1990, 1, 1),
                      end_date=dt.date(2017, 12, 31)):
    """
    the statistics of the return of the specified stocks
    """
    import csv
    import json
    import statsmodels.tsa.stattools as tsa_tools
    import scipy.stats as spstats
    import portfolio_programming.statistics.risk_adjusted as risk_adj
    import arch.bootstrap.multiple_comparison as arch_comp

    symbols = json.load(open(os.path.join(pp.DATA_DIR,
                                          'DJIA_symbols_20170901.json')))
    data_xarr = xr.open_dataarray(os.path.join(pp.DATA_DIR,
                                               'DJIA_symbols_20170901.nc'))

    with open(os.path.join(pp.TMP_DIR,
                           'DJIA_symbols_20170901_stat.csv'), 'w') as csv_file:
        fields = ["rank", 'symbol', 'start_date', 'end_date', "n_data",
                  "cum_roi", "annual_roi", "roi_mu", "std", "skew", "ex_kurt",
                  "Sharpe", "Sortino", "JB", "worst_ADF", "SPA_c"]

        writer = csv.DictWriter(csv_file, fieldnames=fields)
        writer.writeheader()

        for sdx, symbol in enumerate(symbols):
            rois = data_xarr.loc[start_date:end_date, symbol, 'simple_roi']
            trans_dates = rois.get_index('trans_date')
            rois = rois.data # to numpy
            rois = rois[~np.isnan(rois)] # filter the nan
            n_roi = len(rois)
            rois[0] = 0
            cumulative_roi = float((1 + rois).prod() - 1)
            annual_roi = float(np.power(cumulative_roi + 1, 1. / 10) - 1)

            sharpe = risk_adj.Sharpe(rois)
            sortino = risk_adj.Sortino_full(rois)[0]
            jb = spstats.jarque_bera(rois)[1]

            # worse case of adf
            adf_c = tsa_tools.adfuller(rois, regression='c')[1]
            adf_ct = tsa_tools.adfuller(rois, regression='ct')[1]
            adf_ctt = tsa_tools.adfuller(rois, regression='ctt')[1]
            adf_nc = tsa_tools.adfuller(rois, regression='nc')[1]
            adf = max(adf_c, adf_ct, adf_ctt, adf_nc)

            spa_value = 0
            for _ in range(5):
                spa = arch_comp.SPA(rois, np.zeros(n_roi), reps=1000)
                spa.seed(np.random.randint(0, 2 ** 31 - 1))
                spa.compute()
                # preserve the worse p_value
                if spa.pvalues[1] > spa_value:
                    spa_value = spa.pvalues[1]

            writer.writerow({
                "rank": sdx + 1,
                "symbol": symbol,
                "start_date": trans_dates[0].strftime("%Y-%m-%d"),
                "end_date": trans_dates[-1].strftime("%Y-%m-%d"),
                "n_data": n_roi,
                "cum_roi": cumulative_roi,
                "annual_roi": annual_roi,
                "roi_mu": float(rois.mean()),
                "std": float(rois.std(ddof=1)),
                "skew": spstats.skew(rois, bias=False),
                "ex_kurt": spstats.kurtosis(rois, bias=False),
                "Sharpe": sharpe,
                "Sortino": sortino,
                "JB": jb,
                "worst_ADF": adf,
                "SPA_c": spa_value,
            })
            print("[{}/{}] {}, cum_roi:{:.2%}".format(
                sdx + 1, len(symbols),
                symbol, cumulative_roi))
def choose_block(parameter_dir, varnames, percentile_threshold=97):
    """Calculate a block size for all variables, months, and locations using the Wilks (1997) JClim formula.

    Parameters
    ----------
    parameter_dir : str
        Parent directory for parameter files
    varnames : list
        List of (standard) variable names to be considered, i.e. ['tas', 'pr', 'slp']
    percentile_threshold : float
        The percentile of estimated blocks to use universally.

    Returns
    -------
    block_use : int
        Suggested block size in years
    block_use_mo : int
        Suggested block size in months

    """

    # Initialize with smallest block (in years)
    block_use = 1

    for this_varname in varnames:
        this_dir = '%s/%s' % (parameter_dir, this_varname)
        fname = '%s/residual.nc' % this_dir

        da = xr.open_dataarray(fname)
        _, nlat, nlon = np.shape(da)

        has_data = ~np.isnan(da[-1, ...].values)

        datavec = da.values[:, has_data]

        # We want to know the extent to which there is year-to-year memory (not seasonal)
        # Calculate block size for each month, gridbox

        ntime, nbox = np.shape(datavec)

        block_est = np.empty((12, nbox))

        def rhs(L):
            return (n - L + 1)**((2 / 3) * (1 - n_eff / n))

        for i in range(12):
            for j in range(nbox):
                this_ts = datavec[i::12, j]
                # estimate rho
                rho = np.corrcoef(this_ts[1:], this_ts[:-1])[0, 1]
                n = len(this_ts)

                # Wilks equation is implicit, so need to solve iteratively
                n_eff = n * (1 - rho) / (1 + rho)
                # As per Wilks 1997, start with a guess of L = sqrt(n)
                L = int(np.sqrt(n))

                while L > rhs(L):
                    L -= 1

                while L < rhs(L):
                    L += 1

                if L > rhs(L):
                    L -= 1

                block_est[i, j] = L

        new_block = np.percentile(block_est.flatten(), percentile_threshold)
        if new_block > block_use:
            block_use = new_block

    print('Block size: %d years' % block_use)
    block_use_mo = block_use * 12  # switch to months

    return block_use, block_use_mo
示例#50
0
def grdfilter(grid, **kwargs):
    """
    Filter a grid in the space (or time) domain.

    Filter a grid file in the time domain using one of the selected convolution
    or non-convolution isotropic or rectangular filters and compute distances
    using Cartesian or Spherical geometries. The output grid file can
    optionally be generated as a sub-region of the input (via *region*) and/or
    with new increment (via *spacing*) or registration (via *toggle*). In this
    way, one may have "extra space" in the input data so that the edges will
    not be used and the output can be within one half-width of the input edges.
    If the filter is low-pass, then the output may be less frequently sampled
    than the input.

    Full option list at :gmt-docs:`grdfilter.html`

    {aliases}

    Parameters
    ----------
    grid : str or xarray.DataArray
        The file name of the input grid or the grid loaded as a DataArray.
    outgrid : str or None
        The name of the output netCDF file with extension .nc to store the grid
        in.
    filter : str
        ``xwidth[/width2][modifiers]``.
        Name of filter type you which to apply, followed by the width
        b: Box Car; c: Cosine Arch; g: Gaussian; o: Operator; m: Median;
        p: Maximum Likelihood probability; h: histogram
        Example: F='m600' for a median filter with width of 600
    distance : str
        Distance *flag* tells how grid (x,y) relates to filter width as
        follows:

        p: grid (px,py) with *width* an odd number of pixels; Cartesian
        distances.

        0: grid (x,y) same units as *width*, Cartesian distances.

        1: grid (x,y) in degrees, *width* in kilometers, Cartesian distances.

        2: grid (x,y) in degrees, *width* in km, dx scaled by cos(middle y),
        Cartesian distances.

        The above options are fastest because they allow weight matrix to be
        computed only once. The next three options are slower because they
        recompute weights for each latitude.

        3: grid (x,y) in degrees, *width* in km, dx scaled by cosine(y),
        Cartesian distance calculation.

        4: grid (x,y) in degrees, *width* in km, Spherical distance
        calculation.

        5: grid (x,y) in Mercator ``projection='m1'`` img units, *width* in km,
        Spherical distance calculation.

    spacing : str
        ``xinc[+e|n][/yinc[+e|n]]``.
        x_inc [and optionally y_inc] is the grid spacing.
    nans : str or float
        ``i|p|r``.
        Determine how NaN-values in the input grid affects the filtered output.
    {R}
    toggle : bool
        Toggle the node registration for the output grid so as to become the
        opposite of the input grid. [Default gives the same registration as the
        input grid].
    {V}

    Returns
    -------
    ret: xarray.DataArray or None
        Return type depends on whether the *outgrid* parameter is set:
        - xarray.DataArray if *outgrid* is not set
        - None if *outgrid* is set (grid output will be stored in *outgrid*)

    Examples
    --------
    >>> import os
    >>> import pygmt

    >>> # Apply a filter of 600km (full width) to the @earth_relief_30m file
    >>> # and return a filtered field (saved as netcdf)
    >>> pygmt.grdfilter(
    ...     grid="@earth_relief_30m",
    ...     filter="m600",
    ...     distance="4",
    ...     region=[150, 250, 10, 40],
    ...     spacing=0.5,
    ...     outgrid="filtered_pacific.nc",
    ... )
    >>> os.remove("filtered_pacific.nc")  # cleanup file

    >>> # Apply a gaussian smoothing filter of 600 km in the input data array,
    >>> # and returns a filtered data array with the smoothed field.
    >>> grid = pygmt.datasets.load_earth_relief()
    >>> smooth_field = pygmt.grdfilter(grid=grid, filter="g600", distance="4")

    """
    kind = data_kind(grid)

    with GMTTempFile(suffix=".nc") as tmpfile:
        with Session() as lib:
            if kind == "file":
                file_context = dummy_context(grid)
            elif kind == "grid":
                file_context = lib.virtualfile_from_grid(grid)
            else:
                raise GMTInvalidInput("Unrecognized data type: {}".format(type(grid)))

            with file_context as infile:
                if "G" not in kwargs.keys():  # if outgrid is unset, output to tempfile
                    kwargs.update({"G": tmpfile.name})
                outgrid = kwargs["G"]
                arg_str = " ".join([infile, build_arg_string(kwargs)])
                lib.call_module("grdfilter", arg_str)

        if outgrid == tmpfile.name:  # if user did not set outgrid, return DataArray
            with xr.open_dataarray(outgrid) as dataarray:
                result = dataarray.load()
                _ = result.gmt  # load GMTDataArray accessor information
        else:
            result = None  # if user sets an outgrid, return None

        return result
示例#51
0
 def setUp(self):
     state_path = os.path.join(DATA_PATH, 'test_state.nc')
     self.state = xr.open_dataarray(state_path)
     obs_path = os.path.join(DATA_PATH, 'test_single_obs.nc')
     self.obs = xr.open_dataset(obs_path)
     self.operator = BernoulliOperator()
示例#52
0
def grdcut(grid, **kwargs):
    """
    Extract subregion from a grid.

    Produce a new *outgrid* file which is a subregion of *grid*. The
    subregion is specified with *region*; the specified range must not exceed
    the range of *grid* (but see *extend*). If in doubt, run
    :meth:`pygmt.grdinfo` to check range. Alternatively, define the subregion
    indirectly via a range check on the node values or via distances from a
    given point. Finally, you can give *projection* for oblique projections to
    determine the corresponding rectangular *region* setting that will give a
    grid that fully covers the oblique domain.

    Full option list at :gmt-docs:`grdcut.html`

    {aliases}

    Parameters
    ----------
    grid : str or xarray.DataArray
        The file name of the input grid or the grid loaded as a DataArray.
    outgrid : str or None
        The name of the output netCDF file with extension .nc to store the grid
        in.
    {J}
    {R}
    extend : bool or int or float
        Allow grid to be extended if new *region* exceeds existing boundaries.
        Give a value to initialize nodes outside current region.
    circ_subregion : str
        ``'lon/lat/radius[unit][+n]'``.
        Specify an origin (*lon* and *lat*) and *radius*; append a distance
        *unit* and we determine the corresponding rectangular region so that
        all grid nodes on or inside the circle are contained in the subset.
        If **+n** is appended we set all nodes outside the circle to NaN.
    z_subregion : str
        ``'[min/max][+n|N|r]'``.
        Determine a new rectangular region so that all nodes outside this
        region are also outside the given z-range [-inf/+inf]. To indicate no
        limit on *min* or *max* only, specify a hyphen (-). Normally, any NaNs
        encountered are simply skipped and not considered in the
        range-decision. Append **+n** to consider a NaN to be outside the given
        z-range. This means the new subset will be NaN-free. Alternatively,
        append **+r** to consider NaNs to be within the data range. In this
        case we stop shrinking the boundaries once a NaN is found [Default
        simply skips NaNs when making the range decision]. Finally, if your
        core subset grid is surrounded by rows and/or columns that are all
        NaNs, append **+N** to strip off such columns before (optionally)
        considering the range of the core subset for further reduction of the
        area.

    {V}

    Returns
    -------
    ret: xarray.DataArray or None
        Return type depends on whether the *outgrid* parameter is set:

        - xarray.DataArray if *outgrid* is not set
        - None if *outgrid* is set (grid output will be stored in *outgrid*)
    """
    kind = data_kind(grid)

    with GMTTempFile(suffix=".nc") as tmpfile:
        with Session() as lib:
            if kind == "file":
                file_context = dummy_context(grid)
            elif kind == "grid":
                file_context = lib.virtualfile_from_grid(grid)
            else:
                raise GMTInvalidInput("Unrecognized data type: {}".format(type(grid)))

            with file_context as infile:
                if "G" not in kwargs.keys():  # if outgrid is unset, output to tempfile
                    kwargs.update({"G": tmpfile.name})
                outgrid = kwargs["G"]
                arg_str = " ".join([infile, build_arg_string(kwargs)])
                lib.call_module("grdcut", arg_str)

        if outgrid == tmpfile.name:  # if user did not set outgrid, return DataArray
            with xr.open_dataarray(outgrid) as dataarray:
                result = dataarray.load()
                _ = result.gmt  # load GMTDataArray accessor information
        else:
            result = None  # if user sets an outgrid, return None

        return result
示例#53
0
def stocksp_cor15_plot_2d_contour_by_alpha(setting, z_dim="cum_roi"):
    """
    The  2 x 5 contour diagrams in the paper are generated by the function
    """

    # verify setting
    if setting not in ("compact", "general"):
        raise ValueError("unknown setting: {}".format(setting))

    # verify z_dim
    if z_dim not in ('cum_roi', 'daily_VSS', 'annual_roi'):
        raise ValueError('unknown z_dim:{}'.format(z_dim))

    # parameters
    start_date, end_date = dt.date(2005, 1, 3), dt.date(2014, 12, 31)
    max_portfolio_sizes = range(5, 50 + 5, 5)
    window_sizes = range(60, 240 + 10, 10)
    alphas = ["{:.2f}".format(v / 100.) for v in range(50, 100, 5)]
    set_indices = [1, 2, 3]

    name = "report_SPSP_CVaR_whole_{}_{}_{}.nc".format(
        setting, start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d"))

    # read report file
    xarr = xr.open_dataarray(open(os.path.join(pp.DATA_DIR, name), 'rb'))

    import matplotlib as mpl
    import matplotlib.pyplot as plt

    # figure size in inches
    fig = plt.figure(figsize=(64, 48), facecolor='white')

    # set color range
    if z_dim == 'cum_roi':
        cm_norm = mpl.colors.Normalize(vmin=-100, vmax=300, clip=False)
        color_range = np.arange(-100, 300 + 10, 20)
    elif z_dim == "daily_VSS":
        cm_norm = mpl.colors.Normalize(vmin=0, vmax=4, clip=False)
        color_range = np.arange(0, 4 + 0.2, 0.3)
    elif z_dim == 'annual_roi':
        pass

    xlim = (5, 50)
    ylim = (60, 240)
    for adx, alpha in enumerate(alphas):
        # x-axis, max_portfolio_size, y-axis:  window_sizes
        ax = fig.add_subplot(2, 5, adx + 1, xlim=xlim, ylim=ylim)

        ax.set_title(r'$\alpha$ = {:.0%}'.format(float(alpha)),
                     y=1.02,
                     fontsize=18)
        # labelpad - number of points between the axis and its label
        ax.set_xlabel(r'$M$',
                      fontsize=14,
                      labelpad=-2,
                      fontname="Times New Roman")
        ax.set_ylabel(r'$h$',
                      fontsize=14,
                      labelpad=-2,
                      fontname="Times New Roman")
        ax.tick_params(labelsize=10, pad=1)
        ax.set_xticks(max_portfolio_sizes)
        ax.set_xticklabels(max_portfolio_sizes,
                           fontsize=10,
                           fontname="Times New Roman")
        ax.set_yticks(window_sizes)
        ax.set_yticklabels(window_sizes,
                           fontsize=10,
                           fontname="Times New Roman")

        Xs, Ys = np.meshgrid(max_portfolio_sizes, window_sizes)
        Zs = np.zeros_like(Xs, dtype=np.float)
        n_row, n_col = Xs.shape

        for rdx in range(n_row):
            for cdx in range(n_col):
                n_symbol, win_size = Xs[rdx, cdx], Ys[rdx, cdx]
                z_values = xarr.loc[
                    "{}_{}".format(start_date.strftime("%Y%m%d"),
                                   end_date.strftime("%Y%m%d")), set_indices,
                    n_symbol, win_size, alpha, z_dim]
                mean = z_values.mean()
                Zs[rdx, cdx] = float(mean) * 100.

        print("Z_dim:", z_dim)
        print("z_range:", np.min(Zs), np.max(Zs))
        print(Zs)
        # contour, projecting on z
        cset = ax.contourf(Xs,
                           Ys,
                           Zs,
                           cmap=plt.cm.coolwarm,
                           norm=cm_norm,
                           levels=color_range)

    # share color bar,  rect [left, bottom, width, height]
    cbar_ax = fig.add_axes([0.92, 0.125, 0.015, 0.75])
    # print fig.get_axes()
    cbar = fig.colorbar(cset,
                        ax=fig.get_axes(),
                        cax=cbar_ax,
                        ticks=color_range)

    cbar.ax.tick_params(labelsize=12)
    if z_dim == "cum_roi":
        cbar_label_name = "Average cumulative returns (%)"
    elif z_dim == "daily_VSS":
        cbar_label_name = "Average daily VSS (%)"

    cbar.set_label(cbar_label_name,
                   labelpad=1,
                   size=20,
                   fontname="Times New Roman")

    plt.show()
示例#54
0
        for n, i in enumerate(tqdm.tqdm(idxs_window)):
            obj_mask = da_.where(da_ == i, other=0)
            y_mean = obj_mask.yt.where(obj_mask).mean()

            d = (y_mean - y_min) / (y_max - y_min)
            obj_mask.max(dim="yt").plot.contour(
                y="zt", ax=ax, add_colorbar=False, levels=[0.5], alpha=d
            )

        ax.set_aspect(1)


if __name__ == "__main__":
    import argparse

    argparser = argparse.ArgumentParser(__doc__)
    argparser.add_argument("objects_filename")
    argparser.add_argument("--frac", default=0.9, type=float)
    argparser.add_argument("--lx", default=10e3, type=float)

    args = argparser.parse_args()

    da = xr.open_dataarray(args.objects_filename, decode_times=False)

    plot_outline(da=da, lx=args.lx, frac=args.frac)

    fn_out = args.objects_filename.replace(".nc", ".outlines.png")
    plt.savefig(fn_out)
    print("Saved plot to {}".format(fn_out))
示例#55
0
    def make_pwqd_TEMP_files(self):
        """ quadratically detrends annually averaged TEMP field at each point for selected 250 year segments of CTRL or LPD simulations
        pwqd : `point wise quadratically detrended`
        """
        if self.run == 'ctrl':
            path = f'{path_prace}/ctrl_rect'
            interp = '.interp900x602'
            mf_fn = f'{path}/TEMP_PD_yrly_*.interp900x602.nc'
            trange = np.arange(50, 300)
            km = 42
            z = 'depth_t'
        elif self.run == 'lpd':
            path = f'{path_prace}/lpd'
            interp = ''
            mf_fn = f'{path}/ocn_yrly_TEMP_PD_*.nc'
            trange = np.arange(0, 250)
            km = 60
            z = 'z_t'

        # concatenate yearly files
        yrly_TEMP_file = f'{path}/TEMP_yrly{interp}.nc'
        try:
            #             assert 1==0
            assert os.path.exists(yrly_TEMP_file)
        except:
            print('making yrly TEMP file')
            da = xr.open_mfdataset(mf_fn, concat_dim='time').TEMP
            da = da.isel(time=trange)
            da.assign_coords(time=da.time.values).to_netcdf(yrly_TEMP_file)
            da.close()

        # calculating detrended TEMP field for each vertical level b/c of memory limitations
        for k in tqdm(range(km)):
            fn = f'{path}/TEMP_yrly_pwqd_{k:02d}{interp}.nc'
            try:
                #                 assert 1==0
                assert os.path.exists(fn)
            except:
                da_k = xr.open_dataarray(yrly_TEMP_file,
                                         decode_times=False).isel({z: k})
                da_pwqd_k = da_k - xr_quadtrend(da_k)
                da_pwqd_k.to_netcdf(fn)
                da_pwqd_k.close()
        # concatenating
        print(f'{path}/TEMP_yrly_pwqd_*{interp}.nc')
        da_pwqd = xr.open_mfdataset(f'{path}/TEMP_yrly_pwqd_*{interp}.nc',
                                    concat_dim=['depth_t'],
                                    chunks={'time': 1})
        if self.run == 'ctrl':
            da_pwqd = da_pwqd.assign_coords(time=np.arange(51, 301))
        elif self.run == 'lpd':
            da_pwqd = da_pwqd.assign_coords(time=np.arange(154, 404))

#         da = xr.open_dataarray(yrly_TEMP_file, decode_times=False)
#         da_pwqd = da - xr_quadtrend(da)

# writing out files for individual years
        print(da_pwqd.time)
        for i, y in tqdm(enumerate(da_pwqd.time)):  # 9 mins for ctrl
            da_pwqd.isel(time=i).to_netcdf(
                f'{path}/TEMP_pwqd_yrly_{int(y.values):04d}{interp}.nc')

        return
示例#56
0
def load_earth_relief(resolution="01d", region=None, registration=None):
    """
    Load Earth relief grids (topography and bathymetry) in various resolutions.

    The grids are downloaded to a user data directory
    (usually ``~/.gmt/server/earth/earth_relief/``) the first time you invoke
    this function. Afterwards, it will load the grid from the data directory.
    So you'll need an internet connection the first time around.

    These grids can also be accessed by passing in the file name
    ``'@earth_relief_rru[_reg]'`` to any grid plotting/processing function.
    Refer to :gmt-docs:`datasets/remote-data.html` for more details.

    Parameters
    ----------
    resolution : str
        The grid resolution. The suffix ``d``, ``m`` and ``s`` stand for
        arc-degree, arc-minute and arc-second. It can be ``'01d'``, ``'30m'``,
        ``'20m'``, ``'15m'``, ``'10m'``, ``'06m'``, ``'05m'``, ``'04m'``,
        ``'03m'``, ``'02m'``, ``'01m'``, ``'30s'``, ``'15s'``, ``'03s'``,
        or ``'01s'``.

    region : str or list
        The subregion of the grid to load. Required for Earth relief grids with
        resolutions <= 05m.

    registration : str
        Grid registration type. Either ``pixel`` for pixel registration or
        ``gridline`` for gridline registration. Default is ``None``, where
        a pixel-registered grid is returned unless only the
        gridline-registered grid is available.

    Returns
    -------
    grid : xarray.DataArray
        The Earth relief grid. Coordinates are latitude and longitude in
        degrees. Relief is in meters.

    Notes
    -----
    The DataArray doesn's support slice operation, for Earth relief data with
    resolutions higher than "05m", which are stored as smaller tiles.

    Examples
    --------

    >>> # load the default grid (pixel-registered 01d grid)
    >>> grid = load_earth_relief()
    >>> # load the 30m grid with "gridline" registration
    >>> grid = load_earth_relief("30m", registration="gridline")
    >>> # load high-resolution grid for a specific region
    >>> grid = load_earth_relief(
    ...     "05m", region=[120, 160, 30, 60], registration="gridline"
    ... )
    """

    # earth relief data stored as single grids for low resolutions
    non_tiled_resolutions = ["01d", "30m", "20m", "15m", "10m", "06m"]
    # earth relief data stored as tiles for high resolutions
    tiled_resolutions = [
        "05m", "04m", "03m", "02m", "01m", "30s", "15s", "03s", "01s"
    ]

    if registration in ("pixel", "gridline", None):
        # If None, let GMT decide on Pixel/Gridline type
        reg = f"_{registration[0]}" if registration else ""
    else:
        raise GMTInvalidInput(
            f"Invalid grid registration: {registration}, should be either "
            "'pixel', 'gridline' or None. Default is None, where a "
            "pixel-registered grid is returned unless only the "
            "gridline-registered grid is available.")

    # different ways to load tiled and non-tiled earth relief data
    if resolution in non_tiled_resolutions:
        if region is not None:
            raise NotImplementedError(
                f"'region' is not supported for Earth relief resolution '{resolution}'"
            )
        fname = which(f"@earth_relief_{resolution}{reg}", download="a")
        with xr.open_dataarray(fname) as dataarray:
            grid = dataarray.load()
            _ = grid.gmt  # load GMTDataArray accessor information
    elif resolution in tiled_resolutions:
        # Titled grid can't be sliced.
        # See https://github.com/GenericMappingTools/pygmt/issues/524
        if region is None:
            raise GMTInvalidInput(
                f"'region' is required for Earth relief resolution '{resolution}'"
            )
        grid = grdcut(f"@earth_relief_{resolution}{reg}", region=region)
    else:
        raise GMTInvalidInput(
            f'Invalid Earth relief resolution "{resolution}"')

    # Add some metadata to the grid
    grid.name = "elevation"
    grid.attrs["long_name"] = "elevation relative to the geoid"
    grid.attrs["units"] = "meters"
    grid.attrs["vertical_datum"] = "EMG96"
    grid.attrs["horizontal_datum"] = "WGS84"
    # Remove the actual range because it gets outdated when indexing the grid,
    # which causes problems when exporting it to netCDF for usage on the
    # command-line.
    grid.attrs.pop("actual_range")
    for coord in grid.coords:
        grid[coord].attrs.pop("actual_range")
    return grid
示例#57
0
def plot_yearly_2d_contour_by_alpha(setting, z_dim="cum_roi"):
    # verify setting
    if setting not in ("compact", "general"):
        raise ValueError("unknown setting: {}".format(setting))

    start_date, end_date = dt.date(2005, 1, 3), dt.date(2017, 12, 29)
    name = "report_SPSP_CVaR_yearly_{}_{}_{}.nc".format(
        setting, start_date.strftime("%Y%m%d"), end_date.strftime("%Y%m%d"))

    # yearly interval
    years = [[dt.date(2005, 1, 3), dt.date(2005, 12, 30)],
             [dt.date(2006, 1, 2), dt.date(2006, 12, 29)],
             [dt.date(2007, 1, 2), dt.date(2007, 12, 31)],
             [dt.date(2008, 1, 2), dt.date(2008, 12, 31)],
             [dt.date(2009, 1, 5), dt.date(2009, 12, 31)],
             [dt.date(2010, 1, 4), dt.date(2010, 12, 31)],
             [dt.date(2011, 1, 3), dt.date(2011, 12, 30)],
             [dt.date(2012, 1, 2), dt.date(2012, 12, 28)],
             [dt.date(2013, 1, 2), dt.date(2013, 12, 31)],
             [dt.date(2014, 1, 2), dt.date(2014, 12, 31)],
             [dt.date(2015, 1, 5), dt.date(2015, 12, 31)],
             [dt.date(2016, 1, 4), dt.date(2016, 12, 30)],
             [dt.date(2017, 1, 3), dt.date(2017, 12, 29)]]

    # read report file
    xarr = xr.open_dataarray(open(os.path.join(pp.DATA_DIR, name), 'rb'))

    # parameters
    max_portfolio_sizes = range(5, 50 + 5, 5)
    window_sizes = range(60, 240 + 10, 10)
    alphas = ["{:.2f}".format(v / 100.) for v in range(50, 100, 5)]
    set_indices = [1, 2, 3]

    import matplotlib as mpl
    import matplotlib.pyplot as plt

    for start, end in years:
        # figure size in inches
        fig = plt.figure(figsize=(64, 48), facecolor='white')
        fig.suptitle(
            'TAIEX_20050103_50largest_listed_market_cap {} {}-{}'.format(
                setting, start.strftime("%Y-%m-%d"), end.strftime("%Y-%m-%d")),
            fontsize=20)

        xlim = (5, 50)
        ylim = (60, 240)
        for adx, alpha in enumerate(alphas):
            # x-axis, max_portfolio_size, y-axis:  window_sizes
            ax = fig.add_subplot(2, 5, adx + 1, xlim=xlim, ylim=ylim)

            ax.set_title(r'$\alpha$ = {:.0%}'.format(float(alpha)),
                         y=1.02,
                         fontsize=18)
            # labelpad - number of points between the axis and its label
            ax.set_xlabel(r'$M$',
                          fontsize=14,
                          labelpad=-2,
                          fontname="Times New Roman")
            ax.set_ylabel(r'$h$',
                          fontsize=14,
                          labelpad=-2,
                          fontname="Times New Roman")
            ax.tick_params(labelsize=10, pad=1)
            ax.set_xticks(max_portfolio_sizes)
            ax.set_xticklabels(max_portfolio_sizes,
                               fontsize=10,
                               fontname="Times New Roman")
            ax.set_yticks(window_sizes)
            ax.set_yticklabels(window_sizes,
                               fontsize=10,
                               fontname="Times New Roman")

            Xs, Ys = np.meshgrid(max_portfolio_sizes, window_sizes)
            Zs = np.zeros_like(Xs, dtype=np.float)
            n_row, n_col = Xs.shape

            for rdx in range(n_row):
                for cdx in range(n_col):
                    n_symbol, win_size = Xs[rdx, cdx], Ys[rdx, cdx]
                    z_values = xarr.loc["{}_{}".format(
                        start.strftime("%Y%m%d"), end.strftime("%Y%m%d")),
                                        set_indices, n_symbol, win_size, alpha,
                                        z_dim]
                    mean = z_values.mean()
                    Zs[rdx, cdx] = float(mean) * 100.
                    # if Zs[rdx, cdx] > 10:
                    #     Zs[rdx, cdx] = 10.5

            print("Z_dim:", z_dim)
            print("z_range:", np.min(Zs), np.max(Zs))
            z_min = int(np.floor(np.min(Zs)))
            z_max = int(np.ceil(np.max(Zs)))

            # set color range
            if z_dim == 'cum_roi':
                cm_norm = mpl.colors.Normalize(vmin=z_min,
                                               vmax=z_max,
                                               clip=False)
                color_range = np.arange(z_min, z_max + 1)

            # contour, projecting on z
            cset = ax.contourf(Xs,
                               Ys,
                               Zs,
                               cmap=plt.cm.coolwarm,
                               norm=cm_norm,
                               levels=color_range)

        # share color bar,  rect [left, bottom, width, height]
        cbar_ax = fig.add_axes([0.92, 0.125, 0.015, 0.75])
        # print fig.get_axes()
        cbar = fig.colorbar(cset,
                            ax=fig.get_axes(),
                            cax=cbar_ax,
                            ticks=color_range)

        cbar.ax.tick_params(labelsize=12)
        if z_dim == "cum_roi":
            cbar_label_name = "Average cumulative returns (%)"
        elif z_dim == "daily_VSS":
            cbar_label_name = "Average daily VSS (%)"

        cbar.set_label(cbar_label_name,
                       labelpad=1,
                       size=20,
                       fontname="Times New Roman")
        fig_path = os.path.join(
            pp.TMP_DIR,
            'SPSP_CVaR_cum_roi_yearly_{}_{}.png'.format(setting, start.year))
        fig.set_size_inches(16, 9)
        plt.savefig(fig_path, dpi=240, format='png')

    plt.show()
示例#58
0
import xarray as xr
import numpy as np
import matplotlib
from faceted import faceted
from matplotlib import ticker
matplotlib.rcParams['mathtext.fontset'] = 'cm'
import matplotlib.pyplot as plt
from matplotlib.lines import Line2D
# matplotlib.rc('text', usetex=True)
from lighten_color import lighten_color

lam_ds = xr.open_mfdataset(
    '/work/bnm/buoyant_entrainment/data/lam/rho_u_v_w/slice*.nc',
    concat_dim='t')
lam_omega = xr.open_dataarray(
    '/work/bnm/buoyant_entrainment/data/lam/vort_phi/azi_lam_vort.nc')
lam_mask = xr.open_dataarray(
    '/work/bnm/buoyant_entrainment/data/lam/mask/laminar_mask.nc',
    engine='scipy')
lam_circ = xr.open_dataarray(
    '/work/bnm/buoyant_entrainment/data/lam/mask/circ.nc')
lam_azi_omega = xr.open_mfdataset(
    '/work/bnm/buoyant_entrainment/data/lam/azi_vort_phi/lam*.nc',
    concat_dim='t').omega_phi

turb_ds = xr.open_mfdataset(
    '/work/bnm/buoyant_entrainment/data/turb/rho_u_v_w/slice*.nc',
    concat_dim='t')
turb_omega = xr.open_mfdataset(
    '/work/bnm/buoyant_entrainment/data/turb/vort_phi/turb*.nc',
    concat_dim='t').omega_phi
示例#59
0
import numpy as np
import pdb

fpath = '/localscratch/wllf030/cornkle/obs_data/blob_maps_MSG/'
file = fpath + 'blob_map_90km_sum_18UTC.nc'
file2 = fpath + 'blob_map_30km_sum_18UTC.nc'
file3 = fpath + 'blob_map_90km_sum_3UTC.nc'
file4 = fpath + 'blob_map_30km_sum_3UTC.nc'
tpath = '/users/global/cornkle/data/pythonWorkspace/proj_CEH/topo/gtopo_1min_afr.nc'
spath = '/users/global/cornkle/C_paper/wavelet/figs/paper/'

diff30 = fpath + 'blob_map_30km_18-3UTRC_diff.nc'
diff90 = fpath + 'blob_map_90km_18-3UTRC_diff.nc'

ds = xr.open_dataarray(file)
top = xr.open_dataarray(tpath)
ds2 = xr.open_dataarray(file2)
ds3 = xr.open_dataarray(file3)
ds4 = xr.open_dataarray(file4)

d30diff = xr.open_dataarray(diff30)
d90diff = xr.open_dataarray(diff90)

ds.name = '100k'
ds2.name = '30k'

ds = ds.sel(lon=slice(-17.5, 20),
            lat=slice(4.5, 20))  # lake chad lon=slice(10,20), lat=slice(10,15)
ds2 = ds2.sel(lon=slice(-17.5, 20),
              lat=slice(4.5, 20))  # volta lon=slice(-10,8), lat=slice(4,10)
示例#60
0
def composite(h):
    pool = multiprocessing.Pool(processes=1)

    file = constants.MCS_POINTS_DOM
    hour = h

    msg = xr.open_dataarray(file)
    msg = msg[(msg['time.hour'] == h) & (msg['time.minute'] == 0) &
              (msg['time.year'] >= 2006) & (msg['time.year'] <= 2010) &
              (msg['time.month'] >= 6)]

    msg = msg.sel(lat=slice(10.2, 17), lon=slice(-9.5, 9.5))

    res = pool.map(file_loop, msg)
    pool.close()

    # for m in msg[0:50]:
    #     file_loop(m)
    #
    # return

    res = [x for x in res if x is not None]

    blobs = []
    scales = []
    sign = []
    signt = []

    for r in res:
        blobs.append(r[0])
        scales.append(r[1])
        sign.append(r[2])

    blobs = [item for sublist in blobs
             for item in sublist]  # flatten list of lists
    scales = [item for sublist in scales
              for item in sublist]  # flatten list of lists
    sign = [item for sublist in sign
            for item in sublist]  # flatten list of lists

    blobs = np.array(blobs, dtype=float)

    blobs = blobs[np.isfinite(blobs)]

    scales = np.array(scales, dtype=float)
    scales = scales[np.isfinite(scales)]

    print(np.unique(blobs), len(np.unique(blobs)))

    weight_blobs = np.ones_like(blobs) / float(len(blobs))
    weight_scales = np.ones_like(scales) / float(len(scales))

    histb, hb = np.histogram(blobs,
                             bins=np.arange(-200, 201, 20),
                             weights=weight_blobs)
    hists, hs = np.histogram(scales,
                             bins=np.arange(-200, 201, 20),
                             weights=weight_scales)

    histbc, hb = np.histogram(blobs, bins=np.arange(-200, 201, 20))
    histsc, hs = np.histogram(scales, bins=np.arange(-200, 201, 20))

    print('Number of blobs:', blobs.size)

    # f = plt.figure()
    # plt.bar(hb[0:-1], histb, align='edge', width=hb[1::]-hb[0:-1],edgecolor='k')
    #
    # f = plt.figure()
    # plt.bar(hs[0:-1], hists, align='edge', width=hb[1::]-hb[0:-1],edgecolor='k')

    # f = plt.figure()
    # plt.bar(hs[0:-1], histb-hists, align='edge', width=hb[1::]-hb[0:-1],edgecolor='k')

    return histb, hists, hb, blobs.size, histbc, histsc