Exemplo n.º 1
0
def add_extra_time_axes(ds_in, file_name):
    """Function that adds extra time axes to xarray coordinates, making actions such as 
    `groupby('seasons').mean('time')` possible, even with the model's 360_day calendar.
    """

    if 'atmos_monthly' in file_name:
        ds_in.attrs['data_type'] = 'monthly'
    elif 'atmos_daily' in file_name:
        ds_in.attrs['data_type'] = 'daily'

    try:
        calendar_type = ds_in.time.calendar_type
    except:
        calendar_type = ds_in.time.calendar

    date_arr = cal.day_number_to_date(ds_in.time,
                                      calendar_type=calendar_type,
                                      units_in=ds_in.time.units)

    seasons_arr = cal.month_to_season(date_arr.month, ds_in.attrs['data_type'])

    ds_in.coords['dayofyear'] = (('time'), date_arr.dayofyear)
    ds_in.coords['months'] = (('time'), date_arr.month)
    ds_in.coords['years'] = (('time'), date_arr.year)
    ds_in.coords['seasons'] = (('time'), seasons_arr)

    ds_in.coords['seq_days'] = (('time'),
                                cal.recurring_to_sequential(
                                    date_arr.dayofyear))
    ds_in.coords['seq_months'] = (('time'),
                                  cal.recurring_to_sequential(date_arr.month))
    ds_in.coords['seq_years'] = (('time'),
                                 cal.recurring_to_sequential(date_arr.year))
    ds_in.coords['seq_seasons'] = (('time'),
                                   cal.recurring_to_sequential(seasons_arr))
Exemplo n.º 2
0
def create_time_arr(num_years, is_climatology, time_spacing):

    if (is_climatology):
        if (num_years != 1.):
            print(
                'note that for climatology file only one year is required, so setting num_years=1.'
            )
        num_days = 360.
        num_years = 1.
        #        time_spacing=num_days//10
        day_number = np.linspace(0, num_days, time_spacing +
                                 1)[1:] - (num_days / (2. * time_spacing))

        time_units = 'days since 0000-01-01 00:00:00.0'
        print(
            'when creating a climatology file, the year of the time units must be zero. This is how the model knows it is a climatology.'
        )
    else:
        num_days = num_years * 360.
        #        time_spacing=num_years
        day_number = np.linspace(0, num_days, time_spacing + 1)
        time_units = 'days since 0001-01-01 00:00:00.0'

    half_spacing = (day_number[1] - day_number[0]) / 2.
    lower_time_bounds = day_number - half_spacing
    upper_time_bounds = day_number + half_spacing

    time_bounds = np.zeros((len(lower_time_bounds), 2))
    time_bounds[:, 0] = lower_time_bounds
    time_bounds[:, 1] = upper_time_bounds

    time_arr = day_number_to_date(day_number)
    ntime = len(time_arr)

    return time_arr, day_number, ntime, time_units, time_bounds
Exemplo n.º 3
0
def create_time_arr(num_years, is_climatology, time_spacing):

    if (is_climatology):
        if (num_years != 1.):
            print 'note that for climatology file only one year is required, so setting num_years=1.'
        num_days = 360.
        num_years = 1.
        #		time_spacing=num_days//10
        day_number = np.linspace(0, num_days, time_spacing +
                                 1)[1:] - (num_days / (2. * time_spacing))
        time_units = 'days since 0000-01-01 00:00:00.0'
        print 'when creating a climatology file, the year of the time units must be zero. This is how the model knows it is a climatology.'
    else:
        num_days = num_years * 360.
        #		time_spacing=num_years
        day_number = np.linspace(0, num_days, time_spacing + 1)
        time_units = 'days since 0001-01-01 00:00:00.0'

    time_arr = day_number_to_date(day_number)
    ntime = len(time_arr)

    return time_arr, day_number, ntime, time_units
Exemplo n.º 4
0
def read_data(base_dir,
              exp_name,
              start_file,
              end_file,
              avg_or_daily,
              topo_present,
              model='fms13',
              file_name=None):

    if model == 'fms13':

        files_temp = [
            base_dir + '/' + exp_name + '/run%03d' % m
            for m in range(start_file, end_file + 1)
        ]
        if (topo_present):
            if avg_or_daily == 'monthly':
                #                 extra='_interp.nc'
                extra = '_interp_new_height.nc'

            else:
                extra = '_interp_new_height_temp.nc'
        else:
            extra = '.nc'

        thd_string = '/atmos_' + avg_or_daily + extra

        thd_files = [s + thd_string for s in files_temp]

        thd_files_exist = [os.path.isfile(s) for s in thd_files]

        thd_file_size = [os.path.getsize(s) for s in thd_files]

        mode_file_size = stats.mode(thd_file_size).mode[0]

        thd_files_too_small = [
            thd_file_size[s] < 0.75 * mode_file_size
            for s in range(len(thd_files))
        ]

        print(thd_files[0])

        if not (all(thd_files_exist)):
            raise EOFError('EXITING BECAUSE OF MISSING FILES', [
                thd_files[elem] for elem in range(len(thd_files_exist))
                if not thd_files_exist[elem]
            ])

        if np.any(thd_files_too_small):
            raise EOFError('EXITING BECAUSE OF FILE TOO SMALL', [
                thd_files[elem] for elem in range(len(thd_files_exist))
                if thd_files_too_small[elem]
            ])

        size_list = init(thd_files[0])

        try:
            da_3d = xar.open_mfdataset(
                thd_files,
                decode_times=False,  # no calendar so tell netcdf lib
                # choose how data will be broken down into manageable
                # chunks.
                #                        concat_dim='time',
                chunks={
                    'time': size_list['ntime'],
                    'lon': size_list['nlons'] // 4,
                    'lat': size_list['nlats'] // 2
                })
        except:
            da_3d = xar.open_mfdataset(
                thd_files,
                decode_times=False,  # no calendar so tell netcdf lib
                # choose how data will be broken down into manageable
                # chunks.
                #                        concat_dim='time',
                chunks={
                    'xofyear': size_list['ntime'],
                    'lon': size_list['nlons'] // 4,
                    'lat': size_list['nlats'] // 2
                })

        names_dict = {'xofyear': 'time'}

        for name in names_dict.keys():
            try:
                da_3d.rename({name: names_dict[name]}, inplace=True)
            except ValueError:
                pass

        time_arr = da_3d.time
        date_arr = cal.day_number_to_date(time_arr)

    da_3d.coords['dayofyear_ax'] = (('dayofyear_ax'),
                                    np.unique(date_arr.dayofyear))
    da_3d.coords['months_ax'] = (('months_ax'), np.unique(date_arr.month))
    da_3d.coords['seasons_ax'] = (('seasons_ax'), np.arange(4))
    da_3d.coords['years_ax'] = (('years_ax'), date_arr.year)
    da_3d.coords['all_time_ax'] = (('all_time_ax'), np.arange(1))

    da_3d.coords['dayofyear'] = (('time'), date_arr.dayofyear)
    da_3d.coords['months'] = (('time'), date_arr.month)
    da_3d.coords['years'] = (('time'), date_arr.year)

    seasons_arr = cal.month_to_season(date_arr.month, avg_or_daily)

    da_3d.coords['seasons'] = (('time'), seasons_arr)

    two_months_arr = cal.month_to_two_months(date_arr.month, avg_or_daily)

    da_3d.coords['two_months'] = (('time'), two_months_arr)
    da_3d.coords['two_months_ax'] = (('two_months_ax'),
                                     np.unique(two_months_arr))

    da_3d.coords['all_time'] = (('time'), time_arr / time_arr)

    da_3d.coords['seq_months'] = (('time'), date_arr.month + 12. *
                                  ((date_arr.year - np.min(date_arr.year))))

    da_3d.coords['seq_seasons'] = (('time'),
                                   cal.recurring_to_sequential(seasons_arr))

    da_3d.coords['seq_all_time'] = (('time'), list(range(len(time_arr))))

    da_3d.coords['seq_days'] = (('time'),
                                cal.recurring_to_sequential(
                                    date_arr.dayofyear))

    da_3d.coords['seq_seasons_ax'] = (
        ('seq_seasons_ax'), np.mod(np.min(da_3d.seq_seasons.values), 4) +
        np.arange(len(np.unique(da_3d.seq_seasons.values))))

    da_3d.attrs['exp_name'] = exp_name
    da_3d.attrs['start_file'] = start_file
    da_3d.attrs['end_file'] = end_file
    da_3d.attrs['data_type'] = avg_or_daily
    try:
        da_3d['precipitation']
    except KeyError:
        try:
            print('aggregating rain')
            da_3d['convection_rain']
            da_3d['condensation_rain']
        except KeyError:
            print('no precip output present')
        else:
            da_3d['precip'] = (('time', 'lat',
                                'lon'), da_3d['convection_rain'] +
                               da_3d['condensation_rain'])
            print('done aggregating rain')

    thd_data = da_3d

    return thd_data, time_arr, size_list
Exemplo n.º 5
0
def read_data(base_dir, exp_name, start_file, end_file, avg_or_daily,
              topo_present):

    files_temp = [
        base_dir + '/' + exp_name + '/run%d' % m
        for m in range(start_file, end_file + 1)
    ]
    if (topo_present):
        extra = '_interp_new_model_lev.nc'
    else:
        extra = '.nc'

    thd_string = '/atmos_' + avg_or_daily + extra

    thd_files = [s + thd_string for s in files_temp]

    thd_files_exist = [os.path.isfile(s) for s in thd_files]

    print thd_files[0]

    if not (all(thd_files_exist)):
        print 'WARNING missing files', [
            thd_files[elem] for elem in range(len(thd_files_exist))
            if not thd_files_exist[elem]
        ]
        print 'EXITING BECAUSE OF MISSING FILES'
        sys.exit(0)

    size_list = init(thd_files[0])

    da_3d = xarray.open_mfdataset(
        thd_files,
        decode_times=False,  # no calendar so tell netcdf lib
        # choose how data will be broken down into manageable
        # chunks.
        #                        concat_dim='time',
        chunks={
            'time': size_list['ntime'],
            'lon': size_list['nlons'] // 4,
            'lat': size_list['nlats'] // 2
        })

    time_arr = da_3d.time
    date_arr = cal.day_number_to_date(time_arr)

    da_3d.coords['dayofyear_ax'] = (('dayofyear_ax'),
                                    np.unique(np.mod(np.floor(time_arr), 360)))
    da_3d.coords['months_ax'] = (('months_ax'), np.unique(date_arr.month))
    da_3d.coords['seasons_ax'] = (('seasons_ax'), np.arange(4))
    #	da_3d.coords['years_ax'] = (('years_ax'),date_arr.year)

    da_3d.coords['dayofyear'] = (('time'), np.mod(np.floor(time_arr), 360))
    da_3d.coords['months'] = (('time'), date_arr.month)
    da_3d.coords['years'] = (('time'), date_arr.year)
    da_3d.coords['seasons'] = (('time'), np.mod((da_3d.time + 30.) // 90, 4))
    da_3d.coords['offset_seasons'] = (('time'),
                                      np.mod((da_3d.time - 30.) // 90, 4))

    da_3d.coords['all_time'] = (('time'), time_arr / time_arr)

    da_3d.coords['seq_months'] = (('time'), time_arr // 30 + 1)
    da_3d.coords['seq_seasons'] = (('time'), (da_3d.time + 30.) // 90)

    da_3d.coords['seq_seasons_ax'] = (
        ('seq_seasons_ax'), np.mod(np.min(da_3d.seq_seasons.values), 4) +
        np.arange(len(np.unique(da_3d.seq_seasons.values))))

    da_3d.attrs['exp_name'] = exp_name
    try:
        da_3d['precipitation']
    except KeyError:
        try:
            print 'aggregating rain'
            da_3d['convection_rain']
            da_3d['condensation_rain']
        except KeyError:
            print 'no precip output present'
        else:
            da_3d['precip'] = (('time', 'lat',
                                'lon'), da_3d['convection_rain'] +
                               da_3d['condensation_rain'])
            print 'done aggregating rain'

    thd_data = da_3d

    return thd_data, time_arr, size_list
Exemplo n.º 6
0
def main():

    base_directory = '/scratch/sit204/sst_amip_files/'

    amip_data_version = 'amip_data_version_1_1_0'  #s 'amip_data_version_1_1_0' or 'amip_data_version_1_0_0'

    output_name_list = {'tosbcs': 'sst', 'siconc': 'siconc'}
    #Note that we are using the bcs (boundary conditions) input4MIPs files, as instructed.
    # The theory is that by using the bcs files (which are mid-month values) the time-average
    # of the interpolated bcs files should be equal to the time-average data provided in 'tos'
    # files, not the 'tosbcs'. See http://www-pcmdi.llnl.gov/projects/amip/AMIP2EXPDSN/BCS/amip2bcs.php
    # and http://www-pcmdi.llnl.gov/projects/amip/AMIP2EXPDSN/BCS/amipbc_dwnld_files/360x180/v1.0.0/nc/readme_nc

    add_anomaly = False
    #     anomaly_type='el_nino'
    months_to_include = 'all'

    for variable_name in list(output_name_list.keys()):

        if amip_data_version == 'amip_data_version_1_0_0':
            nfiles = 50
            folder_name = '/1950_1999/'
            filename_prefix = 'amipbc_sst_360x180_19'
            sst_all = np.zeros((nfiles, 12, 180, 360))
            do_annual_mean = True
        elif amip_data_version == 'amip_data_version_1_1_0':
            nfiles = 1
            folder_name = ''
            filename_prefix = variable_name + '_input4MIPs_SSTsAndSeaIce_CMIP_PCMDI-AMIP-1-1-0_gs1x1_187001-201512'
            do_annual_mean = False
        elif amip_data_version == 'hadgem_t_surf':
            nfiles = 1
            folder_name = ''
            filename_prefix = 'ts_clim'
            do_annual_mean = False

        for file_tick in range(nfiles):

            if nfiles != 1:
                filename = filename_prefix + str(file_tick + 50)
            else:
                filename = filename_prefix

            resolution_file = Dataset(
                base_directory + amip_data_version + '/' + folder_name + '/' +
                filename + '.nc', 'r')

            try:
                lons = resolution_file.variables['longitude'][:]
                lats = resolution_file.variables['latitude'][:]
            except KeyError:
                lons = resolution_file.variables['lon'][:]
                lats = resolution_file.variables['lat'][:]

            sst_in = resolution_file.variables[variable_name][:]

            try:
                sst_all[file_tick, :, :, :] = sst_in
            except NameError:
                sst_all = sst_in
            except IndexError:
                sst_all = sst_in

        try:
            no_latb_lonb = False
            lonbs = resolution_file.variables['bounds_longitude'][:]
            latbs = resolution_file.variables['bounds_latitude'][:]
        except KeyError:
            try:
                lonbs = resolution_file.variables['lon_bnds'][:]
                latbs = resolution_file.variables['lat_bnds'][:]
            except:
                no_latb_lonb = True

        nlon = lons.shape[0]
        nlat = lats.shape[0]

        if not no_latb_lonb:
            nlonb = lonbs.shape[0]
            nlatb = latbs.shape[0]

            lonbs_adjusted = np.zeros(nlonb + 1)
            latbs_adjusted = np.zeros(nlatb + 1)

            lonbs_adjusted[0:nlonb] = lonbs[:, 0]
            lonbs_adjusted[nlonb] = lonbs[-1, 1]

            latbs_adjusted[0:nlatb] = latbs[:, 0]
            latbs_adjusted[nlatb] = latbs[-1, 1]
        else:
            latbs_adjusted = None
            lonbs_adjusted = None

        try:
            day_number = resolution_file.variables['time'][:]
        except:
            day_number = np.ones(12)

        time_arr = day_number_to_date(day_number,
                                      calendar_type='gregorian',
                                      units_in='days since 1870-1-1')
        time_arr_adj = np.arange(15, 360, 30)

        annual_mean_name = ''

        if len(sst_all.shape) == 4:
            sst_in = np.mean(sst_all, axis=0)
        else:
            sst_in = np.zeros((12, nlat, nlon))

            if months_to_include == 'all':
                for month_tick in np.arange(1, 13, 1):
                    month_idx = np.where(time_arr.month == month_tick)[0]
                    sst_in[month_tick - 1, :, :] = np.mean(
                        sst_all[month_idx, :, :], axis=0)

            elif months_to_include == 'DJF':
                djf_idx = np.where(
                    np.logical_or(
                        np.logical_or(time_arr.month == 1,
                                      time_arr.month == 2),
                        time_arr.month == 12))
                djf_mean = np.mean(sst_all[djf_idx[0], ...], axis=0)
                for month_tick in np.arange(1, 13, 1):
                    sst_in[month_tick - 1, ...] = djf_mean
                annual_mean_name = '_djf'

            elif months_to_include == 'only_month_available':
                for month_tick in np.arange(1, 13, 1):
                    month_idx = np.where(time_arr.month == month_tick)[0]
                    sst_in[month_tick - 1, :, :] = sst_all

        if do_annual_mean:
            sst_in_am = np.mean(sst_in, axis=0)
            sst_in = np.zeros((12, nlat, nlon))
            for month_tick in np.arange(1, 13, 1):
                sst_in[month_tick - 1, :, :] = sst_in_am
            annual_mean_name = '_am'

        if add_anomaly and variable_name == 'tosbcs':
            sst_in, shifted_lons = add_sst_anomaly(sst_in, anomaly_type)
            anom_name = '_' + anomaly_type
        else:
            anom_name = ''

        p_full = None
        p_half = None

        npfull = None
        nphalf = None

        #Find grid and time numbers

        ntime = time_arr.day.shape[0]
        if not no_latb_lonb:
            nlonb = lonbs_adjusted.shape[0]
            nlatb = latbs_adjusted.shape[0]

        #Output it to a netcdf file.
        variable_name = output_name_list[
            variable_name] + annual_mean_name + '_clim_' + amip_data_version[
                0:5] + anom_name
        file_name = variable_name + '_' + amip_data_version + '.nc'

        number_dict = {}
        number_dict['nlat'] = nlat
        number_dict['nlon'] = nlon
        number_dict['npfull'] = npfull
        number_dict['nphalf'] = nphalf
        number_dict['ntime'] = ntime

        if not no_latb_lonb:
            number_dict['nlatb'] = nlatb
            number_dict['nlonb'] = nlonb

        time_units = 'days since 0000-01-01 00:00:00.0'

        cts.output_to_file(sst_in, lats, lons, latbs_adjusted, lonbs_adjusted,
                           p_full, p_half, time_arr_adj, time_units, file_name,
                           variable_name, number_dict)
Exemplo n.º 7
0
def time_average(array,settings,time_dim, time_arr):
# 	time_average_settings = [time_averaged_plot,time_av_window_start,time_av_window_end]

	if not settings[0]:
		return array
			
	ndim=len(array.shape)
	start=settings[1]
	end=settings[2]
	averaging_type = settings[5]

	if averaging_type == 0:
		averaging_type = 'all_time'
	

	if averaging_type == 'all_time':
		if ndim==4 and time_dim==0:
			array_sub=array[start:end,:,:,:]
		elif ndim==3 and time_dim==0:
			array_sub=array[start:end,:,:]
		elif ndim==5 and time_dim==1:
			array_sub=array[:,start:end,:,:]
		else:
			print 'Something not right with time averaging'
			return -1

		time_mean=np.mean(array_sub,axis=time_dim)

	date_arr = cal.day_number_to_date(time_arr)
	subset_flag = False   #Initial assignment of subset_flag. 

	if averaging_type == 'djf':

		time_idx = (date_arr.month == 12) | (date_arr.month == 1) | (date_arr.month == 2)
		subset_flag = True

	if averaging_type == 'mam':

		time_idx = (date_arr.month == 3) | (date_arr.month == 4) | (date_arr.month == 5)
		subset_flag = True

	if averaging_type == 'jja':

		time_idx = (date_arr.month == 6) | (date_arr.month == 7) | (date_arr.month == 8)
		subset_flag = True

	if averaging_type == 'son':

		time_idx = (date_arr.month == 9) | (date_arr.month == 10) | (date_arr.month == 11)
		subset_flag = True

	if averaging_type == 'jan':

		time_idx = (date_arr.month == 1) 
		subset_flag = True

	if averaging_type == 'feb':

		time_idx = (date_arr.month == 2) 
		subset_flag = True

	if averaging_type == 'mar':

		time_idx = (date_arr.month == 3) 
		subset_flag = True

	if averaging_type == 'apr':

		time_idx = (date_arr.month == 4)
		subset_flag = True

	if averaging_type == 'may':

		time_idx = (date_arr.month == 5)
		subset_flag = True

	if averaging_type == 'jun':

		time_idx = (date_arr.month == 6)
		subset_flag = True

	if averaging_type == 'jul':

		time_idx = (date_arr.month == 7)
		subset_flag = True

	if averaging_type == 'aug':

		time_idx = (date_arr.month == 8)
		subset_flag = True

	if averaging_type == 'sep':

		time_idx = (date_arr.month == 9)
		subset_flag = True

	if averaging_type == 'oct':

		time_idx = (date_arr.month == 10)
		subset_flag = True

	if averaging_type == 'nov':

		time_idx = (date_arr.month == 11)
		subset_flag = True

	if averaging_type == 'dec':

		time_idx = (date_arr.month == 12)
		subset_flag = True

	if subset_flag:

		array_sub=array[time_idx,...]
		time_mean=np.mean(array_sub,axis=time_dim)

		

	return time_mean