示例#1
0
def pickle_files(f, city='toronto'):
    """
    Open all .nc files written in f as xr.Datasets and pickle into 
    /export/data/scratch/tropomi_rc/day_pkl/[city] directory under the date
    of orbit.

    Args:
        f (str): file name of inventory of city orbits.

    >>> toronto_files = os.path.join(inventories, 'toronto/toronto_inventory.txt')
    >>> pickle_files(toronto_files, city='toronto')
    """
    # Create dictory of Toronto orbits sorted by date
    dict_of_city_orbits = create_city_orbits_by_date(f)
    dates = list(dict_of_city_orbits.keys())

    start_time = time.time()
    fdir = tropomi_pkl_day  # directory to store daily pickle files
    i = 1  # counter

    # Get list of pkl files
    fpath = os.path.join(fdir, '{}/*'.format(city))
    pkl_list = sorted(glob.glob(fpath))
    date_list = []

    for file in pkl_list:
        date_list.append(file[-8:])

    for date in dates:
        if date not in date_list:
            start_time_iter = time.time()

            # Read all .nc files for a date into a xr.DataArray
            ds = ot.dsread(date, city=city)

            # Save pickled file to /export/data/scratch/tropomi_rc/day_pkl/city
            pkl_path = fdir + city + '/'
            output_file = os.path.join(pkl_path, date)

            # Pickle files
            with open(output_file, 'wb') as outfile:
                print('Pickling %s' % date)
                pickle.dump(ds, outfile)

            print("[%s] --- %s seconds ---" %
                  (i, (time.time() - start_time_iter)))

            i += 1

    end_time = time.time()
    hours, rem = divmod(end_time - start_time, 3600)
    minutes, seconds = divmod(rem, 60)
    print("Total time elapsed:{:0>2}:{:0>2}:{:05.2f}".format(
        int(hours), int(minutes), seconds))
示例#2
0
def pickle_files(f):
    """
    Pickle all .nc files written in f into ../pkl directory.

    Args:
        f (str): file name of inventory of Toronto orbits.
    """
    # Create dictory of Toronto orbits sorted by date
    dict_of_toronto_orbits = create_toronto_orbits_by_date(toronto_files)
    dates = list(dict_of_toronto_orbits.keys())

    start_time = time.time()
    fdir = tropomi_pkl  # directory to store pickle files
    i = 1  # counter

    # Get list of pkl files
    fpath = os.path.join(fdir, '{}/*'.format(city))
    pkl_list = sorted(glob.glob(fpath))
    date_list = []

    for file in pkl_list:
        date_list.append(file[-8:])

    for date in dates:
        if date not in date_list:
            start_time_iter = time.time()

            f = '*__%s*.nc' % date

            # Read all .nc files for a date into a xr.DataArray
            ds = ot.dsread(f)

            output_file = os.path.join(fdir, date)
            # Pickle files
            # with bz2.BZ2File(output_file + '.pbz2', 'w') as outfile:
            #     print('Pickling %s' % date)
            #     cPickle.dump(ds, outfile)
            with open(output_file, 'wb') as outfile:
                print('Pickling %s' % date)
                pickle.dump(ds, outfile)

            print("[%s] --- %s seconds ---" %
                  (i, (time.time() - start_time_iter)))

            i += 1

    end_time = time.time()
    hours, rem = divmod(end_time - start_time, 3600)
    minutes, seconds = divmod(rem, 60)
    print("Total time elapsed:{:0>2}:{:0>2}:{:05.2f}".format(
        int(hours), int(minutes), seconds))
示例#3
0
        week_num start and week_num_end must be odd and even, 
        respectively. 

    Returns:
        Boolean if week number of ds is in the week_num_range.
    """

    week_one, week_two = week_num_range
    if (week_one % 2 != 1) or (week_two % 2 != 0) or (week_two - week_one !=
                                                      1):
        return ValueError('First entry of week_num_range must be an odd int, \
            second entry must be an even int, and the difference between \
                first and second entry must be 1.')

    ds_week = pd.to_datetime(ds.time.data).week

    if (week_one <= ds_week) and (ds_week <= week_two):
        return True
    else:
        return False


if __name__ == '__main__':
    # f='/export/data/scratch/tropomi/no2/S5P_OFFL_L2__NO2____20200502T080302_20200502T094432_13222_01_010302_20200504T005011.nc'
    f = '*__20200505*_*.nc'
    g = '*__20200504*_*.nc'
    ds1 = ot.dsread(f)
    ds2 = ot.dsread(g)
    # ds1 = aggregate_tropomi(ot.dsread(f))
    # ds2 = aggregate_tropomi(ot.dsread(g))
示例#4
0
def add_wind(f, city='toronto'):
    """
    Return a dataset for data f over city with wind data that matches lat/lon/time
    of TROPOMI observation.

    Args:
        f (str): date string of the form YYYYMMDD.
        city (str): city of interest.

    Returns:
        no2 (xr.Dataset): dataset of NO2 TVCD with eastward (u) and northward (v)
            wind components.

    >>> no2 = add_wind('20200501', 'toronto')
    """

    start_time = time.time()

    # Load city limits
    w, e, s, n = poi.get_plot_limits(city=city, extent=1, res=0)

    # Load dataset
    no2 = ot.dsread(f, city)
    # Subset NO2 dataset over +-1 deg lat/lon around the city
    no2 = no2.where((no2.longitude >= w) & (no2.longitude <= e) &
                    (no2.latitude >= s) & (no2.latitude <= n),
                    drop=True)
    if no2.nitrogendioxide_tropospheric_column.size == 0:
        return None
    no2 = no2.rename({'time': 'measurement_time'})  # rename time
    # create u-component variable
    no2['u'] = (['sounding'], np.zeros([no2.sounding.size]))
    # create v-component variable
    no2['v'] = (['sounding'], np.zeros([no2.sounding.size]))

    # Load wind
    f_str = '*' + f + '*'
    fpath = os.path.join(winds, f_str)
    for file in glob.glob(fpath):
        wind = xr.open_dataset(file)
        interp_wind = wind.interp(lat=no2.latitude,
                                  lon=no2.longitude,
                                  method='linear')
        interp_wind = interp_wind.dropna(dim='sounding')

    # iterate over each observation and append wind speed and bearing to no2
    for i in range(len(no2.scanline)):
        print('Reading scanline', i)
        # Load timestamp of observation
        t_obs = pd.to_datetime(no2.scanline[i].values)
        hour = t_obs.hour
        lat, lon = no2.latitude.values[i], no2.longitude.values[i]
        # load averaged winds from hour
        winds_from_hour = interp_wind.isel(time=hour)

        for j in range(len(winds_from_hour.U850)):
            # add uv- wind components to matching lat/lon/timestamp
            if ((winds_from_hour.lon.values[j] == lon)
                    and (winds_from_hour.lat.values[j] == lat)):
                no2.u[i] += winds_from_hour.U850.values[j]
                no2.v[i] += winds_from_hour.V850.values[j]

    # pickle files
    fdir = winds_pkl + city + '/'
    filename = f + '_raw'
    output_file = os.path.join(fdir, filename)
    with open(output_file, 'wb') as outfile:
        print('Pickling %s' % f)
        pickle.dump(no2, outfile)
    return no2
示例#5
0
start, end, calendar_week = gf.get_files(year=year, calendar_week=week_num)
 
try:
    file_list=open('inventory.txt','r')
except:
    print('Did not find a text file containing file names (perhaps name does not match)')
    sys.exit()

ds_list = []

startiest_time = time.time() 

for test_file in file_list:
    test_file = test_file.strip()
    start_time = time.time()
    ds_list.append(ot.dsread(test_file))
    print("--- %s seconds ---" % (time.time() - start_time))
print('Total time: %s', (time.time() - startiest_time))
print(len(ds_list))






# date_of_interest = '20200505'
# ds_list = []
# d = pd.to_datetime(ds[0].time.data).week