def pickle_files(f, city='toronto'): """ Open all .nc files written in f as xr.Datasets and pickle into /export/data/scratch/tropomi_rc/day_pkl/[city] directory under the date of orbit. Args: f (str): file name of inventory of city orbits. >>> toronto_files = os.path.join(inventories, 'toronto/toronto_inventory.txt') >>> pickle_files(toronto_files, city='toronto') """ # Create dictory of Toronto orbits sorted by date dict_of_city_orbits = create_city_orbits_by_date(f) dates = list(dict_of_city_orbits.keys()) start_time = time.time() fdir = tropomi_pkl_day # directory to store daily pickle files i = 1 # counter # Get list of pkl files fpath = os.path.join(fdir, '{}/*'.format(city)) pkl_list = sorted(glob.glob(fpath)) date_list = [] for file in pkl_list: date_list.append(file[-8:]) for date in dates: if date not in date_list: start_time_iter = time.time() # Read all .nc files for a date into a xr.DataArray ds = ot.dsread(date, city=city) # Save pickled file to /export/data/scratch/tropomi_rc/day_pkl/city pkl_path = fdir + city + '/' output_file = os.path.join(pkl_path, date) # Pickle files with open(output_file, 'wb') as outfile: print('Pickling %s' % date) pickle.dump(ds, outfile) print("[%s] --- %s seconds ---" % (i, (time.time() - start_time_iter))) i += 1 end_time = time.time() hours, rem = divmod(end_time - start_time, 3600) minutes, seconds = divmod(rem, 60) print("Total time elapsed:{:0>2}:{:0>2}:{:05.2f}".format( int(hours), int(minutes), seconds))
def pickle_files(f): """ Pickle all .nc files written in f into ../pkl directory. Args: f (str): file name of inventory of Toronto orbits. """ # Create dictory of Toronto orbits sorted by date dict_of_toronto_orbits = create_toronto_orbits_by_date(toronto_files) dates = list(dict_of_toronto_orbits.keys()) start_time = time.time() fdir = tropomi_pkl # directory to store pickle files i = 1 # counter # Get list of pkl files fpath = os.path.join(fdir, '{}/*'.format(city)) pkl_list = sorted(glob.glob(fpath)) date_list = [] for file in pkl_list: date_list.append(file[-8:]) for date in dates: if date not in date_list: start_time_iter = time.time() f = '*__%s*.nc' % date # Read all .nc files for a date into a xr.DataArray ds = ot.dsread(f) output_file = os.path.join(fdir, date) # Pickle files # with bz2.BZ2File(output_file + '.pbz2', 'w') as outfile: # print('Pickling %s' % date) # cPickle.dump(ds, outfile) with open(output_file, 'wb') as outfile: print('Pickling %s' % date) pickle.dump(ds, outfile) print("[%s] --- %s seconds ---" % (i, (time.time() - start_time_iter))) i += 1 end_time = time.time() hours, rem = divmod(end_time - start_time, 3600) minutes, seconds = divmod(rem, 60) print("Total time elapsed:{:0>2}:{:0>2}:{:05.2f}".format( int(hours), int(minutes), seconds))
week_num start and week_num_end must be odd and even, respectively. Returns: Boolean if week number of ds is in the week_num_range. """ week_one, week_two = week_num_range if (week_one % 2 != 1) or (week_two % 2 != 0) or (week_two - week_one != 1): return ValueError('First entry of week_num_range must be an odd int, \ second entry must be an even int, and the difference between \ first and second entry must be 1.') ds_week = pd.to_datetime(ds.time.data).week if (week_one <= ds_week) and (ds_week <= week_two): return True else: return False if __name__ == '__main__': # f='/export/data/scratch/tropomi/no2/S5P_OFFL_L2__NO2____20200502T080302_20200502T094432_13222_01_010302_20200504T005011.nc' f = '*__20200505*_*.nc' g = '*__20200504*_*.nc' ds1 = ot.dsread(f) ds2 = ot.dsread(g) # ds1 = aggregate_tropomi(ot.dsread(f)) # ds2 = aggregate_tropomi(ot.dsread(g))
def add_wind(f, city='toronto'): """ Return a dataset for data f over city with wind data that matches lat/lon/time of TROPOMI observation. Args: f (str): date string of the form YYYYMMDD. city (str): city of interest. Returns: no2 (xr.Dataset): dataset of NO2 TVCD with eastward (u) and northward (v) wind components. >>> no2 = add_wind('20200501', 'toronto') """ start_time = time.time() # Load city limits w, e, s, n = poi.get_plot_limits(city=city, extent=1, res=0) # Load dataset no2 = ot.dsread(f, city) # Subset NO2 dataset over +-1 deg lat/lon around the city no2 = no2.where((no2.longitude >= w) & (no2.longitude <= e) & (no2.latitude >= s) & (no2.latitude <= n), drop=True) if no2.nitrogendioxide_tropospheric_column.size == 0: return None no2 = no2.rename({'time': 'measurement_time'}) # rename time # create u-component variable no2['u'] = (['sounding'], np.zeros([no2.sounding.size])) # create v-component variable no2['v'] = (['sounding'], np.zeros([no2.sounding.size])) # Load wind f_str = '*' + f + '*' fpath = os.path.join(winds, f_str) for file in glob.glob(fpath): wind = xr.open_dataset(file) interp_wind = wind.interp(lat=no2.latitude, lon=no2.longitude, method='linear') interp_wind = interp_wind.dropna(dim='sounding') # iterate over each observation and append wind speed and bearing to no2 for i in range(len(no2.scanline)): print('Reading scanline', i) # Load timestamp of observation t_obs = pd.to_datetime(no2.scanline[i].values) hour = t_obs.hour lat, lon = no2.latitude.values[i], no2.longitude.values[i] # load averaged winds from hour winds_from_hour = interp_wind.isel(time=hour) for j in range(len(winds_from_hour.U850)): # add uv- wind components to matching lat/lon/timestamp if ((winds_from_hour.lon.values[j] == lon) and (winds_from_hour.lat.values[j] == lat)): no2.u[i] += winds_from_hour.U850.values[j] no2.v[i] += winds_from_hour.V850.values[j] # pickle files fdir = winds_pkl + city + '/' filename = f + '_raw' output_file = os.path.join(fdir, filename) with open(output_file, 'wb') as outfile: print('Pickling %s' % f) pickle.dump(no2, outfile) return no2
start, end, calendar_week = gf.get_files(year=year, calendar_week=week_num) try: file_list=open('inventory.txt','r') except: print('Did not find a text file containing file names (perhaps name does not match)') sys.exit() ds_list = [] startiest_time = time.time() for test_file in file_list: test_file = test_file.strip() start_time = time.time() ds_list.append(ot.dsread(test_file)) print("--- %s seconds ---" % (time.time() - start_time)) print('Total time: %s', (time.time() - startiest_time)) print(len(ds_list)) # date_of_interest = '20200505' # ds_list = [] # d = pd.to_datetime(ds[0].time.data).week