def test_calc_grid_standard(lons, lats): shape = np.meshgrid(lons,lats)[0].shape target_grid = calc_grid(lons, lats, decimals=4) assert type(target_grid) == dict assert target_grid['mask'].shape == shape
def vic2nc(options, global_atts, domain_dict, fields): """ Convert ascii VIC files to netCDF format""" # determine run mode if (options['memory_mode'] == 'standard') \ and (options['chunksize'] in ['all', 'All', 'ALL', 0]): memory_mode = 'big_memory' else: memory_mode = options['memory_mode'] print("\n-------------------------------") print("Configuration File Options") print("-------------OPTIONS-------------") for pair in options.items(): print("{0}: {1}".format(*pair)) print('Fields: {0}'.format(", ".join(fields.keys()))) if domain_dict: print("-------------DOMAIN--------------") for pair in domain_dict.items(): print("{0}: {1}".format(*pair)) print("--------GLOBAL_ATTRIBUTES--------") for pair in global_atts.items(): print("{0}: {1}".format(*pair)) print("--------RUN MODE--------") print('Memory Mode: {0}'.format(memory_mode)) if memory_mode == 'standard': print('Chunksize={0}'.format(options['chunksize'])) print("---------------------------------\n") # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # # Make output directory if not os.path.exists(options['out_directory']): os.makedirs(options['out_directory']) # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # # Make pairs (i.e. find inds) files = glob(options['input_files']) points = get_file_coords(files) # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # # Get target grid information if domain_dict: domain = read_domain(domain_dict) target_grid_file = path.split(domain_dict['filename'])[1] global_atts['target_grid_file'] = target_grid_file else: # must be a regular grid, build from file names domain = calc_grid(points.get_lats(), points.get_lons()) target_grid_file = None domain_dict = {'y_x_dims': ['lat', 'lon']} # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # # Get grid index locations points = get_grid_inds(domain, points) # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # # Get timestamps if options['input_file_format'].lower() == 'ascii': if ('bin_start_date' in options and 'bin_end_date' in options and 'bin_dt_sec' in options): vic_datelist, vic_ordtime = make_dates( options['bin_start_date'], options['bin_end_date'], options['bin_dt_sec'], calendar=options['calendar']) else: vic_datelist = get_dates(files[0]) vic_ordtime = date2num(vic_datelist, TIMEUNITS, calendar=options['calendar']) elif options['input_file_format'].lower() in ['binary', 'netcdf']: vic_datelist, vic_ordtime = make_dates(options['bin_start_date'], options['bin_end_date'], options['bin_dt_sec'], calendar=options['calendar']) else: raise ValueError('Unknown input file format: {}. Valid options are \ ascii or binary'.format(options['input_file_format'])) # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # # Determine time segmentation if options['start_date']: start_date = datetime.strptime(options['start_date'], TIMESTAMPFORM) if start_date < vic_datelist[0]: print("WARNING: Start date in configuration file is before " "first date in file.") start_date = vic_datelist[0] print('WARNING: New start date is {0}'.format(start_date)) else: start_date = vic_datelist[0] if options['end_date']: end_date = datetime.strptime(options['end_date'], TIMESTAMPFORM) if end_date > vic_datelist[-1]: print("WARNING: End date in configuration file is after " "last date in file.") end_date = vic_datelist[-1] print('WARNING: New end date is {0}'.format(end_date)) else: end_date = vic_datelist[-1] # Ordinal Time start_ord = date2num(start_date, TIMEUNITS, calendar=options['calendar']) end_ord = date2num(end_date, TIMEUNITS, calendar=options['calendar']) print("netCDF Start Date: {0}".format(start_date)) print("netCDF End Date: {0}".format(end_date)) segment_dates = [] if options['time_segment'] == 'day': # calendar insensitive num_segments = np.ceil(end_ord - start_ord) if start_date.hour == 0: segment_dates = num2date(np.arange(start_ord, end_ord + 1, 1), TIMEUNITS, calendar=options['calendar']) else: # allow start at time other than 0 temp = [start_ord].append(np.arange(np.ceil(start_ord), end_ord + 1, 1)) segment_dates = num2date(temp, TIMEUNITS, calendar=options['calendar']) elif options['time_segment'] == 'month': num_segments = (end_date.year - start_date.year) * 12 \ + end_date.month - start_date.month + 1 month = start_date.month year = start_date.year for i in pyrange(num_segments + 1): segment_dates.append(datetime(year, month, 1)) month += 1 if month == 13: month = 1 year += 1 elif options['time_segment'] == 'year': num_segments = end_date.year - start_date.year + 1 year = start_date.year for i in pyrange(num_segments + 1): segment_dates.append(datetime(year, 1, 1)) year += 1 elif options['time_segment'] == 'decade': num_segments = (end_date.year - start_date.year) / 10 + 1 year = start_date.year for i in pyrange(num_segments + 1): segment_dates.append(datetime(year, 1, 1)) year += 10 elif options['time_segment'] == 'all': num_segments = 1 segment_dates = [start_date, end_date] else: raise ValueError('Unknown timesegment options \ {0}'.format(options['time_segment'])) print("Number of files: {0}".format(len(segment_dates) - 1)) assert len(segment_dates) == num_segments + 1 # Make sure the first and last dates are start/end_date segment_dates[0] = start_date segment_dates[-1] = end_date + timedelta(minutes=1) # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # # Setup Segments segments = deque() for num in pyrange(num_segments): # Segment time bounds t0 = segment_dates[num] t1 = segment_dates[num + 1] # Get segment inds i0 = bisect_left(vic_datelist, t0) i1 = bisect_left(vic_datelist, t1) # Make segment filename (with path) if options['time_segment'] == 'day': filename = "{0}.{1}.nc".format(options['out_file_prefix'], t0.strftime('%Y-%m-%d')) elif options['time_segment'] == 'month': filename = "{0}.{1}.nc".format(options['out_file_prefix'], t0.strftime('%Y-%m')) elif options['time_segment'] == 'year': filename = "{0}.{1}.nc".format(options['out_file_prefix'], t0.strftime('%Y')) elif options['time_segment'] == 'all': filename = "{0}.{1}-{2}.nc".format(options['out_file_prefix'], t0.strftime('%Y%m%d'), t1.strftime('%Y%m%d')) filename = path.join(options['out_directory'], filename) # Setup segment and initialize netcdf segment = Segment(num, i0, i1, options['out_file_format'], filename, memory_mode=memory_mode) segment.nc_globals(**global_atts) segment.nc_time(t0, t1, vic_ordtime, options['calendar']) segment.nc_dimensions(snow_bands=options['snow_bands'], veg_tiles=options['veg_tiles'], soil_layers=options['soil_layers']) segment.nc_domain(domain) segment.nc_fields(fields, domain_dict['y_x_dims'], options['precision']) print(repr(segment)) segments.append(segment) # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # # Get column numbers and names (will help speed up reading) names = [] usecols = [] dtypes = [] bin_dtypes = [] bin_mults = [] if options['precision'] == 'double': prec = NC_DOUBLE else: prec = NC_FLOAT for name, field in fields.items(): if not np.isscalar(field['column']): # multiple levels for i, col in enumerate(field['column']): names.append(name + str(i)) usecols.append(col) if 'type' in field: if type(field['type']) == list: dtypes.extend(field['type']) else: dtypes.extend([field['type']] * len(field['column'])) else: dtypes.append([prec] * len(field['column'])) if options['input_file_format'].lower() == 'binary': if 'bin_dtype' in field: if type(field['bin_dtype']) == list: bin_dtypes.extend(field['bin_dtype']) else: bin_dtypes.extend([field['bin_dtype']] * len(field['column'])) else: raise ValueError('bin_dtype not in field: {}'.format(name)) if 'bin_mult' in field: if type(field['bin_mult']) == list: bin_mults.extend(field['bin_mult']) else: bin_mults.extend([field['bin_mult']] * len(field['column'])) else: bin_mults.extend([1.0] * len(field['column'])) else: # no levels names.append(name) usecols.append(field['column']) if 'type' in field: dtypes.append(field['type']) else: dtypes.append(prec) if options['input_file_format'].lower() == 'binary': if 'bin_dtype' in field: bin_dtypes.append(field['bin_dtype']) else: raise ValueError('bin_dtype not in field: {}'.format(name)) if 'bin_mult' in field: bin_mults.append(field['bin_mult']) else: bin_mults.append(1.0) print('setting point attributes (fileformat, names, usecols, and dtypes)') # pandas.read_table does not 'honor' the order of the columns in usecols # it simply uses them in ascending order. So the names need to be sorted # the same way. For example, if the columns in the VIC file are: # 3: prcp; 4: evap; 5: runoff; 6; baseflow; 7: sm1; 8: sm2; 9: sm3; 10: swe # and this is parsed from the configuration file as # usecols = [3, 4, 5, 6, 10, 7, 8, 9] # names=['prcp', 'evap', 'runoff', 'baseflow', 'swe', 'sm1', 'sm2', 'sm3'] # then without sorting, the netcdf file will have the wrong variables: # nc_swe will contain sm1, nc_sm1 will contain sm2, nc_sm2: sm3 and # nc_swe: sm3 # the following will ensure that the names are sorted in increasing column # order. Note that sorted(usecols) is not strictly necessary, since # apparently that is done in read_table, but it keeps the names and columns # in the same order names = [x for (y, x) in sorted(pyzip(usecols, names))] usecols = sorted(usecols) points.set_names(names) points.set_usecols(usecols) points.set_dtypes(dtypes) # set binary attributes if options['input_file_format'].lower() == 'binary': points.set_bin_dtypes(bin_dtypes) points.set_bin_mults(bin_mults) points.set_fileformat(options['input_file_format']) print('done') # ---------------------------------------------------------------- # # ---------------------------------------------------------------- # if memory_mode == 'big_memory': # ------------------------------------------------------------ # # run in big memory mode for i, segment in enumerate(segments): segments[i].allocate() while points: point = points.popleft() point.open() point.read() point.close() for segment in segments: segment.nc_add_data_to_array(point) for segment in segments: segment.nc_write_data_from_array() segment.nc_close() # ------------------------------------------------------------ # elif memory_mode == 'standard': # ------------------------------------------------------------ # # Open VIC files and put data into netcdfs chunk = Plist() while points: point = points.popleft() point.open() point.read() point.close() chunk.append(point) if len(chunk) > int(options['chunksize']) or len(points) == 0: for segment in segments: segment.nc_add_data_standard(chunk) chunk = Plist() del point # ------------------------------------------------------------ # # ------------------------------------------------------------ # # Close the netcdf files for segment in segments: segment.nc_close() # ------------------------------------------------------------ # elif memory_mode == 'original': # ------------------------------------------------------------ # # Run in original memory mode (a.k.a. vic2nc.c mode) # Open all files for point in points: point.open() while segments: segment = segments.popleft() segment.allocate() count = segment.count for point in points: point.read(count) segment.nc_add_data_to_array(point) segment.nc_write_data_from_array() segment.nc_close() for point in points: point.close() # ------------------------------------------------------------ # return
def test_calc_grid_standard(lons, lats): shape = np.meshgrid(lons, lats)[0].shape target_grid = calc_grid(lons, lats, decimals=4) assert type(target_grid) == dict assert target_grid['mask'].shape == shape