def _initialize_secondary(self): """Helper method to initializes sources from secondary source file""" units = PointSourceContainer.units try: secondary_sources = open(self.secondary_file_name) except IOError as error: err_string = "Unable to open file {0}".format(self.secondary_file_name) raise IOError(err_string) secondary_list = [] headers = secondary_sources.readline() line = secondary_sources.readline() while line!="": line_list = line.split(',') name=line_list[0] parent = line_list[1] parentSource = getattr(self,parent) lat=float(line_list[2]) lon=float(line_list[3]) emissions = float(line_list[4]) yearly_emissions = {2014: Units.SciVal(emissions, units), 2015: Units.SciVal(emissions, units), 2016: Units.SciVal(emissions, units) } height = float(line_list[5]) source = PST.PointSource(name,lat,lon,name,emissions,yearly_emissions,height) setattr(self,name,source) secondary_list.append(source) parentSource.secondary.append(source) line=secondary_sources.readline() secondary_sources.close() return secondary_list
def _initialize_sources(self): """Helper method to initialize sources from the main csv file""" units = PointSourceContainer.units source_list = [] if not os.path.exists(self.file_name): raise IOError("The file %s does not exists" % self.file_name) try: source_file = open(self.file_name) except: print "Exception Raised: see message" raise file_heading = source_file.readline() next_line = source_file.readline() while next_line != "": temp = next_line.split(',') name = temp[0] loc = temp[1] lon = float(temp[2]) lat = float(temp[3]) emissions_2014 = float(temp[4]) emissions_2015 = float(temp[5]) emissions_2016 = float(temp[6]) average_emissions = (emissions_2014 + emissions_2015 + emissions_2016)*(1./3.) yearly_emissions = {2014: Units.SciVal(emissions_2014, units), 2015: Units.SciVal(emissions_2015, units), 2016: Units.SciVal(emissions_2016, units) } var_name = temp[7] height = float(temp[8]) monthly_factors = map(lambda x: float(x),temp[9:21]) full_name = '{0}, {1}'.format(name,loc) source = PST.PointSource(var_name,lat,lon,full_name,average_emissions,yearly_emissions,height) source.monthly_factors = monthly_factors setattr(self,var_name,source) source_list.append(source) next_line = source_file.readline() source_file.close() return source_list
def wind_arrow(overpass, lat=None, lon=None, size=0.02, sources=['MERRA', 'ECMWF', 'GEM', 'Average']): """Creates a string which draws wind arrows in final KML file""" arrows = "" input_lat = str(lat) if lat else "None" input_lon = str(lon) if lon else "None" if overpass.MERRA==overpass.ECMWF: w = PST.AllWinds(overpass) try: w.get_winds() wind_source = w except: wind_source = overpass else: # use winds from overpass instance wind_source = overpass MERRA_updated = wind_source.MERRA MERRA_beginning = wind_source.MERRA_beginning GEM = wind_source.GEM ECMWF_old = wind_source.ECMWF_old ECMWF = wind_source.ECMWF avg = wind_source.Average avg_old = wind_source.Average_old avg_beginning = wind_source.Average_beginning if ECMWF.speed>0.001: ECMWF_old = PST.Wind((0,0),0) avg_old = PST.Wind((0,0),0) source_map = { 'MERRA':(MERRA_updated, 'MERRA', 'MERRA'), 'ECMWF':(ECMWF,"ECMWF","ECMWF"), 'Average':(avg, "Average", "Average"), 'GEM':(GEM, 'GEM', 'GEM') } winds = [source_map[src] for src in sources] wind_files = set([]) lat = overpass.lat if lat==None else lat lon = overpass.lon if lon==None else lon # try: # gem_new = ReadWinds.get_gem_highres(overpass.time) # except IOError: # pass # except: # raise # else: # winds.append((gem_new,'GEM Forecast','GEM')) # try: # ecmwf_new = ReadWinds.get_ecmwf_highres(overpass.time,True)[0] # except IOError: # pass # except: # raise # else: # winds.append((ecmwf_new,'ECMWF (0.3 degree)','ECMWF')) winds_to_show = [w for w in winds if w[0].speed>0.001] winds_to_show = sorted(winds_to_show, key=lambda l: l[0].speed, reverse=True) for index,(wind,wind_source,wind_arrow_source) in enumerate(winds_to_show): speed = wind.speed north = lat + speed*size south = lat - speed*size east = lon + speed*size west = lon - speed*size heading = -90 - wind.bearing arrow = Formats.Arrow_Format.format(windsource=wind_source, description=str(wind), N=north, S=south, E=east, W=west, rotation=heading, arrow=wind_arrow_source, draworder=index+20) wind_file = os.path.join(data.kml_images, 'arrow_%s.png' % wind_arrow_source) wind_files.add(wind_file) arrows += arrow return Overlay(arrows, list(wind_files))
def _Initialize(self, csv_name): """Creates Overpass objects for each overpass, labelled as {PointSource short name}{YYYYMMDD} for example, Gavin20160207""" if not os.path.exists(csv_name): raise IOError("File %s does not exist" % csv_name) try: csv=open(csv_name,'r') except: print "Could not open %s: See message" % csv_name raise header=csv.readline() line=csv.readline() list = line.split(',') overpass_list = [] while line !="": try: key = list[0] source = Sources.Sources[key] except AttributeError: raise AttributeError("Attribute {0} does not exist in module Sources.\nCurrent line of csv: {1}; in csv {2}".format(key, line, self.source_file)) if len(list)==18: try: date = map(int,list[1].split('-')) year,month,day,hour,minute = date dt = datetime.datetime(year,month,day,hour,minute) time = PST.Time(dt,source) merra_vector = map(float,list[2:4]) ecmwf_old_vector = map(float,list[4:6]) ecmwf_vector = (0,0) gem_vector = map(float,list[6:8]) avg_vector = map(float,list[8:10]) surf_vector = map(float,list[10:12]) stability_class = list[12] a = float(list[13].strip('\n')) stability_corrected = list[14] a_corrected = float(list[15]) lite = list[16] full = list[17].strip('\n').strip('\r') except IndexError: raise except Exception as exc: print "An unecpexted exception was raised: see exception" raise else: try: date = map(int,list[1].split('-')) year,month,day,hour,minute = date dt = datetime.datetime(year,month,day,hour,minute) time = PST.Time(dt,source) merra_beginning = map(float,list[2:4]) merra_middle = map(float, list[4:6]) merra_interp = map(float, list[6:8]) ecmwf_old_vector = map(float,list[8:10]) ecmwf_vector = map(float,list[10:12]) gem_vector = map(float,list[12:14]) avg_vector = map(float,list[14:16]) surf_vector = map(float,list[16:18]) stability_class = list[18] a = float(list[19].strip('\n')) stability_corrected = list[20] a_corrected = float(list[21]) lite = list[22] full = list[23].strip('\n').strip('\r') except IndexError: raise except Exception as exc: print "An unecpexted exception was raised: see exception" raise try: overpass = PST.Overpass(time) overpass.height = source.height overpass.MERRA_beginning = PST.Wind(merra_beginning,source.height) overpass.MERRA = PST.Wind(merra_middle, source.height) overpass.MERRA_interp = PST.Wind(merra_interp, source.height) overpass.ECMWF = PST.Wind(ecmwf_vector,source.height) overpass.ECMWF_old = PST.Wind(ecmwf_old_vector, source.height) overpass.GEM = PST.Wind(gem_vector,source.height) overpass.Average_beginning = 0.5*(overpass.MERRA_beginning + overpass.ECMWF) overpass.Average = 0.5*(overpass.MERRA + overpass.ECMWF) overpass.Average_interp = 0.5*(overpass.MERRA_interp + overpass.ECMWF) overpass.Average_old = 0.5*(overpass.MERRA + overpass.ECMWF_old) overpass.Average_hybrid = PST.Wind.construct(overpass.Average.speed, overpass.Average_beginning.bearing, overpass.height) overpass.surface = PST.Wind(surf_vector,source.height) overpass.a_elevated = PST.Stability(overpass.Average.speed,0.).a overpass.a_old = a overpass.a = a_corrected overpass.stability_old = stability_class overpass.stability = stability_corrected overpass.source = source overpass.FullFile = full overpass.LiteFile = lite try: obs_mode = full.split('/')[-1].split('_')[1][-2:] except: obs_mode = '' overpass.observation_mode = obs_mode overpass_name = source.short + time.strf8 setattr(self, overpass_name, overpass) self.all.append(overpass) Sources.Sources[key].Overpasses.append(overpass) overpass_list.append(overpass) line=csv.readline() list=line.split(',') overpass.secondary = source.secondary except: raise csv.close() return overpass_list
def Model(overpass, f_plume=0.10, f_background=0.01, offset=3000., y_max_positive=50.e3, y_max_negative=50.e3, y_min_negative=0., y_min_positive=0., direction='y', wind_adjustment=0., wind_sources=['Average'], smooth=False, surface_stability=True, stability="new", temporal_factors=False, bias_correction='corrected', LocalBackground=PlumeModel.InBackground, LocalInPlume=PlumeModel.InPlume, co2_source='xco2', custom_wind=None, snr_strong_co2_min=None, chi_squared_max=None, albedo_min=None, albedo_max=None, outcome_flags={1, 2}, surface_pressure_max=None, surface_pressure_min=None, background_average=None, secondary_sources=False, fixed_secondary_sources=[], x_max=75.e3, scatter_plot=False, force_winds=None, units=Units.output_units, sza_adjustments=True, weighted=False, uncertainty=True): """Computes model enhancements for an overpass, and compares them to observed data. The behaviour is specified by the many default arguments. This function does everything needed including calculating model enhancements, but delegates the actual comparison to the ModelFunctions module. See the keyword list document for an explanation of the optional arguments """ ## First all the default arguments are dealt with, and then move on to actually classifying data and fitting model # Args to pass to full_file.quality(i,**kwargs) method quality_args = { 'chi_squared_max': chi_squared_max, 'snr_strong_co2_min': snr_strong_co2_min, 'albedo_min': albedo_min, 'albedo_max': albedo_max, 'outcome_flags': outcome_flags, 'surface_pressure_min': surface_pressure_min, 'surface_pressure_max': surface_pressure_max } bg_kwargs = { 'background_factor': f_background, 'ymax_positive': y_max_positive, 'ymax_negative': y_max_negative, 'ymin_negative': y_min_negative, 'ymin_positive': y_min_positive, 'offset': offset, 'sign': direction } plume_kwargs = {'xmax': x_max, 'plume_factor': f_plume} if secondary_sources == False: secondary = [] secondary_sources = False elif secondary_sources == True: secondary = overpass.source.secondary secondary_sources = True elif hasattr(secondary_sources, "__iter__"): secondary = secondary_sources secondary_sources = True else: raise TypeError("Invalid argument '{}' for secondary_sources.\ Must be True, False, or iterable collection of PointSources".format( secondary_sources)) # bias correction: if bias_correction not in File.allowed_bias_correction: raise ValueError( "bias correction must be one of {0}; given {1}".format( ', '.join(File.allowed_bias_correction), bias_correction)) if not (co2_source == 'xco2' or co2_source == 'co2_column'): raise ValueError( "co2_source must be one of 'xco2' or 'co2_column' not '{0}'". format(co2_source)) co2 = "{0}_{1}".format(bias_correction, co2_source) if smooth: co2 = 'smoothed_' + co2 # get emissions for the overpass and make sure they're in g/s F = overpass.get_emissions(temporal_factors=temporal_factors) F.convert(Units._model_units) # atmospheric stability parameter if surface_stability == True: if stability == "new": a = overpass.a elif stability == "old": a = overpass.a_old elif surface_stability == False: a = overpass.a_elevated else: a = surface_stability secondary_emissions = [ second.get_emissions(overpass)(Units._model_units) for second in secondary ] all_emissions = [F(units)] + [em(units) for em in secondary_emissions] total_emissions = F(units) for second in secondary_emissions: total_emissions += second(units) emissions_info = ', '.join([str(emi) for emi in all_emissions]) sources_info = ', '.join( [src for src in [overpass.short] + [s.short for s in secondary]]) print "Using reported emissions:", emissions_info print "For sources:", sources_info print "Total emissions:", total_emissions print "Using atmospheric stability parameter a={0}".format(a) print "Opening and reading full file" full_file = File.full(overpass) # force_winds is an override for forcing the model to run with a given list of Wind instances. # Useful for running the model with many wind adjustments if force_winds is None: all_winds = PST.AllWinds(overpass) try: WindSources, wind_labels = all_winds.parse(wind_sources) WindSources = [wnd.rotate(wind_adjustment) for wnd in WindSources] except Exception as exc: WindSources, wind_labels = [], [] print "Exception raised and ignored:", exc try: custom_winds, custom_labels = all_winds.add_custom(custom_wind) except ValueError as ve: print "custom_wind value was not valid. See message:", ve except Exception as sxc: print "Unexpected error occured:", sxc else: WindSources.extend(custom_winds) wind_labels.extend(custom_labels) else: WindSources = force_winds wind_labels = [str(wind) for wind in force_winds] if sza_adjustments: print "Assigning a sign to the sensor zenith angle" full_file.sign_zenith_angle(overpass) # filtered_data has the same fields as full_file but only the points that pass the quality filter filtered_data = full_file.filter(**quality_args) print "Classifying points" in_plume_objects = [] background_objects = [] model_enhancement_lists = [] model_alpha_lists = [] fixed_enhancement_lists = [] for wind in WindSources: # set wind.height to the weighted (by emissions) average height height_list = [overpass.height ] + [source.height for source in secondary] mean_height = numpy.average(height_list, weights=all_emissions) wind.height = mean_height plume_data = File.File() background_data = File.File() model_enhancements = [] # will be column vector [ [V1], [V2], ... ] model_alpha = [] # will be matrix A from math docs fixed_enhancements = [] # list of enhancements from fixed sources # we already have a, F determined u = wind.speed # the offset (x,y) in wind basis components from the source to the center plume (highest enhancement) x_offset, y_offset = filtered_data.get_offset(overpass, wind) # same as above offset but for each secondary source secondary_offsets = filtered_data.get_secondary_offset( overpass, wind, secondary_sources=secondary) # emissons of all the fixed secondary sources fixed_emissions = [ fixed.get_emissions(overpass, temporal_factors=temporal_factors)( Units._model_units) for fixed in fixed_secondary_sources ] for i in range(len(filtered_data)): coordinate = Geometry.CoordGeom(wind) x, y = coordinate.coord_to_wind_basis( overpass.lat, overpass.lon, filtered_data.retrieval_latitude[i], filtered_data.retrieval_longitude[i]) dist = Geometry.CoordGeom.cartesian_distance((x, y), (x_offset, y_offset)) sza = Geometry.SZA(filtered_data, wind) # check if point is in background or in plume, including secondary sources in_background = PlumeModel.InBackground(x, y, dist, u, F, a, **bg_kwargs) in_plume = PlumeModel.InPlume(x, y, u, F, a, **plume_kwargs) if secondary_sources: for ind, secondary_src in enumerate(secondary): SZA_secondary = Geometry.SZA(filtered_data, wind) xs, ys = Geometry.CoordGeom(wind).coord_to_wind_basis( secondary_src.lat, secondary_src.lon, filtered_data.retrieval_latitude[i], filtered_data.retrieval_longitude[i]) secondary_dist = Geometry.CoordGeom.cartesian_distance( (xs, ys), secondary_offsets[ind]) in_secondary_plume = PlumeModel.InPlume( xs, ys, u, 1., a, **plume_kwargs) in_secondary_background = PlumeModel.InBackground( xs, ys, secondary_dist, u, 1., a, **bg_kwargs) # logic for when to consider it in-plume/background with secondary sources in_plume = in_plume or in_secondary_plume in_background = in_background and in_secondary_background if in_background: background_data.append(filtered_data, i) if in_plume: total_enhancement = 0. #enhancements from all sources plume_data.append(filtered_data, i) # get enhancement from main source, then add secondary sources if sza_adjustments: main_enhancement = sza.V(x, y, u, F, a, i) else: main_enhancement = PlumeModel.V(x, y, u, F, a) alpha = [main_enhancement / F] total_enhancement += main_enhancement for ind, secondary_src in enumerate(secondary): xs, ys = Geometry.CoordGeom(wind).coord_to_wind_basis( secondary_src.lat, secondary_src.lon, filtered_data.retrieval_latitude[i], filtered_data.retrieval_longitude[i]) F_secondary = secondary_emissions[ind] if sza_adjustments: secondary_enhancement = sza.V(xs, ys, u, F_secondary, a, i) else: secondary_enhancement = PlumeModel.V( xs, ys, u, F_secondary, a) total_enhancement += secondary_enhancement alpha.append(secondary_enhancement / F_secondary) model_enhancements.append([total_enhancement]) model_alpha.append(alpha) fixed_enhancement = 0. for ind, fixed in enumerate(fixed_secondary_sources): xs, ys = Geometry.CoordGeom(wind).coord_to_wind_basis( fixed.lat, fixed.lon, filtered_data.retrieval_latitude[i], filtered_data.retrieval_longitude[i]) F_fixed = fixed_emissions[ind] if sza: source_enhancement = sza.V(xs, ys, u, F_fixed, a, i) else: source_enhancement = PlumeModel.V( xs, ys, u, F_fixed, a) fixed_enhancement += source_enhancement fixed_enhancements.append(fixed_enhancement) in_plume_objects.append(plume_data) background_objects.append(background_data) model_enhancement_lists.append(numpy.array(model_enhancements)) model_alpha_lists.append(numpy.array(model_alpha)) fixed_enhancement_lists.append(numpy.array(fixed_enhancements)) all_results = [] for (k, wind) in enumerate(WindSources): print '' print wind_labels[k] defaults = ModelFunctions.results_defaults() defaults.co2_attribute = co2 defaults.co2_name = co2_source defaults.output_units = units in_plume_objects[k].xco2_uncert = numpy.array( in_plume_objects[k].xco2_uncert) try: results = ModelFunctions.interpret_results( in_plume_objects[k], background_objects[k], model_enhancement_lists[k], model_alpha_lists[k], fixed_enhancement_lists[k], overpass, wind, defaults, float(total_emissions), weights=weighted, background_average=background_average, uncertainty=uncertainty) # results is tuple (scale factor, estimated emissions, number of plume points, correlation) except: results = defaults.null_value # print "An exception was raised: see message following" # print exc raise all_results.append(results) if secondary_sources: print '\nEmissions are ordered as', ', '.join( [overpass.short] + map(lambda s: s.short, secondary)) print '' # make scatter plot if it's asked for, using last wind source if scatter_plot: if type(scatter_plot) != str: raise TypeError( "Keyword scatter_plot must be a string corresponding to a valid path. Given '{0}'" .format(scatter_plot)) try: sc_location = ModelFunctions.make_scatter_plot( numpy.array((model_enhancement_lists[k]) / numpy.mean(background_objects[k].k)).flatten(), in_plume_objects[k][co2], scatter_plot) except: raise else: print "Scatter plot made and saved as {0}".format(scatter_plot) return all_results
def get_new_ecmwf(time, interp=True, return_stability=True, return_surface=False): """Reads 0.75 degree 6h ECMWF data""" source = time.source print "\nGathering ECMWF data..." grib_file = _new_ecmwf_fmt step = _new_ecmwf_step year = time.year month = time.month day = time.day hour = time.hour minute = time.minute tlon = source.lon % 360 tlat = source.lat hour_minus = int(step * (time.decimaltime // step)) delta_minus = dt.timedelta(hours=(hour_minus - hour), minutes=-minute) hour_plus = hour_minus + step delta_plus = dt.timedelta(hours=hour_plus - hour, minutes=-minute) time_minus = PST.Time(time.datetimeobj + delta_minus, source) time_plus = PST.Time(time.datetimeobj + delta_plus, source) decimal_hour = hour + minute / 60. file_minus = time_minus.strftime(grib_file) file_plus = time_plus.strftime(grib_file) time_closest = time.round(step * 3600) file_closest = time_closest.strftime(grib_file) if interp: files = [file_minus, file_plus] hours = [hour_minus, hour_plus] else: files = [file_closest] hours = [decimal_hour] u_interp = [] v_interp = [] usurf_interp = [] vsurf_interp = [] cloud_interp = [] for file_name in files: try: grib = pygrib.open(file_name) except IOError: raise IOError("Could not open file {0}".format(file_name)) except: print "Unexpected Error follows:" raise u_list = grib.select(name='U component of wind') v_list = grib.select(name='V component of wind') Psurface = grib.select(name='Surface pressure')[0].values cloud = grib.select(name="Total cloud cover")[0].values u_surf_list = grib.select(name="10 metre U wind component")[0].values v_surf_list = grib.select(name="10 metre V wind component")[0].values A = u_list[0].pv[:61] / 100. B = u_list[1].pv[61:] lat_ax = u_list[0].latlons()[0][:, 0] lon_ax = u_list[0].latlons()[1][0, :] lat0 = lat_ax[0] lat1 = lat_ax[1] lon0 = lon_ax[0] lon1 = lon_ax[1] dlat = lat1 - lat0 dlon = lon1 - lon0 lat_row = int((tlat - lat0) // dlat) lon_col = int((tlon - lon0) // dlon) lat_used = lat_ax[lat_row] lon_used = lon_ax[lon_col] lat_error = lat_used - tlat lon_error = lon_used - tlon # check if lat, lon are too far from the expected lat, lon if lat_error > 1. or lon_error > 1.: raise ValueError( "Lat/lon disagree by more than the resolution: Errors are ({0}, {1})" .format(lat_error, lon_error)) # print "Using value at ({0}, {1})".format(lat_row,lon_col) stack = source.height H = 7000. p0 = Psurface[lat_row, lon_col] / 100. H_list = [] i = 0 prev = 0 while i < 60: A0 = A[i] A1 = A[i + 1] B0 = B[i] B1 = B[i + 1] Pk0 = A0 + B0 * p0 Pk1 = A1 + B1 * p0 Pk = 0.5 * (Pk0 + Pk1) z = H * math.log(p0 / Pk) if z < stack: h1 = z h2 = prev break i += 1 prev = z else: h1 = z h2 = stack i -= 1 u1 = u_list[i].values[lat_row, lon_col] u2 = u_list[i - 1].values[lat_row, lon_col] v1 = v_list[i].values[lat_row, lon_col] v2 = v_list[i - 1].values[lat_row, lon_col] u = numpy.interp(stack, [h1, h2], [u1, u2]) u_interp.append(u) v = numpy.interp(stack, [h1, h2], [v1, v2]) v_interp.append(v) cloud_fraction = cloud[lat_row, lon_col] cloud_interp.append(cloud_fraction) u_surf = u_surf_list[lat_row, lon_col] usurf_interp.append(u_surf) v_surf = v_surf_list[lat_row, lon_col] vsurf_interp.append(v_surf) u_final = numpy.interp(decimal_hour, hours, u_interp) v_final = numpy.interp(decimal_hour, hours, v_interp) usurf_final = numpy.interp(decimal_hour, hours, usurf_interp) vsurf_final = numpy.interp(decimal_hour, hours, vsurf_interp) cloud_final = numpy.interp(decimal_hour, hours, cloud_interp) surface_wind = PST.Wind((usurf_final, vsurf_final), 0.) stability = PST.Stability(surface_wind.speed, cloud_final) return_values = [PST.Wind((u_final, v_final), stack)] if return_stability: return_values.append(stability) if return_surface: return_values.append(surface_wind) return return_values
def find(sources, lat_threshold=_lat_threshold, lon_threshold=_lon_threshold, min_date=None, max_date=None, download=True, min_points=0): """searches all OCO-2 lite files for soundings close to each source in sources. Writes a text file summarizing the overpass (number of observations, time, file names, etc), and writes overpass data to a CSV file. option download will download ECMWF data for the overpass date if we don't already have that data downloaded """ print "Starting search for %d sources" % len(sources) print "Lat threshold: %s" % lat_threshold print "Lon threshold: %s" % lon_threshold print "Minimum points found: %d" % min_points print "Sources being searched:" for src in sources: print src.short print "" if min_date==None and max_date==None: year_list = ['2014','2015','2016'] elif min_date==None: year_list = map(str, range(2014,max_date.year+1)) elif max_date == None: year_list = map(str, range(min_date.year, 2017)) else: year_list = map(str, range(min_date.year, max_date.year + 1)) if min_date==None: min_date = dt.datetime(2014,1,1,0,0) if max_date==None: max_date = dt.datetime(2016,12,31,23,59) fnames = [_summary_file_loc.format(source.short) for source in sources] for s in fnames: dir = os.path.dirname(s) if not os.path.exists(dir): os.mkdir(dir) # use 'with' context manager so if this crashes or is stopped you don't lose all the data! with open(_csv_save_loc,'w') as csv_output: csv_output.write(PST.Overpass.header) with nested(*[open(nm,"w") for nm in fnames]) as open_files: summary_files = {source.short: open_files[i] for i,source in enumerate(sources)} for name,file in summary_files.items(): file.write('Overpasses for %s' % name) file.write('Fields are ID, Date [Year Month Day Hour Minute Second Millisecond], Number of soundings found, Filename\n') for year in year_list: print year for lite_name in os.listdir(lite_dir.format(year)): date = dt.datetime.strptime(lite_name.split('_')[2],'%Y%m%d') if min_date<=date<=max_date: # dict of name:[# nadir, # glint, # target, # other] overpasses all_overpasses={source.short:[0,0,0,0] for source in sources} lite_file = os.path.join(lite_dir.format(year),lite_name) lite_data = File.lite(lite_file) print lite_file lats = lite_data.latitude lons = lite_data.longitude times = lite_data.date lite_id = lite_data.sounding_id mode = lite_data.observation_mode found = [] close_indices = [] for k in range(len(lite_data)): sounding_lat = lats[k] sounding_lon = lons[k] for source in sources: key = source.short lat = source.lat lon = source.lon dlat = abs(sounding_lat-lat) % 360. dlon = abs(sounding_lon-lon) % 360. if dlat<=lat_threshold and dlon<=lon_threshold: if not key in found: close_indices.append(k) found.append(key) print("Found overpass for %s: dlat=%f, dlon=%f" % (source.short,dlat, dlon)) else: pass if mode[k]=="ND": all_overpasses[key][0]+=1 elif mode[k]=="GL": all_overpasses[key][1]+=1 elif mode[k]=="TG": all_overpasses[key][2]+=1 else: all_overpasses[key][3]+=1 for (i,source_name) in zip(close_indices,found): print "Processing %s overpass" % source_name nadir,glint,tg,other = all_overpasses[source_name] total_points = nadir+glint+tg+other if total_points<min_points: print "Only {0} points".format(total_points) else: id = lite_id[i] id_info = 'ID: {0}'.format(id) type_info = 'Nadir: {0}, Glint: {1}, Target: {2}, Transition: {3}'.format(nadir,glint,tg,other) source = Sources.Sources[source_name] time = PST.Time(None,source,time_string=str(times[i])) if download: ECMWF.download(time) full = lite_data.full_file(i) overpass = PST.Overpass.new(source, time, full, lite_file) file_info = 'File: {0}, {1}'.format(lite_file, full) overpass_info=', '.join([id_info,str(times[i]),type_info,file_info]) if full!="": csv_output.write(overpass.write()) summary_file = summary_files[source_name] summary_file.write(overpass_info+'\n') print("Done") print("Saved overpass information to {0}".format(_csv_save_loc)) return _csv_save_loc
def get_merra(time, print_time=False, action='middle'): """Reads 3h average MERRA data""" Source = time.source print "\nGathering MERRA data..." file_times = [ "01:30", "04:30", "07:30", "10:30", "13:30", "16:30", "19:30", "22:30" ] filename = time.strftime(_merra_fmt) t_lon = Source.lon t_lat = Source.lat if action == 'middle': time_indices = [int((time.decimaltime) // _merra_step)] hours = [time.decimaltime] elif action == 'beginning': time_indices = [int((time.decimaltime - 1.5) // _merra_step)] hours = [time.decimaltime] elif action == 'interpolate': tfloor = int((time.decimaltime - 1.5) // _merra_step) time_indices = [tfloor, tfloor + 1] hours = [(_merra_step * h + 1.5) for h in time_indices] else: raise ValueError('Invalid option "%s" for "action"' % action) if print_time: ftimes = [file_times[i] for i in time_indices] print "Time {0}; using file for time {1}".format( time.datetimeobj, ', '.join(ftimes)) if not os.path.exists(filename): raise IOError("File {0} does not exist".format(filename)) u_interp = [] v_interp = [] for time_index in time_indices: try: merra = netcdf.open(filename) except IOError: raise except: print "Unexpected Error Encountered:" raise U = merra.U V = merra.V lat0 = U.lat[0] lon0 = U.lon[0] lat1 = U.lat[1] lon1 = U.lon[1] dlat = lat1 - lat0 dlon = lon1 - lon0 lat_row = int((t_lat - lat0) // dlat) lon_col = int((t_lon - lon0) // dlon) lat_used = U.lat[lat_row] lon_used = U.lon[lon_col] lat_error = lat_used - t_lat lon_error = lon_used - t_lon if lat_error > dlat or lon_error > dlon: raise ValueError( "Lat/lon disagree by more than the resolution: Errors are ({0}, {1})" .format(lat_error, lon_error)) U = U[time_index, :, lat_row, lon_col] V = V[time_index, :, lat_row, lon_col] Heights = merra.H[time_index, :, lat_row, lon_col] stack = Source.height H_list = list(Heights) i = 0 while i < len(H_list) and H_list[i] > stack: i += 1 if i == len(H_list): i = -1 u = U[i] v = V[i] # print("Using height {0}".format(H_list[i])) else: h1 = H_list[i] h2 = H_list[i - 1] u1 = U[i] u2 = U[i - 1] v1 = V[i] v2 = V[i - 1] # print("Interpolating between heights {0} and {1} for a stack height of {2}".format(h1,h2,stack)) u = numpy.interp(stack, [h1, h2], [u1, u2]) v = numpy.interp(stack, [h1, h2], [v1, v2]) u_interp.append(u) v_interp.append(v) if len(u_interp) == 1: u_final = u_interp[0] v_final = v_interp[0] else: u_final = numpy.interp(time.decimaltime, hours, u_interp) v_final = numpy.interp(time.decimaltime, hours, v_interp) return PST.Wind((u_final, v_final), stack)
def get_surface(time, interp=True): """Reads just surface wind from 0.75 degree 6h ECMWF data""" source = time.source print "\nGathering ECMWF data..." grib_file = _new_ecmwf_fmt step = _new_ecmwf_step year = time.year month = time.month day = time.day hour = time.hour minute = time.minute tlon = source.lon % 360 tlat = source.lat hour_minus = int(step * (time.decimaltime // step)) delta_minus = dt.timedelta(hours=(hour_minus - hour), minutes=-minute) hour_plus = hour_minus + step delta_plus = dt.timedelta(hours=hour_plus - hour, minutes=-minute) time_minus = time + delta_minus time_plus = time + delta_plus decimal_hour = hour + minute / 60. file_minus = time_minus.strftime(grib_file) file_plus = time_plus.strftime(grib_file) time_closest = time.round(step * 3600) file_closest = time_closest.strftime(grib_file) if interp: files = [file_minus, file_plus] hours = [hour_minus, hour_plus] else: files = [file_closest] hours = [decimal_hour] usurf_interp = [] vsurf_interp = [] for file_name in files: try: grib = pygrib.open(file_name) except IOError: raise IOError("Could not open file {0}".format(file_name)) except: print "Unexpected Error follows:" raise u_surf_dataset = grib.select(name="10 metre U wind component")[0] u_surf_list = u_surf_dataset.values v_surf_list = grib.select(name="10 metre V wind component")[0].values lat_ax = u_surf_dataset.latlons()[0][:, 0] lon_ax = u_surf_dataset.latlons()[1][0, :] lat0 = lat_ax[0] lat1 = lat_ax[1] lon0 = lon_ax[0] lon1 = lon_ax[1] dlat = lat1 - lat0 dlon = lon1 - lon0 lat_row = int((tlat - lat0) // dlat) lon_col = int((tlon - lon0) // dlon) lat_used = lat_ax[lat_row] lon_used = lon_ax[lon_col] lat_error = lat_used - tlat lon_error = lon_used - tlon if lat_error > 1. or lon_error > 1.: raise ValueError( "Lat/lon disagree by more than the resolution: Errors are ({0}, {1})" .format(lat_error, lon_error)) u_surf = u_surf_list[lat_row, lon_col] v_surf = v_surf_list[lat_row, lon_col] usurf_interp.append(u_surf) vsurf_interp.append(v_surf) u = numpy.interp(decimal_hour, hours, usurf_interp) v = numpy.interp(decimal_hour, hours, vsurf_interp) return PST.Wind((u, v), 0.)
def get_ecmwf(time, surface=True, interp=True, print_time=False, return_stability=True): """Reads 1 degree 3h interpolated ECMWF data""" source = time.source print "\nGathering ECMWF data..." step = _ecmwf_step time_step = 3600 * step year = time.year month = time.month day = time.day hour = time.hour minute = time.minute decimal_hour = hour + float(minute) / 60. time_closest = time.round(time_step) file_closest = time_closest.strftime(_ecmwf_fmt) hour_minus = int(step * (time.decimaltime // step)) delta_minus = dt.timedelta(hours=(hour_minus - hour), minutes=-minute) hour_plus = hour_minus + step delta_plus = dt.timedelta(hours=hour_plus - hour, minutes=-minute) time_minus = time + delta_minus time_plus = time + delta_plus file_minus = time_minus.strftime(_ecmwf_fmt) file_plus = time_plus.strftime(_ecmwf_fmt) if interp: files = [file_minus, file_plus] hours = [hour_minus, hour_plus] else: files = [file_closest] hours = [decimal_hour] u_interp = [] v_interp = [] uSurface_interp = [] vSurface_interp = [] if print_time: print "Time {0}; using files {1}".format(time.datetimeobj, files) for file_name in files: try: grib = pygrib.open(file_name) except IOError: raise IOError("Could not open file {0}".format(file_name)) except: print "Unexpected Error follows:" raise u_list = grib.select(name='U component of wind') v_list = grib.select(name='V component of wind') Psurface = grib.select(name='Surface pressure')[0].values cloud = grib[304].values A = u_list[0].pv[:61] / 100. B = u_list[1].pv[61:] lat_ax = u_list[0].latlons()[0][:, 0] lon_ax = u_list[0].latlons()[1][0, :] lat0 = lat_ax[0] lat1 = lat_ax[1] lon0 = lon_ax[0] lon1 = lon_ax[1] dlat = lat1 - lat0 dlon = lon1 - lon0 lat_row = int((source.lat - lat0) // dlat) lon_col = int((source.lon - lon0) // dlon) lat_used = lat_ax[lat_row] lon_used = lon_ax[lon_col] lat_error = lat_used - source.lat lon_error = lon_used - source.lon if lat_error > 1. or lon_error > 1.: raise ValueError( "Lat/lon disagree by more than the resolution: Errors are ({0}, {1})" .format(lat_error, lon_error)) stack = source.height H = 7000. p0 = Psurface[lat_row, lon_col] / 100. H_list = [] i = 0 prev = 0 while i < 60: A0 = A[i] A1 = A[i + 1] B0 = B[i] B1 = B[i + 1] Pk0 = A0 + B0 * p0 Pk1 = A1 + B1 * p0 Pk = 0.5 * (Pk0 + Pk1) z = H * math.log(p0 / Pk) if z < stack: h1 = z h2 = prev break i += 1 prev = z i = min(59, i) if i == 59: h1 = stack h2 = stack u1 = u_list[i].values[lat_row, lon_col] u2 = u_list[i - 1].values[lat_row, lon_col] v1 = v_list[i].values[lat_row, lon_col] v2 = v_list[i - 1].values[lat_row, lon_col] # print("Interpolating between heights {0} and {1} for a stack height of {2}".format(h1,h2,stack)) u = numpy.interp(stack, [h1, h2], [u1, u2]) v = numpy.interp(stack, [h1, h2], [v1, v2]) uSurface = grib.select( name="10 metre U wind component")[0].values[lat_row, lon_col] vSurface = grib.select( name="10 metre V wind component")[0].values[lat_row, lon_col] u_interp.append(u) v_interp.append(v) uSurface_interp.append(uSurface) vSurface_interp.append(vSurface) cloudFraction = cloud[lat_row, lon_col] u_final = numpy.interp(decimal_hour, hours, u_interp) v_final = numpy.interp(decimal_hour, hours, v_interp) uSurface_final = numpy.interp(decimal_hour, hours, uSurface_interp) vSurface_final = numpy.interp(decimal_hour, hours, vSurface_interp) if surface: stability = PST.Stability( PST.Wind((uSurface_final, vSurface_final), stack).speed, cloudFraction) else: stability = PST.Stability( PST.Wind((u_final, v_final), stack).speed, cloudFraction) if return_stability: return [PST.Wind((u_final, v_final), stack), stability] else: return PST.Wind((u_final, v_final), stack)
def get_gem_highres(time): """Reads 1h Gem Forecast data""" print "\nGathering GEM data..." t_lon = 360. + time.lon if time.lon < 0. else time.lon t_lat = time.lat year = time.year month = time.month day = time.day hour = time.hour minute = time.minute decimal_hour = time.decimaltime hour_minus = time.hour hour_plus = time.hour + 1 time_minus = PST.Time(dt.datetime(year, month, day, hour_minus, 0), time.source) time_plus = PST.Time(time_minus.datetimeobj + dt.timedelta(hours=1), time.source) hour_string_minus = '00' if time_minus.hour < 12 else '12' hour_string_plus = '00' if time_plus.hour < 12 else '12' file_minus = time_minus.strftime(_gemh_fmt).replace("AM", "00").replace( "PM", "12") file_plus = time_plus.strftime(_gemh_fmt).replace("AM", "00").replace( "PM", "12") if (not os.path.isfile(file_minus)) and (not os.path.isfile(file_plus)): err = IOError( "Neither file on either side of the sounding time exists. Unable to read wind data" ) raise err elif (not os.path.isfile(file_minus)) and os.path.isfile(file_plus): print( "File before in time does not exist, but file after in time does; using only time after sounding" ) files = [file_plus, file_plus] elif (not os.path.isfile(file_plus)) and os.path.isfile(file_minus): print( "File after in time does not exist, but file before in time does; using only time before sounding" ) files = [file_minus, file_minus] else: files = [file_minus, file_plus] u_interp = [] v_interp = [] for fname in files: try: rpn = fstd.open(fname) except IOError as e: print "Unable to open file {0}".format(fname) raise e try: U = rpn.UU V = rpn.VV GZ = rpn.GZ except AttributeError as error: print("Unable to read all fields -- see exception raised") print error # <Var 'GZ'> has 160 levels, but U, V only have 80. In tests opening files, # <LogHybrid> from GZ has extra levels compared to it from U and V; test to make sure of this if not numpy.array_equal(U.axes[2][:], GZ.axes[2][1::2]): raise ValueError( "<LogHybrid> from GZ and UU, VV variables don't match up. Look closer at this file" ) return PST.Wind((0, 0), 0) lats = U.lat[:] lons = U.lon[:] lat_row = 0 lon_col = 0 while lats[lat_row] < t_lat: lat_row += 1 while lons[lon_col] < t_lon: lon_col += 1 lat_row -= 1 lon_col -= 1 used_lat = lats[lat_row] used_lon = lons[lon_col] lat_error = abs(t_lat - used_lat) lon_error = abs(t_lon - used_lon) if lat_error > abs(lats[1] - lats[0]) or lon_error > abs(lons[1] - lons[0]): print "Source position :", (t_lat, t_lon) print "Calculated position:", (used_lat, used_lon) raise ValueError( "Rounding error: latitude or longitude was off by more than their resolution" ) U = U[0, 0, :, lat_row, lon_col] V = V[0, 0, :, lat_row, lon_col] GZ = GZ[0, 0, 1::2, lat_row, lon_col] i = 0 while i < len(GZ): if GZ[i] < time.height: h1 = GZ[i] h2 = GZ[i - 1] break i += 1 else: h1 = h2 = time.height u1 = U[i] / 3.6 u2 = U[i - 1] / 3.6 v1 = V[i] / 3.6 v2 = V[i - 1] / 3.6 u = numpy.interp(time.height, [h1, h2], [u1, u2]) v = numpy.interp(time.height, [h1, h2], [v1, v2]) u_interp.append(u) v_interp.append(v) u, v = map( lambda x: numpy.interp(decimal_hour, [hour_minus, hour_plus], x), [u_interp, v_interp]) return PST.Wind((u, v), time.height)
def get_gem(time, interp=True, print_time=False): """Reads 6h GEM data""" print "\nGathering GEM data..." t_lon = time.lon % 360 t_lat = time.lat year = time.year month = time.month day = time.day hour = time.hour minute = time.minute step = _gem_step time_step = 3600 * step decimal_hour = hour + minute / 60. hour_minus = int(6 * (time.decimaltime // 6)) delta_minus = dt.timedelta(hours=hour_minus - hour, minutes=-minute) hour_plus = hour_minus + 6 delta_plus = dt.timedelta(hours=hour_plus - hour, minutes=-minute) time_minus = time + delta_minus time_plus = time + delta_plus file_minus = time_minus.strftime(_gem_fmt) file_plus = time_plus.strftime(_gem_fmt) time_closest = time.round(time_step) file_closest = time_closest.strftime(_gem_fmt) if interp: files = [file_minus, file_plus] hours = [hour_minus, hour_plus] else: files = [file_closest] hours = [decimal_hour] u_interp = [] v_interp = [] if print_time: print "Time {0}; using files {1}".format(time.datetimeobj, files) for fname in files: if not os.path.exists(fname): raise IOError("File does not exists {0}".format(fname)) try: rpn = fstd.open(fname) except IOError: raise IOError("Could not open {0} for GEM data\n".format(fname)) try: U = rpn.UU V = rpn.VV Heights = rpn.GZ lats = U.lat[:] lons = U.lon[:] except AttributeError: raise AttributeError( "Could not read U, V, GZ, lat, and lon from file {0}".format( fname)) lat_row = 0 lon_col = 0 while lats[lat_row] < t_lat: lat_row += 1 while lons[lon_col] < t_lon: lon_col += 1 lat_row -= 1 lon_col -= 1 used_lat = lats[lat_row] used_lon = lons[lon_col] lat_error = t_lat - used_lat lon_error = t_lon - used_lon if lat_error > 1. or lon_error > 1.: raise ValueError("Position error: ({0},{1})".format( lat_error, lon_error)) U = U[0, 0, :, lat_row, lon_col] V = V[0, 0, :, lat_row, lon_col] Heights = Heights[0, 0, :, lat_row, lon_col] stack = time.height H_list = list(Heights) i = 0 while i < len(H_list) and H_list[i] > stack: i += 1 i = min(i, len(H_list) - 1) h1 = H_list[i] h2 = H_list[i - 1] u1 = U[i] / 3.6 u2 = U[i - 1] / 3.6 v1 = V[i] / 3.6 v2 = V[i - 1] / 3.6 u = numpy.interp(stack, [h1, h2], [u1, u2]) v = numpy.interp(stack, [h1, h2], [v1, v2]) u_interp.append(u) v_interp.append(v) u = numpy.interp(decimal_hour, hours, u_interp) v = numpy.interp(decimal_hour, hours, v_interp) return PST.Wind((u, v), stack)
def get_ecmwf_highres(time, surface=True): """Reads 0.3 degree ECMWF data""" print "Gathering ECMWF data..." source = time.source step = 1 year = time.year month = time.month day = time.day hour = time.hour minute = time.minute hour_minus = hour hour_plus = hour + 1 time_minus = PST.Time(dt.datetime(year, month, day, hour_minus, 0), source) time_plus = PST.Time(time_minus.datetimeobj + dt.timedelta(hours=1), source) file_minus = time_minus.strftime(_ecmwfh_fmt) file_plus = time_plus.strftime(_ecmwfh_fmt) u_time_interpolate = [] v_time_interpolate = [] u_surface_interp = [] v_surface_interp = [] cloud_fractions = [] lat_resolution, lon_resolution = (0.3, 0.3) for file_name in [file_minus, file_plus]: if not os.path.isfile(file_name): raise IOError('File {0} does not exist'.format(file_name)) try: ecmwf = pygrib.open(file_name) except IOError: print("IOError: Could not open file {0}".format(file_name)) raise except Excption as exc: print "Unexpected error occured" raise exc try: UU = ecmwf.select(name='U component of wind') VV = ecmwf.select(name='V component of wind') P_surface = ecmwf.select(name='Surface pressure')[0] P_surface.expand_grid(0) P_surface = numpy.reshape(P_surface.values[:55800], (150, 372)) cloud_fraction = ecmwf.select(name="Total cloud cover")[0].values U_surface = ecmwf.select( name="10 metre U wind component")[0].values[:-1] V_surface = ecmwf.select( name="10 metre V wind component")[0].values[:-1] except AttributeError: raise AttributeError( "File {0} is missing some expected attributes".format( file_name)) # This data is missing part of the last row ( 33.3 to 33.0 degrees latitude for -109.8 to -64.8 degrees longitude) # so ignore the last row of the data (last 151 values) U = [] V = [] levels = len(UU) half_levels = levels + 1 try: for level in range(len(UU)): UU[level].expand_grid(0) U.append(numpy.reshape((UU[level].values)[:55800], (150, 372))) VV[level].expand_grid(0) V.append(numpy.reshape((VV[level].values)[:55800], (150, 372))) latitudes = numpy.reshape(UU[0].latitudes[:55800], (150, 372))[:, 0] longitudes = numpy.reshape(UU[0].longitudes[:55800], (150, 372))[0] - 360. except: raise IndexError( "Problem reshaping U, V, lat, lon arrays in file {0}".format( file_name)) U = numpy.array(U) V = numpy.array(V) A = UU[0].pv[:half_levels] A /= 100. B = VV[0].pv[half_levels:] lat_row = int((latitudes[0] - time.lat) // lat_resolution) lon_col = int((time.lon - longitudes[0]) // lon_resolution) lat_check = abs(latitudes[lat_row] - time.lat) < 0.3 lon_check = abs(longitudes[lon_col] - time.lon) < 0.3 coord = (latitudes[lat_row], longitudes[lon_col]) if not lat_check or not lon_check: raise ValueError( "Latitude and longitude points are further than the resolution away from the source. Source position ({0},{1}); calculated as ({2},{3})" .format(time.lat, time.lon, latitudes[lat_row], longitudes[lon_col])) p0 = P_surface[lat_row, lon_col] H = 7000. z_prev = 0. k = 0 while k < (levels - 1): A0 = A[k] A1 = A[k + 1] A0 = A[k] A1 = A[k + 1] B0 = B[k] B1 = B[k + 1] Pk0 = A0 + B0 * p0 Pk1 = A1 + B1 * p0 Pk = 0.5 * (Pk0 + Pk1) z = H * math.log(p0 / Pk) if z < time.height: h1 = z h2 = z_prev break k += 1 z_prev = z else: h1 = h2 = time.height u1 = U[k][lat_row, lon_col] u2 = U[k - 1][lat_row, lon_col] v1 = V[k][lat_row, lon_col] v2 = V[k - 1][lat_row, lon_col] u_surface = U_surface[lat_row, lon_col] v_surface = V_surface[lat_row, lon_col] u_interpolated = numpy.interp(time.height, [h1, h2], [u1, u2]) v_interpolated = numpy.interp(time.height, [h1, h2], [v1, v2]) u_time_interpolate.append(u_interpolated) v_time_interpolate.append(v_interpolated) u_surface_interp.append(u_surface) v_surface_interp.append(v_surface) cloud_fractions.append(cloud_fraction[lat_row, lon_col]) u = numpy.interp(time.decimaltime, [hour_minus, hour_plus], u_time_interpolate) v = numpy.interp(time.decimaltime, [hour_minus, hour_plus], v_time_interpolate) # print u,v u_surface = numpy.interp(time.decimaltime, [hour_minus, hour_plus], u_surface_interp) v_surface = numpy.interp(time.decimaltime, [hour_minus, hour_plus], v_surface_interp) surface_wind = PST.Wind( (u_surface, v_surface), 10.) if surface else PST.Wind( (u, v), time.height) wind = PST.Wind((u, v), time.height) if surface: stability = PST.Stability(surface_wind.speed, min(cloud_fractions)) else: stability = PST.Stability(wind.speed, min(cloud_fractions)) return (wind, stability)
pro = node.probability_vector[current_tag] else: pro = 0 if pro < μ_min: # 判断为非法序列 count_intrusion += 1 print(pro) if (count_intrusion / N) > lambda_min: print("该序列:%s异常!" % sequence, "异常度:", count_intrusion / N) else: print("该序列:%s正常。" % sequence, "异常度:", count_intrusion / N) if __name__ == '__main__': txt = 'pst_data.txt' pkl = 'pst_result.pkl' tree = PST.gen_tree(txt) PST.draw_pst(tree) sequence_list = ["abcab", "bcabbbca"] print( '----------------------------------计算异常度-------------------------------' ) for sequence in sequence_list: detection_sequence(sequence, tree) # test operatecode mapping source_code_list = ['login', 'trans1', 'trans2', 'trans3', 'logout'] ope_code_dict = Properties("opecode_mapper.properties").getProperties() str_transed = "" for source_code in source_code_list: if ope_code_dict.get(source_code): str_transed += ope_code_dict.get(source_code)