def _get_argument_parser(): #Open the datafile df = SciFile() df.import_data([args['file']]) #This sectorfiles argument should be reduced to a single string rather than a list... sectfile = sectorfile.open(args['sectorfiles'], scifile=df) sector = sectfile.open_sector(args['sector'], scifile=df) #Get the product pf = productfile.open_product(df.source_name, args['product'], scifile=df) geoips_only = False # Set this in check_if_testonly. May want to pass argument at some point, # so leave pass through of geoips_only. #if not os.getenv('GEOIPS_OPERATIONAL_USER'): # geoips_only=True #Call process process(df, sector, productlist=args['productlist'], outdir=args['product_outpath'], nofinal=args['nofinal'], forcereprocess=args['forcereprocess'], sectorfile=sectfile, geoips_only=geoips_only)
# MLS 20160108 Make sure start time is 9h prior, so we get appropriate sectorfiles. [args['start_datetime'], args['end_datetime']] = time_range_defaults(args['start_datetime'], args['end_datetime'], args['num_hours_back_to_start'], args['num_hours_to_check'], ) if args['queue'] and args['queue'].lower() == 'none': args['queue'] = None #Should fix this so that either we use dynamic_templates here # or sectorfile.open() uses 'templatefiles'. #I prefer the second solution. args['start_datetime'] = args['start_datetime'] - timedelta(hours=9) combined_sf = sectorfile.open(dynamic_templates=args['templatefiles'], **args ) # Don't want the 9h prior for downloading, just for getting dynamic sectorfiles. args['start_datetime'] = args['start_datetime'] + timedelta(hours=9) bigindent = '\n'+' '*60 try: log.info('') combined_sf.check_sectorfile() log.info('') except SectorFileError: raise #downloader(args['data_type'], args['host_type'], sector_file=combined_sf, **args)
dyn_start_dt = df.start_datetime - timedelta(hours=12) dyn_end_dt = df.end_datetime if df.start_datetime == df.end_datetime: dyn_end_dt = df.end_datetime + timedelta(hours=2) if not args['sectorlist']: args['sectorlist'] = [] for ds in df.datasets.values(): args['sectorlist'] = predict_sectors(ds.platform_name, ds.source_name, dyn_start_dt, dyn_end_dt) DATETIMES['after_opendatafile'] = datetime.utcnow() print_mem_usage('After reading metadata') # Create combined sectorfile for requested sectors sectfile = sectorfile.open(sectorlist=args['sectorlist'], productlist=args['productlist'], sectorfiles=args['sectorfiles'], dynamic_templates=args['templatefiles'], tc=args['tc'], volcano=args['volcano'], allstatic=args['allstatic'], alldynamic=args['alldynamic'], start_datetime=dyn_start_dt, end_datetime=df.end_datetime, actual_datetime=df.start_datetime, scifile=df, quiet=True) log.info('\n\n') # Determine which sectors are needed and which aren't if args['sectorlist']: req_sects = sorted(args['sectorlist']) non_req_sects = sorted(list(set(sectfile.sectornames()) ^ set(req_sects))) log.info('\tRequested sectors: {0}'.format(req_sects)) log.info('\n') log.info('\tUnused sectors: {0}'.format(non_req_sects)) log.info('\n\n') log.info('\t\tSectors from files: ')
def latency_products( start_datetime, end_datetime, sensor, satellites, data_providers, overall=False, verbose=True, ): log.info('\n\n') total_hours = (end_datetime - start_datetime).days * 24 + ( end_datetime - start_datetime).seconds // 3600 log.info(str(total_hours) + ' hours') sector_file = sectorfile.open(allstatic=True, alldynamic=True, start_datetime=start_datetime, end_datetime=end_datetime, one_per_sector=True) allfiles = [] for sat in satellites: log.info('Trying sat ' + sat + ' sensor: ' + sensor) currdata_providers = data_providers currchannels = channels if not data_providers: currdata_providers = ['*'] for data_provider in currdata_providers: sectors = sector_file.getsectors() totalnum = len(sectors) num = 0 for sector in sectors: foundsome = False num += 1 try: productnames = sector.products[sensor] except KeyError: # Skip if not defined continue for productname in productnames: #log.info(' Trying product '+productname+' from sector '+sector.name) try: product = productfile.open_product(sensor, productname) except AttributeError: # Skip if not defined continue # Check operational, not local. Probably should make this an option? os.environ['GEOIPSFINAL'] = os.getenv('OpsGEOIPSFINAL') #print os.getenv('GEOIPSFINAL') currfiles = ProductFileName.list_range_of_files( sat, sensor, start_datetime, end_datetime, sector, product, datetime_wildcards={ '%H': '*%H', '%M': '*', '%S': '*' }, data_provider=data_provider, coverage='*', intensity='*', extra='*', ext='*', ) if currfiles: foundsome = True allfiles += currfiles if foundsome: log.info(' Listing all products from sector ' + sector.name + ', ' + str(num) + ' of ' + str(totalnum) + ', found ' + str(len(allfiles)) + ' files so far') if overall: totalsize, totalnum = calc_latency(allfiles, fileclass='ProductFileName', verbose=verbose, classkeys={ 'overall': ['sensorname', 'dataprovider'], }) else: totalsize, totalnum = calc_latency( allfiles, fileclass='ProductFileName', verbose=verbose, classkeys={ 'individual': [ 'sensorname', 'productname', 'satname', 'sectorname', 'dataprovider' ], 'overallproduct': ['sensorname', 'productname', 'dataprovider'], 'overallsector': ['sensorname', 'sectorname', 'dataprovider'], 'overall': ['sensorname', 'dataprovider'], }) log.interactive('Total size on disk for ' + str(totalnum) + ' products: ' + convert_bytes(totalsize) + ': sensor: ' + sensor + ' satellites: ' + ' ,'.join(satellites)) return totalsize, totalnum
def process_overpass( satellite, sensor, productlist, sectorlist, sectorfiles, extra_dirs, sector_file, datelist, hourlist=None, data_outpath=None, product_outpath=None, list=False, clean=False, forceclean=False, download=False, queue=None, mp_max_cpus=1, allstatic=True, alldynamic=True, tc=False, volcano=False, quiet=False, start_datetime=None, end_datetime=None, ): if quiet: log.setLevel(35) log.interactive('') opasses = [] old_opasses = [] overall_start_dt = None overall_end_dt = None single = False both = False if sectorlist: single = True both = False if hourlist == None: for datestr in datelist: if sectorlist: log.interactive( 'Checking for overpasses $GEOIPS/geoips/process_overpass.py ' + satellite + ' ' + sensor + ' ' + datestr + ' -s "' + ' '.join(sectorlist) + '" --all') else: log.interactive( 'Checking for overpasses $GEOIPS/geoips/process_overpass.py ' + satellite + ' ' + sensor + ' ' + datestr + ' --all') sys.stdout.write('.') sys.stdout.flush() start_dt = datetime.strptime(datestr + '0000', '%Y%m%d%H%M') end_dt = datetime.strptime(datestr + '2359', '%Y%m%d%H%M') opasses.extend( pass_prediction([satellite], [sensor], sector_file, sectorlist, start_dt - timedelta(minutes=15), end_dt + timedelta(minutes=15), single=single, both=both, force=True, quiet=quiet)) sys.stdout.write('\n') if opasses and len(opasses) < 200 and len(opasses) != 0: log.interactive('Available overpasses: ' + bigindent + bigindent.join(sorted(str(val) for val in opasses)) + '\n') elif opasses: log.interactive( str(len(opasses)) + ' available overpasses, not listing\n') return opasses else: hourstart = hourlist[0] if len(hourlist) == 1: hourend = hourlist[0] else: hourend = hourlist[-1] for datestr in datelist: if sectorlist and hourlist: log.interactive( 'Checking for overpasses for $GEOIPS/geoips/process_overpass.py ' + satellite + ' ' + sensor + ' ' + datestr + ' -H "' + ' '.join(hourlist) + '" -s "' + ' '.join(sectorlist) + '" --all') else: log.interactive( 'Checking for overpasses for $GEOIPS/geoips/process_overpass.py ' + satellite + ' ' + sensor + ' ' + datestr + ' --all') sys.stdout.write('.') sys.stdout.flush() start_dt = datetime.strptime(datestr + hourstart + '00', '%Y%m%d%H%M') start_dt = start_dt - timedelta(minutes=15) if overall_start_dt == None or overall_start_dt > start_dt: overall_start_dt = start_dt end_dt = datetime.strptime(datestr + hourend + '59', '%Y%m%d%H%M') end_dt = end_dt + timedelta(minutes=15) if overall_end_dt == None or overall_end_dt < end_dt: overall_end_dt = end_dt opasses.extend( pass_prediction([satellite], [sensor], sector_file, sectorlist, start_dt, end_dt, single=single, force=True, quiet=quiet)) sys.stdout.write('\n') if opasses and len(opasses) < 20: log.interactive('Available overpasses: ' + bigindent + bigindent.join(sorted(str(val) for val in opasses)) + '\n\n') elif opasses: log.interactive( str(len(opasses)) + ' available overpasses, not listing\n\n') # Start 8h before start time to make sure we can get the # sector file entry before if sensor != 'modis': overall_start_dt = overall_start_dt - timedelta(minutes=480) log.info('Overall start and end times: ' + str(overall_start_dt) + ' to ' + str(overall_end_dt)) if download == True: log.interactive('queue: ' + str(queue) + '\n\n') data_type = default_data_type[sensor] host_type = default_host_type[data_type] #Can't we do something to minimize the copypaste done here? Hard to maintain... if (data_type, host_type) in non_qsubbed: for opass in opasses: log.info('sectorfiles: ' + str(sectorfiles)) sector_file = sectorfile.open(allstatic=allstatic, alldynamic=alldynamic, tc=tc, start_datetime=opass.startdt - timedelta(hours=6), end_datetime=opass.enddt, one_per_sector=True) if not sectorfiles: currsectorfiles = sector_file.names else: currsectorfiles = sectorfiles log.info('currsectorfiles: ' + str(currsectorfiles)) log.interactive('Downloading opass: '******'\n\n') si = SatSensorInfo(satellite, sensor) # If they are very long files (ie, full orbit), make # sure we get the file before the overpass time startdt = opass.startdt - timedelta(minutes=si.mins_per_file) downloader( data_type, host_type, sector_file=sector_file, sectorlist=sectorlist, sectorfiles=currsectorfiles, productlist=productlist, data_outpath=data_outpath, product_outpath=product_outpath, start_datetime=startdt, end_datetime=opass.enddt, queue=queue, allstatic=allstatic, alldynamic=alldynamic, tc=tc, volcano=volcano, #max_connections=8, max_wait_seconds=None, ) time.sleep(5) else: log.interactive(sectorfiles) downloader( data_type, host_type, sector_file=sector_file, sectorlist=sectorlist, sectorfiles=sectorfiles, productlist=productlist, data_outpath=data_outpath, product_outpath=product_outpath, start_datetime=overall_start_dt, end_datetime=overall_end_dt, queue=queue, allstatic=allstatic, alldynamic=alldynamic, tc=tc, opasses=opasses, #max_connections=8, max_wait_seconds=None, ) time.sleep(5) all_files = [] # Reverse=True for newest first all_files = sorted(find_available_data_files(opasses, start_dt, satellite, sensor, extra_dirs), reverse=True) log.info('Done sorting default') #shell() if productlist and 'near-constant-contrast' in productlist: log.info(' Checking near-constant-contrast files') # Reverse=True for newest first all_files = sorted(find_available_data_files(opasses, start_dt, satellite, sensor, extra_dirs, prodtype='ncc'), reverse=True) file_str = '\n\t'.join(all_files) log.info('Files found current search time for %s: \n\t%s' % (str(opasses), file_str)) if not all_files: log.info('No files available in directories listed above') log.info( 'To check alternate directories, you can call (replace /sb2/viirs and /sb1/viirs with the paths where data files are available): ' ) infostr = '' if productlist: infostr += '-p ' + "'" + ' '.join(productlist) + "'" if sectorlist: infostr += '-s ' + "'" + ' '.join(sectorlist) + "'" log.info( "process_overpass.py %s %s '%s' -d '/sb2/viirs /sb1/viirs' %s -H '%s'" % (satellite, sensor, ' '.join(datelist), infostr, ' '.join(hourlist))) return None try: for opass in opasses: currdatelist = [] day_count = (opass.enddt - opass.startdt).days + 1 for dt in (opass.startdt + timedelta(n) for n in range(day_count)): currdatelist.append(dt.strftime('%Y%m%d')) log.info('Checking for existing products to clean... clean: ' + str(clean) + ' forceclean: ' + str(forceclean)) find_existing_products(sensor, sector_file, opass.actualsectornames, productlist, currdatelist, clean, forceclean) except ProductError, resp: log.error(str(resp) + ' Check spelling?')
cmd_args.addarg(file) # We only need to pass the sectorfile that actually contains this # sector - don't pass all sectorfiles that we are using in gendriver # (which could include all dynamic sectorfiles, and all static...) start_dt = dt - timedelta(hours=9) #if not fileobj.end_dt: # end_dt = fileobj.start_dt+ timedelta(minutes=15) #else: # end_dt = fileobj.end_dt end_dt = dt + timedelta(minutes=15) actual_dt = start_dt currsf = sectorfile.open(sectorfiles=sectorfiles, allstatic=allstatic, alldynamic=alldynamic, tc=tc, volcano=volcano, start_datetime=start_dt, end_datetime=end_dt, actual_datetime=actual_dt, sectorlist=currsectorlist, one_per_sector=True) if currsf.names: cmd_args.addopt('sectorfiles', ' '.join(currsf.names)) if productlist: cmd_args.addopt('productlist', ' '.join(productlist)) if currsectorlist: cmd_args.addopt('sectorlist', ' '.join(currsectorlist)) if tc: cmd_args.addopt('tc') if volcano: cmd_args.addopt('volcano') if allstatic: cmd_args.addopt('allstatic') if alldynamic: cmd_args.addopt('alldynamic') cmd_args.addopt('forcereprocess')
def global_stitched(full_xarray, area_def): ''' Stitching geostationary and polar imagery into a single product NOTE in geoips/geoimg/plot/prototypealg.py scifile is converted to xarray BEFORE being passed sector is converted to area_def BEFORE being passed from geoips2.geoips1_utils.scifile import xarray_from_scifile from geoips2.geoips1_utils.sector import area_def_from_sector ''' # Run stuff like this to produce a global stitched image over time: # ./geoips/geoips/driver.py /data/20190529.11/goes16/20190529.1100/ -s globalglobal -p global-stitched # ./geoips/geoips/driver.py /data/20190529.11/goes17/20190529.1100/ -s globalglobal -p global-stitched # ./geoips/geoips/driver.py /data/20190529.11/ahi/20190529.1100/ -s globalglobal -p global-stitched # ./geoips/geoips/driver.py /data/20190529.11/meteo11EU/20190529.1100/ -s globalglobal -p global-stitched # ./geoips/geoips/driver.py /data/20190529.11/meteo8EU/20190529.1100/ -s globalglobal -p global-stitched # ./geoips/geoips/driver.py /data/20190529.11/npp/20190529.1100/ -s globalarctic -p global-stitched # ./geoips/geoips/driver.py /data/20190529.11/jpss/20190529.1100/ -s globalantarctic -p global-stitched # MLS5 Turn this off completely for now. We can move the return farther and farther down the line as we go! # return if full_xarray.source_name in SAT_CONFIG['platforms']['roi'].keys(): roi = SAT_CONFIG['platforms']['roi'][full_xarray.source_name] full_xarray.attrs['interpolation_radius_of_influence'] = roi varname = SAT_CONFIG['platforms']['merge_channels'][ full_xarray.source_name] prodname = SAT_CONFIG['platforms']['merge_channels']['product_name'] from geoips2.xarray_utils.data import sector_xarray_dataset sect_xarray = sector_xarray_dataset(full_xarray, area_def, [varname]) if sect_xarray is None: LOG.info('NO COVERAGE, SKIPPING') return None sect_xarray.attrs['sector_name'] = area_def.area_id sect_xarray[prodname] = sect_xarray[varname] from geoips2.xarray_utils.interpolation import interp_nearest [interp_data] = interp_nearest(area_def, sect_xarray, varlist=[prodname]) import xarray alg_xarray = xarray.Dataset() target_lons, target_lats = area_def.get_lonlats() alg_xarray[prodname] = xarray.DataArray(interp_data) alg_xarray['latitude'] = xarray.DataArray(target_lats) alg_xarray['longitude'] = xarray.DataArray(target_lons) alg_xarray.attrs = sect_xarray.attrs.copy() from geoips2.filenames.product_filenames import netcdf_write_filename # Use %H%M time format so we don't try to match seconds. ncdf_fname = netcdf_write_filename( gpaths['PRECALCULATED_DATA_PATH'], product_name=prodname, source_name=alg_xarray.source_name, platform_name=alg_xarray.platform_name, sector_name=area_def.area_id, product_datetime=alg_xarray.start_datetime, time_format='%H%M') from geoips2.xarray_utils.outputs import write_xarray_netcdf write_xarray_netcdf(alg_xarray, ncdf_fname) if alg_xarray.source_name in SAT_CONFIG['platforms']['run_on_sources']: from geoips2.data_manipulations.merge import get_matching_files, merge_data import geoips.sectorfile as sectorfile for primary_sector_name in SAT_CONFIG['sectors'].keys(): # Use %H%M time format so we don't try to match seconds, and so we match written filename above. match_fnames = get_matching_files( primary_sector_name, subsector_names=SAT_CONFIG['sectors'][primary_sector_name] ['merge_lines'], platforms=SAT_CONFIG['platforms']['merge_platforms'], sources=SAT_CONFIG['platforms']['merge_sources'], max_time_diffs=SAT_CONFIG['platforms']['merge_max_time_diffs'], basedir=gpaths['PRECALCULATED_DATA_PATH'], merge_datetime=full_xarray.start_datetime, product_name=prodname, time_format='%H%M') # Open the primary_sector_name to get the lat/lons and resolution of the overall sector main_sector = sectorfile.open(sectorlist=[primary_sector_name] ).open_sector(primary_sector_name) sectlons, sectlats = main_sector.area_definition.get_lonlats() finaldata, attrs = merge_data( match_fnames, sectlons.shape, variable_name=prodname, merge_samples=SAT_CONFIG['sectors'][primary_sector_name] ['merge_samples'], merge_lines=SAT_CONFIG['sectors'][primary_sector_name] ['merge_lines']) stitched_xarray = xarray.Dataset() stitched_xarray[prodname] = xarray.DataArray(finaldata).astype( numpy.float32) latdims = stitched_xarray.dims.keys()[0] londims = stitched_xarray.dims.keys()[1] stitched_xarray['latitude'] = xarray.DataArray( sectlats[:, 0], dims=latdims).astype(numpy.float32) stitched_xarray['longitude'] = xarray.DataArray( sectlons[0, :], dims=londims).astype(numpy.float32) stitched_xarray.attrs = attrs stitched_xarray.attrs[ 'start_datetime'] = full_xarray.start_datetime stitched_xarray.attrs['platform_name'] = 'stitched' stitched_xarray.attrs['source_name'] = 'stitched' stitched_xarray.attrs['data_provider'] = 'stitched' # ## from geoips2.output_formats.image import plot_image, set_plotting_params, coverage # ## set_plotting_params(geoimg_obj, # ## finaldata, # ## cmap_name='Blues', # ## title_str='This is\nMy Title', # ## is_final=True) from geoips2.data_manipulations.info import percent_unmasked from geoips2.xarray_utils.outputs import output_geoips_fname web_fname = output_geoips_fname(area_def, stitched_xarray, prodname, percent_unmasked(finaldata)) web_fname_clear = output_geoips_fname(area_def, stitched_xarray, prodname + 'Clear', percent_unmasked(finaldata)) from geoips2.image_utils.mpl_utils import create_figure_and_main_ax_and_mapobj from geoips2.image_utils.mpl_utils import set_matplotlib_colors_standard from geoips2.image_utils.mpl_utils import plot_image, save_image, plot_overlays from geoips2.image_utils.mpl_utils import get_title_string_from_objects, set_title from geoips2.image_utils.mpl_utils import create_colorbar # Create matplotlib figure and main axis, where the main image will be plotted fig, main_ax, mapobj = create_figure_and_main_ax_and_mapobj( area_def.x_size, area_def.y_size, area_def) # Create the matplotlib color info dict - the fields in this dictionary (cmap, norm, boundaries, # etc) will be used in plot_image to ensure the image matches the colorbar. mpl_colors_info = set_matplotlib_colors_standard( data_range=[finaldata.min(), finaldata.max()], cmap_name=None, cbar_label=None) # Plot the actual data on a basemap or cartopy instance plot_image(main_ax, finaldata, mapobj, mpl_colors_info=mpl_colors_info) # Save the clean image with no gridlines or coastlines save_image(fig, web_fname_clear, is_final=False) # Set the title for final image title_string = get_title_string_from_objects( area_def, sect_xarray, prodname) set_title(main_ax, title_string, area_def.y_size) # Create the colorbar to match the mpl_colors create_colorbar(fig, mpl_colors_info) # Plot gridlines and boundaries overlays plot_overlays(mapobj, main_ax, area_def, boundaries_info=None, gridlines_info=None) # Save the final image save_image(fig, web_fname, is_final=True) from geoips2.filenames.product_filenames import netcdf_write_filename del (stitched_xarray.attrs['start_datetime']) stitched_xarray.attrs[ 'valid_datetime'] = full_xarray.start_datetime # Use %H%M time format so we don't try to match seconds. ncdf_fname = netcdf_write_filename( gpaths['MAPROOMDROPBOX'], product_name=prodname, source_name=full_xarray.source_name, platform_name=full_xarray.platform_name, sector_name=area_def.area_id, product_datetime=stitched_xarray.valid_datetime, time_format='%H%M') from geoips2.xarray_utils.outputs import write_xarray_netcdf write_xarray_netcdf(full_xarray, ncdf_fname) else: LOG.info('SKIPPING Not specified to run on %s, exiting', alg_xarray.source_name) return None