Exemplo n.º 1
0
def get_hrrr_variable(DATE,
                      variable,
                      fxx=0,
                      model='hrrr',
                      field='sfc',
                      removeFile=True,
                      value_only=False,
                      verbose=True,
                      outDIR='./'):
    """
    Uses cURL to grab just one variable from a HRRR grib2 file on the MesoWest
    HRRR archive.

    Input:
        DATE - the datetime(year, month, day, hour) for the HRRR file you want
               This must be in UTC, obviouslly.
        variable - a string describing the variable you are looking for.
                   Refer to the .idx files here: https://api.mesowest.utah.edu/archive/HRRR/
                   You want to put the variable short name and the level information
                   For example, for 2m temperature: 'TMP:2 m above ground'
        fxx - the forecast hour you desire. Default is the anlaysis hour.
        model - the model you want. Options include ['hrrr', 'hrrrX', 'hrrrAK']
        field - the file type your variable is in. Options include ['sfc', 'prs']
        removeFile - True will remove the grib2 file after downloaded. False will not.
        value_only - Only return the values. Fastest return speed if set to True, when all you need is the value.
                     Return Time .75-1 Second if False, .2 seconds if True.
        verbose - prints some stuff out
    """
    # Model direcotry names are named differently than the model name.
    if model == 'hrrr':
        model_dir = 'oper'
    elif model == 'hrrrX':
        model_dir = 'exp'
    elif model == 'hrrrAK':
        model_dir = 'alaska'

    # Temp file name has to be very unique, else when we use multiprocessing we
    # might accidentally delete files before we are done with them.
    outfile = '%stemp_%04d%02d%02d%02d_f%02d_%s.grib2' % (
        outDIR, DATE.year, DATE.month, DATE.day, DATE.hour, fxx, variable[:3])

    if verbose is True:
        print outfile

    # Dear User,
    # Only HRRR files for the previous day have been transfered to Pando.
    # That means if you are requesting data for today, you need to get it from
    # the NOMADS website. Good news, it's an easy fix. All we need to do is
    # redirect you to the NOMADS URLs. I'll check that the date you are
    # requesting is not for today's date. If it is, then I'll send you to
    # NOMADS. Deal? :)
    #                                                    -Sincerely, Brian
    UTC = datetime.utcnow()  # the current date in UTC
    if DATE < datetime(UTC.year, UTC.month, UTC.day):
        # Get HRRR from Pando
        if verbose is True:
            print "Oh, good, you requested a date that should be on Pando."
        # URL for the grib2.idx file
        fileidx = 'https://api.mesowest.utah.edu/archive/HRRR/%s/%s/%04d%02d%02d/%s.t%02dz.wrf%sf%02d.grib2.idx' \
                    % (model_dir, field, DATE.year, DATE.month, DATE.day, model, DATE.hour, field, fxx)
        # URL for the grib2 file (located on PANDO S3 archive)
        pandofile = 'https://pando-rgw01.chpc.utah.edu/HRRR/%s/%s/%04d%02d%02d/%s.t%02dz.wrf%sf%02d.grib2' \
                    % (model_dir, field, DATE.year, DATE.month, DATE.day, model, DATE.hour, field, fxx)
    else:
        # Get operational HRRR from NOMADS
        if model == 'hrrr':
            if verbose is True:
                print "\n-----------------------------------------------------------------------"
                print "!! Hey! You are requesting a date that is not on the Pando archive  !!"
                print "!! That's ok, I'll redirect you to the NOMADS server. :)            !!"
                print "-----------------------------------------------------------------------\n"
            # URL for the grib2 idx file
            fileidx = 'http://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/hrrr.%04d%02d%02d/%s.t%02dz.wrf%sf%02d.grib2.idx' \
                        % (DATE.year, DATE.month, DATE.day, model, DATE.hour, field, fxx)
            # URL for the grib2 file (located on NOMADS server)
            pandofile = 'http://nomads.ncep.noaa.gov/pub/data/nccf/com/hrrr/prod/hrrr.%04d%02d%02d/%s.t%02dz.wrf%sf%02d.grib2' \
                        % (DATE.year, DATE.month, DATE.day, model, DATE.hour, field, fxx)
        # or, get experiemtnal HRRR from ESRL
        elif model == 'hrrrX':
            print "\n-----------------------------------------------------------------------"
            print "!! Need to download today's Experimental HRRRfrom ESRL via FTP !!"
            print "!! Have to get the full file, and then will have to sift through each field"
            print "-----------------------------------------------------------------------\n"
            import sys
            sys.path.append(
                '/uufs/chpc.utah.edu/common/home/u0553130/pyBKB_v2/')
            from BB_MesoWest.get_token import get_ESRL_credentials
            user, password = get_ESRL_credentials()
            ESRL_file = datetime.strftime(DATE, '%y%j%H00') + '%02d00' % (fxx)
            ftp = FTP('gsdftp.fsl.noaa.gov')
            ftp.login(user, password)
            ftp.cwd('hrrr/conus/wrftwo')

            # What is the initalized hour and forecast?
            hour = ESRL_file[5:7]
            forecast = ESRL_file[9:11]

            # Save the file similar to the standard hrrr file naming convention
            # except insert an X to represent that this is the experimental version
            OUTDIR = './'
            NEWFILE = 'hrrrX.t%sz.wrfsfcf%s.grib2' % (hour, forecast)
            if os.path.isfile(OUTDIR + NEWFILE):
                print "looks like that file already exists", OUTDIR + NEWFILE
            else:
                print "Downloading:", OUTDIR + NEWFILE
                ftp.retrbinary('RETR ' + ESRL_file,
                               open(OUTDIR + NEWFILE, 'wb').write)
                ftp.quit()
                print "Finished Downloading"
                os.system('wgrib2 ' + OUTDIR + NEWFILE +
                          ' -t -var -lev -ftime > ' + OUTDIR + NEWFILE +
                          '.idx')
            idxpage = open(OUTDIR + NEWFILE + '.idx')
            lines = idxpage.readlines()
            gcnt = 0
            for g in lines:
                expr = re.compile(variable)
                if expr.search(g):
                    if verbose is True:
                        print 'matched a variable', g
                    parts = g.split(':')
                    number = int(parts[0])
                    if verbose is True:
                        print 'grib field number:', number
                gcnt += 1
            # 3) Get data from the file, using pygrib
            grbs = pygrib.open(OUTDIR + NEWFILE)
            if value_only is True:
                value = grbs[number].values
                # (Remove the temporary file)
                #    ?? Is it possible to push the data straight from curl to ??
                #    ?? pygrib, without writing/removing a temp file? and     ??
                #    ?? would that speed up this process?                     ??
                if removeFile is True:
                    os.system('rm -f %s' % (OUTDIR + NEWFILE))
                    os.system('rm -f %s' % (OUTDIR + NEWFILE + '.idx'))
                return {'value': value}

            else:
                value, lat, lon = grbs[number].data()
                validDATE = grbs[number].validDate
                anlysDATE = grbs[number].analDate
                msg = str(grbs[number])

                # 4) Remove the temporary file
                if removeFile == True:
                    os.system('rm -f %s' % (OUTDIR + NEWFILE))
                    os.system('rm -f %s' % (OUTDIR + NEWFILE + '.idx'))

                # 5) Return some import stuff from the file
                return {
                    'model': model,
                    'value': value,
                    'lat': lat,
                    'lon': lon,
                    'valid': validDATE,
                    'anlys': anlysDATE,
                    'msg': msg
                }

    try:
        # 0) Read in the grib2.idx file
        try:
            # ?? Ignore ssl certificate (else urllib2.openurl wont work).
            #    Depends on your version of python.
            #    See here:
            #    http://stackoverflow.com/questions/19268548/python-ignore-certicate-validation-urllib2
            ctx = ssl.create_default_context()
            ctx.check_hostname = False
            ctx.verify_mode = ssl.CERT_NONE
            idxpage = urllib2.urlopen(fileidx, context=ctx)
        except:
            idxpage = urllib2.urlopen(fileidx)

        lines = idxpage.readlines()

        # 1) Find the byte range for the variable. Need to first find where the
        #    variable is located. Keep a count (gcnt) so we can get the end
        #    byte range from the next line.
        gcnt = 0
        for g in lines:
            expr = re.compile(variable)
            if expr.search(g):
                if verbose is True:
                    print 'matched a variable', g
                parts = g.split(':')
                rangestart = parts[1]
                parts = lines[gcnt + 1].split(':')
                rangeend = int(parts[1]) - 1
                if verbose is True:
                    print 'range:', rangestart, rangeend
                byte_range = str(rangestart) + '-' + str(rangeend)
                # 2) When the byte range is discovered, use cURL to download.
                os.system('curl -s -o %s --range %s %s' %
                          (outfile, byte_range, pandofile))
            gcnt += 1

        # 3) Get data from the file, using pygrib
        grbs = pygrib.open(outfile)
        if value_only is True:
            value = grbs[1].values
            # (Remove the temporary file)
            #    ?? Is it possible to push the data straight from curl to ??
            #    ?? pygrib, without writing/removing a temp file? and     ??
            #    ?? would that speed up this process?                     ??
            if removeFile is True:
                os.system('rm -f %s' % (outfile))
            return {'value': value}

        else:
            value, lat, lon = grbs[1].data()
            validDATE = grbs[1].validDate
            anlysDATE = grbs[1].analDate
            msg = str(grbs[1])

            # 4) Remove the temporary file
            if removeFile == True:
                os.system('rm -f %s' % (outfile))

            # 5) Return some import stuff from the file
            return {
                'value': value,
                'lat': lat,
                'lon': lon,
                'valid': validDATE,
                'anlys': anlysDATE,
                'msg': msg
            }

    except:
        print " ! Could not get the file:", pandofile
        print " ! Is the variable right?", variable
        print " ! Does the file exist?", fileidx
        return {
            'value': np.nan,
            'lat': np.nan,
            'lon': np.nan,
            'valid': np.nan,
            'anlys': np.nan,
            'msg': np.nan
        }
# Directory to save the downloads. Create it if it doesn't exist
OUTDIR = '/uufs/chpc.utah.edu/common/home/horel-group/archive/%04d%02d%02d/BB_test/models/hrrrAK/' \
        % (yesterday.year, yesterday.month, yesterday.day)
if not os.path.exists(OUTDIR):
    os.makedirs(OUTDIR)
    # Change directory permissions
    os.chmod(OUTDIR, stat.S_IRWXU | \
                     stat.S_IRGRP | stat.S_IXGRP | \
                     stat.S_IROTH | stat.S_IXOTH)
    # User can read, write, execute
    # Group can read and execute
    # Others can read and execute

# Credentials for logging into ESRL FTP database
user, password = get_ESRL_credentials()

# rclone config file
config_file = '/scratch/local/mesohorse/.rclone.conf'  # meso1 mesohorse user
# ----------------------------------------------------------------------------


def create_idx(for_this_file, put_here):
    """
    Create a .idx file and move to horel-group/archive/HRRR
    """
    file_name = for_this_file.split('/')[-1]
    idx_dir = '/uufs/chpc.utah.edu/common/home/horel-group/archive/' + put_here
    if not os.path.exists(idx_dir):
        os.makedirs(idx_dir)
    idx_name = idx_dir + file_name + '.idx'
Exemplo n.º 3
0
def get_grib2(model, model_params, DIR, idx=True):
    """
    Download EXPERIMENTAL HRRR from NOAA ESRL via FTP
    ftp://gsdftp.fsl.noaa.gov/
    
    Files on the FTP site are only available for the last day.

    Input:
        model        - [hrrrX, hrrrakX]
        model_params - A dictionary of the model parameters:
                       {'hours': range(0,24),
                        'fxx':{'sfc':range(0,18),
                               'prs':range(0,1)}}
        DIR   - Where should I save the files?
        idx   - Should I download/create an .idx file?
    """

    # Credentials for logging into ESRL FTP database
    user, password = get_ESRL_credentials()

    # Models are named slightly different on ESRL than on NOMADS. I want to
    # preserve my nameing convention for the files downloaded from ESRL.
    if model == 'hrrrX':
        ESRL = 'hrrr/conus/'
        png = True
    elif model == 'hrrrak':
        ESRL = 'hrrr_ak/alaska/'
        png = False

    # For every requested field type, get a list of the available files
    requested_fields = [
        T for T in model_params['fxx'].keys()
        if len(model_params['fxx'][T]) > 0
    ]
    for field in requested_fields:
        ftp = FTP('gsdftp.fsl.noaa.gov')
        ftp.login(user, password)
        if field == 'sfc':
            ftp.cwd('%s/wrftwo' % ESRL)
        elif field == 'prs':
            ftp.cwd('%s/wrfprs' % ESRL)
        elif field == 'nat':
            ftp.cwd('%s/wrfnat' % ESRL)
        # Get a list of the files...
        ftp_filenames = ftp.nlst()
        ftp.quit()

        ## Filter list of files for files named with only digits
        ftp_filenames = filter(lambda x: x.isdigit(), ftp_filenames)

        ## Filter list of files by if the fxx was requested
        ftp_filenames = filter(
            lambda x: int(x[-4:-2]) in model_params['fxx'][field],
            ftp_filenames)

        ## Extract from each ftp_filenames the run datetime and forecast hour
        ftp_filenames_EXTRACT = map(
            lambda x: (datetime.strptime(x[:-4], '%y%j%H%M'), x[-4:-2]),
            ftp_filenames)

        ## List the path and new file name for each ftp_filenames_EXTRACT
        ftp_filenames_NEW = map(
            lambda x: '%s/%s/%s/%s.t%02dz.wrf%sf%s.grib2' % (model, field, x[
                0].strftime('%Y%m%d'), model, x[0].hour, field, x[1]),
            ftp_filenames_EXTRACT)

        ## Filter the ftp file list if the file has not been downloaded
        not_on_Pando = map(
            lambda x: not os.path.isfile(DIR + x) or os.path.getsize(DIR + x) <
            5 * 10e6, ftp_filenames_NEW)
        ftp_filenames = [
            ftp_filenames[d] for d in range(len(not_on_Pando))
            if not_on_Pando[d]
        ]

        # Download each file, only if the file name is made of a digits
        for i in ftp_filenames:
            # I only have success downloading if I login to the FTP site each
            # time I download a file.
            ftp = FTP('gsdftp.fsl.noaa.gov')
            ftp.login(user, password)
            if field == 'sfc':
                ftp.cwd('%s/wrftwo' % ESRL)
            elif field == 'prs':
                ftp.cwd('%s/wrfprs' % ESRL)
            elif field == 'nat':
                ftp.cwd('%s/wrfnat' % ESRL)
            print "logged in for", i
            # What is the file's initialized hour and forecast?
            DATE = datetime.strptime(i[:-4], '%y%j%H%M')
            fxx = int(i[-4:-2])

            # Where should I put this file?
            PATH = '%s/%s/%s/' % (model, field, DATE.strftime('%Y%m%d'))
            FILE = '%s.t%02dz.wrf%sf%02d.grib2' % (model, DATE.hour, field,
                                                   fxx)
            print DIR + PATH + FILE

            # If the destination DIR path does not exist, then create it
            if not os.path.exists(DIR + PATH):
                os.makedirs(DIR + PATH)

            print "Downloading:", DIR + PATH + FILE
            ftp.retrbinary('RETR ' + i, open(DIR + PATH + FILE, 'wb').write)
            print "Saved:", DIR + PATH + FILE
            ftp.quit()

            # Create the .idx file
            if idx:
                create_idx(DIR + PATH + FILE)