Exemplo n.º 1
0
def process_file(ifile,suffix,average=False,forcedaily=False,mask=True,xlist=[]):

    try:
        d = cdms2.open(ifile)
    except:
        print "Error opening file", ifile
        usage()
        sys.exit(1)

    hcrit = 0.5 # Critical value of Heavyside function for inclusion.
    ofilelist = []

    for vn in d.variables:
        var = d.variables[vn]
        # Need to check whether it really has a stash_item to skip coordinate variables

        # Note: need to match both item and section number
        if not hasattr(var,'stash_item'):
            continue
        item_code = var.stash_section[0]*1000 + var.stash_item[0]
        if item_code in xlist:
            print "Skipping", item_code
            continue

        grid = var.getGrid()
        time = var.getTime()
        timevals = np.array(time[:])
        if forcedaily:
            # Work around cdms error in times
            for k in range(len(time)):
                timevals[k] = round(timevals[k],1)

        umvar = stashvar.StashVar(item_code,var.stash_model[0])
        vname = umvar.name
        print vname, var[0,0,0,0]

        # Create filename from variable name and cell_methods,
        # checking for name collisions
        if suffix:
            ofile = "%s_%s.nc" % (umvar.uniquename, suffix)
        else:
            ofile = "%s.nc" % umvar.uniquename
        if ofile in ofilelist:
            raise Exception("Duplicate file name %s" % ofile)
        ofilelist.append(ofile)

    #  If output file exists then append to it, otherwise create a new file
        try:
            file = cdms2.openDataset(ofile, 'r+')
            newv = file.variables[vname]
            newtime = newv.getTime()
        except cdms2.error.CDMSError:
            file = cdms2.createDataset(ofile)
        # Stop it creating the bounds_latitude, bounds_longitude variables
            cdms2.setAutoBounds("off")

            # By default get names like latitude0, longitude1
            # Need this awkwardness to get the variable/dimension name set correctly
            # Is there a way to change the name cdms uses after 
            # newlat = newgrid.getLatitude() ????
            newlat = file.createAxis('lat', grid.getLatitude()[:])
            newlat.standard_name = "latitude"
            newlat.axis = "Y"
            newlat.units = 'degrees_north'
            newlon = file.createAxis('lon', grid.getLongitude()[:])
            newlon.standard_name = "longitude"
            newlon.axis = "X"
            newlon.units = 'degrees_east'

            order = var.getOrder()
            if order[1] == 'z':
                lev = var.getLevel()
                if len(lev) > 1:
                    newlev = file.createAxis('lev', lev[:])
                    for attr in ('standard_name', 'units', 'positive', 'axis'):
                        if hasattr(lev,attr):
                            setattr(newlev, attr, getattr(lev,attr))
                else:
                    newlev = None
            else:
                # Pseudo-dimension
                pdim = var.getAxis(1)
                if len(pdim) > 1:
                    newlev = file.createAxis('pseudo', pdim[:])
                else:
                    newlev = None

            newtime = file.createAxis('time', None, cdms2.Unlimited)
            newtime.standard_name = "time"
            newtime.units = time.units # "days since " + `baseyear` + "-01-01 00:00"
            newtime.setCalendar(time.getCalendar())
            newtime.axis = "T"

            if var.dtype == np.dtype('int32'):
                vtype = cdms2.CdInt
                missval = -2147483647
            else:
                vtype = cdms2.CdFloat
                missval = 1.e20

            if newlev:
                newv = file.createVariable(vname, vtype, (newtime, newlev, newlat, newlon))
            else:
                newv = file.createVariable(vname, vtype, (newtime, newlat, newlon))
            for attr in ("standard_name", "long_name", "units"):
                if hasattr(umvar, attr):
                    newv.setattribute(attr, getattr(umvar,attr))
            newv.missing_value = missval
            newv.stash_section=var.stash_section[0] 
            newv.stash_item=var.stash_item[0] 
            newv._FillValue = missval

            try:
                newv.units = var.units
            except AttributeError:
                pass

        # Get appropriate file position
        # Uses 360 day calendar, all with same base time so must be 30 days on.
        k = len(newtime)
        # float needed here to get the later logical tests to work properly
        avetime = float(MV.average(timevals[:])) # Works in either case
        if k>0:
            if average:
                # if newtime[-1] != (avetime - 30):
                # For Gregorian calendar relax this a bit
                # Sometimes get differences slightly > 31
                if not 28 <= avetime - newtime[-1] <= 31.5:
                    raise error, "Times not consecutive %f %f %f" % (newtime[-1], avetime, timevals[0])
            else:
                if k > 1:
                    # Need a better test that works when k = 1. This is just a
                    # temporary workaround
                    if not np.allclose( newtime[-1] + (newtime[-1]-newtime[-2]), timevals[0] ):
                        raise error, "Times not consecutive %f %f " % (newtime[-1], timevals[0])

        if (30201 <= item_code <= 30303) and mask:
            # P LEV/UV GRID with missing values treated as zero.
            # Needs to be corrected by Heavyside fn
            heavyside = d.variables['psag']
            # Check variable code as well as the name.
            if heavyside.stash_item[0] != 301 or heavyside.stash_section[0] != 30:
                raise error, "Heavyside variable code mismatch"

        if average:
            newtime[k] = avetime
            if var.shape[1] > 1:
                # multiple levels
                newv[k] = MV.average(var[:],axis=0).astype(np.float32)
            else:
                # single level
                newv[k] = MV.average(var[:],axis=0)[0].astype(np.float32)
        else:
            for i in range(len(timevals)):
                if var.shape[1] > 1:
                    # Multi-level
                    if (30201 <= item_code <= 30303) and mask:
                        newv[k+i] = np.where( np.greater(heavyside[i], hcrit), var[i]/heavyside[0], newv.getMissing())
                    else:
                        newv[k+i] = var[i]
                else:
                    newv[k+i] = var[i,0]

                newtime[k+i] = timevals[i]

        file.close()
Exemplo n.º 2
0
    newv.stash_item = var.stash_item[0]
    newv.missing_value = missval
    newv._FillValue = missval

    try:
        newv.units = var.units
    except AttributeError:
        pass

file.history += "\n%s: Processed %s" % (datetime.datetime.today().strftime('%Y-%m-%d %H:%M'), ifile)

# Get appropriate file position
# Uses 360 day calendar, all with same base time so must be 30 days on.
k = len(newtime)
# float needed here to get the later logical tests to work properly
avetime = float(MV.average(timevals[:])) # Works in either case
if k>0:
    if average:
        #if newtime[-1] != (avetime - 30):
        # For Gregorian calendar relax this a bit
        # Sometimes get differences slightly > 31
        if not 28 <= avetime - newtime[-1] <= 31.5:
            raise error, "Times not consecutive %f %f %f" % (newtime[-1], avetime, timevals[0])
    else:
        if k > 1:
            # Need a better test that works when k = 1. This is just a
            # temporary workaround
            # For monthly data
            if 27 < newtime[-1] - newtime[-2] < 32:
                if not 27 < timevals[0] - newtime[-1] < 32:
                    raise error, "Monthly times not consecutive %f %f " % (newtime[-1], timevals[0])