Пример #1
0
def calc_moy(ddirout, deb, fin, pays, niveau, types, sat, prod, res_temp, res,
             varname, shape):
    # traitement des dates
    datedeb = datetime.strptime(deb, "%Y-%m-%d")
    datefin = datetime.strptime(fin, "%Y-%m-%d")
    anfin = datetime.strptime(fin, "%Y-%m-%d").strftime("%Y")

    ddirin = os.path.join(ddirDB, types, sat, prod, res)
    os.chdir(ddirin)
    files = sorted(glob.glob('*' + res_temp + '.nc'))  #liste des fichiers .nc

    print "\n#########################################################################################################################"
    print "############################################## PERIODE " + deb + " " + fin + " ############################################"

    ################ import shapefile #############################################################################
    if niveau == "aire":
        if shape == "all_fs":
            try:
                #fshape = '/work/crct/se5780me/carto/fs_par_annee/'+shape+'/150409_BF_FS_2015.shp'
                fshape = os.path.join(ddirDB, 'carto/fs_par_annee', shape,
                                      '150409_BF_FS_' + anfin + '.shp')
                geodf = gpd.GeoDataFrame.from_file(fshape)
            except:
                fshape = os.path.join(
                    ddirDB, 'carto/fs_par_annee/all_fs/150409_BF_FS_2015.shp')
                geodf = gpd.GeoDataFrame.from_file(fshape)
        else:
            try:
                fshape = os.path.join(
                    ddirDB, 'carto/fs_par_annee', shape,
                    '150409_BF_FS_' + shape + '_' + anfin + '.shp')
                #fshape = '/work/crct/se5780me/carto/fs_par_annee/'+shape+'/150409_BF_FS_'+shape+'2015.shp'
                geodf = gpd.GeoDataFrame.from_file(fshape)
            except:
                fshape = os.path.join(ddirDB, 'carto/fs_par_annee', shape,
                                      '150409_BF_FS_' + shape + '_2015.shp')
                geodf = gpd.GeoDataFrame.from_file(fshape)
    else:
        fshape = os.path.join(ddirDB, 'carto', niveau,
                              pays + '_' + niveau + '_sante.shp')
        geodf = gpd.GeoDataFrame.from_file(fshape)

    nbdist = len(geodf[geodf.columns[1]])  # nombre de districts/aires
    listdist = '//'.join(
        geodf[geodf.columns[1]].tolist()
    )  # liste des districts/aires de santé qui sera chargée dans le .nc comme attribut de la dimension index
    print "shapefile utilise: ", fshape.split("/")[-1]
    print "nombre de districts/aires de sante: ", nbdist
    ###############################################################################################################

    ############################ CREATION NETCDF ##################################################################
    ###############################################################################################################
    if varname == "Deep_Blue_Aerosol_Optical_Depth_550_Land":
        varname1 = "AOT"
    else:
        varname1 = varname
    print files[:]
    output = varname1 + '_' + files[
        0][:-5] + '_' + niveau + '_' + shape + '_' + pays + '_' + deb.replace(
            '-', '') + fin.replace('-', '') + res_temp + '.nc'
    ncnew = Dataset(ddirout + '/' + output, 'w')
    # dimensions#####
    ncnew.createDimension('time', None)
    ncnew.createDimension('index_dist', nbdist)
    # variables#####
    tp = ncnew.createVariable('time', 'f8', ('time', ))
    index = ncnew.createVariable('index_dist', 'f4', ('index_dist', ))
    index[:] = range(nbdist)
    nbpx = ncnew.createVariable('count', 'f4', ('time', 'index_dist'))
    vmin = ncnew.createVariable('min', 'f4', ('time', 'index_dist'))
    vmax = ncnew.createVariable('max', 'f4', ('time', 'index_dist'))
    vmean = ncnew.createVariable('mean', 'f4', ('time', 'index_dist'))
    vstd = ncnew.createVariable('std', 'f4', ('time', 'index_dist'))
    #vmed = ncnew.createVariable('median','f4',('time','index_dist'))
    # attributs#####
    ncnew.Convention = 'CF-1.5'
    ncnew.description = 'moyenne districts pour la variable :', varname
    ncnew.history = 'Created ' + time.ctime(time.time())
    ncnew.source = ' '
    index.standard_name = listdist
    tp.standard_name = 'time'
    tp.calendar = 'gregorian'
    #fillvalue = np.nan
    ################################################################################################################
    ################################################################################################################

    # initialisation des listes pour les variables, augmentées après chaque tour de boucle
    nbpx_tmp = []
    vmin_tmp = []
    vmax_tmp = []
    vmean_tmp = []
    vstd_tmp = []
    vmed_tmp = []
    t0 = time.time()  # démarrage du temps de calcul
    print "\nfichier en traitement: ", files[0]
    nc = Dataset(files[0], 'r')
    var_in = nc.variables[varname]
    dates = nc.variables['time']
    # definition des dates de début et fin en format numérique, à partir de l'unité de temps du .nc
    ndatedeb = date2num(datedeb, dates.units)
    ndatefin = date2num(datefin, dates.units)
    if datetime.strftime(
            num2date(dates[0], dates.units), "%H"
    ) != "0":  # condition qui vérifie l'heure de la donnée(0h, 3h,6h,...)
        ndatedeb += 24 - int(
            datetime.strftime(num2date(dates[0], dates.units), "%H"))
        ndatefin += 24 - int(
            datetime.strftime(num2date(dates[0], dates.units), "%H"))
    # détermination des indices des dates debut et fin dans la matrice


#    if ndatedeb >= dates[0] and ndatedeb <= dates[-1]:
    iddeb = np.abs(dates[:] - ndatedeb).argmin()
    #    else:
    #        iddeb = 0
    #    if ndatefin >= dates[0] and ndatefin <= dates[-1]:
    idfin = np.abs(dates[:] - ndatefin).argmin() - 1
    #    else:
    #        idfin = len(dates[:])-1
    # extraction du bloc de dates et ajout à la variable time(tp) du newnc
    serie_dates = dates[iddeb:idfin + 1]

    print "date de debut: ", num2date(serie_dates[0], dates.units)
    print "date de fin: ", num2date(serie_dates[-1], dates.units)

    var = np.array(var_in[iddeb:idfin + 1, ...])
    # traitement de la matrice avec fillvalue, scalefactor et addoffset
    if sat == 'toms':
        var[var == var_in._FillValue] = -999
    else:
        var[var == var_in._FillValue] = np.nan
    if "scale_factor" in var_in.ncattrs():
        var = (var[:] - var_in.add_offset) * var_in.scale_factor
    # définition des caractéristiques géographiques transform,resolution spatiale, lat max et lon min
    lat = nc.variables['latitude'][:]
    lon = nc.variables['longitude'][:]
    xo = min(lon)
    yo = max(lat)
    resx = np.abs(np.mean(np.diff(lon)))
    resy = np.abs(np.mean(np.diff(lat)))
    transform = [xo, 0.01, 0.0, yo, 0.0, -0.01]

    #############################################################################################################
    #############################################################################################################
    idt = len(serie_dates) // 8
    if idt == 0:
        idt = 1
    ndt = range(0, len(serie_dates), idt)
    nb_mat_in = [var[ix:ix + (idt), ...]
                 for ix in ndt]  # decoupage de la matrice en blocs de 26 jours
    res = Parallel(n_jobs=-1)(
        delayed(calc_stats)(resx, resy, geodf, nbdist, transform, temps_x)
        for temps_x in
        nb_mat_in)  # appel de la fonction calc_stats avec parallélisation
    # chargement des calculs dans les variables temporaires
    nbpx_tmp.append(
        np.concatenate([res[n][0] for n in range(0, len(ndt))], axis=0))
    vmax_tmp.append(
        np.concatenate([res[n][1] for n in range(0, len(ndt))], axis=0))
    vmean_tmp.append(
        np.concatenate([res[n][2] for n in range(0, len(ndt))], axis=0))
    vmed_tmp.append(
        np.concatenate([res[n][3] for n in range(0, len(ndt))], axis=0))
    vmin_tmp.append(
        np.concatenate([res[n][4] for n in range(0, len(ndt))], axis=0))
    vstd_tmp.append(
        np.concatenate([res[n][5] for n in range(0, len(ndt))], axis=0))

    t1 = time.time() - t0
    print "\nelapsed time: ", t1, "sec"
    print "fichier en sortie: ", output
    print "rep de sortie: ", ddirout
    print "#########################################################################################################################"
    print "#########################################################################################################################"
    print "#########################################################################################################################\n\n"
    # chargement des variables dans le .nc
    tp[:] = np.append(tp[:], serie_dates)
    tp.units = dates.units
    nbpx[:] = np.concatenate(
        [nbpx_tmp[d_t] for d_t in range(0, len(nbpx_tmp))], axis=0)
    vmax[:] = np.concatenate(
        [vmax_tmp[d_t] for d_t in range(0, len(vmax_tmp))], axis=0)
    vmean[:] = np.concatenate(
        [vmean_tmp[d_t] for d_t in range(0, len(vmean_tmp))], axis=0)
    vmin[:] = np.concatenate(
        [vmin_tmp[d_t] for d_t in range(0, len(vmin_tmp))], axis=0)
    vstd[:] = np.concatenate(
        [vstd_tmp[d_t] for d_t in range(0, len(vstd_tmp))], axis=0)

    index = [num2date(d, dates.units).date() for d in serie_dates]
    columns_name = geodf.name.values.tolist()
    tmpvar_dict = {
        "nbpx": nbpx_tmp,
        "vmax": vmax_tmp,
        "vmean": vmean_tmp,
        "vmin": vmin_tmp,
        "vstd": vstd_tmp
    }
    list_df = {}
    for n in tmpvar_dict:
        list_df[n] = pd.DataFrame(np.concatenate(
            [tmpvar_dict[n][d_t] for d_t in range(0, len(tmpvar_dict[n]))],
            axis=0),
                                  index=index,
                                  columns=columns_name).round(4)
        #df.to_csv(ddirout+'/'+output[:-3]+'_'+n+'.csv', header=True)

    nc.close()
    ncnew.close()
    return list_df
Пример #2
0
def calc_moy(ddirout,ncfile,fshape,deb,fin,pays,niveau,types,sat,prod,res_temp,res,varname,level):
    # traitement des dates
    datedeb = datetime.strptime(deb,"%Y-%m-%d")
    datefin = datetime.strptime(fin,"%Y-%m-%d")
    
    
    ddirin = os.path.join(ddirDB, types, sat, prod, res)
    os.chdir(ddirin)
    
    
    geodf = gpd.GeoDataFrame.from_file(fshape)
    
    nbdist = len(geodf[geodf.columns[1]]) # nombre de districts/aires
    listdist='//'.join(geodf[geodf.columns[1]].tolist())# liste des districts/aires de santé qui sera chargée dans le .nc comme attribut de la dimension index   
    
    
    ############################ CREATION NETCDF ##################################################################
    ###############################################################################################################

    output = os.path.join(ddirout, varname + '_' + os.path.basename(ncfile)[:-5] + '_' + niveau + '_' + pays + '_' + deb.replace('-','') + fin.replace('-','')  + '_' + res_temp + '.nc')
    ncnew = Dataset(output, 'w')
    # dimensions#####
    ncnew.createDimension('time', None)
    ncnew.createDimension('index_dist', nbdist)
    # variables#####
    tp = ncnew.createVariable('time','f8',('time',))
    index = ncnew.createVariable('index_dist','f4',('index_dist',))
    index[:] = range(nbdist)
    nbpx = ncnew.createVariable('count','f4',('time','index_dist'))
    vmin = ncnew.createVariable('min','f4',('time','index_dist'))
    vmax = ncnew.createVariable('max','f4',('time','index_dist'))
    vmean = ncnew.createVariable('mean','f4',('time','index_dist'))
    vstd = ncnew.createVariable('std','f4',('time','index_dist'))
    #vmed = ncnew.createVariable('median','f4',('time','index_dist'))
    # attributs#####
    ncnew.Convention ='CF-1.5'
    ncnew.description = 'moyenne districts pour la variable :',varname
    ncnew.history = 'Created ' + time.ctime(time.time())
    ncnew.source = ' '
    index.standard_name = listdist
    tp.standard_name = 'time'
    tp.calendar = 'gregorian'
    #fillvalue = np.nan
    ################################################################################################################
    ################################################################################################################
    
    # initialisation des listes pour les variables, augmentées après chaque tour de boucle
    nbpx_tmp = []
    vmin_tmp = []
    vmax_tmp = []
    vmean_tmp = []
    vstd_tmp = []
    vmed_tmp = []
    nc = Dataset(ncfile, 'r')
    var_in = nc.variables[varname]
    dates = nc.variables['time']
    # definition des dates de début et fin en format numérique, à partir de l'unité de temps du .nc
    ndatedeb = date2num(datedeb,dates.units)
    ndatefin = date2num(datefin,dates.units)
    if datetime.strftime(num2date(dates[0],dates.units),"%H") != "0": # condition qui vérifie l'heure de la donnée(0h, 3h,6h,...)
        ndatedeb += 24-int(datetime.strftime(num2date(dates[0],dates.units),"%H"))
        ndatefin += 24-int(datetime.strftime(num2date(dates[0],dates.units),"%H"))
    # détermination des indices des dates debut et fin dans la matrice
    iddeb = np.abs(dates[:]-ndatedeb).argmin()
    idfin = np.abs(dates[:]-ndatefin).argmin()-1
    # extraction du bloc de dates et ajout à la variable time(tp) du newnc
    serie_dates = dates[iddeb:idfin+1]

    if level == -1:
        var = np.array(var_in[iddeb:idfin+1,...])
    else:
        var = np.array(var_in[iddeb:idfin+1,level,...])
    # traitement de la matrice avec fillvalue, scalefactor et addoffset
    if sat == 'toms':
        var[var==var_in._FillValue]=-999
    else:
    	var[var==var_in._FillValue]=np.nan
    if "scale_factor" in var_in.ncattrs():
        var = (var[:]-var_in.add_offset)*var_in.scale_factor
    # définition des caractéristiques géographiques transform,resolution spatiale, lat max et lon min
    lat = nc.variables['latitude'][:]
    lon = nc.variables['longitude'][:]
    xo = min(lon)
    yo = max(lat)
    resx = np.abs(np.mean(np.diff(lon)))
    resy = np.abs(np.mean(np.diff(lat)))
    transform = [xo, 0.01, 0.0, yo, 0.0, -0.01]

    #############################################################################################################
    #############################################################################################################
    idt = len(serie_dates)//8
    if idt == 0:
        idt = 1
    ndt = range(0,len(serie_dates),idt)
    nb_mat_in = [var[ix:ix+(idt),...] for ix in ndt]# decoupage de la matrice en blocs de 26 jours
    res = Parallel(n_jobs=-1)(delayed(calc_stats)(resx,resy,geodf,nbdist,transform,temps_x) for temps_x in nb_mat_in)# appel de la fonction calc_stats avec parallélisation
    # chargement des calculs dans les variables temporaires
    nbpx_tmp.append(np.concatenate([res[n][0] for n in range(0,len(ndt))], axis=0))
    vmax_tmp.append(np.concatenate([res[n][1] for n in range(0,len(ndt))], axis=0))
    vmean_tmp.append(np.concatenate([res[n][2] for n in range(0,len(ndt))], axis=0))
    vmed_tmp.append(np.concatenate([res[n][3] for n in range(0,len(ndt))], axis=0))
    vmin_tmp.append(np.concatenate([res[n][4] for n in range(0,len(ndt))], axis=0))
    vstd_tmp.append(np.concatenate([res[n][5] for n in range(0,len(ndt))], axis=0))

    # chargement des variables dans le .nc
    tp[:] = np.append(tp[:],serie_dates)
    tp.units = dates.units
    nbpx[:] = np.concatenate([nbpx_tmp[d_t] for d_t in range(0,len(nbpx_tmp))], axis=0)
    vmax[:] = np.concatenate([vmax_tmp[d_t] for d_t in range(0,len(vmax_tmp))], axis=0)
    vmean[:] = np.concatenate([vmean_tmp[d_t] for d_t in range(0,len(vmean_tmp))], axis=0)
    vmin[:] = np.concatenate([vmin_tmp[d_t] for d_t in range(0,len(vmin_tmp))], axis=0)
    vstd[:] = np.concatenate([vstd_tmp[d_t] for d_t in range(0,len(vstd_tmp))], axis=0)
    
    index = [num2date(d,dates.units).date() for d in serie_dates]
    columns_name = geodf.name.values.tolist()
    tmpvar_dict = {"nbpx":nbpx_tmp,"vmax":vmax_tmp,"vmean":vmean_tmp,"vmin":vmin_tmp,"vstd":vstd_tmp}
    list_df = {}
    for n in tmpvar_dict:
        list_df[n] = pd.DataFrame (np.concatenate([tmpvar_dict[n][d_t] for d_t in range(0,len(tmpvar_dict[n]))], axis=0), index=index, columns=columns_name).round(4)
        #df.to_csv(ddirout+'/'+output[:-3]+'_'+n+'.csv', header=True)
    nc.close()
    ncnew.close()
    return list_df, output