def preproc_ocn_wv(ds): """ read and preprocess data for training/usage of the model :param ds: :return: """ filee = ds.encoding["source"] logging.debug('filee %s', os.path.basename(filee)) fdatedt = datetime.datetime.strptime( os.path.basename(filee).split('-')[4], '%Y%m%dt%H%M%S') logging.debug('fdatedt : %s %s', fdatedt, type(fdatedt)) #ds['time'] = xarray.DataArray([fdatedt],dims=['time']) # marche avec derniere version de xarray pas ancienne logging.debug('brut ds: %s', ds) try: ds['time'] = xarray.DataArray(np.array([fdatedt]), dims=['time'], coords={'time': [0]}) ds = ds.sortby('time', ascending=True) except: pass newds = xarray.Dataset() #format data for CWAVE 22 params computation cspcRe = ds['oswQualityCrossSpectraRe'].values.squeeze() cspcIm = ds['oswQualityCrossSpectraIm'].values.squeeze() ths1 = np.arange(0, 360, 5) ks1 = patch_oswK(ds['oswK'].values.squeeze(), ipfvesion=None, datedtsar=fdatedt) if cspcRe.shape == (36, 30): logging.debug('put zero matrix X spectra') cspcRe = np.zeros((72, 60)) cspcIm = np.zeros((72, 60)) #ks1 = reference_oswK_1145m_60pts else: pass #ths1 = ds['oswPhi'].values.squeeze() #ks1 = ds['oswK'].values.squeeze() ta = ds['oswHeading'].values.squeeze() incidenceangle = ds['oswIncidenceAngle'].values.squeeze() s0 = ds['oswNrcs'].values.squeeze() nv = ds['oswNv'].values.squeeze() lonSAR = ds['oswLon'].values.squeeze() latSAR = ds['oswLat'].values.squeeze() #lonSAR = ds['rvlLon'].values.squeeze() #test #latSAR = ds['rvlLat'].values.squeeze() satellite = os.path.basename(filee)[0:3] # if True : # save a pickle for debug/test # import pickle # savings = {'cspcRe' : cspcRe,'cspcIm' : cspcIm,'ta' : ta,'incidenceangle' : incidenceangle,'nv' : nv,'s0' : s0, # 'datedt' : fdatedt, # 'lonsar' : lonSAR,'latsar' : latSAR} # outputpl = '/tmp/hs_sar_ocn_dataset_vars_before_cwave_compute_%s.pkl' % (fdatedt.strftime('%Y%m%dT%H%M%S')) # fifi = open(outputpl,'wb') # pickle.dump(savings,fifi) # fifi.close() # logging.info('pickle: %s',outputpl) subset_ok,flagKcorrupted,cspcReX,cspcImX,_,ks1,ths1,kx,ky,\ cspcReX_not_conservativ,S = format_input_CWAVE_vector_from_OCN(cspcRe=cspcRe.T, cspcIm=cspcIm.T,ths1=ths1,ta=ta, incidenceangle=incidenceangle, s0=s0,nv=nv,ks1=ks1,datedt=fdatedt, lonSAR=lonSAR,latSAR=latSAR,satellite=satellite) varstoadd = [ 'S', 'cwave', 'dxdt', 'latlonSARcossin', 'todSAR', 'incidence', 'incidence_angle', 'satellite', 'oswQualityCrossSpectraRe', 'oswQualityCrossSpectraIm' ] additional_vars_for_validation = [ 'oswLon', 'oswLat', 'oswLandFlag', 'oswIncidenceAngle', 'oswWindSpeed', 'platformName', 'nrcs', 'nv', 'heading', 'oswK', 'oswNrcs' ] varstoadd += additional_vars_for_validation logging.debug('varstoadd : %s', varstoadd) if 'time' in ds: newds['time'] = ds['time'] else: newds['time'] = xarray.DataArray(np.array([fdatedt]), dims=['time'], coords={'time': [0]}) for vv in varstoadd: logging.debug('start format variable :%s', vv) if vv in ['cwave']: dimszi = ['time', 'cwavedim'] coordi = {'time': [fdatedt], 'cwavedim': np.arange(22)} cwave = np.hstack([S.T, s0.reshape(-1, 1), nv.reshape(-1, 1)]) #found L77 in preprocess.py cwave = preprocess.conv_cwave(cwave) newds[vv] = xarray.DataArray(data=cwave, dims=dimszi, coords=coordi) elif vv == 'S': #to ease the comparison with Justin files dimszi = ['time', 'Sdim'] coordi = {'time': [fdatedt], 'Sdim': np.arange(20)} newds[vv] = xarray.DataArray(data=S.T, dims=dimszi, coords=coordi) elif vv in [ 'dxdt' ]: #dx and dt and delta from coloc with alti see /home/cercache/users/jstopa/sar/empHs/cwaveV5, I can put zeros here at this stage dx = np.array([0]) dt = np.array([1]) dxdt = np.column_stack([dx, dt]) dimszi = ['time', 'dxdtdim'] coordi = {'time': [fdatedt], 'dxdtdim': np.arange(2)} #print('dxdt') newds[vv] = xarray.DataArray(data=dxdt, dims=dimszi, coords=coordi) elif vv in ['latlonSARcossin']: latSARcossin = preprocess.conv_position( subset_ok['latSAR']) # Gets cos and sin lonSARcossin = preprocess.conv_position(subset_ok['lonSAR']) latlonSARcossin = np.hstack([latSARcossin, lonSARcossin]) dimszi = ['time', 'latlondim'] coordi = {'time': [fdatedt], 'latlondim': np.arange(4)} newds[vv] = xarray.DataArray(data=latlonSARcossin, dims=dimszi, coords=coordi) elif vv in ['todSAR']: dimszi = ['time'] coordi = {'time': [fdatedt]} newds[vv] = xarray.DataArray(data=subset_ok['todSAR'], dims=dimszi, coords=coordi) elif vv in ['oswK']: dimszi = ['time', 'oswWavenumberBinSize'] coordi = { 'time': [fdatedt], 'oswWavenumberBinSize': np.arange(len(ks1)) } newds[vv] = xarray.DataArray(data=ks1.reshape((1, len(ks1))), dims=dimszi, coords=coordi) elif vv in [ 'incidence', ]: dimszi = ['time', 'incdim'] coordi = {'time': [fdatedt], 'incdim': np.arange(2)} incidence = preprocess.conv_incidence( ds['oswIncidenceAngle'].values.squeeze()) newds[vv] = xarray.DataArray(data=incidence, dims=dimszi, coords=coordi) elif vv in ['incidence_angle']: dimszi = ['time'] olddims = [ x for x in ds['oswIncidenceAngle'].dims if x not in ['oswAzSize', 'oswRaSize'] ] coordi = {} for didi in olddims: coordi[didi] = ds['oswIncidenceAngle'].coords[didi].values coordi['time'] = [fdatedt] incidence = np.array([ds['oswIncidenceAngle'].values.squeeze()]) newds[vv] = xarray.DataArray(data=incidence, dims=dimszi, coords=coordi) elif vv in ['satellite']: dimszi = ['time'] coordi = {'time': [fdatedt]} satellite_int = np.array([satellite[2] == 'a']).astype(int) newds[vv] = xarray.DataArray(data=satellite_int, dims=dimszi, coords=coordi) elif vv in ['platformName']: dimszi = ['time'] coordi = {'time': [fdatedt]} satellite_int = np.array([satellite]) newds[vv] = xarray.DataArray(data=satellite_int, dims=dimszi, coords=coordi) elif vv in ['nrcs']: dimszi = ['time'] coordi = {'time': [fdatedt]} newds[vv] = xarray.DataArray(data=s0.reshape((1, )), dims=dimszi, coords=coordi) elif vv in ['heading']: dimszi = ['time'] coordi = {'time': [fdatedt]} newds[vv] = xarray.DataArray(data=ds['oswHeading'].values.reshape( (1, )), dims=dimszi, coords=coordi) elif vv in ['nv']: dimszi = ['time'] coordi = {'time': [fdatedt]} newds[vv] = xarray.DataArray(data=nv.reshape((1, )), dims=dimszi, coords=coordi) elif vv in ['oswQualityCrossSpectraRe', 'oswQualityCrossSpectraIm']: if vv == 'oswQualityCrossSpectraRe': datatmp = cspcRe elif vv == 'oswQualityCrossSpectraIm': datatmp = cspcIm else: raise Exception() #datatmp = ds[vv].values.squeeze() #olddims = [x for x in ds[vv].dims if x not in ['oswAzSize','oswRaSize']] coordi = {} #for didi in olddims: # coordi[didi] = ds[vv].coords[didi].values coordi['time'] = [fdatedt] coordi['oswAngularBinSize'] = np.arange(len(ths1)) coordi['oswWavenumberBinSize'] = np.arange(len(ks1)) dimsadd = ['time', 'oswAngularBinSize', 'oswWavenumberBinSize'] if datatmp.shape == (72, 60): #case only one spectra datatmp = datatmp.reshape((1, 72, 60)) newds[vv] = xarray.DataArray(data=datatmp, dims=dimsadd, coords=coordi) else: datatmp = ds[vv].values.squeeze() olddims = [ x for x in ds[vv].dims if x not in ['oswAzSize', 'oswRaSize'] ] coordi = {} for didi in olddims: coordi[didi] = ds[vv].coords[didi].values coordi['time'] = [fdatedt] dimsadd = ['time'] newds[vv] = xarray.DataArray(data=[datatmp], dims=dimsadd, coords=coordi) #logging.debug('field xarray : %s %s',vv,newds[vv]) logging.debug('newds: %s', newds) return newds
def preproc_ref_input(ds): """ :param ds: :return: """ filee = ds.encoding["source"] logging.debug('filee %s',os.path.basename(filee)) fdate = ds['timeSAR'].values try: fdatedt = netCDF4.num2date(fdate,ds['timeSAR'].units) except: fdatedt = fdate logging.debug('fdatedt : %s',fdatedt) real_dates = [] if 'fileNameL2' in ds: #version stopa ref input/ouput filesL2 = ds['fileNameL2'].values else: #version ifr training dataset filesL2 = ds['fileNameFull'].values for tt in range(len(ds['timeSAR'])) : if tt % 10000 == 0 : print(tt,'/',len(ds['timeSAR'])) fileL2 = filesL2[tt] if isinstance(fileL2,str) is False: fileL2 = fileL2.decode() dt = datetime.datetime.strptime(os.path.basename(fileL2).split('-')[4],'%Y%m%dt%H%M%S') # print(dt) real_dates.append(dt) #fdate = datetime.datetime.strptime(os.path.basename(filee).split('-')[4],'%Y%m%dt%H%M%S') #ds['time'] = xarray.DataArray([fdate],dims=['time']) #ds = ds.sortby('time',ascending=True) newds = xarray.Dataset() #format data for CWAVE 22 params computation #cspcRe = ds['oswQualityCrossSpectraRe'].values.squeeze().T #cspcIm = ds['oswQualityCrossSpectraIm'].values.squeeze().T #ths1 = ds['oswPhi'].values.squeeze() #ks1 = ds['oswK'].values.squeeze() #ta = ds['oswHeading'].values.squeeze() #incidenceangle =ds['oswIncidenceAngle'].values.squeeze() s0 = ds['sigma0'].values.squeeze() nv = ds['normalizedVariance'].values.squeeze() nv = nv.reshape((len(nv),1)) #to allow concatenation with 2D S variable s0 = s0.reshape((len(s0),1)) logging.debug('s0: %s',s0.shape) if 'S' in ds: varname_20CWAVEparam = 'S' varstoadd = ['cwave','dxdt','latlonSARcossin','todSAR','incidence','satellite','cspcRe','cspcIm','hsNN', 'hsNNSTD'] else: varname_20CWAVEparam = 'py_S' varstoadd = ['cwave','dxdt','latlonSARcossin','todSAR','incidence','satellite','cspcRe','cspcIm', 'hsALT','hsALTmin','hsALTmax','hsWW3','hsSM','hsWW3v2'] logging.debug('ds[S] %s %s',ds[varname_20CWAVEparam].shape,ds[varname_20CWAVEparam]) #ds['S'] = ds['S'].astype('float32',casting='unsafe') #nc = netCDF4.Dataset(filee) #patch because S params saved by JStopa are not readable with xarray (different dtypes) #S = nc.variables['S'][:,0].astype('float32') #logging.info('S from nc: %s',S.shape) #lonSAR = ds['oswLon'].values.squeeze() #latSAR = ds['oswLat'].values.squeeze() #satellite = os.path.basename(filee)[0:3] # subset_ok,flagKcorrupted,cspcReX,cspcImX,cspcRe,ks1,ths1,kx,ky,cspcReX_not_conservativ,S = compute_hs_total_SAR_v2.format_input_CWAVE_vector_from_OCN(cspcRe, # cspcIm,ths1,ta,incidenceangle,s0,nv,ks1,fdate,lonSAR,latSAR,satellite) #additional_vars_for_validation = ['oswLon','oswLat','oswLandFlag','oswIncidenceAngle'] #varstoadd += additional_vars_for_validation newds['timeSAR'] = xarray.DataArray(fdate,dims=['time'],coords={'time':fdate}) #newds['timeSARdt'] = xarray.DataArray(fdatedt,dims=['time'],coords={'time':fdate}) newds['timeSARdt'] = xarray.DataArray(real_dates,dims='time',coords={'time':fdate}) if 'S1A' in filee: satellite = 0 else: satellite = 1 logging.debug('newds with only time: %s',newds) for vv in varstoadd: logging.debug('vv : %s',vv) if vv in ['cwave']: dimszi = ['time','cwavedim'] coordi= {'time':fdate,'cwavedim':np.arange(22)} #tmptmp = ds['S'].astype('float32',casting='unsafe').values[:,1] tmptmp = ds[varname_20CWAVEparam].values logging.debug('tmptmp : %s %s %s',tmptmp.shape,type(tmptmp),tmptmp.dtype) logging.debug('s0 %s',s0.shape) logging.debug('nV : %s',nv.shape) cwave = np.hstack([tmptmp, s0, nv]) #found L77 in preprocess.py logging.debug('cwave : %s',cwave.shape) cwave = preprocess.conv_cwave(cwave) logging.debug('cwave after normalization : %s,%s',cwave.shape,type(cwave)) newds[vv] = xarray.DataArray(cwave,coords=coordi,dims=dimszi) elif vv in ['dxdt']: #dx and dt and delta from coloc with alti see /home/cercache/users/jstopa/sar/empHs/cwaveV5, I can put zeros here at this stage #dx = preprocess.conv_dx(fs['dx'][indices]) #dt = preprocess.conv_dt(fs['dt'][indices]) #dx = np.array([0]) #dt = np.array([1]) dx = np.zeros(len(fdate)) dt = np.zeros(len(fdate)) dxdt = np.column_stack([dx, dt]) logging.debug('dxdt: %s %s',dxdt.shape,dxdt) dimszi = ['time','dxdtdim'] coordi= {'time':fdate,'dxdtdim':np.arange(2)} #print('dxdt') newds[vv] = xarray.DataArray(data=dxdt,dims=dimszi,coords=coordi) elif vv in ['latlonSARcossin']: latSARcossin = preprocess.conv_position(ds['latSAR']) # Gets cos and sin lonSARcossin = preprocess.conv_position(ds['lonSAR']) latlonSARcossin = np.hstack([latSARcossin, lonSARcossin]) dimszi = ['time','latlondim'] coordi= {'time':fdate,'latlondim':np.arange(4)} newds[vv] = xarray.DataArray(data=latlonSARcossin,dims=dimszi,coords=coordi) elif vv in ['todSAR']: dimszi = ['time'] coordi= {'time':fdate} todSAR = preprocess.conv_time(fdate) logging.debug('todSAR : %s',todSAR) newds[vv] = xarray.DataArray(data=todSAR,dims=dimszi,coords=coordi) elif vv in ['incidence',]: dimszi = ['time','incdim'] coordi= {'time':fdate,'incdim':np.arange(2)} incidence = preprocess.conv_incidence(ds['incidenceAngle'].values.squeeze()) newds[vv] = xarray.DataArray(data=incidence,dims=dimszi,coords=coordi) elif vv in ['satellite']: dimszi = ['time'] coordi= {'time':fdate} #satellite_int = np.array([satellite[2] == 'a']).astype(int) #satellite_int = np.repeat(satellite_int,len(fdate)) satellite_int = np.ones((ds['timeSAR'].shape[0], ), dtype=float) * satellite logging.debug('satellite_int = %s',satellite_int.shape) newds[vv] = xarray.DataArray(data=satellite_int,dims=dimszi,coords=coordi) elif vv in ['cspcRe','cspcIm']: datatmp = ds[vv].values.squeeze() logging.debug('vv: %s shape : %s',vv,datatmp.shape) olddims = [x for x in ds[vv].dims if x not in ['oswAzSize','oswRaSize']] coordi = {} for didi in olddims: coordi[didi] = ds[vv].coords[didi].values coordi['time'] = fdate dimsadd= ['time','directions','wavenumbers'] #datatmp = datatmp.reshape((1,72,60)) newds[vv] = xarray.DataArray(data=datatmp,dims=dimsadd,coords=coordi) else: datatmp = ds[vv].values.squeeze() olddims = [x for x in ds[vv].dims if x not in ['oswAzSize','oswRaSize']] coordi = {} for didi in olddims : coordi[didi] = ds[vv].coords[didi].values coordi['time'] = fdate dimsadd = ['time'] newds[vv] = xarray.DataArray(data=datatmp,dims=dimsadd,coords=coordi) return newds
def prepare_training_dataset_core(ds_train_raw, validation_dataset=False): """ this method I used for building training dataset and also to do the validation dataset :param ds_train_raw: :return: """ # except: #for py2.7 version # ocn_wv_ds = xarray.open_mfdataset(pattern_path,concat_dim='time',preprocess=preproc_ocn_wv) logging.info('Nb pts in dataset: %s', ds_train_raw['timeSAR'].size) varstoadd = [ 'S', 'cwave', 'dxdt', 'latlonSARcossin', 'todSAR', 'incidence', 'satellite', 'oswQualityCrossSpectraRe', 'oswQualityCrossSpectraIm' ] # additional_vars_for_validation = ['oswLon','oswLat','oswLandFlag','oswIncidenceAngle','oswWindSpeed','platformName', # 'nrcs','nv','heading','oswK','oswNrcs'] # varstoadd += additional_vars_for_validation if validation_dataset: varstoadd.append('py_cspcImX') varstoadd.append('py_cspcReX') varstoadd.append('fileNameL2') if 'hsSM' in ds_train_raw: varstoadd += ['hsSM'] S = ds_train_raw['py_S'].values s0 = ds_train_raw['sigma0'] nv = ds_train_raw['normalizedVariance'].values ds_training_normalized = xarray.Dataset() timeSAR_vals = ds_train_raw['timeSAR'].values # hours since .... #apath = ('').join([ddc.decode() for ddc in filenames_L2[iiu,:]]) timeSAR_seconds = np.array([ datetime.datetime.strptime( os.path.basename(fup.decode()).split('-')[4], '%Y%m%dt%H%M%S') for fup in ds_train_raw['fileNameL2'].values ]) ths1 = ds_train_raw['th'].values ks1 = ds_train_raw['k'].values if 'fileNameFull' in ds_train_raw: fpaths = ds_train_raw['fileNameFull'].values #varstoadd.append('fileNameFull') else: fpaths = ds_train_raw[ 'fileNameL2'].values # 2019 dataset is a bt different #varstoadd.append('fileNameL2') sattelites = np.array([os.path.basename(hhy)[0:3] for hhy in fpaths]) satellites_int = np.array([ threelettersat[2] == 'a' for threelettersat in sattelites ]).astype(int) cspcRe = ds_train_raw['cspcRe'].values cspcIm = ds_train_raw['cspcIm'].values for vv in varstoadd: logging.info('start format variable :%s', vv) if vv in ['cwave']: dimszi = ['time', 'cwavedim'] coordi = {'time': timeSAR_seconds, 'cwavedim': np.arange(22)} logging.debug('S %s s0: %s nv: %s', S.shape, s0.shape, nv.shape) cwave = np.vstack([S.T, s0, nv]).T # found L77 in preprocess.py logging.debug('cwave vals: %s', cwave.shape) cwave = preprocess.conv_cwave(cwave) ds_training_normalized[vv] = xarray.DataArray(data=cwave, dims=dimszi, coords=coordi) elif vv in ['fileNameFull', 'fileNameL2']: # dimszi = ['time','pathnchar'] # coordi = {'time' : timeSAR_seconds,'pathnchar' : len(fpaths[0])} dimszi = ['time'] coordi = {'time': timeSAR_seconds} ds_training_normalized[vv] = xarray.DataArray(data=fpaths, dims=dimszi, coords=coordi) elif vv == 'S': # to ease the comparison with Justin files dimszi = ['time', 'Sdim'] coordi = {'time': timeSAR_seconds, 'Sdim': np.arange(20)} ds_training_normalized[vv] = xarray.DataArray(data=S, dims=dimszi, coords=coordi) elif vv in [ 'dxdt' ]: # dx and dt and delta from coloc with alti see /home/cercache/users/jstopa/sar/empHs/cwaveV5, I can put zeros here at this stage #dxdt = np.column_stack([ds_train_raw['dx'].values,ds_train_raw['dt'].values]) dxdt = np.column_stack([np.zeros(s0.shape), np.ones(s0.shape)]) dimszi = ['time', 'dxdtdim'] coordi = {'time': timeSAR_seconds, 'dxdtdim': np.arange(2)} ds_training_normalized[vv] = xarray.DataArray(data=dxdt, dims=dimszi, coords=coordi) elif vv in ['latlonSARcossin']: latSARcossin = preprocess.conv_position( ds_train_raw['latSAR'].values) # Gets cos and sin lonSARcossin = preprocess.conv_position( ds_train_raw['lonSAR'].values) latlonSARcossin = np.hstack([latSARcossin, lonSARcossin]) dimszi = ['time', 'latlondim'] coordi = {'time': timeSAR_seconds, 'latlondim': np.arange(4)} ds_training_normalized[vv] = xarray.DataArray(data=latlonSARcossin, dims=dimszi, coords=coordi) elif vv in ['todSAR']: dimszi = ['time'] new_dates_dt = np.array( [from_np64_to_dt(dt64) for dt64 in timeSAR_vals]) unit = "hours since 2010-01-01T00:00:00Z UTC" # see https://github.com/grouny/sar_hs_nn/blob/c05322e6635c6d77409e36537d7c3b58788e7322/sarhspredictor/lib/sarhs/preprocess.py#L11 new_dates_num = np.array( [netCDF4.date2num(dfg, unit) for dfg in new_dates_dt]) coordi = {'time': timeSAR_seconds} todSAR = conv_time(new_dates_num) ds_training_normalized[vv] = xarray.DataArray(data=todSAR, dims=dimszi, coords=coordi) elif vv in ['oswK']: dimszi = ['time', 'oswWavenumberBinSize'] coordi = { 'time': timeSAR_seconds, 'oswWavenumberBinSize': np.arange(len(ks1)) } ds_training_normalized[vv] = xarray.DataArray(data=ks1, dims=dimszi, coords=coordi) elif vv in [ 'incidence', ]: dimszi = ['time', 'incdim'] coordi = {'time': timeSAR_seconds, 'incdim': np.arange(2)} incidence = preprocess.conv_incidence( ds_train_raw['incidenceAngle'].values.squeeze()) ds_training_normalized[vv] = xarray.DataArray(data=incidence, dims=dimszi, coords=coordi) elif vv in ['incidence_angle']: dimszi = ['time'] olddims = [ x for x in ds_train_raw['incidenceAngle'].dims if x not in ['oswAzSize', 'oswRaSize'] ] coordi = {} for didi in olddims: coordi[didi] = ds_train_raw['incidenceAngle'].coords[ didi].values coordi['time'] = timeSAR_seconds incidence = np.array( [ds_train_raw['incidenceAngle'].values.squeeze()]) ds_training_normalized[vv] = xarray.DataArray(data=incidence, dims=dimszi, coords=coordi) elif vv in ['satellite']: dimszi = ['time'] coordi = {'time': timeSAR_seconds} # satellite_int = np.array([satellite[2] == 'a']).astype(int) ds_training_normalized[vv] = xarray.DataArray(data=satellites_int, dims=dimszi, coords=coordi) elif vv in ['platformName']: dimszi = ['time'] coordi = {'time': timeSAR_seconds} satellite_int = sattelites ds_training_normalized[vv] = xarray.DataArray(data=satellite_int, dims=dimszi, coords=coordi) elif vv in ['nrcs']: dimszi = ['time'] coordi = {'time': timeSAR_seconds} ds_training_normalized[vv] = xarray.DataArray(data=s0, dims=dimszi, coords=coordi) elif vv in ['heading']: dimszi = ['time'] coordi = {'time': timeSAR_seconds} ds_training_normalized[vv] = xarray.DataArray( data=ds_train_raw['trackAngle'].values, dims=dimszi, coords=coordi) elif vv in ['nv']: dimszi = ['time'] coordi = {'time': timeSAR_seconds} ds_training_normalized[vv] = xarray.DataArray(data=nv, dims=dimszi, coords=coordi) elif vv in ['oswQualityCrossSpectraRe', 'oswQualityCrossSpectraIm']: if vv == 'oswQualityCrossSpectraRe': datatmp = cspcRe elif vv == 'oswQualityCrossSpectraIm': datatmp = cspcIm else: raise Exception() # datatmp = ds[vv].values.squeeze() # olddims = [x for x in ds[vv].dims if x not in ['oswAzSize','oswRaSize']] coordi = {} # for didi in olddims: # coordi[didi] = ds[vv].coords[didi].values coordi['time'] = timeSAR_seconds coordi['oswAngularBinSize'] = np.arange(len(ths1)) coordi['oswWavenumberBinSize'] = np.arange(len(ks1)) dimsadd = ['time', 'oswAngularBinSize', 'oswWavenumberBinSize'] # if datatmp.shape == (72,60) : # case only one spectra # datatmp = datatmp.reshape((1,72,60)) ds_training_normalized[vv] = xarray.DataArray(data=datatmp, dims=dimsadd, coords=coordi) elif vv in ['py_cspcImX', 'py_cspcReX']: datatmp = ds_train_raw[vv].values coordi = ds_train_raw[vv].coords coordi['time'] = timeSAR_seconds dimsadd = ds_train_raw[vv].dims ds_training_normalized[vv] = xarray.DataArray(data=datatmp, dims=dimsadd, coords=coordi) else: datatmp = ds_train_raw[vv].values.squeeze() olddims = [ x for x in ds_train_raw[vv].dims if x not in ['oswAzSize', 'oswRaSize'] ] coordi = {} for didi in olddims: coordi[didi] = ds_train_raw[vv].coords[didi].values coordi['time'] = timeSAR_seconds dimsadd = ['time'] logging.info('data: %s', datatmp.shape) ds_training_normalized[vv] = xarray.DataArray(data=datatmp, dims=dimsadd, coords=coordi) # logging.debug('field xarray : %s %s',vv,newds[vv]) logging.debug('newds: %s', ds_training_normalized) logging.info('SAR data ready to be used') # cspcRe = ds_train_raw['oswQualityCrossSpectraRe'].values # cspcIm = ds_train_raw['oswQualityCrossSpectraIm'].values re = preprocess.conv_real(cspcRe) im = preprocess.conv_imaginary(cspcIm) logging.info('re : %s', re.shape) logging.info('im : %s', im.shape) spectrum = np.stack((re, im), axis=3) logging.info('spectrum shape : %s', spectrum.shape) return spectrum, ds_training_normalized
def split_aggregated_ds(file_src, file_dest): """ :param file_src: :param file_dest: :return: """ groups = {'2015_2016': [2015, 2016], '2017': [2017], '2018': [2018]} # Print fields of source file. with h5py.File(file_src, 'r') as f: for k in [k for k in f.keys()]: print(f'{k}: {f[k].dtype}') # Create h5. with h5py.File(file_src, 'r') as fs, h5py.File(file_dest, 'w') as fd: for group_name, years in groups.items(): grp = fd.create_group(group_name) # Find examples of the specified years. indices = np.zeros_like(fs['year'][:], dtype='bool') for year in years: indices = np.logical_or(fs['year'][:] == year, indices) # Find examples that don't have nans. indices[np.any(np.isnan(fs['py_S'][:]), axis=1)] = 0 indices[np.isnan(fs['sigma0'][:])] = 0 indices[np.isnan(fs['normalizedVariance'][:])] = 0 # Done num_examples = indices.sum() print(f'Found {num_examples} events from years: ', years) # Write data from this year. # print(fs['year'][indices].shape) grp.create_dataset('year', data=fs['year'][indices]) # Get 22 CWAVE features. cwave = np.hstack([ fs['py_S'][indices, ...], fs['sigma0'][indices].reshape(-1, 1), fs['normalizedVariance'][indices].reshape(-1, 1) ]) cwave = preprocess.conv_cwave( cwave ) # Remove extrema, then standardize with hardcoded mean,vars. grp.create_dataset('cwave', data=cwave) # Additional features. dx = preprocess.conv_dx(fs['dx'][indices]) dt = preprocess.conv_dt(fs['dt'][indices]) grp.create_dataset('dxdt', data=np.column_stack([dx, dt])) latSAR = fs['latSAR'][indices] lonSAR = fs['lonSAR'][indices] latSARcossin = preprocess.conv_position(latSAR) # Gets cos and sin lonSARcossin = preprocess.conv_position(lonSAR) grp.create_dataset('latlonSAR', data=np.column_stack([latSAR, lonSAR])) grp.create_dataset('latlonSARcossin', data=np.hstack([latSARcossin, lonSARcossin])) timeSAR = fs['timeSAR'][indices] todSAR = preprocess.conv_time(timeSAR) grp.create_dataset('timeSAR', data=timeSAR, shape=(timeSAR.shape[0], 1)) grp.create_dataset('todSAR', data=todSAR, shape=(todSAR.shape[0], 1)) incidence = preprocess.conv_incidence( fs['incidenceAngle'][indices]) # Separates into 2 var. grp.create_dataset('incidence', data=incidence) satellite = fs['satellite'][indices] grp.create_dataset('satellite', data=satellite, shape=(satellite.shape[0], 1)) # Altimeter hsALT = fs['hsALT'][indices] grp.create_dataset('hsALT', data=hsALT, shape=(hsALT.shape[0], 1)) # Get spectral data. x = np.stack(( preprocess.conv_real(fs['cspcRe'][indices, ...]), preprocess.conv_imaginary(fs['cspcIm'][indices, ...]), ), axis=3) grp.create_dataset('spectrum', data=x) print(f'Done with {years}') print('Done')