Пример #1
0
#filename = r'D:\vlachos\Documents\KV MSc thesis\Data\Satellite\Gulf Stream_1\npz_files_sral_slstr\S3A_2018-05-10 02_08_39__2018-05-10 02_05_24.npz'.replace('\\','\\')
models_path = r"C:\Users\vlachos\Desktop\SSTlevel4".replace('\\', '\\')
filespath = r'C:\Users\vlachos\Desktop\npz_files_sral_sstL4_1DCNN_real'.replace(
    '\\', '\\')
npz_files = os.listdir(filespath)

# load npz files. encoding argument is used only if npz files have been
# saved using py2s.x and are loaded by py3.x
# Calculate Maximum distance vector size
d = []
fff = []
font = {'size': 18}
plt.rc('font', **font)
for filename in npz_files:
    plt.close('all')
    matrix, distance, _ = ml_utilities.feature_matrix_from_npz(
        os.path.join(filespath, filename))
    if distance.size < 2000:
        continue
    # Imputate NaNs
    matrix, _ = ml_utilities.imputate_nans_feature_matrix(matrix,
                                                          method='Interpolate',
                                                          drop_nan=False)

    label = np.array(matrix['SSHA_105'])
    #    label = ml_utilities.matrix_min_max_rescale(label, 1, -1, axis=0)
    matrix = matrix.drop(columns=['SSHA_35', 'SSHA_71', 'SSHA_105'])
    #    matrix = ml_utilities.matrix_min_max_rescale(matrix, 0.5, -0.5, axis=0)
    matrix = np.array(matrix)
    matrix = ml_utilities.my_standardizer(matrix, matrix)  # standardize
    label = ml_utilities.my_standardizer(np.expand_dims(label, axis=1),
                                         np.expand_dims(label,
Пример #2
0
path_npzfiles = r'D:\vlachos\Documents\KV MSc thesis\Data\Satellite\Gulf Stream_1\npz_files_sral_slstr'.replace('\\','\\')
npz_files = os.listdir(path_npzfiles)
npz_files = [item for item in npz_files if 'npz' in item]

model_name = 'S3B_2019-03-28 14_55_41__2019-03-28 01_16_43_RF_slstr_model.sav'

for npz in npz_files:
    if npz[4:14] == model_name[4:14]:
        pass
    else:
        continue
    
    # Read model
    model = pickle.load(open(os.path.join(path_models, model_name), 'rb'))
    # Read npz file
    matrix, distance, _ = ml_utilities.feature_matrix_from_npz(os.path.join(path_npzfiles, npz))
    matrix, idx_nan = ml_utilities.imputate_nans_feature_matrix(matrix, method='Interpolate', drop_nan=True)
    
    label = np.array(matrix['SSHA_35'])

    matrix = matrix.drop(columns=['SSHA_35', 'SST_125km', 'SST_95km','SST_75km', 'SST_32km', 'SST_16km', 'SST_12.5km'])
    
    matrix_labels = list(matrix.columns) # keep feature matrix names
    matrix = np.array(matrix)
    
    # Predict
    y_hat = model.predict(matrix)
    
    # PLOT
    font = {'size' : 18}
    plt.rc('font', **font)
Пример #3
0
# =============================================================================
# BEGIN
# =============================================================================
filespath = r'H:\MSc_Thesis_05082019\Data\Satellite\Gulf Stream_1\npz_files_sral_slstr'.replace(
    '\\', '\\')
npz_files = os.listdir(filespath)
npz_files = [filename for filename in npz_files if '.npz' in filename]
N_npz_files = len(npz_files)
models_path = r'C:\Users\vlachos\Desktop\MLP'.replace('\\', '\\')

# Derive names of variables
var_to_drop = [
    'SSHA_35', 'SST_125km', 'SST_95km', 'SST_75km', 'SST_32km', 'SST_16km',
    'SST_12.5km'
]
matrix, _, _ = ml_utilities.feature_matrix_from_npz(
    os.path.join(filespath, npz_files[0]))
matrix = matrix.drop(columns=var_to_drop)
matrix_labels = list(matrix.columns)  # keep feature matrix names
del matrix

i = 1
bad = []
RMSE_test = []
RMSE_train = []
matrix = pd.DataFrame(columns=matrix_labels, dtype=np.float32)
label = pd.DataFrame(dtype=np.float32)

for filename in npz_files:
    try:

        # Progress
            # Progress
            sys.stdout.write("\rProgress ... {0:.2f} %".format(
                (counter_2 / N_temp_npz_files) * 100))
            sys.stdout.flush()

            # load npz files. encoding argument is used only if npz files have been
            #            # saved using py2.x and are loaded by py3.x
            #            dat = np.load(os.path.join(filespath, file_name), encoding='latin1', allow_pickle=True)
            #
            #            # Retrieve dictionary
            #            dat = dat['arr_0'].item()
            #            # Keep distance in variable
            #        #    distance = dat['Distance']
            #            del dat['Metadata'], dat['Distance']
            fullpath = os.path.join(filespath, file_name)
            data_temp, distance, _ = ml_utilities.feature_matrix_from_npz(
                fullpath)
            #            raise Exception('STOPPED CODE')
            # =============================================================================
            # MISSING VALUES HANDLING
            # =============================================================================
            # Assign label and feature matrix to temporary variables
            #            data_temp = pd.DataFrame.from_dict(dat, dtype=np.float32)

            if True:  # 1) IMPUTATION OF NANs- INTERPOLATION AND DROP THE REST OF THE NANS

                # Interpolate the NAN values inside the dataset
                data_temp = data_temp.interpolate(method='akima',
                                                  limit=150,
                                                  limit_direction='both',
                                                  axis=0)
                # Interpolate (actually extrapolate) the values at the edges
model = skensemble.RandomForestRegressor(**params)
i = 1
bad = []
RMSE_test = []
RMSE_train = []
importance_list = []
for filename in npz_files:
    try:
        plt.close('all')
        # Progress
        sys.stdout.write("\rFiles {0} out of {1}".format(i, N_npz_files))
        sys.stdout.flush()

        fullpath = os.path.join(path, filename)
        matrix, distance, metadata = ml_utilities.feature_matrix_from_npz(
            fullpath)

        # =============================================================================
        # MISSING VALUES IMPUTATION
        # =============================================================================
        matrix, _ = ml_utilities.imputate_nans_feature_matrix(
            matrix, method='Interpolate', drop_nan=True)

        label = np.array(matrix['SSHA_35'])

        #        matrix = matrix.drop(columns='SSHA_35')
        #        matrix_labels_out = [item for item in matrix.columns.to_list() if ('OLCI' not in item) or ('CHL' not in item)]
        #        matrix_labels_out = ['SSHA_35', 'SST_12.5km', 'SST_32km', 'SST_125km']
        matrix_labels_out = [
            'SSHA_35', 'KD490_M07_OLCI_12.5km', 'CHL_OC4ME_OLCI_5km',
            'KD490_M07_OLCI_5km', 'ADG443_NN_OLCI_5km', 'TSM_NN_OLCI_12.5km',
paths = {
    'SRAL': r'C:\Users\vlachos\Desktop\SRAL'.replace('\\', '\\'),
    'OLCI': r'C:\Users\vlachos\Desktop\OLCI'.replace('\\', '\\'),
    'SLSTR': r'C:\Users\vlachos\Desktop\SLSTR'.replace('\\', '\\')
}

# Read npz list
path = r'C:\Users\vlachos\Desktop\npz_files_sral_slstr_olci_RF'.replace(
    '\\', '\\')
npz_files = os.listdir(path)
npz_files = [item for item in npz_files if 'npz' in item]

# Correct for the missing OLCI name in the npz
olci_dates = []
for filename in npz_files:
    _, _, metadata = ml_utilities.feature_matrix_from_npz(
        os.path.join(path, filename))
    olci_temp = metadata.split(' ')[3]
    olci_temp = dt.datetime.strptime(olci_temp, '%Y%m%dT%H%M%S')
    olci_temp = dt.datetime.strftime(olci_temp, '%Y-%m-%d %H_%M_%S')
    olci_dates.append(olci_temp)

npz_files = [
    item[:-4] + '__' + item_2 + '.npz'
    for (item, item_2) in zip(npz_files, olci_dates)
]

# Folder names with the common dates
common_date = s3utilities.find_common_dates(paths)
common_date = ml_utilities.products_from_npz(common_date, npz_files)

# Remove Dublicates