def organize_geo(self): domain_collection_list = {} for domain_name_step in self.domain_name_list: file_path_collections = self.define_domain_collection( domain_name_step) if not os.path.exists(file_path_collections): domain_info = self.get_domain_info(domain_name_step) domain_drainage_area = self.get_domain_drainage_area( domain_name_step) domain_collection = {**domain_drainage_area, **domain_info} folder_name_collections, file_name_collections = os.path.split( file_path_collections) make_folder(folder_name_collections) write_obj(file_path_collections, domain_collection) else: domain_collection = read_obj(file_path_collections) domain_collection_list[domain_name_step] = domain_collection return domain_collection_list
def organize_discharge(self): time = self.time_run geo_data_collection = self.geo_data_collection logging.info(' --> Organize discharge datasets [' + str(time) + '] ... ') file_path_discharge = self.file_path_discharge file_time_discharge = self.file_time_discharge section_collection = {} for domain_name_step in self.domain_name_list: logging.info(' ---> Domain ' + domain_name_step + ' ... ') file_path_discharge = self.file_path_discharge[domain_name_step] file_path_ancillary = self.file_path_ancillary[domain_name_step] if self.flag_cleaning_dynamic_ancillary: if os.path.exists(file_path_ancillary): os.remove(file_path_ancillary) if not os.path.exists(file_path_ancillary): domain_discharge_index = geo_data_collection[domain_name_step][ self.domain_discharge_index_tag] domain_grid_rows = geo_data_collection[domain_name_step][ self.domain_grid_x_tag].shape[0] domain_grid_cols = geo_data_collection[domain_name_step][ self.domain_grid_y_tag].shape[1] domain_section_db = geo_data_collection[domain_name_step][ self.domain_sections_db_tag] section_workspace = {} for section_key, section_data in domain_section_db.items(): section_description = section_data['description'] section_name = section_data['name'] section_idx = section_data['idx'] section_discharge_default = section_data[ 'discharge_default'] section_id = self.format_group.format( section_data['group']['id']) logging.info(' ----> Section ' + section_description + ' ... ') section_file_path_list = file_path_discharge[section_id] if section_file_path_list: section_dframe = pd.DataFrame( index=file_time_discharge) for section_file_path_step in section_file_path_list: section_folder_name_step, section_file_name_step = os.path.split( section_file_path_step) section_file_ts_start, section_file_ts_end, \ section_file_mask, section_file_ens = parse_file_parts(section_file_name_step) section_file_tag = create_file_tag( section_file_ts_start, section_file_ts_end, section_file_ens) section_ts = read_file_hydro( section_name, section_file_path_step) section_dframe[section_file_tag] = section_ts section_workspace[section_description] = section_dframe logging.info(' ----> Section ' + section_description + ' ... DONE') else: logging.info(' ----> Section ' + section_description + ' ... SKIPPED. Datasets are empty') section_workspace[section_description] = None folder_name_ancillary, file_name_ancillary = os.path.split( file_path_ancillary) make_folder(folder_name_ancillary) flag_save_obj = True for section_key, section_data in section_workspace.items(): if section_data is None: flag_save_obj = False break if flag_save_obj: write_obj(file_path_ancillary, section_workspace) logging.info(' ---> Domain ' + domain_name_step + ' ... DONE') else: logging.info( ' ---> Domain ' + domain_name_step + ' ... SKIPPED. All or some datasets are empty') else: section_workspace = read_obj(file_path_ancillary) logging.info(' ---> Domain ' + domain_name_step + ' ... SKIPPED. Data previously computed') section_collection[domain_name_step] = section_workspace logging.info(' --> Organize discharge datasets [' + str(time) + '] ... DONE') return section_collection
def save_analysis(self, group_analysis_sm, group_analysis_rain_map, group_analysis_rain_point, group_soilslip): logging.info(' ----> Save analysis [' + str(self.time_step) + '] ... ') time_step = self.time_step geo_data_alert_area = self.geo_data_alert_area group_data_alert_area = self.structure_data_group for (group_data_key, group_data_items), geo_data_dframe in zip( group_data_alert_area.items(), geo_data_alert_area.values()): logging.info(' -----> Alert Area ' + group_data_key + ' ... ') file_path_dest = collect_file_list( time_step, self.folder_name_dest_indicators_raw, self.file_name_dest_indicators_raw, self.alg_template_tags, alert_area_name=group_data_key)[0] if not os.path.exists(file_path_dest): group_soilslip_select = group_soilslip[group_data_key] if group_analysis_sm[group_data_key] is not None: group_analysis_sm_select = group_analysis_sm[ group_data_key][self.template_struct_obj] else: group_analysis_sm_select = None if group_analysis_rain_map[group_data_key] is not None: group_analysis_rain_map_select = group_analysis_rain_map[ group_data_key][self.template_struct_obj] else: group_analysis_rain_map_select = None if group_analysis_rain_point[group_data_key] is not None: group_analysis_rain_point_select = group_analysis_rain_point[ group_data_key] else: group_analysis_rain_point_select = None if group_analysis_sm_select is not None: if time_step in list(group_soilslip_select.index): soilslip_select = group_soilslip_select.loc[ time_step.strftime('%Y-%m-%d 00:00:00')] else: soilslip_select = None else: soilslip_select = None if (group_analysis_sm_select is not None) and ( (group_analysis_rain_map_select is not None) and (group_analysis_rain_point_select is not None)): analysis_sm = self.unpack_analysis( group_analysis_sm_select) analysis_rain_map = self.unpack_analysis( group_analysis_rain_map_select) analysis_rain_point = self.unpack_analysis( group_analysis_rain_point_select) analysis_data = { **analysis_sm, **analysis_rain_map, **analysis_rain_point } else: analysis_data = None if (group_analysis_sm_select is None) and ( (group_analysis_rain_map_select is not None) and (group_analysis_rain_point_select is not None)): logging.warning( ' ===> SoilMoisture datasets is undefined') elif (group_analysis_rain_map_select is None) and (group_analysis_sm_select is not None): logging.warning(' ===> Rain map datasets is undefined') elif (group_analysis_rain_point_select is None) and (group_analysis_sm_select is not None): logging.warning( ' ===> Rain point datasets is undefined') else: logging.warning( ' ===> Rain and SoilMoisture datasets are undefined' ) if soilslip_select is not None: analysis_event = self.unpack_analysis( soilslip_select, ['event_n', 'event_threshold', 'event_index']) else: analysis_event = self.analysis_event_undefined logging.warning( ' ===> SoilSlip datasets is null. No events reported') if (analysis_data is not None) and (analysis_event is not None): analysis_obj = { self.flag_indicators_time: time_step, self.flag_indicators_data: analysis_data, self.flag_indicators_event: analysis_event } folder_name_dest, file_name_dest = os.path.split( file_path_dest) make_folder(folder_name_dest) write_obj(file_path_dest, analysis_obj) logging.info(' -----> Alert Area ' + group_data_key + ' ... DONE') else: logging.info(' -----> Alert Area ' + group_data_key + ' ... SKIPPED. Some datasets are undefined') else: logging.info(' -----> Alert Area ' + group_data_key + ' ... SKIPPED. Analysis file created previously') logging.info(' ----> Save analysis [' + str(self.time_step) + '] ... DONE')
def organize_data(self): # Starting info logging.info(' ----> Organize soil slips point information ... ') geo_point_db = self.dset_geo_point time_point_expected = self.dset_time_point file_path_dst = self.file_path_dst if self.flag_geo_updating: if os.path.exists(file_path_dst): os.remove(file_path_dst) if not os.path.exists(file_path_dst): soil_slip_collections = {} for group_key, group_data in self.structure_group_data.items(): group_selection = group_data[self.structure_group_tag_name] group_threshold = group_data[self.structure_group_tag_threshold] group_index = group_data[self.structure_group_tag_index] geo_point_selection = geo_point_db.loc[geo_point_db[self.column_db_tag_alert_area] == group_selection] # geo_point_selection = geo_point_selection.reset_index() # geo_point_selection = geo_point_selection.set_index(self.column_db_tag_time) time_point_selection = pd.DatetimeIndex(geo_point_selection[ self.column_db_tag_time].values).unique().sort_values() soil_slip_n = [] soil_slip_features = [] soil_slip_threshold = [] soil_slip_index = [] for time_point_step in time_point_selection: time_str_step = time_point_step.strftime('%Y-%m-%d') geo_point_step = geo_point_selection.loc[ geo_point_selection[self.column_db_tag_time] == time_str_step] geo_point_threshold = find_category(geo_point_step.shape[0], group_threshold) geo_point_index = find_value(geo_point_threshold, group_index) soil_slip_n.append(geo_point_step.shape[0]) soil_slip_features.append(geo_point_step) soil_slip_threshold.append(geo_point_threshold) soil_slip_index.append(geo_point_index) data_soilslip = {'event_n': soil_slip_n, 'event_threshold': soil_slip_threshold, 'event_index': soil_slip_index, 'event_features': soil_slip_features} dframe_soilslip = pd.DataFrame(data_soilslip, index=time_point_selection) soil_slip_collections[group_key] = {} soil_slip_collections[group_key] = dframe_soilslip # Write soil slips collections to disk folder_name_dst, file_name_dst = os.path.split(file_path_dst) make_folder(folder_name_dst) write_obj(file_path_dst, soil_slip_collections) # Ending info logging.info(' ----> Organize soil slips point information ... DONE') else: # Read soil slips collections from disk soil_slip_collections = read_obj(file_path_dst) logging.info(' ----> Organize soil slips point information ... LOADED. Datasets was previously computed.') return soil_slip_collections
def organize_dynamic_data(self): time_str = self.time_str time_period = self.time_period geo_da_dst = self.geo_da_dst src_dict = self.src_dict var_name_obj = self.var_name_obj file_path_obj_src = self.file_path_obj_src file_path_obj_anc = self.file_path_obj_anc flag_cleaning_ancillary = self.flag_cleaning_dynamic_ancillary log_stream.info(' ---> Organize dynamic datasets [' + time_str + '] ... ') # Check if ancillary file already exists file_check_list = [] for file_path_tmp in file_path_obj_anc: if os.path.exists(file_path_tmp): if flag_cleaning_ancillary: os.remove(file_path_tmp) file_check_list.append(False) else: file_check_list.append(True) else: file_check_list.append(False) file_check = all(file_check_list) # If statement on ancillary availability if not file_check: dset_collection = {} for var_name in var_name_obj: log_stream.info(' ----> Variable "' + var_name + '" ... ') var_compute, var_tag, var_scale_factor, var_shift, file_compression, \ file_geo_reference, file_type, file_coords, file_freq, compute_quality, var_decimal_digits \ = self.extract_var_fields(src_dict[var_name]) var_file_path_src = file_path_obj_src[var_name] if var_compute: var_geo_data = None for var_time, var_file_path_in in zip( time_period, var_file_path_src): log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... ') if os.path.exists(var_file_path_in): #copy to tmp var_file_path, var_file_name = os.path.split( var_file_path_in) var_file_path_in_tempcopy = self.domain + '_' + var_file_name copyfile( var_file_path_in, os.path.join(var_file_path, var_file_path_in_tempcopy)) var_file_path_in = os.path.join( var_file_path, var_file_path_in_tempcopy) if file_compression: var_file_path_out = self.define_file_name_unzip( var_file_path_in) unzip_filename(var_file_path_in, var_file_path_out) else: var_file_path_out = deepcopy(var_file_path_in) if file_type == 'binary': if var_geo_data is None: log_stream.info( ' ------> Select geo reference for binary datasets ... ' ) var_geo_name = search_geo_reference( var_file_path_out, self.static_data_src, tag_geo_reference=file_geo_reference) log_stream.info( ' -------> Geo reference name: ' + var_geo_name) var_geo_data, var_geo_x, var_geo_y, var_geo_attrs = \ self.set_geo_attributes(self.static_data_src[var_geo_name]) log_stream.info( ' ------> Select geo reference for binary datasets ... DONE' ) var_da_src = read_data_binary( var_file_path_out, var_geo_x, var_geo_y, var_geo_attrs, var_scale_factor=var_scale_factor, var_time=var_time, var_name=var_name, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, coord_name_time=self.coord_name_time, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, dim_name_time=self.dim_name_time, dims_order=self.dims_order_3d) elif file_type == 'netcdf': if var_geo_data is None: log_stream.info( ' ------> Select geo reference for netcdf datasets ... ' ) var_geo_data, var_geo_x, var_geo_y, var_geo_attrs = \ self.set_geo_attributes(self.static_data_src[file_geo_reference]) log_stream.info( ' ------> Select geo reference for netcdf datasets ... DONE' ) var_da_src = read_data_nc( var_file_path_out, var_geo_x, var_geo_y, var_geo_attrs, var_coords=file_coords, var_scale_factor=var_scale_factor, var_name=var_tag, var_time=var_time, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, coord_name_time=self.coord_name_time, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, dim_name_time=self.dim_name_time, dims_order=self.dims_order_3d) elif file_type == 'tiff' or file_type == 'asc': var_da_src = read_data_tiff( var_file_path_out, var_scale_factor=var_scale_factor, var_name=var_tag, var_time=var_time, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, coord_name_time=self.coord_name_time, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, dim_name_time=self.dim_name_time, dims_order=self.dims_order_3d, decimal_round_data=2, decimal_round_geo=7) elif file_type == 'mat': var_da_src = read_data_mat( var_file_path_out, var_scale_factor=var_scale_factor, var_name=var_tag, var_time=var_time, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, coord_name_time=self.coord_name_time, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, dim_name_time=self.dim_name_time, dims_order=self.dims_order_3d, decimal_round_data=2, decimal_round_geo=7, src_dict=src_dict[var_name]) else: log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... FAILED') log_stream.error(' ===> File type "' + file_type + '"is not allowed.') raise NotImplementedError( 'Case not implemented yet') # Delete (if needed the uncompressed file(s) if var_file_path_in != var_file_path_out: if os.path.exists(var_file_path_out): os.remove(var_file_path_out) # Delete temporary file os.remove(var_file_path_in) # Apply scale factor and shift to values if var_shift is not None: var_da_src.values = var_da_src.values + var_shift if var_scale_factor is not None: var_da_src.values = var_da_src.values / var_scale_factor #if var_shift is not None: # var_da_src=var_da_src + var_shift #if var_scale_factor is not None: # var_da_src=var_da_src / var_scale_factor # Organize destination dataset if var_da_src is not None: # Active (if needed) interpolation method to the variable source data-array active_interp = active_var_interp( var_da_src.attrs, geo_da_dst.attrs) # Apply the interpolation method to the variable source data-array if active_interp: var_da_dst = apply_var_interp( var_da_src, geo_da_dst, var_name=var_name, dim_name_geo_x=self.dim_name_geo_x, dim_name_geo_y=self.dim_name_geo_y, coord_name_geo_x=self.coord_name_geo_x, coord_name_geo_y=self.coord_name_geo_y, interp_method=self.interp_method) else: if var_tag != var_name: var_da_dst = deepcopy(var_da_src) var_da_dst.name = var_name else: var_da_dst = deepcopy(var_da_src) # Mask the variable destination data-array var_nodata = None if 'nodata_value' in list( var_da_dst.attrs.keys()): var_nodata = var_da_dst.attrs[ 'nodata_value'] geo_nodata = None if 'nodata_value' in list( geo_da_dst.attrs.keys()): geo_nodata = geo_da_dst.attrs[ 'nodata_value'] if (geo_nodata is not None) and (var_nodata is not None): var_da_masked = var_da_dst.where( (geo_da_dst.values[:, :, np.newaxis] != geo_nodata) & (var_da_dst != var_nodata)) else: var_da_masked = deepcopy(var_da_dst) #Sanity check to remove nans var_da_masked.values = \ np.where(np.isnan(var_da_masked.values), var_nodata, var_da_masked.values) #Round var_da_masked.values = np.round( var_da_masked.values, var_decimal_digits) # plt.figure(1) # plt.imshow(var_da_dst.values[:, :, 0]) # plt.colorbar() # plt.figure(2) # plt.imshow(var_da_src.values[:, :, 0]) # plt.colorbar() # plt.figure(3) # plt.imshow(var_da_masked.values[:, :, 0]) # plt.colorbar() # plt.show() # plt.figure(4) # plt.imshow(geo_da_dst.values) # plt.colorbar() # plt.show() # Organize data in a common datasets var_dset_masked = create_dset( var_data_time=var_time, var_data_name=var_name, var_data_values=var_da_masked, var_data_attrs=None, var_geo_1d=False, file_attributes=geo_da_dst.attrs, var_geo_name='terrain', var_geo_values=geo_da_dst.values, var_geo_x=geo_da_dst['longitude'].values, var_geo_y=geo_da_dst['latitude'].values, var_geo_attrs=None) # Organize data in merged datasets if var_time not in list( dset_collection.keys()): dset_collection[var_time] = var_dset_masked else: var_dset_tmp = deepcopy( dset_collection[var_time]) var_dset_tmp = var_dset_tmp.merge( var_dset_masked, join='right') dset_collection[var_time] = var_dset_tmp #Compute SQA if needed if compute_quality: log_stream.info(' ----> Variable "' + var_name + '" ... computing quality ') SQA = compute_SQA(var_da_masked.values, geo_da_dst.values, self.SQA_ground_and_snow) SQA_dset = create_dset( var_data_time=var_time, var_data_name='SQA', var_data_values=SQA, var_data_attrs=None, var_geo_1d=False, file_attributes=geo_da_dst.attrs, var_geo_name='terrain', var_geo_values=geo_da_dst.values, var_geo_x=geo_da_dst['longitude']. values, var_geo_y=geo_da_dst['latitude']. values, var_geo_attrs=None) var_dset_tmp = deepcopy( dset_collection[var_time]) var_dset_tmp = var_dset_tmp.merge( SQA_dset, join='right') dset_collection[var_time] = var_dset_tmp log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... DONE') else: log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... Datasets is not defined') else: var_da_src = None log_stream.info( ' -----> Time "' + var_time.strftime(time_format_algorithm) + '" ... Datasets is not defined') log_stream.info(' ----> Variable "' + var_name + '" ... DONE') else: log_stream.info( ' ----> Variable "' + var_name + '" ... SKIPPED. Compute flag not activated.') # Save ancillary datasets for file_path_anc, (dset_time, dset_anc) in zip(file_path_obj_anc, dset_collection.items()): folder_name_anc, file_name_anc = os.path.split(file_path_anc) if not os.path.exists(folder_name_anc): make_folder(folder_name_anc) write_obj(file_path_anc, dset_anc) log_stream.info(' ---> Organize dynamic datasets [' + time_str + '] ... DONE') else: log_stream.info( ' ---> Organize dynamic datasets [' + time_str + '] ... SKIPPED. All datasets are previously computed')
def organize_data(self): # Starting info logging.info(' ----> Organize weather stations point information ... ') df_point = self.df_geo_point max_distance = self.search_radius_degree inf_distance = float("inf") file_path_point = self.file_path_point_alert_area_tree_dst flag_geo_updating = self.flag_geo_updating if flag_geo_updating: if os.path.exists(file_path_point): os.remove(file_path_point) if not os.path.exists(file_path_point): code_points = df_point[self.point_code_tag].values name_points = df_point[self.point_name_tag].values lats_points = df_point[self.point_latitude_tag].values lons_points = df_point[self.point_longitude_tag].values aa_points = df_point[self.point_alert_area_tag].values coord_points = np.dstack( [lats_points.ravel(), lons_points.ravel()])[0] coord_tree = cKDTree(coord_points) weather_stations_collections = {} for code_point, aa_point, coord_point in zip( code_points, aa_points, coord_points): distances, indices = coord_tree.query( coord_point, len(coord_points), p=2, distance_upper_bound=max_distance) code_points_neighbors = [] name_points_neighbors = [] coord_points_neighbors = [] lats_points_neighbors = [] lons_points_neighbors = [] aa_points_neighbors = [] for index, distance in zip(indices, distances): if distance == inf_distance: break coord_points_neighbors.append(coord_points[index]) code_points_neighbors.append(code_points[index]) name_points_neighbors.append(name_points[index]) lons_points_neighbors.append(lons_points[index]) lats_points_neighbors.append(lats_points[index]) aa_points_neighbors.append(aa_points[index]) coord_dict = { self.point_code_tag: code_points_neighbors, self.point_name_tag: name_points_neighbors, self.point_latitude_tag: lats_points_neighbors, self.point_longitude_tag: lons_points_neighbors, self.point_alert_area_tag: aa_points_neighbors } coord_dframe = pd.DataFrame(data=coord_dict) if aa_point not in list(weather_stations_collections.keys()): weather_stations_collections[aa_point] = {} weather_stations_collections[aa_point][ code_point] = coord_dframe folder_name, file_name = os.path.split(file_path_point) make_folder(folder_name) write_obj(file_path_point, weather_stations_collections) # Ending info logging.info( ' ----> Organize weather stations point information ... DONE') else: # Ending info weather_stations_collections = read_obj(file_path_point) logging.info( ' ----> Organize weather stations point information ... LOADED. ' 'Datasets was previously computed.') return weather_stations_collections