def maybe_cloud_after_all(is_land, is_supposed_free, vis): ''' the "unstable albedo test" classifies as cloud the previously "allegedly cloud-free and snow-free pixels" which are much brighter than expected :param is_land: :param is_supposed_free: :param vis: :return: a boolean matrix which is True if the pixel passes this visible cloud test but not the previous cloud tests ''' # apply only for a few consecutive days is_supposed_free_for_long = is_supposed_free & np.roll(is_supposed_free, -1) & np.roll(is_supposed_free, 2) & \ is_supposed_free & np.roll(is_supposed_free, -2) & np.roll(is_supposed_free, 2) (slots, lats, lons) = vis.shape slot_per_day = get_nb_slots_per_day(read_satellite_step(), 1) entire_days = slots / slot_per_day assert entire_days < 11, 'please do not apply this test on strictly more than 10 days' vis_copy = vis.copy() vis_copy[~is_supposed_free_for_long] = 100 supposed_clear_sky = np.min( apply_rolling_on_time(vis_copy, 5, 'mean').reshape((entire_days, slot_per_day, lats, lons)), axis=0) del vis_copy vis = vis.reshape((entire_days, slot_per_day, lats, lons)) # from quick_visualization import visualize_map_time # visualize_map_time(supposed_clear_sky, typical_bbox(seed)) # visualize_map_time(is_supposed_free & land_visible_test(is_land, vis, supposed_clear_sky).reshape((slots, lats, lons)), typical_bbox()) return is_supposed_free & land_visible_test(is_land, vis, supposed_clear_sky).reshape((slots, lats, lons))
def digital_low_cut_filtering_time(array, mask, satellite_step): # the slot step does not matter here fs = 0.5 * get_nb_slots_per_day(satellite_step, 1) cutoff = 20. / (fs * 1) b, a = signal.butter(8, cutoff, 'high', analog=False, output='ba') X1 = signal.lfilter(b, a, array, axis=0) X1[mask] = 0 return X1
def read_channels(channels, latitudes, longitudes, dfb_beginning, dfb_ending, slot_step=1): dir, pattern = read_channels_dir_and_pattern() satellite = read_satellite_name() satellite_step = read_satellite_step() nb_slots = get_nb_slots_per_day(satellite_step, slot_step) patterns = [ pattern.replace("{SATELLITE}", satellite).replace('{CHANNEL}', chan) for chan in channels ] nb_days = dfb_ending - dfb_beginning + 1 content = np.empty( (nb_slots * nb_days, len(latitudes), len(longitudes), len(patterns))) start = read_start_slot() for k in range(len(patterns)): pattern = patterns[k] chan = channels[k] dataset = DataSet.read( dirs=dir, extent={ 'latitude': latitudes, 'longitude': longitudes, 'dfb': { 'start': dfb_beginning, 'end': dfb_ending, "end_inclusive": True, 'start_inclusive': True, }, 'slot': np.arange(start, start + nb_slots, step=slot_step) }, file_pattern=pattern, variable_name=chan, fill_value=np.nan, interpolation='N', max_processes=0, ) data = dataset['data'].data day_slot_b = 0 day_slot_e = nb_slots for day in range(nb_days): content[day_slot_b:day_slot_e, :, :, k] = data[day] day_slot_b += nb_slots day_slot_e += nb_slots return content
def read_classes(latitudes, longitudes, dfb_beginning, dfb_ending, slot_step=1): dir, pattern = read_indexes_dir_and_pattern('classes') satellite_step = read_satellite_step() nb_slots = get_nb_slots_per_day(satellite_step, slot_step) nb_days = dfb_ending - dfb_beginning + 1 content = np.empty((nb_slots * nb_days, len(latitudes), len(longitudes))) dataset = DataSet.read( dirs=dir, extent={ 'latitude': latitudes, 'longitude': longitudes, 'dfb': { 'start': dfb_beginning, 'end': dfb_ending, "end_inclusive": True, 'start_inclusive': True, }, 'slot': { "enumeration": np.arange(0, nb_slots, step=slot_step), "override_type": "slot" }, }, file_pattern=pattern, variable_name='Classes', fill_value=np.nan, interpolation='N', max_processes=0, ) data = dataset['data'].data day_slot_b = 0 day_slot_e = nb_slots for day in range(nb_days): content[day_slot_b:day_slot_e, :, :] = data[day] day_slot_b += nb_slots day_slot_e += nb_slots return content
def train_solar_model(zen, classes, features, method_learning, meta_method, pca_components, training_rate): t_beg = time() nb_days_training = len(zen) / get_nb_slots_per_day(read_satellite_step(), 1) select = mask_temporally_stratified_samples( zen, training_rate, coef_randomization * nb_days_training) features = reshape_features(features) select = select.flatten() nb_features = features.shape[-1] if pca_components is not None: nb_features = pca_components features = immediate_pca(features, pca_components) var = features[:, 0][select] training = np.empty((len(var), nb_features)) training[:, 0] = var for k in range(1, nb_features): training[:, k] = features[:, k][select] del var if method_learning == 'knn': estimator = create_knn() elif method_learning == 'bayes': estimator = create_naive_bayes() elif method_learning == 'mlp': estimator = create_neural_network() elif method_learning == 'forest': estimator = create_random_forest() else: estimator = create_decision_tree() if meta_method == 'bagging': estimator = create_bagging_estimator(estimator) model = fit_model(estimator, training, classes.flatten()[select]) del training t_train = time() print 'time training:', t_train - t_beg save(path_, model) t_save = time() print 'time save:', t_save - t_train
def prepare_temperature_mask(lats, lons, beginning, ending, slot_step=1): ''' Create a temperature mask which has the same temporal sampling than spectral channels :param lats: latitudes array :param lons: longitudes array :param beginning: dfb beginning sampling :param ending: dfb ending sampling :param slot_step: slot sampling chosen by the user (probably 1) :return: ''' satellite_step = read_satellite_step() nb_slots = get_nb_slots_per_day(satellite_step, slot_step) * (ending - beginning + 1) temperatures = read_temperature_forecast(lats, lons, beginning, ending) to_return = empty((nb_slots, len(lats), len(lons))) for slot in range(nb_slots): try: nearest_temp_meas = int(0.5 + satellite_step * slot_step * slot / 60) to_return[slot] = temperatures[nearest_temp_meas] + 273.15 except IndexError: nearest_temp_meas = int(satellite_step * slot_step * slot / 60) to_return[slot] = temperatures[nearest_temp_meas] + 273.15 return to_return
def read_labels(label_type, lat_beginning, lat_ending, lon_beginning, lon_ending, dfb_beginning, dfb_ending, slot_step=1, keep_holes=True): ''' this function assume labels are "well named", starting with YYYYMMDDHHMMSS where SS=0 60*HH+MM is a multiple of satellite time step :param label_type is 'CSP' (=clear sy mask) or '' :param dfb_beginning: :param dfb_ending: :param slot_step: :return: ''' label_type = label_type.upper() assert label_type in ['CSP', 'CT'], 'the type of labels you asked for does not exist' satellite_step = read_satellite_step() nb_slots_per_day = get_nb_slots_per_day(satellite_step, slot_step) res = 1 / 33. nb_lats = int((lat_ending - lat_beginning) / res) nb_lons = int((lon_ending - lon_beginning) / res) dir_ = read_labels_dir(label_type) var_ = {'CSP': 'clear_sky_probability', 'CT': 'cloud_type'}[label_type] if read_satellite_name() == 'H08': lonmin = 115. latmax = 60 if read_satellite_name() == 'GOES16': lonmin = -10. latmax = 60 lat_beginning_ind = int((latmax - lat_ending) / res) lat_ending_ind = int((latmax - lat_beginning) / res) lon_beginning_ind = int((lon_beginning - lonmin) / res) lon_ending_ind = int((lon_ending - lonmin) / res) selected_slots = [] if keep_holes: shape_ = ((dfb_ending - dfb_beginning + 1) * nb_slots_per_day, nb_lats, nb_lons) to_return = -10 * ones(shape_) else: to_return = [] sat_name = read_satellite_name() if sat_name == 'GOES16': suffixe = '_LATLON-GOES16.nc' elif sat_name == 'H08': suffixe = '_LATLON-HIMAWARI8-AHI.nc' for dfb in range(dfb_beginning, dfb_ending + 1): pre_pattern = dfb2yyyymmdd(dfb) for slot_of_the_day in range(nb_slots_per_day): try: real_slot = slot_of_the_day + (dfb - dfb_beginning) * nb_slots_per_day total_minutes = satellite_step * slot_step * slot_of_the_day hours, minutes = total_minutes / 60, total_minutes % 60 if len(str(hours)) == 1: hours = '0' + str(hours) if len(str(minutes)) == 1: minutes = '0' + str(minutes) filename = pre_pattern + str(hours) + str(minutes) + '00-' + label_type + suffixe content = Dataset(str(os.path.join(dir_, filename))) if keep_holes: to_return[real_slot] = \ content.variables[var_][lat_beginning_ind: lat_ending_ind, lon_beginning_ind:lon_ending_ind] else: to_return.append( content.variables[var_][lat_beginning_ind: lat_ending_ind, lon_beginning_ind:lon_ending_ind]) selected_slots.append(real_slot) except Exception as e: # the data for this slot does not exist or has not been load print e pass print to_return[to_return != -10] return asarray(to_return), selected_slots