Exemplo n.º 1
0
def create_groundtruth_mask_lake(data_dir, used_band, year_range,
                                 n_data_per_year, day_period,
                                 groundtruth_mask_lake_dir, resize_input):
    for year in range(year_range[0], year_range[1] + 1):
        for d in range(n_data_per_year):
            day = d * day_period + 1
            prefix = os.path.join(str(year), str(year) + str(day).zfill(3))
            current_data_dir = os.path.join(data_dir, prefix)
            try:
                list_imgs = os.listdir(current_data_dir)
                band_filename = list(
                    filter(lambda x: used_band in x, list_imgs))[0]
                img = rio.open(os.path.join(current_data_dir, band_filename),
                               'r').read(1)
                if resize_input:
                    img = img[:resize_input, :resize_input]
                groundtruth_mask_lake = mask_lake_img(img, offset=1000)
                cur_mask_data_dir = os.path.join(groundtruth_mask_lake_dir,
                                                 prefix)
                if not os.path.exists(cur_mask_data_dir):
                    os.makedirs(cur_mask_data_dir)
                cache_data(groundtruth_mask_lake,
                           os.path.join(cur_mask_data_dir, 'masked.dat'))
            except:
                print('Not found band {} in {}{:03} in {}.'.format(
                    used_band, year, day, current_data_dir))
Exemplo n.º 2
0
 def create_predict_mask_lake(modis_utils_obj, data_type='test'):
     for idx in range(modis_utils_obj.get_n_tests(data_type)):
         pred = modis_utils_obj.get_inference(data_type, idx)
         pred = modis_utils_obj._preprocess_strategy_context.inverse(pred)
         predict_mask_lake_path = os.path.join(
             modis_utils_obj._predict_mask_lake_dir, data_type, '{}.dat'.format(idx))
         cache_data(mask_lake_img(pred), predict_mask_lake_path)
Exemplo n.º 3
0
def inference_all(sess, eclm, test_filenames, batch_size=1, keep_rate=1.0, inference_dir=None):
    if inference_dir:
        if not os.path.exists(inference_dir):
            os.makedirs(inference_dir)

    n_tests = len(test_filenames)
    steps = math.ceil(steps/batch_size)
    test_losses = []
    VERBOSE_STEP = eclm.verbose_step

    for i in range(steps):
        x, y, pw, y_pw = restore_data_batch(test_filenames[i*batch_size: (i + 1)*batch_size])
        loss, y_hat = sess.run([eclm.loss, eclm.y_hat],
                                feed_dict={eclm.x: x, eclm.y: y, eclm.pw: pw, eclm.y_pw: y_pw,
                                           eclm.is_training: False, eclm.keep_rate: keep_rate})
        if inference_dir:
            cache_data(y_hat, os.path.join(inference_dir, '{}.dat'.format(i)))
        if i % VERBOSE_STEP == 0:
            print('     test_{} - test_loss = {:0.7f}'.format(i, loss))
        test_losses.append(loss)
    test_losses = np.asarray(test_losses)
    avg_test_loss = np.mean(test_losses)
    print('Average Test Loss: {:0.7f}'.format(avg_test_loss))
    if inference_dir:
        cache_data(test_losses, os.path.join(inference_dir, 'loss.dat'))
Exemplo n.º 4
0
def inference(sess, eclm, test_inputs, pw_inputs, keep_rate=1.0, inference_path=None):
    y_hat = sess.run([eclm.y_hat],
                      feed_dict={eclm.x: test_inputs, eclm.pw: pw_inputs,
                                 eclm.is_training: False, eclm.keep_rate: keep_rate})
    if inference_path:
        cache_data(y_hat, inference_path)
    return y_hat
Exemplo n.º 5
0
def gen_data_2(data_dir, list_filenames, permanent_water_area, water_threshold,
               patch_size, output_dir):
    for filename in list_filenames:
        output_path = os.path.join(output_dir, filename)
        input_path = os.path.join(data_dir, filename)
        sequence_data = restore_data(input_path)
        res = gen_data_1(sequence_data, water_threshold, patch_size)
        cache_data(res, output_path)
Exemplo n.º 6
0
    def inference(self, data_type='test', idx=0, model=None):
        if self.inference_model is None:
            if model is None:
                model = self._model
            self.inference_model = model
        assert self.inference_model is not None

        output = self.model_utils.inference(self, data_type, idx)
        cache_data(output, self._get_inference_path(data_type, idx))
        return output
Exemplo n.º 7
0
def main():
    m = n // 4
    if gpu_id < 3:
        list_idx = list(range(gpu_id*m, (gpu_id + 1)*m))
    else:
        list_idx = list(range(gpu_id*m, n))
    res = []
    for idx in list_idx:
        res_idx = predict_multisteps_single_point(input_test[:, :, idx : idx+1], idx, steps, batch_size, gpu_id)
        res.append(np.expand_dims(res_idx, axis=-1))
        with open('tmp/log_{}.txt'.format(gpu_id), 'a') as f:
            f.write(str(idx) + '\n')
    cache_data(res, 'tmp/out_{}.dat'.format(gpu_id))
Exemplo n.º 8
0
def _generate_on_boundaries(data_paths, target_paths, mask_paths,
                            data_augment_dir, crop_size, n_samples,
                            input_time_steps, output_timesteps):
    n_data = len(data_paths)
    cnt = 0
    half_crop_size = crop_size // 2
    for k in range(n_data):
        data_merged = get_data_merged_from_paths(data_paths[k],
                                                 target_paths[k],
                                                 mask_paths[k])
        target_img = data_merged[0, -2 * output_timesteps, :, :]
        h, w = data_merged.shape[2:4]
        already = set()
        boundaries = find_boundaries(target_img)
        pos = np.where(boundaries)
        n_pos = len(pos[0])
        ii = 1
        while n_pos < n_samples:
            target_img = data_merged[0, -2 * output_timesteps + ii, :, :]
            already = set()
            boundaries = find_boundaries(target_img)
            pos = np.where(boundaries)
            n_pos = len(pos[0])
            ii += 1
        for i in range(n_samples):
            while True:
                offset_x = pos[0][np.random.randint(n_pos)]
                offset_y = pos[1][np.random.randint(n_pos)]
                if offset_x + half_crop_size + 1 < w and offset_y + half_crop_size + 1 < h and \
                  offset_x - half_crop_size >= 0 and offset_y - half_crop_size >= 0 and \
                  (offset_x, offset_y) not in already:
                    break
            already.add((offset_x, offset_y))
            batch = _random_crop_func(data_merged,
                                      offset_x - half_crop_size,
                                      offset_y - half_crop_size,
                                      crop_size=crop_size)
            for j in range(batch.shape[0]):
                cur = batch[j]
                data = np.expand_dims(cur[:input_time_steps], axis=-1)
                target = np.expand_dims(
                    cur[-2 * output_timesteps:-output_timesteps], axis=-1)
                mask = np.expand_dims(cur[-output_timesteps:], axis=-1)
                target_mask = np.concatenate((target, mask), axis=-1)
                if not os.path.isdir(data_augment_dir):
                    os.makedirs(data_augment_dir)
                file_path = os.path.join(data_augment_dir,
                                         '{}.dat'.format(cnt))
                cnt += 1
                cache_data((data, target_mask), file_path)
Exemplo n.º 9
0
def _merge_last_data_augment(n_data, data_index_shuffle, list_data,
                             merge_data_dir, batch_size, thread_id, n_threads):
    m = n_data // batch_size
    k = m - m % n_threads
    i = k + thread_id
    merge_data = []
    merge_target_mask = []
    for j in data_index_shuffle[i * batch_size:(i + 1) * batch_size]:
        data = restore_data(list_data[j])
        merge_data.append(np.expand_dims(data[0], axis=0))
        if data[1].shape[0] > 1:
            data_1 = np.expand_dims(data[1], axis=0)
        else:
            data_1 = data[1]
        merge_target_mask.append(data_1)
    if len(merge_data) == batch_size:
        merge_data = np.vstack(merge_data)
        merge_target_mask = np.vstack(merge_target_mask)
        merge_data_path = os.path.join(merge_data_dir, '{}.dat'.format(i))
        cache_data((merge_data, merge_target_mask), merge_data_path)
Exemplo n.º 10
0
def create_one_only_mask(data_dir, used_band, year_range, n_data_per_year,
                         day_period, mask_data_dir, resize_input):
    for year in range(year_range[0], year_range[1] + 1):
        for d in range(n_data_per_year):
            day = d * day_period + 1
            prefix = os.path.join(str(year), str(year) + str(day).zfill(3))
            current_data_dir = os.path.join(data_dir, prefix)
            try:
                data = restore_data(
                    os.path.join(current_data_dir, 'masked.dat'))
                mask = np.ones_like(data)
                if resize_input:
                    mask = mask[:resize_input, :resize_input]
                cur_mask_data_dir = os.path.join(mask_data_dir, prefix)
                if not os.path.exists(cur_mask_data_dir):
                    os.makedirs(cur_mask_data_dir)
                cache_data(mask, os.path.join(cur_mask_data_dir, 'masked.dat'))
            except:
                print('Not found band {} in {}{:03} in {}.'.format(
                    used_band, year, day, current_data_dir))
Exemplo n.º 11
0
def change_fill_value(data_dir, used_band, year_range, n_data_per_year,
                      day_period, change_fill_value_data_dir):
    for year in range(year_range[0], year_range[1] + 1):
        for d in range(n_data_per_year):
            day = d * day_period + 1
            prefix = os.path.join(str(year), str(year) + str(day).zfill(3))
            current_data_dir = os.path.join(data_dir, prefix)
            try:
                list_imgs = os.listdir(current_data_dir)
                band_filename = list(
                    filter(lambda x: used_band in x, list_imgs))[0]
                img = get_im(os.path.join(current_data_dir, band_filename))
                img[img == -3000] = -2001
                cur_dest_dir = os.path.join(change_fill_value_data_dir, prefix)
                if not os.path.exists(cur_dest_dir):
                    os.makedirs(cur_dest_dir)
                cache_data(img,
                           os.path.join(cur_dest_dir, 'change_fill_value.dat'))
            except:
                print('Not found band {} in {}{:03} in {}.'.format(
                    used_band, year, day, current_data_dir))
Exemplo n.º 12
0
def gen_data_1(sequence_data,
               permanent_water_area,
               water_threshold,
               patch_size=32,
               output_path=None):
    inputs, targets, inputs_pw, targets_pw = sequence_data
    last_inputs_pw = inputs[-1]
    a = find_boundaries_mask_lake(last_inputs_pw, water_threshold)
    list_center_pos = get_pos(a, center_point_xs)

    patches_inputs = []
    patches_targets = []
    patches_inputs_pw = []
    patches_targets_pw = []

    outputs = [
        patches_inputs, patches_targets, patches_inputs_pw, patches_targets_pw
    ]

    def padding(x, sz):
        res = np.zeros((sz, sz))
        res[:x.shape[0], :x.shape[1]] = x
        return res

    for center_pos in list_center_pos:
        if len(center_pos) < 2:
            continue
        x1, x2, y1, y2 = get_patch_coor(center_pos, patch_size)
        for origin, patches in zip(sequence_data, outputs):
            patch = origin[:, x1:x2 + 1, y1:y2 + 1]
            if x2 - x1 < patch_size - 1 or y2 - y1 < patch_size - 1:
                patch = padding(patch, patch_size)
            patches.append(np.expand_dims(patch, axis=0))
    res = []
    for patches in outputs:
        res.append(np.vstack(patches))
    if output_path is not None:
        cache_data(res, output_path)
    return res
Exemplo n.º 13
0
def _generate_whole_image(data_paths, target_paths, mask_paths,
                          data_augment_dir, input_time_steps,
                          output_timesteps):
    n_data = len(data_paths)
    for k in range(n_data):
        data_merged = get_data_merged_from_paths(data_paths[k],
                                                 target_paths[k],
                                                 mask_paths[k])
        data = data_merged[:, :-2 * output_timesteps, :, :]
        data = np.expand_dims(data, axis=-1)
        target = data_merged[:, -2 * output_timesteps:-output_timesteps, :, :]
        target = np.expand_dims(data, axis=-1)
        mask = data_merged[:, -output_timesteps:, :, :]
        mask = np.expand_dims(data, axis=-1)
        if target.shape[1] > 1:
            target = target.squeeze(axis=1)
            mask = mask.squeeze(axis=1)
        target_mask = np.concatenate((target, mask), axis=-1)
        if not os.path.isdir(data_augment_dir):
            os.makedirs(data_augment_dir)
        file_path = os.path.join(data_augment_dir, '{}.dat'.format(k))
        cache_data((data, target_mask), file_path)
Exemplo n.º 14
0
 def _preprocess_data(self, data_dir, used_band, year_range, n_data_per_year,
                      day_period, preprocessed_data_dir, resize_input=None):
     for year in range(year_range[0], year_range[1] + 1):
         for d in range(n_data_per_year):
             day = d*day_period + 1
             prefix = os.path.join(str(year), str(year) + str(day).zfill(3))
             current_data_dir = os.path.join(data_dir, prefix)
             try:
                 list_imgs = os.listdir(current_data_dir)
                 filename = list(filter(lambda x: used_band in x, list_imgs))[0]
                 img = restore_data(os.path.join(current_data_dir, filename))
                 normalized_img = self.fn(img)
                 if resize_input:
                     normalized_img = normalized_img[:resize_input, :resize_input]
                 cur_dest_dir = os.path.join(preprocessed_data_dir, prefix)
                 if not os.path.exists(cur_dest_dir):
                     os.makedirs(cur_dest_dir)
                 cache_data(normalized_img, os.path.join(cur_dest_dir, 
                                                         'preprocessed.dat'))
             except:
                 print('Not found data {}{:03} in {}.'.format(
                     year, day, current_data_dir))
Exemplo n.º 15
0
def _generate(data_paths, target_paths, mask_paths, data_augment_dir,
              crop_size, n_samples, input_time_steps, output_timesteps):
    n_data = len(data_paths)
    cnt = 0
    for k in range(n_data):
        data_merged = get_data_merged_from_paths(data_paths[k],
                                                 target_paths[k],
                                                 mask_paths[k])
        for i in range(n_samples):
            batch = _random_crop_func_1(data_merged, crop_size)
            for j in range(batch.shape[0]):
                cur = batch[j]
                data = np.expand_dims(cur[:input_time_steps], axis=-1)
                target = np.expand_dims(
                    cur[-2 * output_timesteps:-output_timesteps], axis=-1)
                mask = np.expand_dims(cur[-output_timesteps:], axis=-1)
                target_mask = np.concatenate((target, mask), axis=-1)
                if not os.path.isdir(data_augment_dir):
                    os.makedirs(data_augment_dir)
                file_path = os.path.join(data_augment_dir,
                                         '{}.dat'.format(cnt))
                cnt += 1
                cache_data((data, target_mask), file_path)
Exemplo n.º 16
0
def create_sequence_data(data_file_paths, n_samples=10000):
    if not os.path.exists(sequence_data_dir):
        os.makedirs(sequence_data_dir)

    for data_type, n_data in zip(('train', 'val', 'test'),
                                 (n_train, n_val, n_test)):
        sequence_data_type_dir = os.path.join(sequence_data_dir, data_type)
        if not os.path.exists(sequence_data_type_dir):
            os.makedirs(sequence_data_type_dir)
        data_type_file_paths = data_file_paths[data_type]
        input_file = data_type_file_paths['input']
        target_file = data_type_file_paths['target']
        pw_target_file = data_type_file_paths['pw_target']
        for i in range(min(n_data, n_samples)):
            inputs = select_data(get_data_from_data_file(input_file, i))
            target = get_data_from_data_file(target_file, i)
            input_pixel_weights = np.array(
                list(
                    map(lambda x: get_pixel_weights(x, water_threshold),
                        inputs)))
            target_pixel_weights = get_data_from_data_file(pw_target_file, i)
            cache_data(
                (inputs, target, input_pixel_weights, target_pixel_weights),
                os.path.join(sequence_data_type_dir, '{}.dat'.format(i)))
                _, results, _, _ = load_data_pickle(results)
            res.append(results.forecast(steps))
        return pd.concat(res, ignore_index=True, axis=1)

    def eval(self, groundtruth, metric=None):
        steps = groundtruth.shape[0]
        yhat = self.inference(steps)
        if metric is None:
            metric = mse
        return yhat, metric(groundtruth, yhat)


# In[71]:

vsarima = VSARIMA(df_train_12, df_test_12)
#vsarima = VSARIMA(df_train, df_test)

# In[72]:

vsarima.train()

# In[85]:

yhat, loss = vsarima.eval(df_test_12)
#yhat, loss = vsarima.eval(df_test)
print('mse =', loss)

cache_data(yhat, 'vsarima_inference.dat')
account.upload_file('vsarima_inference.dat', 'MODIS')
account.upload_file('log.csv', 'MODIS')
Exemplo n.º 18
0
# In[ ]:


lstm_2 = LSTM_2(data, data_train_1, list_idx, scaler, mode='inference')
losses, predictions = lstm_2.eval(return_original_range=True)
print(losses)
print(losses.mean())


# In[ ]:


inference_dir = 'inference'
if not os.path.exists(inference_dir):
    os.makedir(inference_dir)
cache_data(predictions, os.path.join(inference_dir, 'lstm_2.dat'))


# In[ ]:


inputs = lstm_1.data['test_X'][:1]
groundtruths = lstm_1.data['test_y'][:1]
inputs.shape, groundtruths.shape


# In[ ]:


predictions.shape
Exemplo n.º 19
0
# In[153]:

vsarima_1 = VSARIMA(data_train_1, data_test_1, list_idx, mode='inference')
losses, predictions = vsarima_1.eval(data_test_1)

# In[155]:

print(losses.mean())

# In[154]:

inference_dir = 'inference'
if not os.path.exists(inference_dir):
    os.makedirs(inference_dir)
cache_data(predictions, os.path.join(inference_dir, 'sarima.dat'))

# # Calculate Polygon area

# In[166]:

predictions = predictions.reshape(predictions.shape[0], -1, 2)

# In[167]:

predictions.shape, data_test.shape

# In[171]:


def find_border(data_points):
Exemplo n.º 20
0
# In[14]:

inputs_np_whole_img = restore_data(
    '../../multiscale_predrnn/data/sequence_data/test/0.dat')[0]
inputs_np_whole_img.shape

# In[15]:

inference_convlstm_whole_img = InferenceConvLSTMWholeImg(params)

# In[19]:

inferences_np = {}
np_input_dir = '../../multiscale_predrnn/data/sequence_data'
steps_ahead = 80

for subset in ('test', 'val'):
    inference_dir = 'inferences/{}'.format(subset)
    if not os.path.exists(inference_dir):
        os.makedirs(inference_dir)
    np_input_dir_subset = os.path.join(np_input_dir, subset)
    n = len(os.listdir(np_input_dir_subset))
    res1 = []
    for i in tqdm(range(n)):
        inputs_np = restore_data(
            os.path.join(np_input_dir_subset, '{}.dat'.format(i)))[0]
        inputs_np = inputs_np[-timesteps:]
        res = inference_convlstm_whole_img.get_inference_from_np_array(
            inputs_np, steps_ahead)
        cache_data(res, os.path.join(inference_dir, '{}.dat'.format(i)))